diff --git a/.gitignore b/.gitignore index 39cae458dd..f9ebe06576 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ Gemfile.lock # Statically generated HTML public/** +output/** # Files maintained from the middleman build archived_docs.basho.com.tar.bz2 diff --git a/Gemfile b/Gemfile index 28ea97b140..4f28cca67e 100644 --- a/Gemfile +++ b/Gemfile @@ -1,5 +1,5 @@ source "https://rubygems.org" -ruby "2.2.5" +ruby ">2.2.5" #TOOD: This file is now out of date. # We need to walk the gems and make sure that, @@ -11,12 +11,13 @@ gem "autoprefixer-rails", "~>6.3.6" gem "sprockets", "~>3.7" gem "coffee-script", "~>2.4.1" gem "uglifier", "~>3.0.0" -gem "therubyracer" +gem "mini_racer" # included via the docker image -gem "aws-sdk", "~> 2.4.3" -gem "aws-s3", "~> 0.6.3" -gem "simple-cloudfront-invalidator", "~>1.1.0" -gem "progressbar", "~> 1.8.0" +# deprecated as new files are on files.tiot.jp +#gem "aws-sdk", "~> 2.4.3" +#gem "aws-s3", "~> 0.6.3" +#gem "simple-cloudfront-invalidator", "~>1.1.0" +#gem "progressbar", "~> 1.8.0" gem "rake", "~>11.2.2" gem "guard", "~>2.14.0" @@ -24,3 +25,8 @@ gem "guard-rake", "~>1.0.0" gem "pry", "~>0.10.3" gem "pry-byebug", "~>3.4.0" + + # for the SFTP download metadata generator +gem "net-sftp" +#gem "json" +gem "uri" \ No newline at end of file diff --git a/README.md b/README.md index f464e50daf..b155019246 100644 --- a/README.md +++ b/README.md @@ -1,129 +1,150 @@ -[basho docs]: http://docs.basho.com/ -[task list]: https://github.com/basho/private_basho_docs/issues/11 -[middleman]: https://middlemanapp.com/ -[rvm]: https://rvm.io/ - -# Basho's Documentation Generation - -This repository contains all the bits and pieces, large and small required to -render and deploy Basho's documentation. - -### http://docs.basho.com/ - -This is a Work In Progress! -Please let us know if you'd like to help out! - -## Building The HTML Locally - -1. Install [Hugo][hugo] by checking out [Hugo's Installing][installing hugo] page. - -1. Clone the repository with: - - ``` - git clone https://github.com/basho/basho_docs.git - cd basho_docs - ``` - -1. Run Hugo with `hugo server` and wait a couple of seconds for the site to - build. - -1. Play by visiting . - ->**Heads-up** -> -> When running a local instance of the site, you can't navigate from the splash page (the first page when you navigate to localhost:1313) to the index page of KV, TS, or CS. You will need to manually enter the version in the address bar of your browser. So, for instance, http://localhost:1313/riak/kv/2.2.0/ rather than http://localhost:1313/riak/kv/latest/. - -[hugo]: http://gohugo.io/ -[installing hugo]: http://gohugo.io/overview/installing/ -[homebrew]: http://brew.sh/ - -### No Really, _Go_ Play - -See what we did there? - -At this point, any changes you make to the markdown files in the `content/` -directory will be automatically detected and rendered live in your local browser. -Change some stuff! Have fun! - -If you want to modify the [content templates][hugo content templates] that -define how each pages' HTML is generated, modifying the [Go Templates][hugo go template primer] -in `layouts/_default/` and the [partial templates][hugo partial templates] in -`layouts/partials/` will also be automatically detected and rendered live in your browser. - -[hugo content templates]: https://gohugo.io/templates/content/ -[hugo go template primer]: https://gohugo.io/templates/go-templates/ -[hugo partial templates]: https://gohugo.io/templates/partials/ -[hugo shortcodes]: https://gohugo.io/extras/shortcodes/ - -## Modifying the `.js` and `.css` Files - ->**Note:** Generally, unless you're helping us out with a specific task or project that you've discussed with us, you should not be altering the .js or .css files in this repo. - -If you want to mess with the scripts and CSS that this site uses, it's not -_quite_ as easy as modifying the HTML. - -The scripts and CSS files used to render Hugo content are expected to live in -the `static/` directory. We use a lot of [Coffee Script][coffee] and [Sass][sass] -for our scripting and styling needs, and we convert those files to `.js` and -`.css` as a pre-render step. We put those `.coffee` and `.scss` files into the -`dynamic/` directory. - ->**Note:** For files manually generated, place the source of the generation in -a directory parallel to the generated file(s), rooted in `public_src/`. If -possible, include a script to generate the output. For example, the uml -deployment diagram images in `static/images/redis/` were generated by the .uml -files in `public_src/images/redis/` via the script `gen_diagrams.sh` w/ the list -of source files for generation explicitly listed in `diagrams.lst`. - -To convert the Coffee and Sass into `.js` and `.css` files, you'll need to: - -1. **Install [RVM][rvm]** or equivalent. - You might need to restart your shell to get the `rvm` command to be recognized. -1. **Install Ruby.** - Use the following command: ``rvm install `cat .ruby-version` `` or manually - install the current version specified in our .ruby-version and Gemfile files. -1. **Install [Bundler]** with `gem install bundler`. -1. **Install the rest of the dependencies** with `bundle install`. -1. **Use [Rake] to do everything else**, like rebuild a copy of everything that - should live in `static/`. You can use `rake build` for that. For a more - debug-friendly version of everything, run `rake build:debug`. - - In case you want any changes you make to `.coffee` and `.scss` files to be - automatically detected and rendered live in your browser, you can run - `rake watch`. - - For a list of some of the useful commands, just run `rake`. - -[coffee]: coffeescript.org -[sass]: http://sass-lang.com/ -[rvm]: https://rvm.io/ -[bundler]: http://bundler.io/ -[rake]: http://docs.seattlerb.org/rake/ - -## Would You Like to Contribute? - -Awesome! (We're assuming you said yes. Because you're reading this. And you're _awesome_.) - -This repository operates just like any other open source repo, and only thrives -through the efforts of everyone who contributes to it. If you see something wrong, -something that could be improved, or something that's simply missing please -don't hesitate to: - -* **Open Up a [New Issue]** - and let us know what you think should change. - -* **[Find the File] You Want to Change** - and use GitHub's online editor to open a Pull Request right here. - -* **[Fork] This Repository** - so you can make (and see) your changes locally. - -Don't forget to check out our [Contributing Guidelines][contributing] so you -can read up on all our weird little quirks, like how we -[don't want you to use `

` headers][contributing_headers]. - -[new issue]: https://github.com/basho/basho_docs/issues/new -[find the file]: https://github.com/basho/basho_docs/find/master -[fork]: https://github.com/basho/basho_docs/#fork-destination-box -[contributing]: CONTRIBUTING.md -[contributing_headers]: CONTRIBUTING.md +[basho docs]: http://docs.basho.com/ +[task list]: https://github.com/basho/private_basho_docs/issues/11 +[middleman]: https://middlemanapp.com/ +[rvm]: https://rvm.io/ + +# Riak's Documentation Generation + +This repository contains all the bits and pieces, large and small required to +render and deploy Basho's documentation. + +### https://docs.riak.com/ + +This is updated for each new version of Riak once reviewed. + +This is a Work In Progress! +Please let us know if you'd like to help out! + +### https://www.tiot.jp/riak-docs/ + +This is updated for each new version of Riak as soon as written, and gets regular small updates. + +### https://www.tiot.jp/riak-docs-beta/ + +This is updated for each new version of Riak as each doc section is updated, and often will be a WIP. + +## Building The HTML Locally + +We moved to a Docker image to build the docs to avoid the issues with getting the various versions of things to work together. + +1. Install [Docker](https://docs.docker.com/engine/install/) + +1. Clone the repository with: + + ``` + git clone https://github.com/ti-tokyo/riak-docs-fork.git + cd riak-docs-fork + ``` + + Or: + + ``` + git clone https://github.com/basho/basho_docs.git + cd basho_docs + ``` + +1. Build the Docker image: + + ``` + ./docker/docker-build-image.titokyo.sh + ``` + +1. Run the docker image as a local server to test it all works: + + ``` + docker-compose -f ./docker/docker-compose.localhost-preview.yaml up riakdocs + ``` + +1. Play by visiting . + + +### No Really, _Go_ Play + +See what we did there? + +At this point, any changes you make to the markdown files in the `content/` +directory will be automatically detected and rendered live in your local browser. +Change some stuff! Have fun! + +If you want to modify the [content templates][hugo content templates] that +define how each pages' HTML is generated, modifying the [Go Templates][hugo go template primer] +in `layouts/_default/` and the [partial templates][hugo partial templates] in +`layouts/partials/` will also be automatically detected and rendered live in your browser. + +[hugo content templates]: https://gohugo.io/templates/content/ +[hugo go template primer]: https://gohugo.io/templates/go-templates/ +[hugo partial templates]: https://gohugo.io/templates/partials/ +[hugo shortcodes]: https://gohugo.io/extras/shortcodes/ + +## Modifying the `.js` and `.css` Files + +>**Note:** Generally, unless you're helping us out with a specific task or project that you've discussed with us, you should not be altering the .js or .css files in this repo. + +If you want to mess with the scripts and CSS that this site uses, it's not +_quite_ as easy as modifying the HTML. + +The scripts and CSS files used to render Hugo content are expected to live in +the `static/` directory. We use a lot of [Coffee Script][coffee] and [Sass][sass] +for our scripting and styling needs, and we convert those files to `.js` and +`.css` as a pre-render step. We put those `.coffee` and `.scss` files into the +`dynamic/` directory. + +>**Note:** For files manually generated, place the source of the generation in +a directory parallel to the generated file(s), rooted in `public_src/`. If +possible, include a script to generate the output. For example, the uml +deployment diagram images in `static/images/redis/` were generated by the .uml +files in `public_src/images/redis/` via the script `gen_diagrams.sh` w/ the list +of source files for generation explicitly listed in `diagrams.lst`. + +To convert the Coffee and Sass into `.js` and `.css` files, you'll need to: + +1. **Install [RVM][rvm]** or equivalent. + You might need to restart your shell to get the `rvm` command to be recognized. +1. **Install Ruby.** + Use the following command: ``rvm install `cat .ruby-version` `` or manually + install the current version specified in our .ruby-version and Gemfile files. +1. **Install [Bundler]** with `gem install bundler`. +1. **Install the rest of the dependencies** with `bundle install`. +1. **Use [Rake] to do everything else**, like rebuild a copy of everything that + should live in `static/`. You can use `rake build` for that. For a more + debug-friendly version of everything, run `rake build:debug`. + + In case you want any changes you make to `.coffee` and `.scss` files to be + automatically detected and rendered live in your browser, you can run + `rake watch`. + + For a list of some of the useful commands, just run `rake`. + +[coffee]: coffeescript.org +[sass]: http://sass-lang.com/ +[rvm]: https://rvm.io/ +[bundler]: http://bundler.io/ +[rake]: http://docs.seattlerb.org/rake/ + +## Would You Like to Contribute? + +Awesome! (We're assuming you said yes. Because you're reading this. And you're _awesome_.) + +This repository operates just like any other open source repo, and only thrives +through the efforts of everyone who contributes to it. If you see something wrong, +something that could be improved, or something that's simply missing please +don't hesitate to: + +* **Open Up a [New Issue]** + and let us know what you think should change. + +* **[Find the File] You Want to Change** + and use GitHub's online editor to open a Pull Request right here. + +* **[Fork] This Repository** + so you can make (and see) your changes locally. + +Don't forget to check out our [Contributing Guidelines][contributing] so you +can read up on all our weird little quirks, like how we +[don't want you to use `

` headers][contributing_headers]. + +[new issue]: https://github.com/basho/basho_docs/issues/new +[find the file]: https://github.com/basho/basho_docs/find/master +[fork]: https://github.com/basho/basho_docs/#fork-destination-box +[contributing]: CONTRIBUTING.md +[contributing_headers]: CONTRIBUTING.md diff --git a/Rakefile b/Rakefile index b2928bbed6..5bb79921bc 100644 --- a/Rakefile +++ b/Rakefile @@ -19,7 +19,8 @@ require_relative 'rake_libs/compile_js' require_relative 'rake_libs/compile_css' require_relative 'rake_libs/s3_deploy' -require_relative 'rake_libs/downloads_metadata_generator' +require_relative 'rake_libs/downloads_metadata_generator_sftp' +#require_relative 'rake_libs/downloads_metadata_generator' require_relative 'rake_libs/projects_metadata_generator' $css_source = "./dynamic/css" @@ -27,7 +28,7 @@ $css_dest = "./static/css" $js_source = "./dynamic/js" $js_dest = "./static/js" $cache_dir = "./dynamic/.cache" -$hugo_dest = "./public" # Should always be set to `publishdir` from config.yml +$hugo_dest = "./output" # Should always be set to `publishdir` from config.yml ### Rake directory definitions directory "#{$js_dest}" @@ -48,7 +49,6 @@ if Gem::Version.new(min_ruby_version) > Gem::Version.new(RUBY_VERSION) "#{min_ruby_version}.\n") end - # Check if Hugo is installed, and confirm it's up to date. if (`which hugo`.empty?) Kernel.abort("ERROR: No version of Hugo is installed.\n"\ @@ -73,7 +73,7 @@ end # Default Rake::TaskManager.record_task_metadata = true task :default do - puts("Basho Documentation Generate System Usage:") + puts("Riak Documentation Generate System Usage:") puts("") Rake::application.options.show_tasks = :tasks # this solves sidewaysmilk problem Rake::application.options.show_task_pattern = // @@ -119,22 +119,23 @@ end ######## # Build desc "Compile compressed JS and compressed CSS" -task :build => ['clean', 'build:js', 'build:css'] +task :build => ['clean', 'build:css', 'build:js'] namespace :build do - task :js => ["#{$js_dest}", 'clean:js'] do compile_js(debug: false); end task :css => ["#{$css_dest}", 'clean:css'] do compile_css(debug: false); end + task :js => ["#{$js_dest}", 'clean:js'] do compile_js(debug: false); end ################ # Build : Debug desc "Compile human-readable JS and compile human-readable CSS" - task :debug => ["#{$js_dest}", "#{$css_dest}", - 'build:debug:js', 'build:debug:css'] + task :debug => ["#{$css_dest}", "#{$js_dest}", + 'build:debug:css', 'build:debug:js'] namespace :debug do + desc "Compile human-readable CSS" + task :css => ["#{$css_dest}"] do compile_css(debug: true); end + desc "Compile human-readable JS" task :js => ["#{$js_dest}"] do compile_js(debug: true); end - desc "Compile human-readable CSS" - task :css => ["#{$css_dest}"] do compile_css(debug: true); end end end @@ -200,7 +201,7 @@ namespace :metadata do task :all => ['metadata:generate_downloads', 'metadata:generate_projects'] desc "Generate package URI information" - task :generate_downloads do generate_downloads_metadata(); end + task :generate_downloads do generate_downloads_metadata_sftp(); end desc "Generate JavaScript-readable project descriptions" task :generate_projects do generate_projects_metadata(); end diff --git a/config.yaml b/config.yaml index cbe07bfb68..3fe247c78f 100644 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,8 @@ --- # General Hugo Configuration Options # ================================== -title: "Basho Documentation" -baseURL: "https://docs.basho.com/" +title: "Riak Docs" +baseURL: "https://docs.riak.com/" languageCode: "en-us" # Use YAML for front matter, rather than TOML @@ -20,7 +20,7 @@ ignoreFiles: [ ] # Default directory to build the completed site -publishdir: "public" +publishdir: "output" params: # Project Descriptions @@ -49,8 +49,9 @@ params: # - ["", "", ...] # - ["", "", ...] # latest: "" /* 5 */ - # lts: "" /* 6 */ - # archived_url: "" /* 7 */ + # lts: ["",""] /* 6 */ + # archived_below: "" /* 7 */ + # archived_url: "" /* 8 */ # # 1. The project descriptor must match some contents' `project:` front matter. # This should be all lowercase with underscores for spaces---code, really. @@ -78,11 +79,14 @@ params: # 5. `latest` is the current specific version that is currently att the head- # of-development. # `latest` is required. - # 6. `lts` is the Release Series (ex; "1.4", or "2.0", ect. -- it is expected - # to be the "X.Y" of a semantic "X.Y.Z" version string) that is currently - # held as the Long Term Support series. + # 6. `lts` is the set of Release Series (ex; ["2.9"], or ["2.9","3.0"], ect. + # -- it is expected to be the ["X.Y"] of a semantic "X.Y.Z" version string + # that is currently held as the Long Term Support series. # `lts` is optional. - # 7. If the project was moved from the Middleman site to Hugo, the + # 7. To make the versions menu smaller, set `archive_below` to a "X.Y" version + # to place earlier versions under a collapsed "other" item. + # `archive_below` is optional. + # 8. If the project was moved from the Middleman site to Hugo, the # `archived_url` is the URL to the newest of the deprecated pages. # `archived_url` is optional. project_descriptions: @@ -91,28 +95,34 @@ params: project_name_html: 'RiakKV' path: "/riak/kv" archived_path: "/riak" + github_path: "https://github.com/TI-Tokyo/riak-docs-fork/tree/master/content/" releases: - ["2.0.0", "2.0.1", "2.0.2", "2.0.4", "2.0.5", "2.0.6", "2.0.7", "2.0.8", "2.0.9"] - ["2.1.1", "2.1.3", "2.1.4"] - - ["2.2.0", "2.2.1", "2.2.2", "2.2.3"] - latest: "2.2.3" - lts: "2.0" - archived_url: "http://docs.basho.com/riak/1.4.12/" + - ["2.2.0", "2.2.1", "2.2.2", "2.2.3", "2.2.6"] + - ["2.9.0p5", "2.9.1", "2.9.2", "2.9.4", "2.9.7", "2.9.8", "2.9.9", "2.9.10"] + - ["3.0.1", "3.0.2", "3.0.3", "3.0.4"] + latest: "3.0.4" + lts: ["2.9", "3.0"] + archive_below: "2.2" + #archived_url: "http://docs.riak.com/riak/1.4.12/" riak_cs: project_name: "Riak CS" project_name_html: 'RiakCS' path: "/riak/cs" archived_path: "/riakcs" + github_path: "https://github.com/TI-Tokyo/riak-docs-fork/tree/master/content/" releases: - ["2.0.0", "2.0.1"] - - ["2.1.0", "2.1.1"] - latest: "2.1.1" - lts: "2.0" - archived_url: "http://docs.basho.com/riakcs/1.5.4/" + - ["2.1.0", "2.1.1", "2.1.2"] + latest: "2.1.2" + lts: ["2.1"] + #archived_url: "http://docs.riak.com/riakcs/1.5.4/" riak_ts: project_name: "Riak TS" project_name_html: 'RiakTS' path: "/riak/ts" + github_path: "https://github.com/TI-Tokyo/riak-docs-fork/tree/master/content/" archived_path: "/riakts" releases: - ["1.0.0"] @@ -122,6 +132,8 @@ params: - ["1.4.0"] - ["1.5.0", "1.5.1", "1.5.2"] latest: "1.5.2" + lts: ["1.5"] + archive_below: "1.3" dataplatform: project_name: "DataPlatform" path: "/dataplatform" diff --git a/content/community/index.md b/content/community/index.md index 88db3ab612..52b9ecb891 100644 --- a/content/community/index.md +++ b/content/community/index.md @@ -30,9 +30,9 @@ aliases: - /riak/kv/2.0.0/community --- -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi Our Community is an eclectic group of engineers, architects, academics and enthusiasts who care about solving the hardest problems of distributed systems with open source software. Riak, written in Erlang, tackles the continuing complexity of persisting unstructured data with an architecture uniquely designed to scale. @@ -66,7 +66,7 @@ The [#riak IRC room on irc.freenode.net](http://irc.lc/freenode/riak) is a great ### The Basho Blog Members of the Basho team write frequently on [The Basho -Blog](http://basho.com/blog/). +Blog](https://riak.com/blog/). ### Videos diff --git a/content/community/productadvisories.md b/content/community/productadvisories.md index 80636c9e8f..023692fbac 100644 --- a/content/community/productadvisories.md +++ b/content/community/productadvisories.md @@ -20,16 +20,16 @@ aliases: This page contains product advisories that are periodically issued for Basho products. -* [Slight chance that AAE could stall itself or crash a Riak node](/community/productadvisories/aaestall/) -* [Default Configuration For Handoff May Cause Data Loss in TS](/community/productadvisories/130-dataloss/) -* [LevelDB SEGV in Riak KV 2.1.3](/community/productadvisories/leveldbsegfault/) -* [Possibility of Code Injection on Riak Init File](/community/productadvisories/codeinjectioninitfiles/) -* [Socket reuse issue with Riak Golang client 1.5.1](/community/productadvisories/golang151socket/) -* [Potential data loss on restart with LevelDB tiered storage](/community/productadvisories/leveldbrestart/) -* [Incompatibility between Dotted Version Vectors and Last Write Wins](/community/productadvisories/dvvlastwritewins/) -* [Default Configuration For Handoff May Cause Data Loss](/community/productadvisories/210-dataloss/) -* [SSL 3.0 Vulnerability and POODLE Attack](/community/productadvisories/sslpoodle/) -* [Map Data Type Disk Incompatibility](/community/productadvisories/maps-204/) +* [Slight chance that AAE could stall itself or crash a Riak node]({{}}community/productadvisories/aaestall/) +* [Default Configuration For Handoff May Cause Data Loss in TS]({{}}community/productadvisories/130-dataloss/) +* [LevelDB SEGV in Riak KV 2.1.3]({{}}community/productadvisories/leveldbsegfault/) +* [Possibility of Code Injection on Riak Init File]({{}}community/productadvisories/codeinjectioninitfiles/) +* [Socket reuse issue with Riak Golang client 1.5.1]({{}}community/productadvisories/golang151socket/) +* [Potential data loss on restart with LevelDB tiered storage]({{}}community/productadvisories/leveldbrestart/) +* [Incompatibility between Dotted Version Vectors and Last Write Wins]({{}}community/productadvisories/dvvlastwritewins/) +* [Default Configuration For Handoff May Cause Data Loss]({{}}community/productadvisories/210-dataloss/) +* [SSL 3.0 Vulnerability and POODLE Attack]({{}}community/productadvisories/sslpoodle/) +* [Map Data Type Disk Incompatibility]({{}}community/productadvisories/maps-204/) ## Questions diff --git a/content/community/productadvisories/130-dataloss.md b/content/community/productadvisories/130-dataloss.md index 95cccf8730..eb2f798cd9 100644 --- a/content/community/productadvisories/130-dataloss.md +++ b/content/community/productadvisories/130-dataloss.md @@ -17,17 +17,17 @@ Date issued | June 30, 2016 Product | Riak TS (Open Source) Affected versions | 1.3.0 -{{% note title="UPDATE" %}} Riak TS 1.3.1 has been released. Please download it [here](http://docs.basho.com/riak/ts/1.3.1/downloads/) +{{% note title="UPDATE" %}} Riak TS 1.3.1 has been released. Please download it [here]({{< baseurl >}}riak/ts/1.3.1/downloads/) {{% /note %}} ## Overview -Default configuration for handoff may cause data loss in the OSS release of Riak TS 1.3.0. If you are using Riak TS Enterprise, you are not impacted by this bug but you **SHOULD** upgrade to [Riak TS Enterprise 1.3.1](http://docs.basho.com/riak/ts/1.3.1/downloads/) for other handoff bug fixes. +Default configuration for handoff may cause data loss in the OSS release of Riak TS 1.3.0. If you are using Riak TS Enterprise, you are not impacted by this bug but you **SHOULD** upgrade to [Riak TS Enterprise 1.3.1]({{< baseurl >}}riak/ts/1.3.1/downloads/) for other handoff bug fixes. ## Description -In Riak TS 1.3.0, the default configuration for handoff.ip causes vnodes marked for transfer during handoff to be removed without transferring data to their new destination nodes. A mandatory change to configuration (in riak.conf) will resolve this issue. All open source users are impacted by this issue and we strongly recommend that all 1.3.0 users [upgrade to 1.3.1](http://docs.basho.com/riak/ts/1.3.1/downloads/). +In Riak TS 1.3.0, the default configuration for handoff.ip causes vnodes marked for transfer during handoff to be removed without transferring data to their new destination nodes. A mandatory change to configuration (in riak.conf) will resolve this issue. All open source users are impacted by this issue and we strongly recommend that all 1.3.0 users [upgrade to 1.3.1]({{< baseurl >}}riak/ts/1.3.1/downloads/). **NOTE:** This is known to occur for ownership handoff and fallback transfers (hinted handoffs). @@ -65,7 +65,7 @@ riak-admin transfer-limit 0 Then configure handoff.ip in riak.conf to an external IP address or 0.0.0.0 on all nodes. -Perform a [rolling restart](http://docs.basho.com/riak/kv/2.1.4/using/repair-recovery/rolling-restart/) of Riak across your cluster to activate the new setting. +Perform a [rolling restart]({{< baseurl >}}riak/kv/2.1.4/using/repair-recovery/rolling-restart/) of Riak across your cluster to activate the new setting. For additional repair work, you will need to have Riak TS 1.3.1 or higher installed across your cluster. @@ -77,6 +77,6 @@ For additional repair work, you will need to have Riak TS 1.3.1 or higher instal Handoffs should remain disabled until that point in time. Do not add or remove nodes until you have upgraded your cluster to Riak TS 1.3.1 or higher. {{% /note %}} -You should run Riak repair on each cluster member as documented at [http://docs.basho.com/riak/latest/ops/running/recovery/repairing-partitions/](http://docs.basho.com/riak/latest/ops/running/recovery/repairing-partitions/) to recreate any missing replicas from available replicas elsewhere in the cluster. We recommend performing the Riak repair in a round-robin fashion on each node of your cluster (node0, node1, node2, etc). Repeat this round-robin repair “n_val - 1” times. For example: the default configuration for n_val is 3, which means you would run Riak repair twice across the entire cluster. +You should run Riak repair on each cluster member as documented at [{{< baseurl >}}riak/latest/ops/running/recovery/repairing-partitions/]({{< baseurl >}}riak/latest/ops/running/recovery/repairing-partitions/) to recreate any missing replicas from available replicas elsewhere in the cluster. We recommend performing the Riak repair in a round-robin fashion on each node of your cluster (node0, node1, node2, etc). Repeat this round-robin repair “n_val - 1” times. For example: the default configuration for n_val is 3, which means you would run Riak repair twice across the entire cluster. > **NOTE:** It is important to ensure that you execute in a round-robin fashion: node0, node1, node2 and then repeat. diff --git a/content/community/productadvisories/210-dataloss.md b/content/community/productadvisories/210-dataloss.md index c6ce7ccdd6..e910074e54 100644 --- a/content/community/productadvisories/210-dataloss.md +++ b/content/community/productadvisories/210-dataloss.md @@ -65,7 +65,7 @@ Then configure handoff.ip in riak.conf to an external IP address or 0.0.0.0 on a Perform a rolling restart of Riak across your cluster to activate the new setting. -After correcting the configuration and restarting the nodes, you should run Riak KV repair on each cluster member as documented at [http://docs.basho.com/riak/latest/ops/running/recovery/repairing-partitions/](http://docs.basho.com/riak/latest/ops/running/recovery/repairing-partitions/) to recreate any missing replicas from available replicas elsewhere in the cluster. We recommend performing the Riak KV repair in a round-robin fashion on each node of your cluster (node0, node1, node2, etc). Repeat this round-robin repair “n_val - 1” times. For example: the default configuration for n_val is 3, which means you would run Riak KV repair twice across the entire cluster. +After correcting the configuration and restarting the nodes, you should run Riak KV repair on each cluster member as documented at [{{< baseurl >}}riak/latest/ops/running/recovery/repairing-partitions/]({{< baseurl >}}riak/latest/ops/running/recovery/repairing-partitions/) to recreate any missing replicas from available replicas elsewhere in the cluster. We recommend performing the Riak KV repair in a round-robin fashion on each node of your cluster (node0, node1, node2, etc). Repeat this round-robin repair “n_val - 1” times. For example: the default configuration for n_val is 3, which means you would run Riak KV repair twice across the entire cluster. > NOTE: It is important to ensure that you execute in a round-robin fashion: node0, node1, node2 and then repeat. A forthcoming 2.1.1 release will provide an updated default configuration. diff --git a/content/community/productadvisories/dvvlastwritewins.md b/content/community/productadvisories/dvvlastwritewins.md index 5ee25b47b6..10266d79a6 100644 --- a/content/community/productadvisories/dvvlastwritewins.md +++ b/content/community/productadvisories/dvvlastwritewins.md @@ -75,7 +75,7 @@ If you see any output of the form: `!! Fixing <<"foo">>: - resetting dvv_enabled When AAE is enabled, the next AAE run will repair any keys with the issue. Also, once the bucket properties are fixed, issuing a GET (for example, your application reading the key) will result in both a successful read and will repair that particular object. -If AAE is disabled in your cluster, infrequently accessed data affected by this issue will be in a damaged state until the next request. More information about AAE, and the risks associated with disabling it, is available at [https://docs.basho.com/riak/2.1.1/learn/concepts/active-anti-entropy/](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/). +If AAE is disabled in your cluster, infrequently accessed data affected by this issue will be in a damaged state until the next request. More information about AAE, and the risks associated with disabling it, is available at [{{< baseurl >}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/]({{< baseurl >}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/). In a future release we will ensure that dotted version vectors and `last_write_wins` cannot be enabled simultaneously. diff --git a/content/community/productadvisories/leveldbsegfault.md b/content/community/productadvisories/leveldbsegfault.md index 76c034acaf..f5150b6924 100644 --- a/content/community/productadvisories/leveldbsegfault.md +++ b/content/community/productadvisories/leveldbsegfault.md @@ -3,7 +3,7 @@ title: "LevelDB SEGV in Riak KV 2.1.3" description: "" menu: community: - name: "LeveDB Segfault" + name: "LevelDB Segfault" identifier: "leveldbsegv" weight: 120 parent: "productadvisories" @@ -60,7 +60,7 @@ If you are unable to upgrade/patch, the occurrence can be reduced by temporarily Download and update Riak KV using the downloaded package or the package cloud repo. -Packages can be downloaded by going to [http://docs.basho.com/riak/latest/downloads/](http://docs.basho.com/riak/latest/downloads/) and selecting **2.1.4**, or from PackageCloud at [https://packagecloud.io/basho/riak](https://packagecloud.io/basho/riak). +Packages can be downloaded by going to [{{< baseurl >}}riak/kv/latest/downloads/]({{< baseurl >}}riak/kv/latest/downloads/) and selecting **2.1.4**, or from PackageCloud at [https://packagecloud.io/basho/riak](https://packagecloud.io/basho/riak). ## Patch eleveldb.so diff --git a/content/community/productadvisories/maps-204.md b/content/community/productadvisories/maps-204.md index b24b68b541..6d01ff77da 100644 --- a/content/community/productadvisories/maps-204.md +++ b/content/community/productadvisories/maps-204.md @@ -12,10 +12,10 @@ toc: true --- [reported]: http://lists.basho.com/pipermail/riak-users_lists.basho.com/2015-January/016568.html -[dev data types]: /riak/kv/2.0.4/developing/data-types -[dev data types maps]: /riak/kv/2.0.4/developing/data-types/#maps -[cluster ops log]: /riak/kv/2.1.3/using/cluster-operations/logging -[downloads]: http://docs.basho.com/riak/latest/downloads/ +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[dev data types maps]: {{}}riak/kv/2.0.4/developing/data-types/maps +[cluster ops log]: {{}}riak/kv/2.1.3/using/cluster-operations/logging +[Riak 2.0.5 or later]: {{}}riak/latest/downloads/ Info | Value @@ -61,4 +61,4 @@ normal. ## Mitigation Strategy The recommended mitigation strategy is to upgrade to [Riak 2.0.5 or -later][downloads]. +later]. diff --git a/content/community/productadvisories/sslpoodle.md b/content/community/productadvisories/sslpoodle.md index da628468d6..7383081424 100644 --- a/content/community/productadvisories/sslpoodle.md +++ b/content/community/productadvisories/sslpoodle.md @@ -14,8 +14,8 @@ toc: true [POODLE]: https://www.us-cert.gov/ncas/alerts/TA14-290A [ZIP file]: https://github.com/basho/basho_docs/raw/master/extras/binaries/poodle-1.x.zip -[use admin riak cli]: /riak/kv/2.0.2/using/admin/riak-cli/#attach -[riak cs cli]: /riak/cs/2.1.1/cookbooks/command-line-tools/#riak-cs +[use admin riak cli]: {{}}riak/kv/2.0.2/using/admin/riak-cli/#attach +[riak cs cli]: {{}}riak/cs/2.1.1/cookbooks/command-line-tools/#riak-cs Info | Value diff --git a/content/community/release-and-maintenance.md b/content/community/release-and-maintenance.md index 5d29fd8fd0..4ab456ce28 100644 --- a/content/community/release-and-maintenance.md +++ b/content/community/release-and-maintenance.md @@ -19,7 +19,7 @@ aliases: Basho may change or supplement this Policy in its sole discretion. Basho will exercise commercially reasonable efforts to notify then-current customers of any such changes provided that, in all cases, the revised Policy is effective upon posting. -## Definitions +## Definitions The following words and/or phrases having the meanings ascribed to them below: @@ -37,7 +37,7 @@ The following words and/or phrases having the meanings ascribed to them below: Basho’s Standard Support and Maintenance includes the resolution of defects via Patch Releases for a period of time starting at the initial release date of a Feature Release and ending with the next Feature Release. During that time, Basho will release as many Patch Releases as it determines to be appropriate to support the Feature Release. Once a new Feature Release has been made, no further Patch Releases will be made available for any prior Feature Release. -## Long Term Support Window +## Long Term Support Window Basho will promote at least one (1) Feature Release per calendar year to LTS status. From the date a Feature Release is promoted to LTS status, Basho will, in its discretion, provide defect resolution either via Patch Releases or Software Patches for the longer of : i) 2 years, or ii) until the next Feature Release is made available (if longer than 2 years). After the LTS Window for a specific release has expired, no further Patch Releases or Software Patches will be made available for that Feature Release. diff --git a/content/community/taishi.md b/content/community/taishi.md index da838f66b5..6e71fbe42b 100644 --- a/content/community/taishi.md +++ b/content/community/taishi.md @@ -17,7 +17,7 @@ The team at Basho knows that our community is more than any one product or one p We call this group **Taishi**: Basho’s community recognition program. -Members of Taishi will be based on past contribution, current commitment and upholding [our code of conduct](https://github.com/basho-labs/the-basho-community/blob/master/code-of-conduct.md) (which has a TL;DR of **be kind to each other**). We will thank people through membership, some fun giveaways, and exclusive opportunities. [Nominations](http://bit.ly/taishi-nomination) are throughout the year, with memberships occurring from [RICON to RICON](ricon.io). +Members of Taishi will be based on past contribution, current commitment and upholding [our code of conduct](https://github.com/basho-labs/the-basho-community/blob/master/code-of-conduct.md) (which has a TL;DR of **be kind to each other**). We will thank people through membership, some fun giveaways, and exclusive opportunities. [Nominations](http://bit.ly/taishi-nomination) are throughout the year, with memberships occurring from [RICON to RICON](http://ricon.io/). Nominations are always welcome at http://bit.ly/taishi-nomination @@ -75,7 +75,7 @@ Riak and the tools around it are open source. This public strategy, however, sha ## Is membership forever? -Membership to Taishi is for a year, measured by our conference, [RICON](ricon.io). From RICON to RICON, members have all the exclusive access and privileges provided through the program. +Membership to Taishi is for a year, measured by our conference, [RICON](http://ricon.io/). From RICON to RICON, members have all the exclusive access and privileges provided through the program. ## Are Basho employees eligible? diff --git a/content/dataplatform/1.0.0/configuring.md b/content/dataplatform/1.0.0/configuring.md index 7e9a886267..87b0688980 100644 --- a/content/dataplatform/1.0.0/configuring.md +++ b/content/dataplatform/1.0.0/configuring.md @@ -10,12 +10,14 @@ menu: weight: 103 pre: configure toc: true +aliases: + - /dataplatform/latest/configuring/ --- -[learn bdp index]: /dataplatform/1.0.0/learn/ -[bdp config]: /dataplatform/1.0.0/configuring/setup-a-cluster/ -[bdp cluster manager]: /dataplatform/1.0.0/configuring/replace-spark-cluster-manager/ -[cache proxy config]: /dataplatform/1.0.0/configuring/cache-proxy/ +[learn bdp index]: {{}}dataplatform/1.0.0/learn/ +[bdp config]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/ +[bdp cluster manager]: {{}}dataplatform/1.0.0/configuring/replace-spark-cluster-manager/ +[cache proxy config]: {{}}dataplatform/1.0.0/configuring/cache-proxy/ ## In This Section diff --git a/content/dataplatform/1.0.0/configuring/cache-proxy.md b/content/dataplatform/1.0.0/configuring/cache-proxy.md index 67b2b5edc6..3a22283391 100644 --- a/content/dataplatform/1.0.0/configuring/cache-proxy.md +++ b/content/dataplatform/1.0.0/configuring/cache-proxy.md @@ -12,11 +12,12 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/configuration/getting-started-with-cache-proxy/ + - /dataplatform/latest/configuring/cache-proxy/ --- -[bdp install]: /dataplatform/1.0.0/installing/ -[bdp configure]: /dataplatform/1.0.0/configuring/setup-a-cluster/ -[bdp configure add services]: /dataplatform/1.0.0/configuring/setup-a-cluster/#add-services +[bdp install]: {{}}dataplatform/1.0.0/installing/ +[bdp configure]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/ +[bdp configure add services]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/#add-services [ee]: http://info.basho.com/Wiki_Riak_Enterprise_Request.html diff --git a/content/dataplatform/1.0.0/configuring/default-ports.md b/content/dataplatform/1.0.0/configuring/default-ports.md index 2eda28370a..494bcbf58e 100644 --- a/content/dataplatform/1.0.0/configuring/default-ports.md +++ b/content/dataplatform/1.0.0/configuring/default-ports.md @@ -12,6 +12,7 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/configuration/reference/default-ports/ + - /dataplatform/latest/configuring/default-ports/ --- The following is a list of default network ports used by Basho Data Platform. Your environment should be configured to allow traffic for these network ports. diff --git a/content/dataplatform/1.0.0/configuring/replace-spark-cluster-manager.md b/content/dataplatform/1.0.0/configuring/replace-spark-cluster-manager.md index f259595a13..8b2bc2c27b 100644 --- a/content/dataplatform/1.0.0/configuring/replace-spark-cluster-manager.md +++ b/content/dataplatform/1.0.0/configuring/replace-spark-cluster-manager.md @@ -12,13 +12,14 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/configuration/replace-spark-cluster-manager/ + - /dataplatform/latest/configuring/replace-spark-cluster-manager/ --- -[bdp install]: /dataplatform/1.0.0/installing/ -[bdp configure]: /dataplatform/1.0.0/configuring/setup-a-cluster/ -[bdp configure spark master]: /dataplatform/1.0.0/configuring/setup-a-cluster/#set-up-spark-cluster-metadata +[bdp install]: {{}}dataplatform/1.0.0/installing/ +[bdp configure]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/ +[bdp configure spark master]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/#set-up-spark-cluster-metadata [ee]: http://info.basho.com/Wiki_Riak_Enterprise_Request.html -[riak data types]: /riak/kv/2.1.3/developing/data-types/ +[riak data types]: {{}}riak/kv/2.1.3/developing/data-types/ > The Basho Data Platform cluster manager is available to [Enterprise users only][ee]. @@ -56,8 +57,8 @@ To replace your Spark Cluster Manager with the BDP cluster manager, you will do ```bash -sudo data-platform-admin add-service-config my-spark-master spark-master \ -LEAD_ELECT_SERVICE_HOSTS="»IP:PORTS from `listener.leader_latch.internal` in riak.conf«" \ +sudo data-platform-admin add-service-config my-spark-master spark-master / +LEAD_ELECT_SERVICE_HOSTS="»IP:PORTS from `listener.leader_latch.internal` in riak.conf«" / RIAK_HOSTS="»IP:PORTS from `listener.protobuf.internal` in riak.conf«" ``` diff --git a/content/dataplatform/1.0.0/configuring/setup-a-cluster.md b/content/dataplatform/1.0.0/configuring/setup-a-cluster.md index 7a07d9aea0..8b163c6c83 100644 --- a/content/dataplatform/1.0.0/configuring/setup-a-cluster.md +++ b/content/dataplatform/1.0.0/configuring/setup-a-cluster.md @@ -12,17 +12,18 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/configuration/setup-a-cluster/ + - /dataplatform/latest/configuring/setup-a-cluster/ --- -[bdp install]: /dataplatform/1.0.0/installing/ -[riak cluster setup]: /riak/kv/2.1.3/using/running-a-cluster/ -[riak configure]: /riak/kv/2.1.3/configuring/ +[bdp install]: {{}}dataplatform/1.0.0/installing/ +[riak cluster setup]: {{}}riak/kv/2.1.3/using/running-a-cluster/ +[riak configure]: {{}}riak/kv/2.1.3/configuring/ [riak_ensemble]: https://github.com/basho/riak_ensemble -[riak kv]: /riak/kv/2.1.3/ -[riak strong consistency]: /riak/kv/2.1.3/using/reference/strong-consistency -[aws marketplace]: /riak/kv/2.1.3/setup/installing/amazon-web-services/ -[set spark ip]: /dataplatform/1.0.0/configuring/spark-ip-address/ -[default ports]: /dataplatform/1.0.0/configuring/default-ports/ +[riak kv]: {{}}riak/kv/2.1.3/ +[riak strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[aws marketplace]: {{}}riak/kv/2.1.3/setup/installing/amazon-web-services/ +[set spark ip]: {{}}dataplatform/1.0.0/configuring/spark-ip-address/ +[default ports]: {{}}dataplatform/1.0.0/configuring/default-ports/ Now that you've [installed Basho Data Platform][bdp install], you're ready to set up a Basho Data Platform (BDP) cluster. This page will guide you through this process. diff --git a/content/dataplatform/1.0.0/configuring/spark-ip-address.md b/content/dataplatform/1.0.0/configuring/spark-ip-address.md index 4cf25c36ae..a8e26ac2da 100644 --- a/content/dataplatform/1.0.0/configuring/spark-ip-address.md +++ b/content/dataplatform/1.0.0/configuring/spark-ip-address.md @@ -12,6 +12,7 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/configuration/advanced/set-spark-ip-address/ + - /dataplatform/latest/configuring/spark-ip-address/ --- To bind Spark Master to a specific host you can manually set the Spark Master IP Address with: diff --git a/content/dataplatform/1.0.0/downloads.md b/content/dataplatform/1.0.0/downloads.md index d2cafa2061..a26630f3da 100644 --- a/content/dataplatform/1.0.0/downloads.md +++ b/content/dataplatform/1.0.0/downloads.md @@ -18,4 +18,6 @@ listed_projects: - project: "dataplatform_extras" version: "1.0.0" title: "Basho Data Platform Extras" +aliases: + - /dataplatform/latest/downloads/ --- diff --git a/content/dataplatform/1.0.0/index.md b/content/dataplatform/1.0.0/index.md index aa74e77974..180eff70c9 100644 --- a/content/dataplatform/1.0.0/index.md +++ b/content/dataplatform/1.0.0/index.md @@ -10,10 +10,12 @@ menu: weight: 100 pre: riak toc: false +aliases: + - /dataplatform/latest/ --- -[bdp install]: /dataplatform/1.0.0/installing/ -[bdp reference]: /dataplatform/1.0.0/learn/service-manager/ +[bdp install]: {{}}dataplatform/1.0.0/installing/ +[bdp reference]: {{}}dataplatform/1.0.0/learn/service-manager/ [ee]: http://info.basho.com/Wiki_Riak_Enterprise_Request.html Basho Data Platform (BDP) builds on Riak KV (Riak) to support your data-centric services. Ensure your application is highly available and scalable by leveraging BDP features such as: diff --git a/content/dataplatform/1.0.0/installing.md b/content/dataplatform/1.0.0/installing.md index 54be5e06f1..23e7081c00 100644 --- a/content/dataplatform/1.0.0/installing.md +++ b/content/dataplatform/1.0.0/installing.md @@ -10,11 +10,13 @@ menu: weight: 101 parent: "index" toc: true +aliases: + - /dataplatform/latest/installing/ --- -[bdp compatibility]: /dataplatform/1.0.0/#supported-operating-systems -[bdp configure]: /dataplatform/1.0.0/configuring/setup-a-cluster/ -[bdp download]: /dataplatform/1.0.0/downloads/ +[bdp compatibility]: {{}}dataplatform/1.0.0/#supported-operating-systems +[bdp configure]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/ +[bdp download]: {{}}dataplatform/1.0.0/downloads/ Basho Data Platform (BDP) enables you to extend Riak with Spark and Redis. This page will guide you through the process of installing BDP on most supported operating systems. @@ -33,7 +35,7 @@ You need to have root or sudo access on the nodes you will be installing BDP on. Before you can install BDP, both the total open-files limit and the per-user open-files limit must be high enough to allow BDP to function. -For a fuller guide on changing limits for Riak, see [Changing the limit](/riak/kv/2.1.3/using/performance/open-files-limit) . +For a fuller guide on changing limits for Riak, see [Changing the limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) . On most Linux distributions, the total limit for open files is controlled by `sysctl`. diff --git a/content/dataplatform/1.0.0/learn.md b/content/dataplatform/1.0.0/learn.md index f0f9f4a3dd..d4c50c3c6c 100644 --- a/content/dataplatform/1.0.0/learn.md +++ b/content/dataplatform/1.0.0/learn.md @@ -12,13 +12,15 @@ menu: toc: false aliases: - /dataplatform/1.0.0/learn-about-dataplatform/learn-about-dataplatform/ + - /dataplatform/latest/learn/ --- -[using bdp index]: /dataplatform/1.0.0/using/ -[cache proxy features]: /dataplatform/1.0.0/learn/cache-proxy/ -[service manager features]: /dataplatform/1.0.0/learn/service-manager/ -[leader election features]: /dataplatform/1.0.0/learn/leader-election-service/ -[spark manager features]: /dataplatform/1.0.0/learn/spark-cluster-manager/ + +[using bdp index]: {{}}dataplatform/1.0.0/using/ +[cache proxy features]: {{}}dataplatform/1.0.0/learn/cache-proxy/ +[service manager features]: {{}}dataplatform/1.0.0/learn/service-manager/ +[leader election features]: {{}}dataplatform/1.0.0/learn/leader-election-service/ +[spark manager features]: {{}}dataplatform/1.0.0/learn/spark-cluster-manager/ ## In This Section diff --git a/content/dataplatform/1.0.0/learn/cache-proxy.md b/content/dataplatform/1.0.0/learn/cache-proxy.md index ee50dab16e..47abf66e31 100644 --- a/content/dataplatform/1.0.0/learn/cache-proxy.md +++ b/content/dataplatform/1.0.0/learn/cache-proxy.md @@ -12,11 +12,12 @@ menu: toc: true aliases: - /dataplatform/1.0.0/learn-about-dataplatform/cache-proxy-features/ + - /dataplatform/latest/learn/cache-proxy/ --- [ee]: http://info.basho.com/Wiki_Riak_Enterprise_Request.html -[readthrough-strategy]: /images/readthrough-strategy.png -[writethrough-sequence]: /images/writethrough-sequence.png +[readthrough-strategy]: {{}}images/readthrough-strategy.png +[writethrough-sequence]: {{}}images/writethrough-sequence.png >Cache proxy is available to [Enterprise users only][ee]. diff --git a/content/dataplatform/1.0.0/learn/leader-election-service.md b/content/dataplatform/1.0.0/learn/leader-election-service.md index e71374de82..9c55d644ac 100644 --- a/content/dataplatform/1.0.0/learn/leader-election-service.md +++ b/content/dataplatform/1.0.0/learn/leader-election-service.md @@ -12,6 +12,7 @@ menu: toc: true aliases: - /dataplatform/1.0.0/learn-about-dataplatform/leader-election-service/ + - /dataplatform/latest/learn/leader-election-service/ --- [ee]: http://info.basho.com/Wiki_Riak_Enterprise_Request.html diff --git a/content/dataplatform/1.0.0/learn/service-manager.md b/content/dataplatform/1.0.0/learn/service-manager.md index 68189b9872..99e60c1d5e 100644 --- a/content/dataplatform/1.0.0/learn/service-manager.md +++ b/content/dataplatform/1.0.0/learn/service-manager.md @@ -12,9 +12,10 @@ menu: toc: true aliases: - /dataplatform/1.0.0/learn-about-dataplatform/service-manager-features/ + - /dataplatform/latest/learn/service-manager/ --- -[bdp cli]: /dataplatform/1.0.0/using/commands/ +[bdp cli]: {{}}dataplatform/1.0.0/using/commands/ ## Overview diff --git a/content/dataplatform/1.0.0/learn/spark-cluster-manager.md b/content/dataplatform/1.0.0/learn/spark-cluster-manager.md index 5957d08b98..d0721cd8eb 100644 --- a/content/dataplatform/1.0.0/learn/spark-cluster-manager.md +++ b/content/dataplatform/1.0.0/learn/spark-cluster-manager.md @@ -12,10 +12,11 @@ menu: toc: true aliases: - /dataplatform/1.0.0/learn-about-dataplatform/spark-cluster-manager-features/ + - /dataplatform/latest/learn/spark-cluster-manager/ --- -[bdp leader election]: /dataplatform/1.0.0/learn/leader-election-service/ -[bdp cluster manager]: /dataplatform/1.0.0/configuring/replace-spark-cluster-manager/ +[bdp leader election]: {{}}dataplatform/1.0.0/learn/leader-election-service/ +[bdp cluster manager]: {{}}dataplatform/1.0.0/configuring/replace-spark-cluster-manager/ [ee]: http://info.basho.com/Wiki_Riak_Enterprise_Request.html > The Spark cluster manager is available to [Enterprise users only][ee]. diff --git a/content/dataplatform/1.0.0/release-notes.md b/content/dataplatform/1.0.0/release-notes.md index 12b8031b25..9b684c1ccc 100644 --- a/content/dataplatform/1.0.0/release-notes.md +++ b/content/dataplatform/1.0.0/release-notes.md @@ -10,10 +10,12 @@ menu: weight: 103 parent: "index" toc: true +aliases: + - /dataplatform/latest/release-notes/ --- -[bdp downloads]: /dataplatform/1.0.0/downloads/ -[bdp install]: /dataplatform/1.0.0/installing/ +[bdp downloads]: {{}}dataplatform/1.0.0/downloads/ +[bdp install]: {{}}dataplatform/1.0.0/installing/ Released August 27, 2015. diff --git a/content/dataplatform/1.0.0/upgrading.md b/content/dataplatform/1.0.0/upgrading.md index d2cd95bb3c..d3021e7131 100644 --- a/content/dataplatform/1.0.0/upgrading.md +++ b/content/dataplatform/1.0.0/upgrading.md @@ -10,6 +10,8 @@ menu: weight: 102 parent: "index" toc: true +aliases: + - /dataplatform/latest/upgrading/ --- > **NOTE:** diff --git a/content/dataplatform/1.0.0/using.md b/content/dataplatform/1.0.0/using.md index 8e6341fd7a..36a07673b6 100644 --- a/content/dataplatform/1.0.0/using.md +++ b/content/dataplatform/1.0.0/using.md @@ -12,12 +12,13 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/using-dataplatform/ + - /dataplatform/latest/using/ --- -[bdp install]: /dataplatform/1.0.0/installing/ -[start services]: /dataplatform/1.0.0/using/start-services/ -[bdp cli]: /dataplatform/1.0.0/using/dataplatform-commands/ -[learn bdp index]: /dataplatform/1.0.0/learn/ +[bdp install]: {{}}dataplatform/1.0.0/installing/ +[start services]: {{}}dataplatform/1.0.0/using/start-services/ +[bdp cli]: {{}}dataplatform/1.0.0/using/dataplatform-commands/ +[learn bdp index]: {{}}dataplatform/1.0.0/learn/ ## In This Section diff --git a/content/dataplatform/1.0.0/using/commands.md b/content/dataplatform/1.0.0/using/commands.md index bf6cefb735..944e87dccc 100644 --- a/content/dataplatform/1.0.0/using/commands.md +++ b/content/dataplatform/1.0.0/using/commands.md @@ -12,12 +12,13 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/dataplatform-commands/ + - /dataplatform/latest/using/commands/ --- -[bdp configure]: /dataplatform/1.0.0/configuring/setup-a-cluster/ -[bdp configure add services]: /dataplatform/1.0.0/configuring/setup-a-cluster/#add-services -[bdp install]: /dataplatform/1.0.0/installing/ -[bdp reference]: /dataplatform/1.0.0/learn/service-manager/ +[bdp configure]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/ +[bdp configure add services]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/#add-services +[bdp install]: {{}}dataplatform/1.0.0/installing/ +[bdp reference]: {{}}dataplatform/1.0.0/learn/service-manager/ Basho Data Platform (BDP) comes with a command line tool (`data-platform-admin`) that allows you to perform various operations on your BDP cluster. The following reference outlines available commands and their uses. diff --git a/content/dataplatform/1.0.0/using/spark-riak-connector.md b/content/dataplatform/1.0.0/using/spark-riak-connector.md index a58298a2a8..21e35fc8ce 100644 --- a/content/dataplatform/1.0.0/using/spark-riak-connector.md +++ b/content/dataplatform/1.0.0/using/spark-riak-connector.md @@ -12,6 +12,7 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/using-sparkconnector/ + - /dataplatform/latest/using/spark-riak-connector/ --- This is a quick, practical guide on how to use the Spark Riak connector. diff --git a/content/dataplatform/1.0.0/using/start-services.md b/content/dataplatform/1.0.0/using/start-services.md index 81ec57285c..cd2232e595 100644 --- a/content/dataplatform/1.0.0/using/start-services.md +++ b/content/dataplatform/1.0.0/using/start-services.md @@ -12,12 +12,13 @@ menu: toc: true aliases: - /dataplatform/1.0.0/using-dataplatform/using-bdp/ + - /dataplatform/latest/using/start-services/ --- -[bdp configure]: /dataplatform/1.0.0/configuring/setup-a-cluster/ -[bdp configure add services]: /dataplatform/1.0.0/configuring/setup-a-cluster/#add-services -[bdp install]: /dataplatform/1.0.0/installing/ -[bdp reference]: /dataplatform/1.0.0/learn/service-manager/ +[bdp configure]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/ +[bdp configure add services]: {{}}dataplatform/1.0.0/configuring/setup-a-cluster/#add-services +[bdp install]: {{}}dataplatform/1.0.0/installing/ +[bdp reference]: {{}}dataplatform/1.0.0/learn/service-manager/ You've [installed][bdp install] Basho Data Platform (BDP), [configured][bdp configure] your cluster, and [added services][bdp configure add services] to your nodes. The setup of your BDP cluster is complete! Now you can begin using your BDP cluster. diff --git a/content/riak/cs/2.0.0/cookbooks/access-control-lists.md b/content/riak/cs/2.0.0/cookbooks/access-control-lists.md index bdaec2b63c..8b8fa36c12 100644 --- a/content/riak/cs/2.0.0/cookbooks/access-control-lists.md +++ b/content/riak/cs/2.0.0/cookbooks/access-control-lists.md @@ -82,9 +82,9 @@ Riak CS permissions are split into two types: **bucket permissions** and ## Buckets -Bucket names **must** be [globally unique](/riak/cs/2.0.0/theory/stanchion/#globally-unique-entities). To avoid conflicts, all +Bucket names **must** be [globally unique]({{}}riak/cs/2.0.0/theory/stanchion/#globally-unique-entities). To avoid conflicts, all bucket creation requests are made to an application called -[Stanchion](/riak/cs/2.0.0/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we +[Stanchion]({{}}riak/cs/2.0.0/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we believe it is appropriate based on the following statement from this [documentation on bucket restrictions](http://docs.amazonwebservices.com/AmazonS3/latest/dev/BucketRestrictions.html) from Amazon regarding restrictions on bucket operations: @@ -105,4 +105,4 @@ created granting the creator both ownership and full access control and denying access to all other parties. For information on specifying an ACL when making a `PUT` request, see -[Riak CS PUT Object ACL](/riak/cs/2.0.0/references/apis/storage/s3/put-object-acl). +[Riak CS PUT Object ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-object-acl). diff --git a/content/riak/cs/2.0.0/cookbooks/authentication.md b/content/riak/cs/2.0.0/cookbooks/authentication.md index b4747f28e5..b1c42379fd 100644 --- a/content/riak/cs/2.0.0/cookbooks/authentication.md +++ b/content/riak/cs/2.0.0/cookbooks/authentication.md @@ -111,4 +111,4 @@ http://bucket.data.basho.com/document?AWSAccessKeyId=8EE3UE-UMW1YTPMBC3EB&Expire ## Keystone Authentication More information on using Keystone for authentication with Riak CS can -be found in [using Riak CS with Keystone](/riak/cs/2.0.0/cookbooks/using-with-keystone). +be found in [using Riak CS with Keystone]({{}}riak/cs/2.0.0/cookbooks/using-with-keystone). diff --git a/content/riak/cs/2.0.0/cookbooks/command-line-tools.md b/content/riak/cs/2.0.0/cookbooks/command-line-tools.md index 0d48361aae..2a8b3665a5 100644 --- a/content/riak/cs/2.0.0/cookbooks/command-line-tools.md +++ b/content/riak/cs/2.0.0/cookbooks/command-line-tools.md @@ -206,7 +206,7 @@ More information about Erlang's etop tool can be found in the ## riak-cs-admin gc -This command controls Riak CS's [garbage collection](/riak/cs/2.0.0/cookbooks/garbage-collection) system. +This command controls Riak CS's [garbage collection]({{}}riak/cs/2.0.0/cookbooks/garbage-collection) system. ```bash riak-cs-admin gc @@ -312,7 +312,7 @@ undergirding Riak CS. Temporarily changes the host and/or port used by Stanchion. This change is effective until the node is restarted, at which point Stanchion will -begin listening on the host and port specified in your [configuration files](/riak/cs/2.0.0/cookbooks/configuration/reference). +begin listening on the host and port specified in your [configuration files]({{}}riak/cs/2.0.0/cookbooks/configuration/reference). ```bash riak-cs-stanchion switch HOST PORT @@ -521,7 +521,7 @@ documented [above](#riak-cs-admin-access). Riak CS version 1.5 offers support for supercluster operations. The `supercluster` command interface enables you to interact with that system. -More information can be found in [Riak CS Supercluster Support](/riak/cs/2.0.0/cookbooks/supercluster). +More information can be found in [Riak CS Supercluster Support]({{}}riak/cs/2.0.0/cookbooks/supercluster). {{% note title="Note: technical preview" %}} Riak CS supercluster support is available only as a technical preview for @@ -635,7 +635,7 @@ Fetches all current weights from the master member. riak-cs-supercluster refresh ``` -When a member's weight is updated, that weight is stored in the [master member](/riak/cs/2.0.0/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The +When a member's weight is updated, that weight is stored in the [master member]({{}}riak/cs/2.0.0/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The `refresh` command syncs the weights stored in the master member with the weights cached in Riak CS so that there is no discrepancy. diff --git a/content/riak/cs/2.0.0/cookbooks/configuration.md b/content/riak/cs/2.0.0/cookbooks/configuration.md index 5a1552cac6..76b7b5e8a0 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration.md @@ -16,8 +16,8 @@ aliases: In a Riak CS storage system, three components work in conjunction with one another, which means that you must configure each component to work with the others: * Riak --- The database system that acts as the backend storage -* Riak CS --- The cloud storage layer over Riak which exposes the storage and 
billing APIs, storing files and metadata in Riak, and streaming them back to 
users -* Stanchion --- Manages requests involving globally unique system entities, such as 
buckets and users sent to a Riak instance, for example, to create users or to create or delete buckets +* Riak CS --- The cloud storage layer over Riak which exposes the storage and billing APIs, storing files and metadata in Riak, and streaming them back to users +* Stanchion --- Manages requests involving globally unique system entities, such as buckets and users sent to a Riak instance, for example, to create users or to create or delete buckets In addition, you must also configure the S3 client you use to communicate with your Riak CS system. @@ -27,7 +27,7 @@ If your system consists of several nodes, configuration primarily represents set ## Configuration of System Components -* [Configuring Riak](/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs) -* [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs) -* [Configuring Stanchion](/riak/cs/2.0.0/cookbooks/configuration/stanchion) -* [Configuring an S3 client](/riak/cs/2.0.0/cookbooks/configuration/s3-client) +* [Configuring Riak]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-for-cs) +* [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs) +* [Configuring Stanchion]({{}}riak/cs/2.0.0/cookbooks/configuration/stanchion) +* [Configuring an S3 client]({{}}riak/cs/2.0.0/cookbooks/configuration/s3-client) diff --git a/content/riak/cs/2.0.0/cookbooks/configuration/dragondisk.md b/content/riak/cs/2.0.0/cookbooks/configuration/dragondisk.md index fe0721f986..1c45ad8949 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration/dragondisk.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration/dragondisk.md @@ -29,7 +29,7 @@ other Linux distributions. This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_linux0.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux0.png) ## Create an account @@ -38,16 +38,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_linux1.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_linux2.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux2.png) * In the **Account** dialog window, choose **Other S3 compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_linux3.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -66,12 +66,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_linux4.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux4.png) * Click **Close** to complete account creation and to continue to attempt connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_linux5.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux5.png) ### Connect to Riak CS @@ -84,7 +84,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_linux6.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux6.png) ### Create a bucket @@ -93,7 +93,7 @@ Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_linux7.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket `dragondisklinux`. @@ -101,7 +101,7 @@ Riak CS. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_linux8.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux8.png) ### Copy files to bucket @@ -109,13 +109,13 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_linux9.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_linux10.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. @@ -131,7 +131,7 @@ This section describes configuration of DragonDisk for Mac OS X. * This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_osx0.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx0.png) ### Create an account @@ -140,16 +140,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_osx1.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_osx2.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx2.png) * In the **Account** dialog window, choose **Other S3 compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_osx3.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -168,12 +168,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_osx4.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx4.png) * Click **Close** to complete account creation and continue try connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_osx5.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx5.png) ### Connect to Riak CS @@ -186,7 +186,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_osx6.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx6.png) ### Create a bucket @@ -195,7 +195,7 @@ Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_osx7.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket *dragondiskosx*. @@ -203,7 +203,7 @@ Riak CS. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_osx8.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx8.png) ### Copy files to bucket @@ -211,14 +211,14 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_osx9.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_osx10.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. @@ -234,7 +234,7 @@ This section describes configuration of DragonDisk for Windows. * This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_windows0.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows0.png) ### Create an account @@ -243,16 +243,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_windows1.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_windows2.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows2.png) * In the **Account** dialog window, choose **Other S3-compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_windows3.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -271,12 +271,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_windows4.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows4.png) * Click **Close** to complete account creation and continue try connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_windows5.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows5.png) ### Connect to Riak CS @@ -289,7 +289,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_windows6.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows6.png) ### Create a bucket @@ -297,7 +297,7 @@ configuration. with Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_windows7.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket *dragonbucket*. @@ -305,7 +305,7 @@ configuration. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_windows8.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows8.png) ### Copy files to bucket @@ -313,13 +313,13 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_windows9.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_windows10.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. diff --git a/content/riak/cs/2.0.0/cookbooks/configuration/load-balancing-proxy.md b/content/riak/cs/2.0.0/cookbooks/configuration/load-balancing-proxy.md index 373772cc34..4c8937dd5d 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration/load-balancing-proxy.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration/load-balancing-proxy.md @@ -51,7 +51,7 @@ act as a load balancer to a Riak CS installation. > **Note on open files limits** > > The operating system's open files limits need to be greater than 256000 -for the example configuration that follows. Consult the [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different +for the example configuration that follows. Consult the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different operating systems. ```config diff --git a/content/riak/cs/2.0.0/cookbooks/configuration/multi-datacenter.md b/content/riak/cs/2.0.0/cookbooks/configuration/multi-datacenter.md index a38bdac1de..796198a8f4 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration/multi-datacenter.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration/multi-datacenter.md @@ -28,7 +28,7 @@ CS cluster. As of Riak release 1.4.0, there are two different MDC replication modes that Riak CS can use to request data from remote clusters. Please see -the [comparison](/riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. +the [comparison]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. ### Replication Version 3 Configuration @@ -75,9 +75,9 @@ configured **sink cluster**. See also: -* [Upgrading from v2 to v3](/riak/kv/2.1.3/setup/upgrading/multi-datacenter) -* [Comparing v2 and v3](/riak/kv/2.1.3/using/reference/multi-datacenter/comparison) -* [Multi-Datacenter Operations](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) + +* [Comparing v2 and v3]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) +* [Multi-Datacenter Operations]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) ## Riak CS Configuration @@ -119,10 +119,10 @@ Replace the `node` variable above with the nodename specified in the ## Stanchion Configuration -Though there is no specific configuration for [Stanchion](/riak/cs/2.0.0/theory/stanchion), note that +Though there is no specific configuration for [Stanchion]({{}}riak/cs/2.0.0/theory/stanchion), note that Stanchion should be a single, globally unique process to which every Riak CS node sends requests, even if there are multiple replicated sites. Unlike Riak and Riak CS, Stanchion should run on _only one node in a given cluster_, perhaps on its own, dedicated hardware if you wish. Stanchion runs on only one node because it manages strongly consistent -updates to [globally unique entities](/riak/cs/2.0.0/theory/stanchion/#globally-unique-entities) like users and buckets. +updates to [globally unique entities]({{}}riak/cs/2.0.0/theory/stanchion/#globally-unique-entities) like users and buckets. diff --git a/content/riak/cs/2.0.0/cookbooks/configuration/reference.md b/content/riak/cs/2.0.0/cookbooks/configuration/reference.md index 4798528a29..fd61fd7390 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration/reference.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration/reference.md @@ -53,12 +53,12 @@ aliases: ]}, ``` - and so on. More details can be found at [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). + and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). {{% /note %}} This document is intended as a reference listing of all configurable parameters for Riak CS. For a more narrative-style walkthrough of configuring Riak CS, we -recommend consulting the [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs) tutorial. +recommend consulting the [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs) tutorial. The configuration for Riak CS is handled through either the `riak-cs.conf` and `advanced.config` file pair, which were introduced in Riak CS 2.0.0, or the two @@ -130,7 +130,7 @@ The tables below will show settings for both `riak-cs.conf` and riak_host The IP address/port for the Riak CS node's corresponding Riak node (used by -Riak's Protocol Buffers interface) +Riak's Protocol Buffers interface) 127.0.0.1:8087 @@ -157,7 +157,7 @@ the corresponding HTTP host). riak_host The TCP IP/port for the Riak CS node's corresponding Riak node (used by -Riak's Protocol Buffers interface) +Riak's Protocol Buffers interface) {"127.0.0.1", 8087} @@ -298,7 +298,7 @@ tasks use the IP and port as all other Riak CS traffic. The admin key used for administrative access to Riak CS, e.g. usage of the /riak-cs/stats endpoint. Please note that both admin.key and admin.secret must match the -corresponding settings in the Stanchion node's stanchion.conf. +corresponding settings in the Stanchion node's stanchion.conf. admin-key @@ -326,7 +326,7 @@ this setting unless you implement a custom authentication scheme. rewrite_module A rewrite module contains a set of rules for translating requests made using -a particular API to requests in the the native Riak CS storage API. We do +a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you implement a custom module. riak_cs_s3_rewrite @@ -351,7 +351,7 @@ tasks use the IP and port as all other Riak CS traffic. The admin key used for administrative access to Riak CS, e.g. usage of the /riak-cs/stats endpoint. Please note that both admin_key and admin_secret must match the -corresponding settings in the Stanchion node's +corresponding settings in the Stanchion node's app.config. @@ -387,7 +387,7 @@ actions, including bucket deletion. rewrite_module A rewrite module contains a set of rules for translating requests -made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you +made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you implement a custom module. riak_cs_s3_rewrite @@ -396,7 +396,7 @@ implement a custom module. ## Usage Recording -These settings relate to Riak CS's [access logs](/riak/cs/2.0.0/cookbooks/usage-and-billing-data). +These settings relate to Riak CS's [access logs]({{}}riak/cs/2.0.0/cookbooks/usage-and-billing-data). ### `riak-cs.conf` @@ -519,7 +519,7 @@ of 86400 translates to 1 day. ## Garbage Collection -Settings related to Riak CS's [garbage collection](/riak/cs/2.0.0/cookbooks/garbage-collection) \(GC) process. +Settings related to Riak CS's [garbage collection]({{}}riak/cs/2.0.0/cookbooks/garbage-collection) \(GC) process. ### `riak-cs.conf` @@ -661,7 +661,7 @@ blocks to Riak. cs_version The Riak CS version number. This number is used to selectively enable new -features for the current version to better support rolling upgrades. New +features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set @@ -696,7 +696,7 @@ source IP address as an input (which is the default). cs_version The Riak CS version number. This number is used to selectively -enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value. +enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value. @@ -928,7 +928,7 @@ lager. sasl_error_lager Whether to enable , Erlang's built-in +href="http://www.erlang.org/doc/man/sasl_app.html">sasl, Erlang's built-in error logger. false @@ -958,4 +958,4 @@ error logger. --> -[config_your_code]: http://docs.basho.com/riak/1.4.12/ops/advanced/configs/configuration-files/#Configuring-Your-code-vm-args-code- +[config_your_code]: {{< baseurl >}}riak/kv/2.0.0/configuring/basic/#erlang-vm-tunings diff --git a/content/riak/cs/2.0.0/cookbooks/configuration/riak-cs.md b/content/riak/cs/2.0.0/cookbooks/configuration/riak-cs.md index b200e80b38..4f32805ef2 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration/riak-cs.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration/riak-cs.md @@ -10,8 +10,8 @@ menu: project: "riak_cs" project_version: "2.0.0" aliases: - - /riakcs/2.0.0/cookbooks/configuration/Configuring-Riak-CS/ - - /riak/cs/2.0.0/cookbooks/configuration/Configuring-Riak-CS/ + - /riakcs/2.0.0/cookbooks/configuration/riak-cs/ + - /riak/cs/2.0.0/cookbooks/configuration/riak-cs/ --- For Riak CS to operate properly it must know how to connect to Riak. @@ -51,9 +51,9 @@ files. If an `app.config` file is present, neither the `riak-cs.config` nor the to continue usage of the legacy `app.config` file, please note that some configuration options have changed names. Most notably, the IP/Port format has changed in 2.0 for Stanchion, Riak, and Riak CS. To view these changes, -please review the [Rolling Upgrades](/riak/cs/2.0.0/cookbooks/rolling-upgrades) Document. +please review the [Rolling Upgrades]({{}}riak/cs/2.0.0/cookbooks/rolling-upgrades) Document. > -> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference](/riak/cs/2.0.0/cookbooks/configuration/reference). +> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference]({{}}riak/cs/2.0.0/cookbooks/configuration/reference). The sections below walk you through some of the main configuration categories that you will likely encounter while operating Riak CS. @@ -81,12 +81,12 @@ is required. {{% /note %}} After making any changes to the `riak-cs.conf` file in Riak CS, -[restart](/riak/cs/2.0.0/cookbooks/command-line-tools/#riak-cs) the node if it is already running. +[restart]({{}}riak/cs/2.0.0/cookbooks/command-line-tools/#riak-cs) the node if it is already running. ## Specifying the Stanchion Node If you're running a single Riak CS node, you don't have to change the -[Stanchion](/riak/cs/2.0.0/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. +[Stanchion]({{}}riak/cs/2.0.0/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. The Stanchion settings reside in the Riak CS `riak-cs.conf` file, which is located in the `/etc/riak-cs` directory of each Riak CS node. @@ -264,7 +264,7 @@ particular use case. ### Tuning We strongly recommend that you take care when setting the value of the -[`pb_backlog` setting](/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is +[`pb_backlog` setting]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is started, each connection pool begins to establish connections to Riak. This can result in a [thundering herd problem](http://en.wikipedia.org/wiki/Thundering_herd_problem) in which connections in the pool believe they are connected to Riak, but in reality some of the connections have been reset. Due to TCP `RST` packet rate limiting (controlled by `net.inet.icmp.icmplim`) some of the connections may not receive notification until they are used to service a user's request. This manifests itself as an `{error, disconnected}` message in the Riak CS logs and an error returned to the user. @@ -354,7 +354,7 @@ data.riakcs.net The following options are available to make adjustments to the Riak CS garbage collection system. More details about garbage collection in Riak CS are -available in [Garbage Collection](/riak/cs/2.0.0/cookbooks/garbage-collection). +available in [Garbage Collection]({{}}riak/cs/2.0.0/cookbooks/garbage-collection). * `gc.leeway_period` (`leeway_seconds` in `advanced.config` or `app.config`) --- The amount of time that must elapse before an object version that has been @@ -420,4 +420,4 @@ been deprecated, and _will be removed_ in the next major release. ## Other Riak CS Settings For a complete listing of configurable parameters for Riak CS, see the -[configuration reference](/riak/cs/2.0.0/cookbooks/configuration/reference) document. +[configuration reference]({{}}riak/cs/2.0.0/cookbooks/configuration/reference) document. diff --git a/content/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs.md b/content/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs.md index b50a799fbc..d16e60a21c 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs.md @@ -21,21 +21,21 @@ reference document listing important configurable parameters. ## The Proper Backends for Riak CS -The default backend used by Riak is the [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the +The default backend used by Riak is the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the Riak CS package includes a special backend that should be used by the Riak cluster that is part of the Riak CS system. It is a custom version -of the standard [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. +of the standard [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. Some of the Riak buckets used internally by Riak CS use secondary -indexes, which currently requires the [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts +indexes, which currently requires the [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts of the Riak CS system can benefit from the use of the Bitcask backend. -The use of the custom [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take +The use of the custom [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take advantage of the strengths of both of these backends to achieve the best blend of performance and features. The next section covers how to properly set up Riak to use this Multi backend. Additionally, the Riak CS storage calculation system uses Riak's -[MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. +[MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. A few other settings must be modified to configure a Riak node as part of a Riak CS system, such as the node IP address and the IP address and @@ -46,7 +46,7 @@ configure a Riak node to work as part of a Riak CS system. ## Setting up the Proper Riak Backend First, edit Riak's `riak.conf`, or the old-style `advanced.config` or -`app.config` [configuration file](/riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing +`app.config` [configuration file]({{}}riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing the following line: ```riakconf @@ -117,7 +117,7 @@ to use the custom backend provided by Riak CS. We need to use either the ``` It's important to note that many of these values will depend on various -directories specific to your [operating system](/riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` +directories specific to your [operating system]({{}}riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` parameter, for example, assumes that Riak CS is installed in `/usr/lib/riak-cs`, while the `data_root` parameters assume that Riak is installed in `/var/lib/`. @@ -152,7 +152,7 @@ buckets.default.allow_mult = true ]} ``` -This will enable Riak to create [siblings](/riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library](/riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS +This will enable Riak to create [siblings]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library]({{}}riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS operations are strongly consistent by definition. {{% note title="Note on `allow_mult`" %}} @@ -214,7 +214,7 @@ sure that you do not change the backend from `riak_cs_kv_multi_backend` to ## Setting Up Riak to Use Protocol Buffers -The Riak [Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, +The Riak [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, or in the `riak_api` section of the the old-style `advanced.config` or `app.config` files, which is located in the `/etc/riak/` folder. The default host is `127.0.0.1` and the default port is `8087`. You will need to change this if @@ -343,4 +343,4 @@ javascript.hook_pool_size = 0 ``` -[riak_conf_files]: http://docs.basho.com/riak/2.0.5/ops/advanced/configs/configuration-files/ +[riak_conf_files]: {{< baseurl >}}riak/kv/2.0.5/ops/advanced/configs/configuration-files/ diff --git a/content/riak/cs/2.0.0/cookbooks/configuration/stanchion.md b/content/riak/cs/2.0.0/cookbooks/configuration/stanchion.md index 504b7790c8..0718496179 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration/stanchion.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration/stanchion.md @@ -86,7 +86,7 @@ ssl.keyfile = "./etc/key.pem" ## Specifying the Admin User -The admin user is created during the [configuration of Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs/#specifying-the-admin-user). +The admin user is created during the [configuration of Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs/#specifying-the-admin-user). The same user credentials must be added to each Stanchion used in the cluster. This is set in the `stanchion.conf` file, which is located in the `/etc/stanchion` directory. Enter the same `admin.key` and `admin.secret` as diff --git a/content/riak/cs/2.0.0/cookbooks/configuration/transmit.md b/content/riak/cs/2.0.0/cookbooks/configuration/transmit.md index fb44ba4d38..0084ec5f6f 100644 --- a/content/riak/cs/2.0.0/cookbooks/configuration/transmit.md +++ b/content/riak/cs/2.0.0/cookbooks/configuration/transmit.md @@ -38,11 +38,11 @@ dialog as follows: Defining a connection looks like this: -![Trasmit screenshot](/images/riak_cs_transmit0.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit0.jpg) > **Note** > -> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration](/riak/cs/2.0.0/cookbooks/configuration/load-balancing-proxy). +> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration]({{}}riak/cs/2.0.0/cookbooks/configuration/load-balancing-proxy). Finally, test the connection to Riak CS by clicking **Connect**. @@ -56,11 +56,11 @@ After successfully connecting to Riak CS, verify that you can create a bucket. The new bucket creation dialog looks like this: -![Trasmit screenshot](/images/riak_cs_transmit1.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit1.jpg) The newly created bucket is listed in the right hand pane of the Transmit interface: -![Trasmit screenshot](/images/riak_cs_transmit2.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit2.jpg) ## Copy Files @@ -74,7 +74,7 @@ copying of the files to the bucket. After copying, the files will appear in the bucket: -![Trasmit screenshot](/images/riak_cs_transmit3.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit3.jpg) You have now successfully configured a Transmit connection to Riak CS and verified basic file copying capabilities. diff --git a/content/riak/cs/2.0.0/cookbooks/faqs/riak-cs.md b/content/riak/cs/2.0.0/cookbooks/faqs/riak-cs.md index dfd8543040..3aba8d2cdb 100644 --- a/content/riak/cs/2.0.0/cookbooks/faqs/riak-cs.md +++ b/content/riak/cs/2.0.0/cookbooks/faqs/riak-cs.md @@ -15,7 +15,7 @@ aliases: Q: What is Riak CS? A: - Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV](/riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. + Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV]({{}}riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. Q: Can users share data? A: @@ -27,7 +27,7 @@ Q: Is it possible to specify a filesystem where my Riak CS buckets will live? A: You can specify the location of **all** Riak CS bucket data by changing the settings for Riak's backends to a path on a particular filesystem. If this is your goal, you can configure Riak to suit your environment. If you look at our example Riak `advanced.config`/`app.config` backend - definition from the [Configuring Riak for CS](/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs) section, it looks like this: + definition from the [Configuring Riak for CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-for-cs) section, it looks like this: ```advanced.config {riak_kv, [ diff --git a/content/riak/cs/2.0.0/cookbooks/garbage-collection.md b/content/riak/cs/2.0.0/cookbooks/garbage-collection.md index 6e81fbf8f6..cacbd67692 100644 --- a/content/riak/cs/2.0.0/cookbooks/garbage-collection.md +++ b/content/riak/cs/2.0.0/cookbooks/garbage-collection.md @@ -15,7 +15,7 @@ aliases: This document describes some of the implementation details behind Riak CS's garbage collection process. For information on configuring this -system, please see our documentation on [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs). +system, please see our documentation on [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs). ## Versions and Manifests @@ -192,7 +192,7 @@ We recommend using only _one_ active garbage collection daemon in any Riak CS cluster. If multiple daemons are currently being used, you can disable the others by setting the `gc.interval` parameter to `infinity` on those nodes. More information on how to do that can be found in the -[CS configuration doc](/riak/cs/2.0.0/cookbooks/configuration/riak-cs/#garbage-collection-settings). +[CS configuration doc]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs/#garbage-collection-settings). ## Controlling the GC Daemon @@ -212,7 +212,7 @@ Command | Description `set-interval` | Set or update the garbage collection interval. This setting uses a unit of seconds. `set-leeway` | Set or update the garbage collection leeway time. This setting indicates how many seconds must elapse after an object is deleted or overwritten before the garbage collection system may reap the object. This setting uses a unit of seconds. -For more information, see our documentation on [Riak CS command-line tools](/riak/cs/2.0.0/cookbooks/command-line-tools). +For more information, see our documentation on [Riak CS command-line tools]({{}}riak/cs/2.0.0/cookbooks/command-line-tools). ## Manifest Updates @@ -256,7 +256,7 @@ manifest keys that could linger indefinitely. Riak CS's garbage collection implementation gives the deployer several knobs to adjust for fine-tuning system performace. More information -can be found in our documentation on [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs/#garbage-collection-settings). +can be found in our documentation on [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs/#garbage-collection-settings). ## More Information diff --git a/content/riak/cs/2.0.0/cookbooks/installing.md b/content/riak/cs/2.0.0/cookbooks/installing.md index 3e89fff24e..983ce76340 100644 --- a/content/riak/cs/2.0.0/cookbooks/installing.md +++ b/content/riak/cs/2.0.0/cookbooks/installing.md @@ -22,16 +22,16 @@ You can install Riak CS on a single node (for development purposes) or using an automated deployment tool. Any Riak CS installation involves three components, all of which must be installed separately: -* [Riak KV](/riak/2.0.7/) --- The distributed database on top of which Riak CS +* [Riak KV]({{}}riak/kv/2.0.7/) --- The distributed database on top of which Riak CS is built * Riak CS itself -* [Stanchion](/riak/cs/2.0.0/theory/stanchion) --- An application used to manage [globally unique entities](/riak/cs/2.0.0/theory/stanchion/#globally-unique-entities) such as users and buckets. +* [Stanchion]({{}}riak/cs/2.0.0/theory/stanchion) --- An application used to manage [globally unique entities]({{}}riak/cs/2.0.0/theory/stanchion/#globally-unique-entities) such as users and buckets. [Riak KV](#installing-riak) and [Riak CS](#installing-riak-cs-on-a-node) must be installed on each node in your cluster. [Stanchion](#installing-stanchion-on-a-node), however, needs to be installed on only one node. ## Version Compatibility -We strongly recommend using one of the documented [version combinations](/riak/cs/2.0.0/cookbooks/version-compatibility/) +We strongly recommend using one of the documented [version combinations]({{}}riak/cs/2.0.0/cookbooks/version-compatibility/) when installing and running Riak CS. ## Installing Riak KV @@ -40,30 +40,30 @@ Before installing Riak CS, Riak KV must be installed on each node in your cluster. You can install Riak KV either as part of an OS-specific package or from source. - * [Debian and Ubuntu](/riak/kv/2.0.7/setup/installing/debian-ubuntu) - * [RHEL and CentOS](/riak/kv/2.0.7/setup/installing/rhel-centos) - * [Mac OS X](/riak/kv/2.0.7/setup/installing/mac-osx) - * [FreeBSD](/riak/kv/2.0.7/setup/installing/freebsd) - * [SUSE](/riak/kv/2.0.7/setup/installing/suse) - * [From Source](/riak/kv/2.0.7/setup/installing/source) + * [Debian and Ubuntu]({{}}riak/kv/2.0.7/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.0.7/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.0.7/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.0.7/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.0.7/setup/installing/suse) + * [From Source]({{}}riak/kv/2.0.7/setup/installing/source) Riak KV is also officially supported on the following public cloud infrastructures: - * [Windows Azure](/riak/kv/2.0.7/setup/installing/windows-azure) - * [AWS Marketplace](/riak/kv/2.0.7/setup/installing/amazon-web-services) + * [Windows Azure]({{}}riak/kv/2.0.7/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.0.7/setup/installing/amazon-web-services) Remember that you must repeat this installation process on each node in your cluster. For future reference, you should make note of the Riak KV installation directory. If you want to fully configure Riak KV prior to installing Riak CS, see our -documentation on [configuring Riak KV for CS](/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs/). +documentation on [configuring Riak KV for CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-for-cs/). ## Installing Riak CS on a Node -Riak CS and Stanchion packages are available on the [Download Riak CS](/riak/cs/2.0.0/downloads/) -page. Similarly, Riak packages are available on the [Download Riak KV](/riak/kv/2.0.7/downloads/) page. +Riak CS and Stanchion packages are available on the [Download Riak CS]({{}}riak/cs/2.0.0/downloads/) +page. Similarly, Riak packages are available on the [Download Riak KV]({{}}riak/kv/2.0.7/downloads/) page. After downloading Riak CS, Stanchion, and Riak, install them using your operating system's package management commands. @@ -78,7 +78,7 @@ such as a dedicated device [HAProxy](http://haproxy.1wt.eu) or [Nginx](http://wi ### Installing Riak CS on Mac OS X To install Riak CS on OS X, first download the appropriate package from -the [downloads](/riak/cs/2.0.0/downloads) page: +the [downloads]({{}}riak/cs/2.0.0/downloads) page: ```bash curl -O http://s3.amazonaws.com/downloads.basho.com/riak-cs/1.5/2.0.0/osx/10.8/riak-cs-2.0.0-OSX-x86_64.tar.gz @@ -90,7 +90,7 @@ Then, unpack the downloaded tarball: tar -xvzf riak-cs-2.0.0-OSX-x86_64.tar.gz ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs/). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs/). ### Installing Riak CS on Debian or Ubuntu @@ -255,11 +255,11 @@ can achieve this by specifying a load balancer IP as the Stanchion IP in each Riak CS node's `riak-cs.conf`. This load balancer must be configured to send all requests to a single Stanchion node, failing over to a secondary Stanchion node if the primary is unavailable. More -details can be found in [Specifying the Stanchion Node](/riak/cs/2.0.0/cookbooks/configuration/#specifying-the-stanchion-node). +details can be found in [Specifying the Stanchion Node]({{}}riak/cs/2.0.0/cookbooks/configuration/#specifying-the-stanchion-node). ### Installing Stanchion on Mac OS X -First, download the appropriate package from the [downloads](/riak/cs/2.0.0/downloads/#stanchion-1-4-3) page. +First, download the appropriate package from the [downloads]({{}}riak/cs/2.0.0/downloads/#stanchion-1-4-3) page. ```bash curl -O http://s3.amazonaws.com/downloads.basho.com/stanchion/1.4/1.4.3/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz @@ -271,7 +271,7 @@ Then, unpack the downloaded tarball: stanchion-2.0.0-OSX-x86_64.tar.gz ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs). ### Installing Stanchion on Debian or Ubuntu @@ -300,7 +300,7 @@ Now, install the `stanchion` package: sudo apt-get install stanchion ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs). #### Installing the `.deb` Package Manually (not recommended) @@ -311,7 +311,7 @@ sudo dpkg -i Replace `` with the actual filename for the package you are installing. -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs). ### Installing Stanchion on RHEL or CentOS @@ -338,7 +338,7 @@ Once the `.rpm` package has been installed, install Stanchion: sudo yum install stanchion ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs). #### Installing the `.rpm` Package Manually (not recommended) @@ -349,7 +349,7 @@ sudo rpm -Uvh Replace `` with the actual filename for the package you are installing. -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs). > **Note on SELinux** > @@ -359,4 +359,4 @@ encounter errors during installation, try disabling SELinux. ## What's Next? Once you've completed installation of Riak CS and Riak, you're ready to -learn more about [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs). +learn more about [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs). diff --git a/content/riak/cs/2.0.0/cookbooks/installing/chef.md b/content/riak/cs/2.0.0/cookbooks/installing/chef.md index 6cbc3ef768..2a75d706ec 100644 --- a/content/riak/cs/2.0.0/cookbooks/installing/chef.md +++ b/content/riak/cs/2.0.0/cookbooks/installing/chef.md @@ -138,8 +138,8 @@ default['stanchion']['args']['-env']['ERL_CRASH_DUMP'] = "/var/log/stanchion/erl #### Storage Backends -Riak CS uses a specific combination of storage backends. [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) is used to -store blocks and [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: +Riak CS uses a specific combination of storage backends. [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) is used to +store blocks and [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: ```ruby default['riak']['config']['riak_kv']['storage_backend'] = "riak_cs_kv_multi_backend" @@ -183,5 +183,5 @@ default['stanchion']['config']['lager']['error_logger_redirect'] = true More information related to cluster configuration and building development environments is available in our documentation. -* [Building a Local Test Environment](/riak/cs/2.0.0/tutorials/fast-track/local-testing-environment) -* [Building a Virtual Testing Environment](/riak/cs/2.0.0/tutorials/fast-track/virtual-test-environment) +* [Building a Local Test Environment]({{}}riak/cs/2.0.0/tutorials/fast-track/local-testing-environment) +* [Building a Virtual Testing Environment]({{}}riak/cs/2.0.0/tutorials/fast-track/virtual-test-environment) diff --git a/content/riak/cs/2.0.0/cookbooks/keystone-setup.md b/content/riak/cs/2.0.0/cookbooks/keystone-setup.md index 5cd6d6762c..28e923cb81 100644 --- a/content/riak/cs/2.0.0/cookbooks/keystone-setup.md +++ b/content/riak/cs/2.0.0/cookbooks/keystone-setup.md @@ -73,7 +73,7 @@ pip install -r tools/pip-requires The next step is to select the appropriate options in the `keystone.conf` configuration file. A sample configuration that is -useful for local testing with Riak CS can be found [here](/riak/cs/2.0.0/cookbooks/keystone-conf-sample/). This configuration file sets up logging to +useful for local testing with Riak CS can be found [here]({{}}riak/cs/2.0.0/cookbooks/keystone-conf-sample/). This configuration file sets up logging to `./log/keystone/keystone.log` and uses the templated catalog backend to set up the Riak CS object store service. This catalog backend uses a local file to populate the service catalog. diff --git a/content/riak/cs/2.0.0/cookbooks/logging.md b/content/riak/cs/2.0.0/cookbooks/logging.md index b4e1e5df74..6a3e562279 100644 --- a/content/riak/cs/2.0.0/cookbooks/logging.md +++ b/content/riak/cs/2.0.0/cookbooks/logging.md @@ -41,4 +41,4 @@ That section looks something like this: ``` A full description of all available parameters can be found in the -[configuration files](/riak/kv/2.1.3/configuring/reference) document for Riak. +[configuration files]({{}}riak/kv/2.1.3/configuring/reference) document for Riak. diff --git a/content/riak/cs/2.0.0/cookbooks/monitoring-and-metrics.md b/content/riak/cs/2.0.0/cookbooks/monitoring-and-metrics.md index db8cf732c3..7cd97d36f1 100644 --- a/content/riak/cs/2.0.0/cookbooks/monitoring-and-metrics.md +++ b/content/riak/cs/2.0.0/cookbooks/monitoring-and-metrics.md @@ -10,11 +10,11 @@ menu: project: "riak_cs" project_version: "2.0.0" aliases: - - /riakcs/2.0.0/cookbooks/Monitoring-and-Metrics/ + - /riakcs/2.0.0/cookbooks/monitoring-and-metrics/ --- [amazon]: http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html -[s3 api]: http://docs.basho.com/riakcs/latest/references/apis/storage/s3/ +[s3 api]: {{< baseurl >}}riak/cs/latest/references/apis/storage/s3/ Riak S2 (CS) includes metrics and operational statistics to help you monitor your system in more detail and diagnose system issues more easily. There are three major categories of metrics: diff --git a/content/riak/cs/2.0.0/cookbooks/querying-access-statistics.md b/content/riak/cs/2.0.0/cookbooks/querying-access-statistics.md index d3a9c92854..08b43fc024 100644 --- a/content/riak/cs/2.0.0/cookbooks/querying-access-statistics.md +++ b/content/riak/cs/2.0.0/cookbooks/querying-access-statistics.md @@ -24,7 +24,7 @@ and access. {{% /note %}} For information about how access statistics are logged, please read -[Usage and Billing Data](/riak/cs/2.0.0/cookbooks/usage-and-billing-data). +[Usage and Billing Data]({{}}riak/cs/2.0.0/cookbooks/usage-and-billing-data). The following sections discuss accessing the access statistics using bare HTTP requests. Query parameters are used to specify the types and @@ -81,7 +81,7 @@ HTTP/1.1 404 Object Not Found > **Authentication Required** > > Queries to the usage resources described here must be authenticated as -described in the [Authentication documentation](/riak/cs/2.0.0/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. +described in the [Authentication documentation]({{}}riak/cs/2.0.0/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are @@ -234,7 +234,7 @@ the amount of time that may be retrieved in any request is limited. The limit is configured by the `riak_cs` application environment variable `usage_request_limit`. The value is expressed as an integer -number of archive intervals (see [Usage and Billing Data](/riak/cs/2.0.0/cookbooks/usage-and-billing-data) for a +number of archive intervals (see [Usage and Billing Data]({{}}riak/cs/2.0.0/cookbooks/usage-and-billing-data) for a description of archive intervals). The default value is `744`, which is 31 days at the default archive diff --git a/content/riak/cs/2.0.0/cookbooks/querying-storage-statistics.md b/content/riak/cs/2.0.0/cookbooks/querying-storage-statistics.md index ed87c8cd66..7d302f8e37 100644 --- a/content/riak/cs/2.0.0/cookbooks/querying-storage-statistics.md +++ b/content/riak/cs/2.0.0/cookbooks/querying-storage-statistics.md @@ -26,9 +26,9 @@ and access. > **Note**: > -> Storage statistics are not calculated by default. Please read [Usage and Billing Data](/riak/cs/2.0.0/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. +> Storage statistics are not calculated by default. Please read [Usage and Billing Data]({{}}riak/cs/2.0.0/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. -The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics](/riak/cs/2.0.0/cookbooks/querying-access-statistics). +The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-access-statistics). Please refer to the descriptions there for more details. @@ -39,7 +39,7 @@ been configured to something other than default CS port of `8080`. > **Authentication Required** > -> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation](/riak/cs/2.0.0/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. +> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation]({{}}riak/cs/2.0.0/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are both omitted from the result by default: @@ -97,7 +97,7 @@ There are no statistics included in this report because the default time span is ### S3 Object-style Access -As described in [Querying Access Statistics](/riak/cs/2.0.0/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: +As described in [Querying Access Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: ```bash s3cmd get s3://riak-cs/usage/8NK4FH2SGKJJM8JIP2GU/bx/20120315T140000Z/20120315T160000Z diff --git a/content/riak/cs/2.0.0/cookbooks/release-notes.md b/content/riak/cs/2.0.0/cookbooks/release-notes.md index 9b11af61d4..8448312161 100644 --- a/content/riak/cs/2.0.0/cookbooks/release-notes.md +++ b/content/riak/cs/2.0.0/cookbooks/release-notes.md @@ -13,7 +13,7 @@ aliases: - /riakcs/2.0.0/cookbooks/Riak-CS-Release-Notes/ - /riak/cs/2.0.0/cookbooks/Riak-CS-Release-Notes/ --- -[riak_cs_multibag_support]: /riak/cs/2.0.0/cookbooks/supercluster +[riak_cs_multibag_support]: {{}}riak/cs/2.0.0/cookbooks/supercluster [riak_cs_1.5_release_notes_upgrading]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading [riak_cs_1.5_release_notes_upgrading_1]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading-1 @@ -54,7 +54,7 @@ New metrics have been added that enable you to determine the health of your Riak * Memory information about the riak-cs virtual machine * HTTP listener information: active sockets and waiting acceptors -**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation](docs.basho.com/riakcs/latest/cookbooks/Monitoring-and-Metrics/) for more information. +**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation]({{}}riak/cs/latest/cookbooks/monitoring-and-metrics/) for more information. * [[PR 1189](https://github.com/basho/riak_cs/pull/1189)] * [[PR 1180](https://github.com/basho/riak_cs/pull/1180)] @@ -69,7 +69,7 @@ Additional storage usage metrics are also available. . These metrics are gathere * [[PR 1120](https://github.com/basho/riak_cs/pull/1120)] #### `riak-cs-admin` -The following administration CLIs have been replaced by the [`riak-cs-admin` command](http://docs.basho.com/riakcs/latest/cookbooks/command-line-tools/): +The following administration CLIs have been replaced by the [`riak-cs-admin` command]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): * `riak-cs-storage` * `riak-cs-gc` @@ -89,7 +89,7 @@ Several new options have been added to the `riak-cs-admin gc` command: * Riak S2 2.0 (and older) has a race condition where fullsync replication and garbage collection may resurrect deleted blocks without any way to delete them again. When real-time replication and replication of a garbage collection bucket entry object being dropped from the real-time queue are combined, blocks may remain on the sink side without being collected. Riak S2 2.1 introduces deterministic garbage collection to avoid fullsync replication. Additionally, garbage collection and fullsync replication run concurrently, and work on the same blocks and manifests. You can now specify the range of time using the `--start` and `--end` flags with `riak-cs-admin gc batch` for garbage collector in order to collect deleted objects synchronously on both sink and source sides. [[PR 1147 ](https://github.com/basho/riak_cs/pull/1147)] * `riak-cs-admin gc earliest-keys` is available so you can find the oldest entry after `epoch_start` in garbage collection. With this option, you can stay informed of garbage collection progress. [[PR 1160](https://github.com/basho/riak_cs/pull/1160)] -More information on garbage collection can be found in the [documentation](http://docs.basho.com/riakcs/latest/cookbooks/garbage-collection/). +More information on garbage collection can be found in the [documentation]({{< baseurl >}}riak/cs/latest/cookbooks/garbage-collection/). ### Additions @@ -113,7 +113,7 @@ More information on garbage collection can be found in the [documentation](http: * An option has been added to replace the `PR=all user GET` option with `PR=one` just before authentication. This option improves latency, especially in the presence of slow (or actually-failing) nodes blocking the whole request flow because of PR=all. When enabled, a user's owned-bucket list is never pruned after a bucket is deleted, instead it is just marked as deleted. [[PR 1191](https://github.com/basho/riak_cs/pull/1191)] * An info log has been added when starting a storage calculation batch. [[PR 1238](https://github.com/basho/riak_cs/pull/1238)] * `GET Bucket` requests now have clearer responses. A 501 stub for Bucket lifecycle and a simple stub for Bucket requestPayment have been added. [[PR 1223](https://github.com/basho/riak_cs/pull/1223)] -* Several user-friendly features have been added to [`riak-cs-debug`](http://docs.basho.com/riakcs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] +* Several user-friendly features have been added to [`riak-cs-debug`]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] #### Enterprise * MDC has `proxy_get`, which make block objects propagate to site clusters when they are requested. Now, multibag configuration with MDC supports `proxy_get`. [[PR 1171](https://github.com/basho/riak_cs/pull/1171) and [PR 25](https://github.com/basho/riak_cs_multibag/pull/25)] @@ -526,7 +526,7 @@ None ### Download Please see the [Riak CS Downloads -Page](http://docs.basho.com/riakcs/latest/riakcs-downloads/). +Page]({{< baseurl >}}riak/cs/latest/downloads/). ### Feedback @@ -544,7 +544,7 @@ venues: ### Changes * Improve logging around failures with Riak - [riak_cs/#987](http://docs.basho.com/riak/latest/dev/using/libraries/) + [riak_cs/#987](https://github.com/basho/riak_cs/pull/987) * Add amendment log output when storing access stats into Riak failed [riak_cs/#988](https://github.com/basho/riak_cs/pull/988). This change prevents losing access stats logs in cases of temporary connection @@ -572,7 +572,7 @@ None ### Download Please see the [Riak CS Downloads -Page](http://docs.basho.com/riakcs/latest/riakcs-downloads) +Page]({{< baseurl >}}riak/cs/latest/downloads) ### Feedback @@ -589,7 +589,7 @@ venues: ### Additions -* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here](http://docs.basho.com/riakcs/latest/cookbooks/configuration/Configuring-Riak-CS/). +* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here]({{< baseurl >}}riak/cs/latest/cookbooks/configuration/riak-cs/). ### Changes @@ -646,7 +646,7 @@ To avoid having a limit, set `max_buckets_per_user_user` to `unlimited`. ### Download -Please see the [Riak CS Downloads Page](http://docs.basho.com/riakcs/latest/riakcs-downloads/). +Please see the [Riak CS Downloads Page]({{< baseurl >}}riak/cs/latest/downloads/). ### Feedback @@ -662,7 +662,7 @@ Or via email at **info@basho.com**. ### Additions -* Added Multibag Technical Preview to Riak CS. More info is available [here](http://docs.basho.com/riakcs/latest/cookbooks/multibag/) +* Added Multibag Technical Preview to Riak CS. More info is available [here]({{< baseurl >}}riak/cs/latest/cookbooks/multibag/) * A new command `riak-cs-debug` including `cluster-info` [riak_cs/#769](https://github.com/basho/riak_cs/pull/769), [riak_cs/#832](https://github.com/basho/riak_cs/pull/832) * Tie up all existing commands into a new command `riak-cs-admin` [riak_cs/#839](https://github.com/basho/riak_cs/pull/839) * Add a command `riak-cs-admin stanchion` to switch Stanchion IP and port manually [riak_cs/#657](https://github.com/basho/riak_cs/pull/657) @@ -1003,7 +1003,7 @@ they will all share the name "struct". #### Additions -* Support query parameter authentication as specified in [http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html](Signing and Authenticating REST Requests). +* Support query parameter authentication as specified in [Signing and Authenticating REST Requests](http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html). ## Riak CS 1.0.1 diff --git a/content/riak/cs/2.0.0/cookbooks/rolling-upgrades.md b/content/riak/cs/2.0.0/cookbooks/rolling-upgrades.md index fcfdbbf0f0..564e9c3529 100644 --- a/content/riak/cs/2.0.0/cookbooks/rolling-upgrades.md +++ b/content/riak/cs/2.0.0/cookbooks/rolling-upgrades.md @@ -18,7 +18,7 @@ Each node in a Riak CS cluster contains settings that define its operating modes and API coverage. The following steps outline the process of upgrading Riak CS in a rolling fashion. -Be sure to check the Riak CS [Version Compatibility](/riak/cs/2.0.0/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. +Be sure to check the Riak CS [Version Compatibility]({{}}riak/cs/2.0.0/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. As Riak CS 2.0.0 only works with Riak 2.0.5, the underlying Riak installation *must* be upgraded to Riak 2.0.5. @@ -80,9 +80,9 @@ detailed description. 4. Upgrade Riak, Riak CS, and Stanchion. See the Riak + href="{{< baseurl >}}riak/cs/latest/downloads">Riak CS Downloads and Riak Downloads + href="{{< baseurl >}}riak/kv/latest/downloads">Riak Downloads pages to find the appropriate packages. **Debian** / **Ubuntu** @@ -160,7 +160,7 @@ detailed description. ]}, ``` - and so on. More details can be found at [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs). + and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs).
Note on Memory Sizing
diff --git a/content/riak/cs/2.0.0/cookbooks/supercluster.md b/content/riak/cs/2.0.0/cookbooks/supercluster.md index cd638b0617..5f3d77a6cf 100644 --- a/content/riak/cs/2.0.0/cookbooks/supercluster.md +++ b/content/riak/cs/2.0.0/cookbooks/supercluster.md @@ -21,15 +21,15 @@ customers. It is not yet suitable for production use. While [Riak CS Enterprise](http://basho.com/riak-enterprise) enables you to distribute Riak CS objects across multiple data centers in a -[source/sink pattern](/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. +[source/sink pattern]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. With supercluster support, you can store object manifests and blocks in separate clusters or groups of clusters, a.k.a. **a set of supercluser members**, enhancing the scalability and overall storage capabilities of a Riak CS installation. ## Supercluster members -A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication](/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)\(MDC). -Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools](/riak/cs/2.0.0/cookbooks/command-line-tools). +A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)\(MDC). +Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools]({{}}riak/cs/2.0.0/cookbooks/command-line-tools). ## The Master Member @@ -134,7 +134,7 @@ That involves performing the following steps on each node: 1. Stop the node 2. Upgrade Stanchion to a version that supports Riak CS supercluster, i.e. Riak CS 1.5.0 and later -3. Set your desired Stanchion [configuration](/riak/cs/2.0.0/cookbooks/configuration/stanchion) +3. Set your desired Stanchion [configuration]({{}}riak/cs/2.0.0/cookbooks/configuration/stanchion) 4. Start Stanchion on each node ### Add Clusters @@ -145,7 +145,7 @@ connection information as explained above in the [supercluster Configuration](#s ### Set Weights -When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`](/riak/cs/2.0.0/cookbooks/command-line-tools) command-line interface. +When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`]({{}}riak/cs/2.0.0/cookbooks/command-line-tools) command-line interface. The example below sets the weight of the recently added supercluster member `Alpha` to zero: @@ -177,9 +177,9 @@ supercluster feature. ## Command Line Interface Complete documentation for the `riak-cs-supercluster` interface can be found -in our documentation on [Riak CS Command Line Tools](/riak/cs/2.0.0/cookbooks/command-line-tools/#riak-cs-supercluster). +in our documentation on [Riak CS Command Line Tools]({{}}riak/cs/2.0.0/cookbooks/command-line-tools/#riak-cs-supercluster). ## Limitations -Riak CS supercluster does not currently support [proxy gets](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from +Riak CS supercluster does not currently support [proxy gets]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from sink clusters. diff --git a/content/riak/cs/2.0.0/cookbooks/system-features.md b/content/riak/cs/2.0.0/cookbooks/system-features.md index 84678b7d25..9d898d810a 100644 --- a/content/riak/cs/2.0.0/cookbooks/system-features.md +++ b/content/riak/cs/2.0.0/cookbooks/system-features.md @@ -9,9 +9,9 @@ aliases: The following pages detail Riak CS's system features. -* [Access Control Lists](/riak/cs/2.0.0/cookbooks/access-control-lists) -* [Authentication](/riak/cs/2.0.0/cookbooks/authentication) -* [Monitoring and Metrics](/riak/cs/2.0.0/cookbooks/monitoring-and-metrics) -* [Querying Access Statistics](/riak/cs/2.0.0/cookbooks/querying-access-statistics) -* [Querying Storage Statistics](/riak/cs/2.0.0/cookbooks/querying-storage-statistics) -* [Usage and Billing Data](/riak/cs/2.0.0/cookbooks/usage-and-billing-data) +* [Access Control Lists]({{}}riak/cs/2.0.0/cookbooks/access-control-lists) +* [Authentication]({{}}riak/cs/2.0.0/cookbooks/authentication) +* [Monitoring and Metrics]({{}}riak/cs/2.0.0/cookbooks/monitoring-and-metrics) +* [Querying Access Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-access-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-storage-statistics) +* [Usage and Billing Data]({{}}riak/cs/2.0.0/cookbooks/usage-and-billing-data) diff --git a/content/riak/cs/2.0.0/cookbooks/usage-and-billing-data.md b/content/riak/cs/2.0.0/cookbooks/usage-and-billing-data.md index b99fea8f72..9d28f1e261 100644 --- a/content/riak/cs/2.0.0/cookbooks/usage-and-billing-data.md +++ b/content/riak/cs/2.0.0/cookbooks/usage-and-billing-data.md @@ -28,7 +28,7 @@ and access. Access stats are tracked on a per-user basis, as rollups for slices of time. They are stored just like other Riak CS data, in the `cs.access` bucket in particular. For information about querying access statistics, -please read [Querying Access Statistics](/riak/cs/2.0.0/cookbooks/querying-access-statistics). +please read [Querying Access Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-access-statistics). ## Overview @@ -71,7 +71,7 @@ logger determines the operation type by comparing the method, resource module, and path to a known table. For example, it knows that a `GET` on the *key* module with the `acl` query parameter in the path is a `KeyReadACL` operation. A `PUT` to the same resource without the `acl` -query parameter is a `KeyWrite` operation. See [Querying Access Statistics](/riak/cs/2.0.0/cookbooks/querying-access-statistics) for a list of all operation types. +query parameter is a `KeyWrite` operation. See [Querying Access Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-access-statistics) for a list of all operation types. ### Log Accumulation @@ -181,7 +181,7 @@ slices of time. They are stored in the same Riak cluster as other Riak CS data, in the `cs.storage` bucket. For detailed information about querying storage statistics, please read -[Querying Storage Statistics](/riak/cs/2.0.0/cookbooks/querying-storage-statistics). +[Querying Storage Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-storage-statistics). ### High Level @@ -201,7 +201,7 @@ The storage calculation system uses MapReduce to sum the files in a bucket. This means you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. -See [Configuring Riak for CS](/riak/cs/2.0.0/cookbooks/configuration/riak-for-cs) for directions on setting this up. +See [Configuring Riak for CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-for-cs) for directions on setting this up. ### Scheduling and Manual Triggering diff --git a/content/riak/cs/2.0.0/cookbooks/using-with-keystone.md b/content/riak/cs/2.0.0/cookbooks/using-with-keystone.md index 764fcea5b7..40ff026b8b 100644 --- a/content/riak/cs/2.0.0/cookbooks/using-with-keystone.md +++ b/content/riak/cs/2.0.0/cookbooks/using-with-keystone.md @@ -291,7 +291,7 @@ section of the Riak CS `advanced.config` or `app.config` files: ### Keystone Setup -Follow the procedures documented in [Keystone Setup](/riak/cs/2.0.0/cookbooks/keystone-setup) to set up and run +Follow the procedures documented in [Keystone Setup]({{}}riak/cs/2.0.0/cookbooks/keystone-setup) to set up and run Keystone. 1. Create a tenant called `test`: diff --git a/content/riak/cs/2.0.0/index.md b/content/riak/cs/2.0.0/index.md index bcdaa67d2e..4e6c1e516c 100644 --- a/content/riak/cs/2.0.0/index.md +++ b/content/riak/cs/2.0.0/index.md @@ -23,17 +23,17 @@ API is [Amazon S3 compatible](http://docs.aws.amazon.com/AmazonS3/latest/API/API and supports per-tenant reporting for use cases involving billing and metering. -Riak CS is open source and [free for download](/riak/cs/2.0.0/downloads). +Riak CS is open source and [free for download]({{}}riak/cs/2.0.0/downloads). ## Notable Riak CS Features ### Amazon S3-API Compatibility -Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API](/riak/cs/2.0.0/references/appendices/comparisons/swift/) +Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API]({{}}riak/cs/2.0.0/references/appendices/comparisons/swift/) ### Per-Tenant Visibility -With the Riak CS [Reporting API](/riak/cs/2.0.0/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, +With the Riak CS [Reporting API]({{}}riak/cs/2.0.0/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, subscription, chargebacks, plugins with billing systems, efficient multi-department utilization, and much more. ### Supports Large Objects of Arbitrary Content Type, Plus Metadata diff --git a/content/riak/cs/2.0.0/references/apis/storage.md b/content/riak/cs/2.0.0/references/apis/storage.md index 61c72fbfe7..d9097d8f95 100644 --- a/content/riak/cs/2.0.0/references/apis/storage.md +++ b/content/riak/cs/2.0.0/references/apis/storage.md @@ -55,30 +55,30 @@ Multipart Uploads {{1.3.0-}} | Coming Soon | Planned for future release | ## Service-level Operations -* [GET Service](/riak/cs/2.0.0/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request +* [GET Service]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request ## Bucket-level Operations -* [GET Bucket](/riak/cs/2.0.0/references/apis/storage/s3/get-bucket) --- Returns a list of the objects +* [GET Bucket]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-bucket) --- Returns a list of the objects within a bucket -* [GET Bucket ACL](/riak/cs/2.0.0/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket -* [GET Bucket policy](/riak/cs/2.0.0/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket -* [PUT Bucket](/riak/cs/2.0.0/references/apis/storage/s3/put-bucket) --- Creates a new bucket -* [PUT Bucket ACL](/riak/cs/2.0.0/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions +* [GET Bucket ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions for a bucket -* [PUT Bucket policy](/riak/cs/2.0.0/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket -* [DELETE Bucket](/riak/cs/2.0.0/references/apis/storage/s3/delete-bucket) --- Deletes a bucket -* [DELETE Bucket policy](/riak/cs/2.0.0/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket +* [PUT Bucket policy]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.0.0/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.0.0/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket ## Object-level Operations -* [GET Object](/riak/cs/2.0.0/references/apis/storage/s3/get-object) --- Retrieves an object -* [GET Object ACL](/riak/cs/2.0.0/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object -* [PUT Object](/riak/cs/2.0.0/references/apis/storage/s3/put-object) --- Stores an object to a bucket -* [PUT Object (Copy)](/riak/cs/2.0.0/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object -* [PUT Object ACL](/riak/cs/2.0.0/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object -* [HEAD Object](/riak/cs/2.0.0/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) -* [DELETE Object](/riak/cs/2.0.0/references/apis/storage/s3/delete-object) --- Deletes an object +* [GET Object]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.0.0/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.0.0/references/apis/storage/s3/delete-object) --- Deletes an object ## Multipart Upload @@ -87,19 +87,19 @@ Object parts can be uploaded independently and in any order. After all parts are uploaded, Riak CS assembles an object out of the parts. When your object size reaches 100MB, you should consider using multipart uploads instead of uploading the object in a single operation. Read more -about multipart uploads on the [overview page](/riak/cs/2.0.0/cookbooks/multipart-upload-overview). +about multipart uploads on the [overview page]({{}}riak/cs/2.0.0/cookbooks/multipart-upload-overview). -* [Initiate Multipart Upload](/riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID -* [Upload Part](/riak/cs/2.0.0/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload -* [Complete Multipart Upload](/riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts -* [Abort Multipart Upload](/riak/cs/2.0.0/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts -* [List Parts](/riak/cs/2.0.0/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. -* [List Multipart Uploads](/riak/cs/2.0.0/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. +* [Initiate Multipart Upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.0.0/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.0.0/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.0.0/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. ## Common Headers -* [Common Riak CS Request Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-request-headers) -* [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers) +* [Common Riak CS Request Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers) There are two storage API options for Riak CS. The first and most fully featured is the S3 API. There is also limited but improving support for @@ -118,13 +118,13 @@ resource modules. * Module: `riak_cs_s3_rewrite` * [Documentation](http://docs.aws.amazon.com/AmazonS3/latest/API/APIRest.html) -* [Mapping](/riak/cs/2.0.0/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) +* [Mapping]({{}}riak/cs/2.0.0/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) ### Openstack Object Storage API (v1) * Module: `riak_cs_oos_rewrite` * [Documentation](http://docs.openstack.org/api/openstack-object-storage/1.0/content/index.html) -* [Mapping](/riak/cs/2.0.0/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) +* [Mapping]({{}}riak/cs/2.0.0/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) Selecting an API is done by adding or changing the `rewrite_module` key in the Riak CS `riak-cs.conf` file, or the old-style `advanced.config` or `app.config` @@ -157,5 +157,5 @@ included when installing a Riak CS package or building from source. More details for each option can be found by following one of the following links: -* [S3 API](/riak/cs/2.0.0/references/apis/storage/s3/) -* [OpenStack API](/riak/cs/2.0.0/references/apis/storage/openstack/) +* [S3 API]({{}}riak/cs/2.0.0/references/apis/storage/s3/) +* [OpenStack API]({{}}riak/cs/2.0.0/references/apis/storage/openstack/) diff --git a/content/riak/cs/2.0.0/references/apis/storage/openstack.md b/content/riak/cs/2.0.0/references/apis/storage/openstack.md index 520cc896bb..b96a114fac 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/openstack.md +++ b/content/riak/cs/2.0.0/references/apis/storage/openstack.md @@ -47,16 +47,16 @@ Update Object Metadata | Coming Soon | Planned for future release | ## Storage Account Services -* [List Containers](/riak/cs/2.0.0/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account +* [List Containers]({{}}riak/cs/2.0.0/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account ## Storage Container Services -* [List Objects](/riak/cs/2.0.0/references/apis/storage/openstack/list-objects) --- Lists the objects in a container -* [Create Container](/riak/cs/2.0.0/references/apis/storage/openstack/create-container) --- Creates a new container -* [Delete Container](/riak/cs/2.0.0/references/apis/storage/openstack/delete-container) --- Deletes a container +* [List Objects]({{}}riak/cs/2.0.0/references/apis/storage/openstack/list-objects) --- Lists the objects in a container +* [Create Container]({{}}riak/cs/2.0.0/references/apis/storage/openstack/create-container) --- Creates a new container +* [Delete Container]({{}}riak/cs/2.0.0/references/apis/storage/openstack/delete-container) --- Deletes a container ## Storage Object Services -* [Get Object](/riak/cs/2.0.0/references/apis/storage/openstack/get-object) --- Retrieves an object -* [Create or Update Object](/riak/cs/2.0.0/references/apis/storage/openstack/create-object) --- Write an object in a container -* [Delete Object](/riak/cs/2.0.0/references/apis/storage/openstack/delete-object) --- Delete an object from a container +* [Get Object]({{}}riak/cs/2.0.0/references/apis/storage/openstack/get-object) --- Retrieves an object +* [Create or Update Object]({{}}riak/cs/2.0.0/references/apis/storage/openstack/create-object) --- Write an object in a container +* [Delete Object]({{}}riak/cs/2.0.0/references/apis/storage/openstack/delete-object) --- Delete an object from a container diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3.md b/content/riak/cs/2.0.0/references/apis/storage/s3.md index 61cf740962..5f0b5b574f 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3.md @@ -57,30 +57,30 @@ Multipart Uploads {{1.5.0+}} | ✓}}riak/cs/2.0.0/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request ## Bucket-level Operations -* [GET Bucket](/riak/cs/2.0.0/references/apis/storage/s3/get-bucket) --- Returns a list of the objects +* [GET Bucket]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-bucket) --- Returns a list of the objects within a bucket -* [GET Bucket ACL](/riak/cs/2.0.0/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket -* [GET Bucket policy](/riak/cs/2.0.0/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket -* [PUT Bucket](/riak/cs/2.0.0/references/apis/storage/s3/put-bucket) --- Creates a new bucket -* [PUT Bucket ACL](/riak/cs/2.0.0/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions +* [GET Bucket ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions for a bucket -* [PUT Bucket policy](/riak/cs/2.0.0/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket -* [DELETE Bucket](/riak/cs/2.0.0/references/apis/storage/s3/delete-bucket) --- Deletes a bucket -* [DELETE Bucket policy](/riak/cs/2.0.0/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket +* [PUT Bucket policy]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.0.0/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.0.0/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket ## Object-level Operations -* [GET Object](/riak/cs/2.0.0/references/apis/storage/s3/get-object) --- Retrieves an object -* [GET Object ACL](/riak/cs/2.0.0/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object -* [PUT Object](/riak/cs/2.0.0/references/apis/storage/s3/put-object) --- Stores an object to a bucket -* [PUT Object (Copy)](/riak/cs/2.0.0/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object -* [PUT Object ACL](/riak/cs/2.0.0/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object -* [HEAD Object](/riak/cs/2.0.0/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) -* [DELETE Object](/riak/cs/2.0.0/references/apis/storage/s3/delete-object) --- Deletes an object +* [GET Object]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.0.0/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.0.0/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.0.0/references/apis/storage/s3/delete-object) --- Deletes an object ## Multipart Upload @@ -89,16 +89,16 @@ Object parts can be uploaded independently and in any order. After all parts are uploaded, Riak CS assembles an object out of the parts. When your object size reaches 100MB, you should consider using multipart uploads instead of uploading the object in a single operation. Read more -about multipart uploads on the [overview page](/riak/cs/2.0.0/cookbooks/multipart-upload-overview). +about multipart uploads on the [overview page]({{}}riak/cs/2.0.0/cookbooks/multipart-upload-overview). -* [Initiate Multipart Upload](/riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID -* [Upload Part](/riak/cs/2.0.0/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload -* [Complete Multipart Upload](/riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts -* [Abort Multipart Upload](/riak/cs/2.0.0/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts -* [List Parts](/riak/cs/2.0.0/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. -* [List Multipart Uploads](/riak/cs/2.0.0/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. +* [Initiate Multipart Upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.0.0/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.0.0/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.0.0/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. ## Common Headers -* [Common Riak CS Request Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-request-headers) -* [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers) +* [Common Riak CS Request Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers) diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/abort-multipart-upload.md b/content/riak/cs/2.0.0/references/apis/storage/s3/abort-multipart-upload.md index 7a1b89e3bb..d6bc7013e4 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/abort-multipart-upload.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/abort-multipart-upload.md @@ -27,7 +27,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -37,7 +37,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload.md b/content/riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload.md index cb6826002c..c83dc87eff 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload.md @@ -45,7 +45,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -75,7 +75,7 @@ This implementation of the operation uses only response headers that are common ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/delete-bucket-policy.md b/content/riak/cs/2.0.0/references/apis/storage/s3/delete-bucket-policy.md index 5594e951f3..ee8931b18a 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/delete-bucket-policy.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/delete-bucket-policy.md @@ -29,7 +29,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -39,7 +39,7 @@ No body should be appended. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/get-bucket-policy.md b/content/riak/cs/2.0.0/references/apis/storage/s3/get-bucket-policy.md index 5bb8e91192..52b93b4893 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/get-bucket-policy.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/get-bucket-policy.md @@ -31,7 +31,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -41,7 +41,7 @@ No body should be appended. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload.md b/content/riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload.md index 54d8569005..70ac225ab2 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload.md @@ -55,7 +55,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/list-multipart-uploads.md b/content/riak/cs/2.0.0/references/apis/storage/s3/list-multipart-uploads.md index f427db3b0a..3f9acf33ec 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/list-multipart-uploads.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/list-multipart-uploads.md @@ -54,7 +54,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -64,7 +64,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/list-parts.md b/content/riak/cs/2.0.0/references/apis/storage/s3/list-parts.md index 61780ed4dc..a604728e96 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/list-parts.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/list-parts.md @@ -43,7 +43,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -53,7 +53,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/put-bucket-policy.md b/content/riak/cs/2.0.0/references/apis/storage/s3/put-bucket-policy.md index 805b0a9e1f..b4de4b7b40 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/put-bucket-policy.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/put-bucket-policy.md @@ -37,7 +37,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -88,7 +88,7 @@ More information on S3 Policies can be found in Amazon's [Permissions And Polici ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.0/references/apis/storage/s3/upload-part.md b/content/riak/cs/2.0.0/references/apis/storage/s3/upload-part.md index 5f1fdb97b4..e7e792eb0b 100644 --- a/content/riak/cs/2.0.0/references/apis/storage/s3/upload-part.md +++ b/content/riak/cs/2.0.0/references/apis/storage/s3/upload-part.md @@ -9,7 +9,7 @@ aliases: - /riak/cs/2.0.0/references/apis/storage/s3/RiakCS-Upload-Part/ --- -This operation uploads a part in a multipart upload. You must [initiate a multipart upload](/riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. +This operation uploads a part in a multipart upload. You must [initiate a multipart upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. ## Requests @@ -54,7 +54,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.0/references/apis/storage/s3/common-response-headers). ### Response Elements @@ -64,7 +64,7 @@ This operation does not use response elements. ### Sample Request -The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload](/riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload) request. +The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/initiate-multipart-upload) request. ``` PUT /large.iso?partNumber=1&uploadId=VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA HTTP/1.1 @@ -79,7 +79,7 @@ Authorization: AWS AKIAIOSFODNN7EXAMPLE:VGhpcyBtZXNzYWdlIHNpZ25lZGGieSRlbHZpbmc= ### Sample Response -The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload](/riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload) request. +The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload]({{}}riak/cs/2.0.0/references/apis/storage/s3/complete-multipart-upload) request. ``` HTTP/1.1 200 OK diff --git a/content/riak/cs/2.0.0/references/appendices/http-admin.md b/content/riak/cs/2.0.0/references/appendices/http-admin.md index 43e1e24fa0..80add6a1a2 100644 --- a/content/riak/cs/2.0.0/references/appendices/http-admin.md +++ b/content/riak/cs/2.0.0/references/appendices/http-admin.md @@ -19,10 +19,10 @@ above and beyond those associated with Riak itself: Task | CS URI | Further reading :----|:-------|:--------------- -User management | `/riak-cs/user` | [Account Management](/riak/cs/2.0.0/cookbooks/account-management) -User access statistics | `/riak-cs/usage` | [Querying Access Statistics](/riak/cs/2.0.0/cookbooks/querying-access-statistics) -Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics](/riak/cs/2.0.0/cookbooks/querying-storage-statistics) -Global statistics | `/riak-cs/stats` | [Monitoring and Metrics](/riak/cs/2.0.0/cookbooks/monitoring-and-metrics) +User management | `/riak-cs/user` | [Account Management]({{}}riak/cs/2.0.0/cookbooks/account-management) +User access statistics | `/riak-cs/usage` | [Querying Access Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-access-statistics) +Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-storage-statistics) +Global statistics | `/riak-cs/stats` | [Monitoring and Metrics]({{}}riak/cs/2.0.0/cookbooks/monitoring-and-metrics) By default, these are accessible over the same IP/port as the rest of the CS API, but they can be configured to run elsewhere, with or without @@ -52,13 +52,13 @@ details. ## Related Resources -* [configuring Riak CS](/riak/cs/2.0.0/cookbooks/configuration/riak-cs) -* [Querying Access Statistics](/riak/cs/2.0.0/cookbooks/querying-access-statistics) - * [Usage and Billing Data](/riak/cs/2.0.0/cookbooks/usage-and-billing-data) +* [configuring Riak CS]({{}}riak/cs/2.0.0/cookbooks/configuration/riak-cs) +* [Querying Access Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-access-statistics) + * [Usage and Billing Data]({{}}riak/cs/2.0.0/cookbooks/usage-and-billing-data) * [Github wiki](https://github.com/basho/riak_cs/wiki/Querying-Access-Stats) -* [Querying Storage Statistics](/riak/cs/2.0.0/cookbooks/querying-storage-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.0.0/cookbooks/querying-storage-statistics) * [Enabling storage statistics](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) * [Github wiki](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) -* [Account Management](/riak/cs/2.0.0/cookbooks/account-management) +* [Account Management]({{}}riak/cs/2.0.0/cookbooks/account-management) * [Github wiki](https://github.com/basho/riak_cs/wiki/User-Management) -* [Monitoring and Metrics](/riak/cs/2.0.0/cookbooks/monitoring-and-metrics) +* [Monitoring and Metrics]({{}}riak/cs/2.0.0/cookbooks/monitoring-and-metrics) diff --git a/content/riak/cs/2.0.0/references/appendices/riak-cs-control.md b/content/riak/cs/2.0.0/references/appendices/riak-cs-control.md index 31e1914fa2..799b66c937 100644 --- a/content/riak/cs/2.0.0/references/appendices/riak-cs-control.md +++ b/content/riak/cs/2.0.0/references/appendices/riak-cs-control.md @@ -20,7 +20,7 @@ managing users in a Riak CS Cluster. ## Installing Riak CS Control -Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package](/riak/cs/2.0.0/downloads). +Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package]({{}}riak/cs/2.0.0/downloads). ## Setting Up Riak CS Control @@ -67,7 +67,7 @@ riak-cs-control start When you first navigate to the Riak CS Control UI, you will land on the Users page: -![Users Page](/images/cs_control_users.png) +![Users Page]({{}}images/cs_control_users.png) On this page you can quickly see all current Riak CS users along with their status, e-mail address, and credentials. From here you can filter, diff --git a/content/riak/cs/2.0.0/theory/stanchion.md b/content/riak/cs/2.0.0/theory/stanchion.md index a3188476d4..604dd6d1f5 100644 --- a/content/riak/cs/2.0.0/theory/stanchion.md +++ b/content/riak/cs/2.0.0/theory/stanchion.md @@ -25,9 +25,9 @@ Riak CS cluster at any time. Correspondingly, your Stanchion installation must be managed and configured separately. For more information, see the following documents: -* [Configuring Stanchion](/riak/cs/2.0.0/cookbooks/configuration/stanchion) -* [Installing Stanchion](/riak/cs/2.0.0/cookbooks/installing#installing-stanchion-on-a-node) -* [The Stantion Command-line Interface](/riak/cs/2.0.0/cookbooks/command-line-tools#stanchion) +* [Configuring Stanchion]({{}}riak/cs/2.0.0/cookbooks/configuration/stanchion) +* [Installing Stanchion]({{}}riak/cs/2.0.0/cookbooks/installing#installing-stanchion-on-a-node) +* [The Stantion Command-line Interface]({{}}riak/cs/2.0.0/cookbooks/command-line-tools#stanchion) For a more in-depth discussion of implementation details, see the project's @@ -51,7 +51,7 @@ rejected. The uniqueness of these entities is enforced by serializing any creation or modification requests that involve them. This process is handled by Stanchion. What happens under the hood is essentially that Stanchion -mandates that all [vnodes](/riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. +mandates that all [vnodes]({{}}riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. One result of this enforcement is that user creation requests and bucket creation or modification, i.e. deletion, requests are not highly diff --git a/content/riak/cs/2.0.0/tutorials/fast-track/local-testing-environment.md b/content/riak/cs/2.0.0/tutorials/fast-track/local-testing-environment.md index e503f6704e..3c3f089a67 100644 --- a/content/riak/cs/2.0.0/tutorials/fast-track/local-testing-environment.md +++ b/content/riak/cs/2.0.0/tutorials/fast-track/local-testing-environment.md @@ -20,7 +20,7 @@ does not attempt to optimize your installation for your particular architecture. If you want to build a testing environment with a minimum of -configuration, there is an option for [Building a Virtual Testing Environment](/riak/cs/2.0.0/tutorials/fast-track/virtual-test-environment). +configuration, there is an option for [Building a Virtual Testing Environment]({{}}riak/cs/2.0.0/tutorials/fast-track/virtual-test-environment). ## Installing Your First Node @@ -30,7 +30,7 @@ and running Riak and Riak CS. ### Step 1: Raise your system's open file limits Riak can consume a large number of open file handles during normal -operation. See the [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit) document for more information on +operation. See the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) document for more information on how to increase your system's open files limit. If you are the root user, you can increase the system's open files limit @@ -52,7 +52,7 @@ riak soft nofile 65536 riak hard nofile 65536 ``` -For Mac OS X, consult the [open files limit](/riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. +For Mac OS X, consult the [open files limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. ### Step 2: Download and install packages @@ -67,14 +67,14 @@ sudo apt-get install -y curl substitute the appropriate CLI commands. If you are running Ubuntu 11.10 or later, you will also need the -`libssl0.9.8` package. See [Installing on Debian and Ubuntu](/riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. +`libssl0.9.8` package. See [Installing on Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. ```bash sudo apt-get install -y libssl0.9.8 ``` Now, grab the appropriate packages: Riak, Riak CS, and Stanchion. See -[Download Riak](/riak/kv/2.1.3/downloads/) and [Download Riak CS](/riak/cs/2.0.0/downloads). +[Download Riak]({{}}riak/kv/2.1.3/downloads/) and [Download Riak CS]({{}}riak/cs/2.0.0/downloads). You can skip Riak CS Control for now. Once you have the packages, install them per the instructions below. @@ -87,14 +87,14 @@ installing Riak. **Do not attempt to configure or start Riak until step 3 in this document.** - * [Debian and Ubuntu](/riak/kv/2.1.3/setup/installing/debian-ubuntu) - * [RHEL and CentOS](/riak/kv/2.1.3/setup/installing/rhel-centos) - * [Mac OS X](/riak/kv/2.1.3/setup/installing/mac-osx) - * [FreeBSD](/riak/kv/2.1.3/setup/installing/freebsd) - * [SUSE](/riak/kv/2.1.3/setup/installing/suse) - * [Windows Azure](/riak/kv/2.1.3/setup/installing/windows-azure) - * [AWS Marketplace](/riak/kv/2.1.3/setup/installing/amazon-web-services) - * [From Source](/riak/kv/2.1.3/setup/installing/source) + * [Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.1.3/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.1.3/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.1.3/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.1.3/setup/installing/suse) + * [Windows Azure]({{}}riak/kv/2.1.3/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.1.3/setup/installing/amazon-web-services) + * [From Source]({{}}riak/kv/2.1.3/setup/installing/source) #### Next, install Riak CS @@ -458,7 +458,7 @@ your first node with two exceptions: You will then need to verify the cluster plan with the `riak-admin cluster plan` command, and commit the cluster changes with `riak-admin cluster commit` to complete the join process. More information is -available in the [Command Line Tools](/riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. +available in the [Command Line Tools]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. > **Note** > @@ -469,4 +469,4 @@ such as a dedicated device, [HAProxy](http://haproxy.1wt.eu), or [Nginx](http://wiki.nginx.org/Main) between Riak CS and the outside world. -Once you have completed this step, You can progress to [testing the Riak CS installation](/riak/cs/2.0.0/tutorials/fast-track/test-installation) using s3cmd. +Once you have completed this step, You can progress to [testing the Riak CS installation]({{}}riak/cs/2.0.0/tutorials/fast-track/test-installation) using s3cmd. diff --git a/content/riak/cs/2.0.0/tutorials/fast-track/test-installation.md b/content/riak/cs/2.0.0/tutorials/fast-track/test-installation.md index 77c24e9d24..258833a147 100644 --- a/content/riak/cs/2.0.0/tutorials/fast-track/test-installation.md +++ b/content/riak/cs/2.0.0/tutorials/fast-track/test-installation.md @@ -141,6 +141,6 @@ bit of learning to be done, so make sure and check out the Reference section (click "Reference" on the nav on the left side of this page). A few items that may be of particular interest: -* [Details about API operations](/riak/cs/2.0.0/references/apis/storage) -* [Information about the Ruby Fog client](/riak/cs/2.0.0/cookbooks/fog) -* [Release Notes](/riak/cs/2.0.0/cookbooks/release-notes) +* [Details about API operations]({{}}riak/cs/2.0.0/references/apis/storage) +* [Information about the Ruby Fog client]({{}}riak/cs/2.0.0/cookbooks/fog) +* [Release Notes]({{}}riak/cs/2.0.0/cookbooks/release-notes) diff --git a/content/riak/cs/2.0.0/tutorials/fast-track/virtual-test-environment.md b/content/riak/cs/2.0.0/tutorials/fast-track/virtual-test-environment.md index af56437b97..bca5bae63c 100644 --- a/content/riak/cs/2.0.0/tutorials/fast-track/virtual-test-environment.md +++ b/content/riak/cs/2.0.0/tutorials/fast-track/virtual-test-environment.md @@ -22,7 +22,7 @@ want to tune the OS or node/memory count, you'll have to edit the If you want to build a testing environment with more flexibility in configuration and durability across environment resets, there are -instructions for [Building a Local Test Environment](/riak/cs/2.0.0/tutorials/fast-track/local-testing-environment). +instructions for [Building a Local Test Environment]({{}}riak/cs/2.0.0/tutorials/fast-track/local-testing-environment). ## Configuration @@ -87,7 +87,7 @@ Secret key: RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw== ## Next Steps Congratulations! You have deployed a virtualized environment of Riak CS. -You are ready to progress to [Testing the Riak CS Installation](/riak/cs/2.0.0/tutorials/fast-track/test-installation). +You are ready to progress to [Testing the Riak CS Installation]({{}}riak/cs/2.0.0/tutorials/fast-track/test-installation). ### Stopping Your Virtual Environment diff --git a/content/riak/cs/2.0.0/tutorials/fast-track/what-is-riak-cs.md b/content/riak/cs/2.0.0/tutorials/fast-track/what-is-riak-cs.md index 355bb0ed44..78f55708b8 100644 --- a/content/riak/cs/2.0.0/tutorials/fast-track/what-is-riak-cs.md +++ b/content/riak/cs/2.0.0/tutorials/fast-track/what-is-riak-cs.md @@ -35,11 +35,11 @@ automatically take over the responsibility of failed or non-communicative nodes, data remains available even in the event of node failure or network partition. -When an object is uploaded via the [storage API](/riak/cs/2.0.0/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, +When an object is uploaded via the [storage API]({{}}riak/cs/2.0.0/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, written, and replicated in Riak. Each chunk is associated with metadata for later retrieval. The diagram below provides a visualization. -![Riak CS Chunking](/images/Riak-CS-Overview.png) +![Riak CS Chunking]({{}}images/Riak-CS-Overview.png) ## Riak CS Enterprise diff --git a/content/riak/cs/2.0.1/cookbooks/access-control-lists.md b/content/riak/cs/2.0.1/cookbooks/access-control-lists.md index 2facb570a2..5e69272b78 100644 --- a/content/riak/cs/2.0.1/cookbooks/access-control-lists.md +++ b/content/riak/cs/2.0.1/cookbooks/access-control-lists.md @@ -82,9 +82,9 @@ Riak CS permissions are split into two types: **bucket permissions** and ## Buckets -Bucket names **must** be [globally unique](/riak/cs/2.0.1/theory/stanchion/#globally-unique-entities). To avoid conflicts, all +Bucket names **must** be [globally unique]({{}}riak/cs/2.0.1/theory/stanchion/#globally-unique-entities). To avoid conflicts, all bucket creation requests are made to an application called -[Stanchion](/riak/cs/2.0.1/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we +[Stanchion]({{}}riak/cs/2.0.1/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we believe it is appropriate based on the following statement from this [documentation on bucket restrictions](http://docs.amazonwebservices.com/AmazonS3/latest/dev/BucketRestrictions.html) from Amazon regarding restrictions on bucket operations: @@ -105,4 +105,4 @@ created granting the creator both ownership and full access control and denying access to all other parties. For information on specifying an ACL when making a `PUT` request, see -[Riak CS PUT Object ACL](/riak/cs/2.0.1/references/apis/storage/s3/put-object-acl). +[Riak CS PUT Object ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-object-acl). diff --git a/content/riak/cs/2.0.1/cookbooks/authentication.md b/content/riak/cs/2.0.1/cookbooks/authentication.md index 318cb05282..cdc466db15 100644 --- a/content/riak/cs/2.0.1/cookbooks/authentication.md +++ b/content/riak/cs/2.0.1/cookbooks/authentication.md @@ -111,4 +111,4 @@ http://bucket.data.basho.com/document?AWSAccessKeyId=8EE3UE-UMW1YTPMBC3EB&Expire ## Keystone Authentication More information on using Keystone for authentication with Riak CS can -be found in [using Riak CS with Keystone](/riak/cs/2.0.1/cookbooks/using-with-keystone). +be found in [using Riak CS with Keystone]({{}}riak/cs/2.0.1/cookbooks/using-with-keystone). diff --git a/content/riak/cs/2.0.1/cookbooks/command-line-tools.md b/content/riak/cs/2.0.1/cookbooks/command-line-tools.md index 680f285ca0..ba3a8bc481 100644 --- a/content/riak/cs/2.0.1/cookbooks/command-line-tools.md +++ b/content/riak/cs/2.0.1/cookbooks/command-line-tools.md @@ -206,7 +206,7 @@ More information about Erlang's etop tool can be found in the ## riak-cs-admin gc -This command controls Riak CS's [garbage collection](/riak/cs/2.0.1/cookbooks/garbage-collection) system. +This command controls Riak CS's [garbage collection]({{}}riak/cs/2.0.1/cookbooks/garbage-collection) system. ```bash riak-cs-admin gc @@ -312,7 +312,7 @@ undergirding Riak CS. Temporarily changes the host and/or port used by Stanchion. This change is effective until the node is restarted, at which point Stanchion will -begin listening on the host and port specified in your [configuration files](/riak/cs/2.0.1/cookbooks/configuration/reference). +begin listening on the host and port specified in your [configuration files]({{}}riak/cs/2.0.1/cookbooks/configuration/reference). ```bash riak-cs-stanchion switch HOST PORT @@ -521,7 +521,7 @@ documented [above](#riak-cs-admin-access). Riak CS version 1.5 offers support for supercluster operations. The `supercluster` command interface enables you to interact with that system. -More information can be found in [Riak CS Supercluster Support](/riak/cs/2.0.1/cookbooks/supercluster). +More information can be found in [Riak CS Supercluster Support]({{}}riak/cs/2.0.1/cookbooks/supercluster). {{% note title="Note: technical preview" %}} Riak CS supercluster support is available only as a technical preview for @@ -635,7 +635,7 @@ Fetches all current weights from the master member. riak-cs-supercluster refresh ``` -When a member's weight is updated, that weight is stored in the [master member](/riak/cs/2.0.1/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The +When a member's weight is updated, that weight is stored in the [master member]({{}}riak/cs/2.0.1/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The `refresh` command syncs the weights stored in the master member with the weights cached in Riak CS so that there is no discrepancy. diff --git a/content/riak/cs/2.0.1/cookbooks/configuration.md b/content/riak/cs/2.0.1/cookbooks/configuration.md index db97286c23..ea2288c61b 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration.md @@ -27,7 +27,7 @@ If your system consists of several nodes, configuration primarily represents set ## Configuration of System Components -* [Configuring Riak](/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs) -* [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs) -* [Configuring Stanchion](/riak/cs/2.0.1/cookbooks/configuration/stanchion) -* [Configuring an S3 client](/riak/cs/2.0.1/cookbooks/configuration/s3-client) +* [Configuring Riak]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-for-cs) +* [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs) +* [Configuring Stanchion]({{}}riak/cs/2.0.1/cookbooks/configuration/stanchion) +* [Configuring an S3 client]({{}}riak/cs/2.0.1/cookbooks/configuration/s3-client) diff --git a/content/riak/cs/2.0.1/cookbooks/configuration/dragondisk.md b/content/riak/cs/2.0.1/cookbooks/configuration/dragondisk.md index eee5f6101a..1ec02abe3e 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration/dragondisk.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration/dragondisk.md @@ -29,7 +29,7 @@ other Linux distributions. This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_linux0.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux0.png) ## Create an account @@ -38,16 +38,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_linux1.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_linux2.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux2.png) * In the **Account** dialog window, choose **Other S3 compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_linux3.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -66,12 +66,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_linux4.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux4.png) * Click **Close** to complete account creation and to continue to attempt connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_linux5.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux5.png) ### Connect to Riak CS @@ -84,7 +84,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_linux6.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux6.png) ### Create a bucket @@ -93,7 +93,7 @@ Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_linux7.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket `dragondisklinux`. @@ -101,7 +101,7 @@ Riak CS. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_linux8.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux8.png) ### Copy files to bucket @@ -109,13 +109,13 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_linux9.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_linux10.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. @@ -131,7 +131,7 @@ This section describes configuration of DragonDisk for Mac OS X. * This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_osx0.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx0.png) ### Create an account @@ -140,16 +140,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_osx1.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_osx2.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx2.png) * In the **Account** dialog window, choose **Other S3 compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_osx3.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -168,12 +168,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_osx4.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx4.png) * Click **Close** to complete account creation and continue try connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_osx5.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx5.png) ### Connect to Riak CS @@ -186,7 +186,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_osx6.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx6.png) ### Create a bucket @@ -195,7 +195,7 @@ Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_osx7.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket *dragondiskosx*. @@ -203,7 +203,7 @@ Riak CS. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_osx8.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx8.png) ### Copy files to bucket @@ -211,14 +211,14 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_osx9.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_osx10.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. @@ -234,7 +234,7 @@ This section describes configuration of DragonDisk for Windows. * This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_windows0.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows0.png) ### Create an account @@ -243,16 +243,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_windows1.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_windows2.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows2.png) * In the **Account** dialog window, choose **Other S3-compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_windows3.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -271,12 +271,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_windows4.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows4.png) * Click **Close** to complete account creation and continue try connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_windows5.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows5.png) ### Connect to Riak CS @@ -289,7 +289,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_windows6.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows6.png) ### Create a bucket @@ -297,7 +297,7 @@ configuration. with Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_windows7.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket *dragonbucket*. @@ -305,7 +305,7 @@ configuration. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_windows8.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows8.png) ### Copy files to bucket @@ -313,13 +313,13 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_windows9.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_windows10.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. diff --git a/content/riak/cs/2.0.1/cookbooks/configuration/load-balancing-proxy.md b/content/riak/cs/2.0.1/cookbooks/configuration/load-balancing-proxy.md index 1130f694a5..ec6607510d 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration/load-balancing-proxy.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration/load-balancing-proxy.md @@ -51,7 +51,7 @@ act as a load balancer to a Riak CS installation. > **Note on open files limits** > > The operating system's open files limits need to be greater than 256000 -for the example configuration that follows. Consult the [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different +for the example configuration that follows. Consult the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different operating systems. ```config diff --git a/content/riak/cs/2.0.1/cookbooks/configuration/multi-datacenter.md b/content/riak/cs/2.0.1/cookbooks/configuration/multi-datacenter.md index 719cfb9a7f..0c6ec804a3 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration/multi-datacenter.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration/multi-datacenter.md @@ -28,7 +28,7 @@ CS cluster. As of Riak release 1.4.0, there are two different MDC replication modes that Riak CS can use to request data from remote clusters. Please see -the [comparison](/riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. +the [comparison]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. ### Replication Version 3 Configuration @@ -75,9 +75,9 @@ configured **sink cluster**. See also: -* [Upgrading from v2 to v3](/riak/kv/2.1.3/setup/upgrading/multi-datacenter) -* [Comparing v2 and v3](/riak/kv/2.1.3/using/reference/multi-datacenter/comparison) -* [Multi-Datacenter Operations](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) + +* [Comparing v2 and v3]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) +* [Multi-Datacenter Operations]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) ## Riak CS Configuration @@ -119,10 +119,10 @@ Replace the `node` variable above with the nodename specified in the ## Stanchion Configuration -Though there is no specific configuration for [Stanchion](/riak/cs/2.0.1/theory/stanchion), note that +Though there is no specific configuration for [Stanchion]({{}}riak/cs/2.0.1/theory/stanchion), note that Stanchion should be a single, globally unique process to which every Riak CS node sends requests, even if there are multiple replicated sites. Unlike Riak and Riak CS, Stanchion should run on _only one node in a given cluster_, perhaps on its own, dedicated hardware if you wish. Stanchion runs on only one node because it manages strongly consistent -updates to [globally unique entities](/riak/cs/2.0.1/theory/stanchion/#globally-unique-entities) like users and buckets. +updates to [globally unique entities]({{}}riak/cs/2.0.1/theory/stanchion/#globally-unique-entities) like users and buckets. diff --git a/content/riak/cs/2.0.1/cookbooks/configuration/reference.md b/content/riak/cs/2.0.1/cookbooks/configuration/reference.md index f1e4ed05ee..d9a7e7ebb9 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration/reference.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration/reference.md @@ -53,12 +53,12 @@ aliases: ]}, ``` - and so on. More details can be found at [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). + and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). {{% /note %}} This document is intended as a reference listing of all configurable parameters for Riak CS. For a more narrative-style walkthrough of configuring Riak CS, we -recommend consulting the [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs) tutorial. +recommend consulting the [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs) tutorial. The configuration for Riak CS is handled through either the `riak-cs.conf` and `advanced.config` file pair, which were introduced in Riak CS 2.0.0, or the two @@ -130,7 +130,7 @@ The tables below will show settings for both `riak-cs.conf` and riak_host The IP address/port for the Riak CS node's corresponding Riak node (used by -Riak's Protocol Buffers interface) +Riak's Protocol Buffers interface) 127.0.0.1:8087 @@ -157,7 +157,7 @@ the corresponding HTTP host). riak_host The TCP IP/port for the Riak CS node's corresponding Riak node (used by -Riak's Protocol Buffers interface) +Riak's Protocol Buffers interface) {"127.0.0.1", 8087} @@ -298,7 +298,7 @@ tasks use the IP and port as all other Riak CS traffic. The admin key used for administrative access to Riak CS, e.g. usage of the /riak-cs/stats endpoint. Please note that both admin.key and admin.secret must match the -corresponding settings in the Stanchion node's stanchion.conf. +corresponding settings in the Stanchion node's stanchion.conf. admin-key @@ -326,7 +326,7 @@ this setting unless you implement a custom authentication scheme. rewrite_module A rewrite module contains a set of rules for translating requests made using -a particular API to requests in the the native Riak CS storage API. We do +a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you implement a custom module. riak_cs_s3_rewrite @@ -351,7 +351,7 @@ tasks use the IP and port as all other Riak CS traffic. The admin key used for administrative access to Riak CS, e.g. usage of the /riak-cs/stats endpoint. Please note that both admin_key and admin_secret must match the -corresponding settings in the Stanchion node's +corresponding settings in the Stanchion node's app.config. @@ -387,7 +387,7 @@ actions, including bucket deletion. rewrite_module A rewrite module contains a set of rules for translating requests -made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you +made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you implement a custom module. riak_cs_s3_rewrite @@ -396,7 +396,7 @@ implement a custom module. ## Usage Recording -These settings relate to Riak CS's [access logs](/riak/cs/2.0.1/cookbooks/usage-and-billing-data). +These settings relate to Riak CS's [access logs]({{}}riak/cs/2.0.1/cookbooks/usage-and-billing-data). ### `riak-cs.conf` @@ -519,7 +519,7 @@ of 86400 translates to 1 day. ## Garbage Collection -Settings related to Riak CS's [garbage collection](/riak/cs/2.0.1/cookbooks/garbage-collection) \(GC) process. +Settings related to Riak CS's [garbage collection]({{}}riak/cs/2.0.1/cookbooks/garbage-collection) \(GC) process. ### `riak-cs.conf` @@ -661,7 +661,7 @@ blocks to Riak. cs_version The Riak CS version number. This number is used to selectively enable new -features for the current version to better support rolling upgrades. New +features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set @@ -696,7 +696,7 @@ source IP address as an input (which is the default). cs_version The Riak CS version number. This number is used to selectively -enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value. +enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value. @@ -958,4 +958,4 @@ error logger. --> -[config_your_code]: http://docs.basho.com/riak/1.4.12/ops/advanced/configs/configuration-files/#Configuring-Your-code-vm-args-code- +[config_your_code]: {{< baseurl >}}riak/kv/2.0.1/configuring/basic/#erlang-vm-tunings diff --git a/content/riak/cs/2.0.1/cookbooks/configuration/riak-cs.md b/content/riak/cs/2.0.1/cookbooks/configuration/riak-cs.md index 806bf3905d..b9ae62fde1 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration/riak-cs.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration/riak-cs.md @@ -10,8 +10,8 @@ menu: project: "riak_cs" project_version: "2.0.1" aliases: - - /riakcs/2.0.1/cookbooks/configuration/Configuring-Riak-CS/ - - /riak/cs/2.0.1/cookbooks/configuration/Configuring-Riak-CS/ + - /riakcs/2.0.1/cookbooks/configuration/riak-cs/ + - /riak/cs/2.0.1/cookbooks/configuration/riak-cs/ --- For Riak CS to operate properly it must know how to connect to Riak. @@ -51,9 +51,9 @@ files. If an `app.config` file is present, neither the `riak-cs.config` nor the to continue usage of the legacy `app.config` file, please note that some configuration options have changed names. Most notably, the IP/Port format has changed in 2.0 for Stanchion, Riak, and Riak CS. To view these changes, -please review the [Rolling Upgrades](/riak/cs/2.0.1/cookbooks/rolling-upgrades) Document. +please review the [Rolling Upgrades]({{}}riak/cs/2.0.1/cookbooks/rolling-upgrades) Document. > -> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference](/riak/cs/2.0.1/cookbooks/configuration/reference). +> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference]({{}}riak/cs/2.0.1/cookbooks/configuration/reference). The sections below walk you through some of the main configuration categories that you will likely encounter while operating Riak CS. @@ -81,12 +81,12 @@ is required. {{% /note %}} After making any changes to the `riak-cs.conf` file in Riak CS, -[restart](/riak/cs/2.0.1/cookbooks/command-line-tools/#riak-cs) the node if it is already running. +[restart]({{}}riak/cs/2.0.1/cookbooks/command-line-tools/#riak-cs) the node if it is already running. ## Specifying the Stanchion Node If you're running a single Riak CS node, you don't have to change the -[Stanchion](/riak/cs/2.0.1/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. +[Stanchion]({{}}riak/cs/2.0.1/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. The Stanchion settings reside in the Riak CS `riak-cs.conf` file, which is located in the `/etc/riak-cs` directory of each Riak CS node. @@ -264,7 +264,7 @@ particular use case. ### Tuning We strongly recommend that you take care when setting the value of the -[`pb_backlog` setting](/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is +[`pb_backlog` setting]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is started, each connection pool begins to establish connections to Riak. This can result in a [thundering herd problem](http://en.wikipedia.org/wiki/Thundering_herd_problem) in which connections in the pool believe they are connected to Riak, but in reality some of the connections have been reset. Due to TCP `RST` packet rate limiting (controlled by `net.inet.icmp.icmplim`) some of the connections may not receive notification until they are used to service a user's request. This manifests itself as an `{error, disconnected}` message in the Riak CS logs and an error returned to the user. @@ -354,7 +354,7 @@ data.riakcs.net The following options are available to make adjustments to the Riak CS garbage collection system. More details about garbage collection in Riak CS are -available in [Garbage Collection](/riak/cs/2.0.1/cookbooks/garbage-collection). +available in [Garbage Collection]({{}}riak/cs/2.0.1/cookbooks/garbage-collection). * `gc.leeway_period` (`leeway_seconds` in `advanced.config` or `app.config`) --- The amount of time that must elapse before an object version that has been @@ -420,4 +420,4 @@ been deprecated, and _will be removed_ in the next major release. ## Other Riak CS Settings For a complete listing of configurable parameters for Riak CS, see the -[configuration reference](/riak/cs/2.0.1/cookbooks/configuration/reference) document. +[configuration reference]({{}}riak/cs/2.0.1/cookbooks/configuration/reference) document. diff --git a/content/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs.md b/content/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs.md index 1175b3002f..21b6a96f57 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs.md @@ -21,21 +21,21 @@ reference document listing important configurable parameters. ## The Proper Backends for Riak CS -The default backend used by Riak is the [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the +The default backend used by Riak is the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the Riak CS package includes a special backend that should be used by the Riak cluster that is part of the Riak CS system. It is a custom version -of the standard [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. +of the standard [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. Some of the Riak buckets used internally by Riak CS use secondary -indexes, which currently requires the [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts +indexes, which currently requires the [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts of the Riak CS system can benefit from the use of the Bitcask backend. -The use of the custom [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take +The use of the custom [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take advantage of the strengths of both of these backends to achieve the best blend of performance and features. The next section covers how to properly set up Riak to use this Multi backend. Additionally, the Riak CS storage calculation system uses Riak's -[MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. +[MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. A few other settings must be modified to configure a Riak node as part of a Riak CS system, such as the node IP address and the IP address and @@ -46,7 +46,7 @@ configure a Riak node to work as part of a Riak CS system. ## Setting up the Proper Riak Backend First, edit Riak's `riak.conf`, or the old-style `advanced.config` or -`app.config` [configuration file](/riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing +`app.config` [configuration file]({{}}riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing the following line: ```riakconf @@ -117,7 +117,7 @@ to use the custom backend provided by Riak CS. We need to use either the ``` It's important to note that many of these values will depend on various -directories specific to your [operating system](/riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` +directories specific to your [operating system]({{}}riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` parameter, for example, assumes that Riak CS is installed in `/usr/lib/riak-cs`, while the `data_root` parameters assume that Riak is installed in `/var/lib/`. @@ -152,7 +152,7 @@ buckets.default.allow_mult = true ]} ``` -This will enable Riak to create [siblings](/riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library](/riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS +This will enable Riak to create [siblings]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library]({{}}riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS operations are strongly consistent by definition. {{% note title="Note on `allow_mult`" %}} @@ -214,7 +214,7 @@ sure that you do not change the backend from `riak_cs_kv_multi_backend` to ## Setting Up Riak to Use Protocol Buffers -The Riak [Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, +The Riak [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, or in the `riak_api` section of the the old-style `advanced.config` or `app.config` files, which is located in the `/etc/riak/` folder. The default host is `127.0.0.1` and the default port is `8087`. You will need to change this if @@ -343,4 +343,4 @@ javascript.hook_pool_size = 0 ``` -[riak_conf_files]: http://docs.basho.com/riak/2.0.5/ops/advanced/configs/configuration-files/ +[riak_conf_files]: {{< baseurl >}}riak/kv/2.0.5/ops/advanced/configs/configuration-files/ diff --git a/content/riak/cs/2.0.1/cookbooks/configuration/stanchion.md b/content/riak/cs/2.0.1/cookbooks/configuration/stanchion.md index b3c6f63447..85d33ba052 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration/stanchion.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration/stanchion.md @@ -86,7 +86,7 @@ ssl.keyfile = "./etc/key.pem" ## Specifying the Admin User -The admin user is created during the [configuration of Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs/#specifying-the-admin-user). +The admin user is created during the [configuration of Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs/#specifying-the-admin-user). The same user credentials must be added to each Stanchion used in the cluster. This is set in the `stanchion.conf` file, which is located in the `/etc/stanchion` directory. Enter the same `admin.key` and `admin.secret` as diff --git a/content/riak/cs/2.0.1/cookbooks/configuration/transmit.md b/content/riak/cs/2.0.1/cookbooks/configuration/transmit.md index 5aa6872d5f..e1c9772cbb 100644 --- a/content/riak/cs/2.0.1/cookbooks/configuration/transmit.md +++ b/content/riak/cs/2.0.1/cookbooks/configuration/transmit.md @@ -38,11 +38,11 @@ dialog as follows: Defining a connection looks like this: -![Trasmit screenshot](/images/riak_cs_transmit0.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit0.jpg) > **Note** > -> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration](/riak/cs/2.0.1/cookbooks/configuration/load-balancing-proxy). +> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration]({{}}riak/cs/2.0.1/cookbooks/configuration/load-balancing-proxy). Finally, test the connection to Riak CS by clicking **Connect**. @@ -56,11 +56,11 @@ After successfully connecting to Riak CS, verify that you can create a bucket. The new bucket creation dialog looks like this: -![Trasmit screenshot](/images/riak_cs_transmit1.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit1.jpg) The newly created bucket is listed in the right hand pane of the Transmit interface: -![Trasmit screenshot](/images/riak_cs_transmit2.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit2.jpg) ## Copy Files @@ -74,7 +74,7 @@ copying of the files to the bucket. After copying, the files will appear in the bucket: -![Trasmit screenshot](/images/riak_cs_transmit3.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit3.jpg) You have now successfully configured a Transmit connection to Riak CS and verified basic file copying capabilities. diff --git a/content/riak/cs/2.0.1/cookbooks/faqs/riak-cs.md b/content/riak/cs/2.0.1/cookbooks/faqs/riak-cs.md index 427835fc3e..0985509bc9 100644 --- a/content/riak/cs/2.0.1/cookbooks/faqs/riak-cs.md +++ b/content/riak/cs/2.0.1/cookbooks/faqs/riak-cs.md @@ -15,7 +15,7 @@ aliases: Q: What is Riak CS? A: - Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV](/riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. + Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV]({{}}riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. Q: Can users share data? A: @@ -27,7 +27,7 @@ Q: Is it possible to specify a filesystem where my Riak CS buckets will live? A: You can specify the location of **all** Riak CS bucket data by changing the settings for Riak's backends to a path on a particular filesystem. If this is your goal, you can configure Riak to suit your environment. If you look at our example Riak `advanced.config`/`app.config` backend - definition from the [Configuring Riak for CS](/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs) section, it looks like this: + definition from the [Configuring Riak for CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-for-cs) section, it looks like this: ```advanced.config {riak_kv, [ diff --git a/content/riak/cs/2.0.1/cookbooks/garbage-collection.md b/content/riak/cs/2.0.1/cookbooks/garbage-collection.md index 7e062c03fe..94e79951dd 100644 --- a/content/riak/cs/2.0.1/cookbooks/garbage-collection.md +++ b/content/riak/cs/2.0.1/cookbooks/garbage-collection.md @@ -15,7 +15,7 @@ aliases: This document describes some of the implementation details behind Riak CS's garbage collection process. For information on configuring this -system, please see our documentation on [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs). +system, please see our documentation on [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs). ## Versions and Manifests @@ -192,7 +192,7 @@ We recommend using only _one_ active garbage collection daemon in any Riak CS cluster. If multiple daemons are currently being used, you can disable the others by setting the `gc.interval` parameter to `infinity` on those nodes. More information on how to do that can be found in the -[CS configuration doc](/riak/cs/2.0.1/cookbooks/configuration/riak-cs/#garbage-collection-settings). +[CS configuration doc]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs/#garbage-collection-settings). ## Controlling the GC Daemon @@ -212,7 +212,7 @@ Command | Description `set-interval` | Set or update the garbage collection interval. This setting uses a unit of seconds. `set-leeway` | Set or update the garbage collection leeway time. This setting indicates how many seconds must elapse after an object is deleted or overwritten before the garbage collection system may reap the object. This setting uses a unit of seconds. -For more information, see our documentation on [Riak CS command-line tools](/riak/cs/2.0.1/cookbooks/command-line-tools). +For more information, see our documentation on [Riak CS command-line tools]({{}}riak/cs/2.0.1/cookbooks/command-line-tools). ## Manifest Updates @@ -256,7 +256,7 @@ manifest keys that could linger indefinitely. Riak CS's garbage collection implementation gives the deployer several knobs to adjust for fine-tuning system performace. More information -can be found in our documentation on [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs/#garbage-collection-settings). +can be found in our documentation on [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs/#garbage-collection-settings). ## More Information diff --git a/content/riak/cs/2.0.1/cookbooks/installing.md b/content/riak/cs/2.0.1/cookbooks/installing.md index 5e1b71704d..b1488bb8f4 100644 --- a/content/riak/cs/2.0.1/cookbooks/installing.md +++ b/content/riak/cs/2.0.1/cookbooks/installing.md @@ -22,16 +22,16 @@ You can install Riak CS on a single node (for development purposes) or using an automated deployment tool. Any Riak CS installation involves three components, all of which must be installed separately: -* [Riak KV](/riak/2.0.7/) --- The distributed database on top of which Riak CS +* [Riak KV]({{}}riak/kv/2.0.7/) --- The distributed database on top of which Riak CS is built * Riak CS itself -* [Stanchion](/riak/cs/2.0.1/theory/stanchion) --- An application used to manage [globally unique entities](/riak/cs/2.0.1/theory/stanchion/#globally-unique-entities) such as users and buckets. +* [Stanchion]({{}}riak/cs/2.0.1/theory/stanchion) --- An application used to manage [globally unique entities]({{}}riak/cs/2.0.1/theory/stanchion/#globally-unique-entities) such as users and buckets. [Riak KV](#installing-riak) and [Riak CS](#installing-riak-cs-on-a-node) must be installed on each node in your cluster. [Stanchion](#installing-stanchion-on-a-node), however, needs to be installed on only one node. ## Version Compatibility -We strongly recommend using one of the documented [version combinations](/riak/cs/2.0.1/cookbooks/version-compatibility/) +We strongly recommend using one of the documented [version combinations]({{}}riak/cs/2.0.1/cookbooks/version-compatibility/) when installing and running Riak CS. ## Installing Riak KV @@ -40,30 +40,30 @@ Before installing Riak CS, Riak KV must be installed on each node in your cluster. You can install Riak KV either as part of an OS-specific package or from source. - * [Debian and Ubuntu](/riak/kv/2.0.7/setup/installing/debian-ubuntu) - * [RHEL and CentOS](/riak/kv/2.0.7/setup/installing/rhel-centos) - * [Mac OS X](/riak/kv/2.0.7/setup/installing/mac-osx) - * [FreeBSD](/riak/kv/2.0.7/setup/installing/freebsd) - * [SUSE](/riak/kv/2.0.7/setup/installing/suse) - * [From Source](/riak/kv/2.0.7/setup/installing/source) + * [Debian and Ubuntu]({{}}riak/kv/2.0.7/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.0.7/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.0.7/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.0.7/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.0.7/setup/installing/suse) + * [From Source]({{}}riak/kv/2.0.7/setup/installing/source) Riak KV is also officially supported on the following public cloud infrastructures: - * [Windows Azure](/riak/kv/2.0.7/setup/installing/windows-azure) - * [AWS Marketplace](/riak/kv/2.0.7/setup/installing/amazon-web-services) + * [Windows Azure]({{}}riak/kv/2.0.7/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.0.7/setup/installing/amazon-web-services) Remember that you must repeat this installation process on each node in your cluster. For future reference, you should make note of the Riak KV installation directory. If you want to fully configure Riak KV prior to installing Riak CS, see our -documentation on [configuring Riak KV for CS](/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs/). +documentation on [configuring Riak KV for CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-for-cs/). ## Installing Riak CS on a Node -Riak CS and Stanchion packages are available on the [Download Riak CS](/riak/cs/2.0.1/downloads/) -page. Similarly, Riak packages are available on the [Download Riak KV](/riak/kv/2.0.7/downloads/) page. +Riak CS and Stanchion packages are available on the [Download Riak CS]({{}}riak/cs/2.0.1/downloads/) +page. Similarly, Riak packages are available on the [Download Riak KV]({{}}riak/kv/2.0.7/downloads/) page. After downloading Riak CS, Stanchion, and Riak, install them using your operating system's package management commands. @@ -78,7 +78,7 @@ such as a dedicated device [HAProxy](http://haproxy.1wt.eu) or [Nginx](http://wi ### Installing Riak CS on Mac OS X To install Riak CS on OS X, first download the appropriate package from -the [downloads](/riak/cs/2.0.1/downloads) page: +the [downloads]({{}}riak/cs/2.0.1/downloads) page: ```bash curl -O http://s3.amazonaws.com/downloads.basho.com/riak-cs/1.5/2.0.1/osx/10.8/riak-cs-2.0.1-OSX-x86_64.tar.gz @@ -90,7 +90,7 @@ Then, unpack the downloaded tarball: tar -xvzf riak-cs-2.0.1-OSX-x86_64.tar.gz ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs/). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs/). ### Installing Riak CS on Debian or Ubuntu @@ -255,11 +255,11 @@ can achieve this by specifying a load balancer IP as the Stanchion IP in each Riak CS node's `riak-cs.conf`. This load balancer must be configured to send all requests to a single Stanchion node, failing over to a secondary Stanchion node if the primary is unavailable. More -details can be found in [Specifying the Stanchion Node](/riak/cs/2.0.1/cookbooks/configuration/#specifying-the-stanchion-node). +details can be found in [Specifying the Stanchion Node]({{}}riak/cs/2.0.1/cookbooks/configuration/#specifying-the-stanchion-node). ### Installing Stanchion on Mac OS X -First, download the appropriate package from the [downloads](/riak/cs/2.0.1/downloads/#stanchion-1-4-3) page. +First, download the appropriate package from the [downloads]({{}}riak/cs/2.0.1/downloads/#stanchion-1-4-3) page. ```bash curl -O http://s3.amazonaws.com/downloads.basho.com/stanchion/1.4/1.4.3/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz @@ -271,7 +271,7 @@ Then, unpack the downloaded tarball: stanchion-2.0.0-OSX-x86_64.tar.gz ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs). ### Installing Stanchion on Debian or Ubuntu @@ -300,7 +300,7 @@ Now, install the `stanchion` package: sudo apt-get install stanchion ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs). #### Installing the `.deb` Package Manually (not recommended) @@ -311,7 +311,7 @@ sudo dpkg -i Replace `` with the actual filename for the package you are installing. -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs). ### Installing Stanchion on RHEL or CentOS @@ -338,7 +338,7 @@ Once the `.rpm` package has been installed, install Stanchion: sudo yum install stanchion ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs). #### Installing the `.rpm` Package Manually (not recommended) @@ -349,7 +349,7 @@ sudo rpm -Uvh Replace `` with the actual filename for the package you are installing. -At this point, you can move on to [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs). > **Note on SELinux** > @@ -359,4 +359,4 @@ encounter errors during installation, try disabling SELinux. ## What's Next? Once you've completed installation of Riak CS and Riak, you're ready to -learn more about [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs). +learn more about [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs). diff --git a/content/riak/cs/2.0.1/cookbooks/installing/chef.md b/content/riak/cs/2.0.1/cookbooks/installing/chef.md index e3a55b718b..6f930d224e 100644 --- a/content/riak/cs/2.0.1/cookbooks/installing/chef.md +++ b/content/riak/cs/2.0.1/cookbooks/installing/chef.md @@ -138,8 +138,8 @@ default['stanchion']['args']['-env']['ERL_CRASH_DUMP'] = "/var/log/stanchion/erl #### Storage Backends -Riak CS uses a specific combination of storage backends. [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) is used to -store blocks and [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: +Riak CS uses a specific combination of storage backends. [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) is used to +store blocks and [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: ```ruby default['riak']['config']['riak_kv']['storage_backend'] = "riak_cs_kv_multi_backend" @@ -183,5 +183,5 @@ default['stanchion']['config']['lager']['error_logger_redirect'] = true More information related to cluster configuration and building development environments is available in our documentation. -* [Building a Local Test Environment](/riak/cs/2.0.1/tutorials/fast-track/local-testing-environment) -* [Building a Virtual Testing Environment](/riak/cs/2.0.1/tutorials/fast-track/virtual-test-environment) +* [Building a Local Test Environment]({{}}riak/cs/2.0.1/tutorials/fast-track/local-testing-environment) +* [Building a Virtual Testing Environment]({{}}riak/cs/2.0.1/tutorials/fast-track/virtual-test-environment) diff --git a/content/riak/cs/2.0.1/cookbooks/keystone-setup.md b/content/riak/cs/2.0.1/cookbooks/keystone-setup.md index 5aff43edf0..1ba43ae169 100644 --- a/content/riak/cs/2.0.1/cookbooks/keystone-setup.md +++ b/content/riak/cs/2.0.1/cookbooks/keystone-setup.md @@ -73,7 +73,7 @@ pip install -r tools/pip-requires The next step is to select the appropriate options in the `keystone.conf` configuration file. A sample configuration that is -useful for local testing with Riak CS can be found [here](/riak/cs/2.0.1/cookbooks/keystone-conf-sample/). This configuration file sets up logging to +useful for local testing with Riak CS can be found [here]({{}}riak/cs/2.0.1/cookbooks/keystone-conf-sample/). This configuration file sets up logging to `./log/keystone/keystone.log` and uses the templated catalog backend to set up the Riak CS object store service. This catalog backend uses a local file to populate the service catalog. diff --git a/content/riak/cs/2.0.1/cookbooks/logging.md b/content/riak/cs/2.0.1/cookbooks/logging.md index bab5fcf89c..c279f1373e 100644 --- a/content/riak/cs/2.0.1/cookbooks/logging.md +++ b/content/riak/cs/2.0.1/cookbooks/logging.md @@ -41,4 +41,4 @@ That section looks something like this: ``` A full description of all available parameters can be found in the -[configuration files](/riak/kv/2.1.3/configuring/reference) document for Riak. +[configuration files]({{}}riak/kv/2.1.3/configuring/reference) document for Riak. diff --git a/content/riak/cs/2.0.1/cookbooks/monitoring-and-metrics.md b/content/riak/cs/2.0.1/cookbooks/monitoring-and-metrics.md index f130a6333f..20125930e1 100644 --- a/content/riak/cs/2.0.1/cookbooks/monitoring-and-metrics.md +++ b/content/riak/cs/2.0.1/cookbooks/monitoring-and-metrics.md @@ -10,11 +10,11 @@ menu: project: "riak_cs" project_version: "2.0.1" aliases: - - /riakcs/2.0.1/cookbooks/Monitoring-and-Metrics/ + - /riakcs/2.0.1/cookbooks/monitoring-and-metrics/ --- [amazon]: http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html -[s3 api]: http://docs.basho.com/riakcs/latest/references/apis/storage/s3/ +[s3 api]: {{< baseurl >}}riak/cs/latest/references/apis/storage/s3/ Riak S2 (CS) includes metrics and operational statistics to help you monitor your system in more detail and diagnose system issues more easily. There are three major categories of metrics: diff --git a/content/riak/cs/2.0.1/cookbooks/querying-access-statistics.md b/content/riak/cs/2.0.1/cookbooks/querying-access-statistics.md index 598aaa4324..11c49ddb67 100644 --- a/content/riak/cs/2.0.1/cookbooks/querying-access-statistics.md +++ b/content/riak/cs/2.0.1/cookbooks/querying-access-statistics.md @@ -24,7 +24,7 @@ and access. {{% /note %}} For information about how access statistics are logged, please read -[Usage and Billing Data](/riak/cs/2.0.1/cookbooks/usage-and-billing-data). +[Usage and Billing Data]({{}}riak/cs/2.0.1/cookbooks/usage-and-billing-data). The following sections discuss accessing the access statistics using bare HTTP requests. Query parameters are used to specify the types and @@ -81,7 +81,7 @@ HTTP/1.1 404 Object Not Found > **Authentication Required** > > Queries to the usage resources described here must be authenticated as -described in the [Authentication documentation](/riak/cs/2.0.1/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. +described in the [Authentication documentation]({{}}riak/cs/2.0.1/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are @@ -234,7 +234,7 @@ the amount of time that may be retrieved in any request is limited. The limit is configured by the `riak_cs` application environment variable `usage_request_limit`. The value is expressed as an integer -number of archive intervals (see [Usage and Billing Data](/riak/cs/2.0.1/cookbooks/usage-and-billing-data) for a +number of archive intervals (see [Usage and Billing Data]({{}}riak/cs/2.0.1/cookbooks/usage-and-billing-data) for a description of archive intervals). The default value is `744`, which is 31 days at the default archive diff --git a/content/riak/cs/2.0.1/cookbooks/querying-storage-statistics.md b/content/riak/cs/2.0.1/cookbooks/querying-storage-statistics.md index 0001ce550e..88699e061e 100644 --- a/content/riak/cs/2.0.1/cookbooks/querying-storage-statistics.md +++ b/content/riak/cs/2.0.1/cookbooks/querying-storage-statistics.md @@ -26,9 +26,9 @@ and access. > **Note**: > -> Storage statistics are not calculated by default. Please read [Usage and Billing Data](/riak/cs/2.0.1/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. +> Storage statistics are not calculated by default. Please read [Usage and Billing Data]({{}}riak/cs/2.0.1/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. -The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics](/riak/cs/2.0.1/cookbooks/querying-access-statistics). +The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-access-statistics). Please refer to the descriptions there for more details. @@ -39,7 +39,7 @@ been configured to something other than default CS port of `8080`. > **Authentication Required** > -> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation](/riak/cs/2.0.1/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. +> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation]({{}}riak/cs/2.0.1/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are both omitted from the result by default: @@ -97,7 +97,7 @@ There are no statistics included in this report because the default time span is ### S3 Object-style Access -As described in [Querying Access Statistics](/riak/cs/2.0.1/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: +As described in [Querying Access Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: ```bash s3cmd get s3://riak-cs/usage/8NK4FH2SGKJJM8JIP2GU/bx/20120315T140000Z/20120315T160000Z diff --git a/content/riak/cs/2.0.1/cookbooks/release-notes.md b/content/riak/cs/2.0.1/cookbooks/release-notes.md index d730145bff..80e93c8671 100644 --- a/content/riak/cs/2.0.1/cookbooks/release-notes.md +++ b/content/riak/cs/2.0.1/cookbooks/release-notes.md @@ -13,7 +13,7 @@ aliases: - /riakcs/2.0.1/cookbooks/Riak-CS-Release-Notes/ - /riak/cs/2.0.1/cookbooks/Riak-CS-Release-Notes/ --- -[riak_cs_multibag_support]: /riak/cs/2.0.1/cookbooks/supercluster +[riak_cs_multibag_support]: {{}}riak/cs/2.0.1/cookbooks/supercluster [riak_cs_1.5_release_notes_upgrading]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading [riak_cs_1.5_release_notes_upgrading_1]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading-1 @@ -54,7 +54,7 @@ New metrics have been added that enable you to determine the health of your Riak * Memory information about the riak-cs virtual machine * HTTP listener information: active sockets and waiting acceptors -**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation](docs.basho.com/riakcs/latest/cookbooks/Monitoring-and-Metrics/) for more information. +**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation]({{}}riak/cs/latest/cookbooks/monitoring-and-metrics/) for more information. * [[PR 1189](https://github.com/basho/riak_cs/pull/1189)] * [[PR 1180](https://github.com/basho/riak_cs/pull/1180)] @@ -69,7 +69,7 @@ Additional storage usage metrics are also available. . These metrics are gathere * [[PR 1120](https://github.com/basho/riak_cs/pull/1120)] #### `riak-cs-admin` -The following administration CLIs have been replaced by the [`riak-cs-admin` command](http://docs.basho.com/riakcs/latest/cookbooks/command-line-tools/): +The following administration CLIs have been replaced by the [`riak-cs-admin` command]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): * `riak-cs-storage` * `riak-cs-gc` @@ -89,7 +89,7 @@ Several new options have been added to the `riak-cs-admin gc` command: * Riak S2 2.0 (and older) has a race condition where fullsync replication and garbage collection may resurrect deleted blocks without any way to delete them again. When real-time replication and replication of a garbage collection bucket entry object being dropped from the real-time queue are combined, blocks may remain on the sink side without being collected. Riak S2 2.1 introduces deterministic garbage collection to avoid fullsync replication. Additionally, garbage collection and fullsync replication run concurrently, and work on the same blocks and manifests. You can now specify the range of time using the `--start` and `--end` flags with `riak-cs-admin gc batch` for garbage collector in order to collect deleted objects synchronously on both sink and source sides. [[PR 1147 ](https://github.com/basho/riak_cs/pull/1147)] * `riak-cs-admin gc earliest-keys` is available so you can find the oldest entry after `epoch_start` in garbage collection. With this option, you can stay informed of garbage collection progress. [[PR 1160](https://github.com/basho/riak_cs/pull/1160)] -More information on garbage collection can be found in the [documentation](http://docs.basho.com/riakcs/latest/cookbooks/garbage-collection/). +More information on garbage collection can be found in the [documentation]({{< baseurl >}}riak/cs/latest/cookbooks/garbage-collection/). ### Additions @@ -113,7 +113,7 @@ More information on garbage collection can be found in the [documentation](http: * An option has been added to replace the `PR=all user GET` option with `PR=one` just before authentication. This option improves latency, especially in the presence of slow (or actually-failing) nodes blocking the whole request flow because of PR=all. When enabled, a user's owned-bucket list is never pruned after a bucket is deleted, instead it is just marked as deleted. [[PR 1191](https://github.com/basho/riak_cs/pull/1191)] * An info log has been added when starting a storage calculation batch. [[PR 1238](https://github.com/basho/riak_cs/pull/1238)] * `GET Bucket` requests now have clearer responses. A 501 stub for Bucket lifecycle and a simple stub for Bucket requestPayment have been added. [[PR 1223](https://github.com/basho/riak_cs/pull/1223)] -* Several user-friendly features have been added to [`riak-cs-debug`](http://docs.basho.com/riakcs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] +* Several user-friendly features have been added to [`riak-cs-debug`]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] #### Enterprise * MDC has `proxy_get`, which make block objects propagate to site clusters when they are requested. Now, multibag configuration with MDC supports `proxy_get`. [[PR 1171](https://github.com/basho/riak_cs/pull/1171) and [PR 25](https://github.com/basho/riak_cs_multibag/pull/25)] @@ -526,7 +526,7 @@ None ### Download Please see the [Riak CS Downloads -Page](http://docs.basho.com/riakcs/latest/riakcs-downloads/). +Page]({{< baseurl >}}riak/cs/latest/downloads/). ### Feedback @@ -544,7 +544,7 @@ venues: ### Changes * Improve logging around failures with Riak - [riak_cs/#987](http://docs.basho.com/riak/latest/dev/using/libraries/) + [riak_cs/#987](https://github.com/basho/riak_cs/pull/987) * Add amendment log output when storing access stats into Riak failed [riak_cs/#988](https://github.com/basho/riak_cs/pull/988). This change prevents losing access stats logs in cases of temporary connection @@ -572,7 +572,7 @@ None ### Download Please see the [Riak CS Downloads -Page](http://docs.basho.com/riakcs/latest/riakcs-downloads) +Page]({{< baseurl >}}riak/cs/latest/downloads) ### Feedback @@ -589,7 +589,7 @@ venues: ### Additions -* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here](http://docs.basho.com/riakcs/latest/cookbooks/configuration/Configuring-Riak-CS/). +* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here]({{< baseurl >}}riak/cs/latest/cookbooks/configuration/riak-cs/). ### Changes @@ -646,7 +646,7 @@ To avoid having a limit, set `max_buckets_per_user_user` to `unlimited`. ### Download -Please see the [Riak CS Downloads Page](http://docs.basho.com/riakcs/latest/riakcs-downloads/). +Please see the [Riak CS Downloads Page]({{< baseurl >}}riak/cs/latest/downloads/). ### Feedback @@ -662,7 +662,7 @@ Or via email at **info@basho.com**. ### Additions -* Added Multibag Technical Preview to Riak CS. More info is available [here](http://docs.basho.com/riakcs/latest/cookbooks/multibag/) +* Added Multibag Technical Preview to Riak CS. More info is available [here]({{< baseurl >}}riak/cs/latest/cookbooks/multibag/) * A new command `riak-cs-debug` including `cluster-info` [riak_cs/#769](https://github.com/basho/riak_cs/pull/769), [riak_cs/#832](https://github.com/basho/riak_cs/pull/832) * Tie up all existing commands into a new command `riak-cs-admin` [riak_cs/#839](https://github.com/basho/riak_cs/pull/839) * Add a command `riak-cs-admin stanchion` to switch Stanchion IP and port manually [riak_cs/#657](https://github.com/basho/riak_cs/pull/657) @@ -1003,7 +1003,7 @@ they will all share the name "struct". #### Additions -* Support query parameter authentication as specified in [http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html](Signing and Authenticating REST Requests). +* Support query parameter authentication as specified in [Signing and Authenticating REST Requests](http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html). ## Riak CS 1.0.1 diff --git a/content/riak/cs/2.0.1/cookbooks/rolling-upgrades.md b/content/riak/cs/2.0.1/cookbooks/rolling-upgrades.md index 61ed600c70..9fa8db4022 100644 --- a/content/riak/cs/2.0.1/cookbooks/rolling-upgrades.md +++ b/content/riak/cs/2.0.1/cookbooks/rolling-upgrades.md @@ -18,7 +18,7 @@ Each node in a Riak CS cluster contains settings that define its operating modes and API coverage. The following steps outline the process of upgrading Riak CS in a rolling fashion. -Be sure to check the Riak CS [Version Compatibility](/riak/cs/2.0.1/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. +Be sure to check the Riak CS [Version Compatibility]({{}}riak/cs/2.0.1/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. As Riak CS 2.0.0 only works with Riak 2.0.5, the underlying Riak installation *must* be upgraded to Riak 2.0.5. @@ -80,9 +80,9 @@ detailed description.
4. Upgrade Riak, Riak CS, and Stanchion. See the Riak + href="{{< baseurl >}}riak/cs/latest/downloads">Riak CS Downloads and Riak Downloads + href="{{< baseurl >}}riak/kv/latest/downloads">Riak Downloads pages to find the appropriate packages. **Debian** / **Ubuntu** @@ -160,7 +160,7 @@ detailed description. ]}, ``` - and so on. More details can be found at [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs). + and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs).
Note on Memory Sizing
diff --git a/content/riak/cs/2.0.1/cookbooks/supercluster.md b/content/riak/cs/2.0.1/cookbooks/supercluster.md index ed99c9644e..1f39b9543e 100644 --- a/content/riak/cs/2.0.1/cookbooks/supercluster.md +++ b/content/riak/cs/2.0.1/cookbooks/supercluster.md @@ -21,15 +21,15 @@ customers. It is not yet suitable for production use. While [Riak CS Enterprise](http://basho.com/riak-enterprise) enables you to distribute Riak CS objects across multiple data centers in a -[source/sink pattern](/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. +[source/sink pattern]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. With supercluster support, you can store object manifests and blocks in separate clusters or groups of clusters, a.k.a. **a set of supercluser members**, enhancing the scalability and overall storage capabilities of a Riak CS installation. ## Supercluster members -A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication](/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)\(MDC). -Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools](/riak/cs/2.0.1/cookbooks/command-line-tools). +A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)\(MDC). +Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools]({{}}riak/cs/2.0.1/cookbooks/command-line-tools). ## The Master Member @@ -134,7 +134,7 @@ That involves performing the following steps on each node: 1. Stop the node 2. Upgrade Stanchion to a version that supports Riak CS supercluster, i.e. Riak CS 1.5.0 and later -3. Set your desired Stanchion [configuration](/riak/cs/2.0.1/cookbooks/configuration/stanchion) +3. Set your desired Stanchion [configuration]({{}}riak/cs/2.0.1/cookbooks/configuration/stanchion) 4. Start Stanchion on each node ### Add Clusters @@ -145,7 +145,7 @@ connection information as explained above in the [supercluster Configuration](#s ### Set Weights -When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`](/riak/cs/2.0.1/cookbooks/command-line-tools) command-line interface. +When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`]({{}}riak/cs/2.0.1/cookbooks/command-line-tools) command-line interface. The example below sets the weight of the recently added supercluster member `Alpha` to zero: @@ -177,9 +177,9 @@ supercluster feature. ## Command Line Interface Complete documentation for the `riak-cs-supercluster` interface can be found -in our documentation on [Riak CS Command Line Tools](/riak/cs/2.0.1/cookbooks/command-line-tools/#riak-cs-supercluster). +in our documentation on [Riak CS Command Line Tools]({{}}riak/cs/2.0.1/cookbooks/command-line-tools/#riak-cs-supercluster). ## Limitations -Riak CS supercluster does not currently support [proxy gets](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from +Riak CS supercluster does not currently support [proxy gets]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from sink clusters. diff --git a/content/riak/cs/2.0.1/cookbooks/system-features.md b/content/riak/cs/2.0.1/cookbooks/system-features.md index a4e9e20857..b3c7c64884 100644 --- a/content/riak/cs/2.0.1/cookbooks/system-features.md +++ b/content/riak/cs/2.0.1/cookbooks/system-features.md @@ -9,9 +9,9 @@ aliases: The following pages detail Riak CS's system features. -* [Access Control Lists](/riak/cs/2.0.1/cookbooks/access-control-lists) -* [Authentication](/riak/cs/2.0.1/cookbooks/authentication) -* [Monitoring and Metrics](/riak/cs/2.0.1/cookbooks/monitoring-and-metrics) -* [Querying Access Statistics](/riak/cs/2.0.1/cookbooks/querying-access-statistics) -* [Querying Storage Statistics](/riak/cs/2.0.1/cookbooks/querying-storage-statistics) -* [Usage and Billing Data](/riak/cs/2.0.1/cookbooks/usage-and-billing-data) +* [Access Control Lists]({{}}riak/cs/2.0.1/cookbooks/access-control-lists) +* [Authentication]({{}}riak/cs/2.0.1/cookbooks/authentication) +* [Monitoring and Metrics]({{}}riak/cs/2.0.1/cookbooks/monitoring-and-metrics) +* [Querying Access Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-access-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-storage-statistics) +* [Usage and Billing Data]({{}}riak/cs/2.0.1/cookbooks/usage-and-billing-data) diff --git a/content/riak/cs/2.0.1/cookbooks/usage-and-billing-data.md b/content/riak/cs/2.0.1/cookbooks/usage-and-billing-data.md index e86285a7bf..4c5f653110 100644 --- a/content/riak/cs/2.0.1/cookbooks/usage-and-billing-data.md +++ b/content/riak/cs/2.0.1/cookbooks/usage-and-billing-data.md @@ -28,7 +28,7 @@ and access. Access stats are tracked on a per-user basis, as rollups for slices of time. They are stored just like other Riak CS data, in the `cs.access` bucket in particular. For information about querying access statistics, -please read [Querying Access Statistics](/riak/cs/2.0.1/cookbooks/querying-access-statistics). +please read [Querying Access Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-access-statistics). ## Overview @@ -71,7 +71,7 @@ logger determines the operation type by comparing the method, resource module, and path to a known table. For example, it knows that a `GET` on the *key* module with the `acl` query parameter in the path is a `KeyReadACL` operation. A `PUT` to the same resource without the `acl` -query parameter is a `KeyWrite` operation. See [Querying Access Statistics](/riak/cs/2.0.1/cookbooks/querying-access-statistics) for a list of all operation types. +query parameter is a `KeyWrite` operation. See [Querying Access Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-access-statistics) for a list of all operation types. ### Log Accumulation @@ -181,7 +181,7 @@ slices of time. They are stored in the same Riak cluster as other Riak CS data, in the `cs.storage` bucket. For detailed information about querying storage statistics, please read -[Querying Storage Statistics](/riak/cs/2.0.1/cookbooks/querying-storage-statistics). +[Querying Storage Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-storage-statistics). ### High Level @@ -201,7 +201,7 @@ The storage calculation system uses MapReduce to sum the files in a bucket. This means you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. -See [Configuring Riak for CS](/riak/cs/2.0.1/cookbooks/configuration/riak-for-cs) for directions on setting this up. +See [Configuring Riak for CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-for-cs) for directions on setting this up. ### Scheduling and Manual Triggering diff --git a/content/riak/cs/2.0.1/cookbooks/using-with-keystone.md b/content/riak/cs/2.0.1/cookbooks/using-with-keystone.md index 7023275dc4..ef1b6a001d 100644 --- a/content/riak/cs/2.0.1/cookbooks/using-with-keystone.md +++ b/content/riak/cs/2.0.1/cookbooks/using-with-keystone.md @@ -291,7 +291,7 @@ section of the Riak CS `advanced.config` or `app.config` files: ### Keystone Setup -Follow the procedures documented in [Keystone Setup](/riak/cs/2.0.1/cookbooks/keystone-setup) to set up and run +Follow the procedures documented in [Keystone Setup]({{}}riak/cs/2.0.1/cookbooks/keystone-setup) to set up and run Keystone. 1. Create a tenant called `test`: diff --git a/content/riak/cs/2.0.1/index.md b/content/riak/cs/2.0.1/index.md index c09bef5ae3..809f34349b 100644 --- a/content/riak/cs/2.0.1/index.md +++ b/content/riak/cs/2.0.1/index.md @@ -23,17 +23,17 @@ API is [Amazon S3 compatible](http://docs.aws.amazon.com/AmazonS3/latest/API/API and supports per-tenant reporting for use cases involving billing and metering. -Riak CS is open source and [free for download](/riak/cs/2.0.1/downloads). +Riak CS is open source and [free for download]({{}}riak/cs/2.0.1/downloads). ## Notable Riak CS Features ### Amazon S3-API Compatibility -Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API](/riak/cs/2.0.1/references/appendices/comparisons/swift/) +Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API]({{}}riak/cs/2.0.1/references/appendices/comparisons/swift/) ### Per-Tenant Visibility -With the Riak CS [Reporting API](/riak/cs/2.0.1/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, +With the Riak CS [Reporting API]({{}}riak/cs/2.0.1/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, subscription, chargebacks, plugins with billing systems, efficient multi-department utilization, and much more. ### Supports Large Objects of Arbitrary Content Type, Plus Metadata diff --git a/content/riak/cs/2.0.1/references/apis/storage.md b/content/riak/cs/2.0.1/references/apis/storage.md index aa789091f7..132286d8c9 100644 --- a/content/riak/cs/2.0.1/references/apis/storage.md +++ b/content/riak/cs/2.0.1/references/apis/storage.md @@ -55,30 +55,30 @@ Multipart Uploads {{1.3.0-}} | Coming Soon | Planned for future release | ## Service-level Operations -* [GET Service](/riak/cs/2.0.1/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request +* [GET Service]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request ## Bucket-level Operations -* [GET Bucket](/riak/cs/2.0.1/references/apis/storage/s3/get-bucket) --- Returns a list of the objects +* [GET Bucket]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-bucket) --- Returns a list of the objects within a bucket -* [GET Bucket ACL](/riak/cs/2.0.1/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket -* [GET Bucket policy](/riak/cs/2.0.1/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket -* [PUT Bucket](/riak/cs/2.0.1/references/apis/storage/s3/put-bucket) --- Creates a new bucket -* [PUT Bucket ACL](/riak/cs/2.0.1/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions +* [GET Bucket ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions for a bucket -* [PUT Bucket policy](/riak/cs/2.0.1/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket -* [DELETE Bucket](/riak/cs/2.0.1/references/apis/storage/s3/delete-bucket) --- Deletes a bucket -* [DELETE Bucket policy](/riak/cs/2.0.1/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket +* [PUT Bucket policy]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.0.1/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.0.1/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket ## Object-level Operations -* [GET Object](/riak/cs/2.0.1/references/apis/storage/s3/get-object) --- Retrieves an object -* [GET Object ACL](/riak/cs/2.0.1/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object -* [PUT Object](/riak/cs/2.0.1/references/apis/storage/s3/put-object) --- Stores an object to a bucket -* [PUT Object (Copy)](/riak/cs/2.0.1/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object -* [PUT Object ACL](/riak/cs/2.0.1/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object -* [HEAD Object](/riak/cs/2.0.1/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) -* [DELETE Object](/riak/cs/2.0.1/references/apis/storage/s3/delete-object) --- Deletes an object +* [GET Object]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.0.1/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.0.1/references/apis/storage/s3/delete-object) --- Deletes an object ## Multipart Upload @@ -87,19 +87,19 @@ Object parts can be uploaded independently and in any order. After all parts are uploaded, Riak CS assembles an object out of the parts. When your object size reaches 100MB, you should consider using multipart uploads instead of uploading the object in a single operation. Read more -about multipart uploads on the [overview page](/riak/cs/2.0.1/cookbooks/multipart-upload-overview). +about multipart uploads on the [overview page]({{}}riak/cs/2.0.1/cookbooks/multipart-upload-overview). -* [Initiate Multipart Upload](/riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID -* [Upload Part](/riak/cs/2.0.1/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload -* [Complete Multipart Upload](/riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts -* [Abort Multipart Upload](/riak/cs/2.0.1/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts -* [List Parts](/riak/cs/2.0.1/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. -* [List Multipart Uploads](/riak/cs/2.0.1/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. +* [Initiate Multipart Upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.0.1/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.0.1/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.0.1/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. ## Common Headers -* [Common Riak CS Request Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-request-headers) -* [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers) +* [Common Riak CS Request Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers) There are two storage API options for Riak CS. The first and most fully featured is the S3 API. There is also limited but improving support for @@ -118,13 +118,13 @@ resource modules. * Module: `riak_cs_s3_rewrite` * [Documentation](http://docs.aws.amazon.com/AmazonS3/latest/API/APIRest.html) -* [Mapping](/riak/cs/2.0.1/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) +* [Mapping]({{}}riak/cs/2.0.1/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) ### Openstack Object Storage API (v1) * Module: `riak_cs_oos_rewrite` * [Documentation](http://docs.openstack.org/api/openstack-object-storage/1.0/content/index.html) -* [Mapping](/riak/cs/2.0.1/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) +* [Mapping]({{}}riak/cs/2.0.1/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) Selecting an API is done by adding or changing the `rewrite_module` key in the Riak CS `riak-cs.conf` file, or the old-style `advanced.config` or `app.config` @@ -157,5 +157,5 @@ included when installing a Riak CS package or building from source. More details for each option can be found by following one of the following links: -* [S3 API](/riak/cs/2.0.1/references/apis/storage/s3/) -* [OpenStack API](/riak/cs/2.0.1/references/apis/storage/openstack/) +* [S3 API]({{}}riak/cs/2.0.1/references/apis/storage/s3/) +* [OpenStack API]({{}}riak/cs/2.0.1/references/apis/storage/openstack/) diff --git a/content/riak/cs/2.0.1/references/apis/storage/openstack.md b/content/riak/cs/2.0.1/references/apis/storage/openstack.md index 1ec687e782..50c118fedf 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/openstack.md +++ b/content/riak/cs/2.0.1/references/apis/storage/openstack.md @@ -47,16 +47,16 @@ Update Object Metadata | Coming Soon | Planned for future release | ## Storage Account Services -* [List Containers](/riak/cs/2.0.1/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account +* [List Containers]({{}}riak/cs/2.0.1/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account ## Storage Container Services -* [List Objects](/riak/cs/2.0.1/references/apis/storage/openstack/list-objects) --- Lists the objects in a container -* [Create Container](/riak/cs/2.0.1/references/apis/storage/openstack/create-container) --- Creates a new container -* [Delete Container](/riak/cs/2.0.1/references/apis/storage/openstack/delete-container) --- Deletes a container +* [List Objects]({{}}riak/cs/2.0.1/references/apis/storage/openstack/list-objects) --- Lists the objects in a container +* [Create Container]({{}}riak/cs/2.0.1/references/apis/storage/openstack/create-container) --- Creates a new container +* [Delete Container]({{}}riak/cs/2.0.1/references/apis/storage/openstack/delete-container) --- Deletes a container ## Storage Object Services -* [Get Object](/riak/cs/2.0.1/references/apis/storage/openstack/get-object) --- Retrieves an object -* [Create or Update Object](/riak/cs/2.0.1/references/apis/storage/openstack/create-object) --- Write an object in a container -* [Delete Object](/riak/cs/2.0.1/references/apis/storage/openstack/delete-object) --- Delete an object from a container +* [Get Object]({{}}riak/cs/2.0.1/references/apis/storage/openstack/get-object) --- Retrieves an object +* [Create or Update Object]({{}}riak/cs/2.0.1/references/apis/storage/openstack/create-object) --- Write an object in a container +* [Delete Object]({{}}riak/cs/2.0.1/references/apis/storage/openstack/delete-object) --- Delete an object from a container diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3.md b/content/riak/cs/2.0.1/references/apis/storage/s3.md index f8c4ae12fa..79e5ec90f5 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3.md @@ -57,30 +57,30 @@ Multipart Uploads {{1.5.0+}} | ✓}}riak/cs/2.0.1/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request ## Bucket-level Operations -* [GET Bucket](/riak/cs/2.0.1/references/apis/storage/s3/get-bucket) --- Returns a list of the objects +* [GET Bucket]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-bucket) --- Returns a list of the objects within a bucket -* [GET Bucket ACL](/riak/cs/2.0.1/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket -* [GET Bucket policy](/riak/cs/2.0.1/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket -* [PUT Bucket](/riak/cs/2.0.1/references/apis/storage/s3/put-bucket) --- Creates a new bucket -* [PUT Bucket ACL](/riak/cs/2.0.1/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions +* [GET Bucket ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions for a bucket -* [PUT Bucket policy](/riak/cs/2.0.1/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket -* [DELETE Bucket](/riak/cs/2.0.1/references/apis/storage/s3/delete-bucket) --- Deletes a bucket -* [DELETE Bucket policy](/riak/cs/2.0.1/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket +* [PUT Bucket policy]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.0.1/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.0.1/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket ## Object-level Operations -* [GET Object](/riak/cs/2.0.1/references/apis/storage/s3/get-object) --- Retrieves an object -* [GET Object ACL](/riak/cs/2.0.1/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object -* [PUT Object](/riak/cs/2.0.1/references/apis/storage/s3/put-object) --- Stores an object to a bucket -* [PUT Object (Copy)](/riak/cs/2.0.1/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object -* [PUT Object ACL](/riak/cs/2.0.1/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object -* [HEAD Object](/riak/cs/2.0.1/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) -* [DELETE Object](/riak/cs/2.0.1/references/apis/storage/s3/delete-object) --- Deletes an object +* [GET Object]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.0.1/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.0.1/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.0.1/references/apis/storage/s3/delete-object) --- Deletes an object ## Multipart Upload @@ -89,16 +89,16 @@ Object parts can be uploaded independently and in any order. After all parts are uploaded, Riak CS assembles an object out of the parts. When your object size reaches 100MB, you should consider using multipart uploads instead of uploading the object in a single operation. Read more -about multipart uploads on the [overview page](/riak/cs/2.0.1/cookbooks/multipart-upload-overview). +about multipart uploads on the [overview page]({{}}riak/cs/2.0.1/cookbooks/multipart-upload-overview). -* [Initiate Multipart Upload](/riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID -* [Upload Part](/riak/cs/2.0.1/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload -* [Complete Multipart Upload](/riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts -* [Abort Multipart Upload](/riak/cs/2.0.1/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts -* [List Parts](/riak/cs/2.0.1/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. -* [List Multipart Uploads](/riak/cs/2.0.1/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. +* [Initiate Multipart Upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.0.1/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.0.1/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.0.1/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. ## Common Headers -* [Common Riak CS Request Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-request-headers) -* [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers) +* [Common Riak CS Request Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers) diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/abort-multipart-upload.md b/content/riak/cs/2.0.1/references/apis/storage/s3/abort-multipart-upload.md index a9993dfddf..2bd80138f0 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/abort-multipart-upload.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/abort-multipart-upload.md @@ -27,7 +27,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -37,7 +37,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload.md b/content/riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload.md index 140d5885b9..9474a9a5cc 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload.md @@ -45,7 +45,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -75,7 +75,7 @@ This implementation of the operation uses only response headers that are common ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/delete-bucket-policy.md b/content/riak/cs/2.0.1/references/apis/storage/s3/delete-bucket-policy.md index fc61123352..67c07061aa 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/delete-bucket-policy.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/delete-bucket-policy.md @@ -29,7 +29,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -39,7 +39,7 @@ No body should be appended. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/get-bucket-policy.md b/content/riak/cs/2.0.1/references/apis/storage/s3/get-bucket-policy.md index bb93be0983..e3da5376b2 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/get-bucket-policy.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/get-bucket-policy.md @@ -31,7 +31,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -41,7 +41,7 @@ No body should be appended. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload.md b/content/riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload.md index 3ddb1aedf6..e14c84f66b 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload.md @@ -55,7 +55,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/list-multipart-uploads.md b/content/riak/cs/2.0.1/references/apis/storage/s3/list-multipart-uploads.md index 2d33958bbe..26cb4ed8d0 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/list-multipart-uploads.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/list-multipart-uploads.md @@ -54,7 +54,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -64,7 +64,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/list-parts.md b/content/riak/cs/2.0.1/references/apis/storage/s3/list-parts.md index 212188aacb..6c46ff5922 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/list-parts.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/list-parts.md @@ -43,7 +43,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -53,7 +53,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/put-bucket-policy.md b/content/riak/cs/2.0.1/references/apis/storage/s3/put-bucket-policy.md index b9a889c2b4..4c21dc2e58 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/put-bucket-policy.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/put-bucket-policy.md @@ -37,7 +37,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -88,7 +88,7 @@ More information on S3 Policies can be found in Amazon's [Permissions And Polici ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.0.1/references/apis/storage/s3/upload-part.md b/content/riak/cs/2.0.1/references/apis/storage/s3/upload-part.md index 848af90e9d..d902f066e8 100644 --- a/content/riak/cs/2.0.1/references/apis/storage/s3/upload-part.md +++ b/content/riak/cs/2.0.1/references/apis/storage/s3/upload-part.md @@ -9,7 +9,7 @@ aliases: - /riak/cs/2.0.1/references/apis/storage/s3/RiakCS-Upload-Part/ --- -This operation uploads a part in a multipart upload. You must [initiate a multipart upload](/riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. +This operation uploads a part in a multipart upload. You must [initiate a multipart upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. ## Requests @@ -54,7 +54,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.0.1/references/apis/storage/s3/common-response-headers). ### Response Elements @@ -64,7 +64,7 @@ This operation does not use response elements. ### Sample Request -The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload](/riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload) request. +The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/initiate-multipart-upload) request. ``` PUT /large.iso?partNumber=1&uploadId=VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA HTTP/1.1 @@ -79,7 +79,7 @@ Authorization: AWS AKIAIOSFODNN7EXAMPLE:VGhpcyBtZXNzYWdlIHNpZ25lZGGieSRlbHZpbmc= ### Sample Response -The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload](/riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload) request. +The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload]({{}}riak/cs/2.0.1/references/apis/storage/s3/complete-multipart-upload) request. ``` HTTP/1.1 200 OK diff --git a/content/riak/cs/2.0.1/references/appendices/http-admin.md b/content/riak/cs/2.0.1/references/appendices/http-admin.md index 8947b03aec..e6d0763ab2 100644 --- a/content/riak/cs/2.0.1/references/appendices/http-admin.md +++ b/content/riak/cs/2.0.1/references/appendices/http-admin.md @@ -19,10 +19,10 @@ above and beyond those associated with Riak itself: Task | CS URI | Further reading :----|:-------|:--------------- -User management | `/riak-cs/user` | [Account Management](/riak/cs/2.0.1/cookbooks/account-management) -User access statistics | `/riak-cs/usage` | [Querying Access Statistics](/riak/cs/2.0.1/cookbooks/querying-access-statistics) -Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics](/riak/cs/2.0.1/cookbooks/querying-storage-statistics) -Global statistics | `/riak-cs/stats` | [Monitoring and Metrics](/riak/cs/2.0.1/cookbooks/monitoring-and-metrics) +User management | `/riak-cs/user` | [Account Management]({{}}riak/cs/2.0.1/cookbooks/account-management) +User access statistics | `/riak-cs/usage` | [Querying Access Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-access-statistics) +Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-storage-statistics) +Global statistics | `/riak-cs/stats` | [Monitoring and Metrics]({{}}riak/cs/2.0.1/cookbooks/monitoring-and-metrics) By default, these are accessible over the same IP/port as the rest of the CS API, but they can be configured to run elsewhere, with or without @@ -52,13 +52,13 @@ details. ## Related Resources -* [configuring Riak CS](/riak/cs/2.0.1/cookbooks/configuration/riak-cs) -* [Querying Access Statistics](/riak/cs/2.0.1/cookbooks/querying-access-statistics) - * [Usage and Billing Data](/riak/cs/2.0.1/cookbooks/usage-and-billing-data) +* [configuring Riak CS]({{}}riak/cs/2.0.1/cookbooks/configuration/riak-cs) +* [Querying Access Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-access-statistics) + * [Usage and Billing Data]({{}}riak/cs/2.0.1/cookbooks/usage-and-billing-data) * [Github wiki](https://github.com/basho/riak_cs/wiki/Querying-Access-Stats) -* [Querying Storage Statistics](/riak/cs/2.0.1/cookbooks/querying-storage-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.0.1/cookbooks/querying-storage-statistics) * [Enabling storage statistics](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) * [Github wiki](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) -* [Account Management](/riak/cs/2.0.1/cookbooks/account-management) +* [Account Management]({{}}riak/cs/2.0.1/cookbooks/account-management) * [Github wiki](https://github.com/basho/riak_cs/wiki/User-Management) -* [Monitoring and Metrics](/riak/cs/2.0.1/cookbooks/monitoring-and-metrics) +* [Monitoring and Metrics]({{}}riak/cs/2.0.1/cookbooks/monitoring-and-metrics) diff --git a/content/riak/cs/2.0.1/references/appendices/riak-cs-control.md b/content/riak/cs/2.0.1/references/appendices/riak-cs-control.md index 81e74191af..0602ef2ce8 100644 --- a/content/riak/cs/2.0.1/references/appendices/riak-cs-control.md +++ b/content/riak/cs/2.0.1/references/appendices/riak-cs-control.md @@ -20,7 +20,7 @@ managing users in a Riak CS Cluster. ## Installing Riak CS Control -Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package](/riak/cs/2.0.1/downloads). +Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package]({{}}riak/cs/2.0.1/downloads). ## Setting Up Riak CS Control @@ -67,7 +67,7 @@ riak-cs-control start When you first navigate to the Riak CS Control UI, you will land on the Users page: -![Users Page](/images/cs_control_users.png) +![Users Page]({{}}images/cs_control_users.png) On this page you can quickly see all current Riak CS users along with their status, e-mail address, and credentials. From here you can filter, diff --git a/content/riak/cs/2.0.1/theory/stanchion.md b/content/riak/cs/2.0.1/theory/stanchion.md index 57e511b5ce..ca19c0a283 100644 --- a/content/riak/cs/2.0.1/theory/stanchion.md +++ b/content/riak/cs/2.0.1/theory/stanchion.md @@ -25,9 +25,9 @@ Riak CS cluster at any time. Correspondingly, your Stanchion installation must be managed and configured separately. For more information, see the following documents: -* [Configuring Stanchion](/riak/cs/2.0.1/cookbooks/configuration/stanchion) -* [Installing Stanchion](/riak/cs/2.0.1/cookbooks/installing#installing-stanchion-on-a-node) -* [The Stantion Command-line Interface](/riak/cs/2.0.1/cookbooks/command-line-tools#stanchion) +* [Configuring Stanchion]({{}}riak/cs/2.0.1/cookbooks/configuration/stanchion) +* [Installing Stanchion]({{}}riak/cs/2.0.1/cookbooks/installing#installing-stanchion-on-a-node) +* [The Stantion Command-line Interface]({{}}riak/cs/2.0.1/cookbooks/command-line-tools#stanchion) For a more in-depth discussion of implementation details, see the project's @@ -51,7 +51,7 @@ rejected. The uniqueness of these entities is enforced by serializing any creation or modification requests that involve them. This process is handled by Stanchion. What happens under the hood is essentially that Stanchion -mandates that all [vnodes](/riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. +mandates that all [vnodes]({{}}riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. One result of this enforcement is that user creation requests and bucket creation or modification, i.e. deletion, requests are not highly diff --git a/content/riak/cs/2.0.1/tutorials/fast-track/local-testing-environment.md b/content/riak/cs/2.0.1/tutorials/fast-track/local-testing-environment.md index e00de5ab4c..da43379069 100644 --- a/content/riak/cs/2.0.1/tutorials/fast-track/local-testing-environment.md +++ b/content/riak/cs/2.0.1/tutorials/fast-track/local-testing-environment.md @@ -20,7 +20,7 @@ does not attempt to optimize your installation for your particular architecture. If you want to build a testing environment with a minimum of -configuration, there is an option for [Building a Virtual Testing Environment](/riak/cs/2.0.1/tutorials/fast-track/virtual-test-environment). +configuration, there is an option for [Building a Virtual Testing Environment]({{}}riak/cs/2.0.1/tutorials/fast-track/virtual-test-environment). ## Installing Your First Node @@ -30,7 +30,7 @@ and running Riak and Riak CS. ### Step 1: Raise your system's open file limits Riak can consume a large number of open file handles during normal -operation. See the [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit) document for more information on +operation. See the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) document for more information on how to increase your system's open files limit. If you are the root user, you can increase the system's open files limit @@ -52,7 +52,7 @@ riak soft nofile 65536 riak hard nofile 65536 ``` -For Mac OS X, consult the [open files limit](/riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. +For Mac OS X, consult the [open files limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. ### Step 2: Download and install packages @@ -67,14 +67,14 @@ sudo apt-get install -y curl substitute the appropriate CLI commands. If you are running Ubuntu 11.10 or later, you will also need the -`libssl0.9.8` package. See [Installing on Debian and Ubuntu](/riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. +`libssl0.9.8` package. See [Installing on Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. ```bash sudo apt-get install -y libssl0.9.8 ``` Now, grab the appropriate packages: Riak, Riak CS, and Stanchion. See -[Download Riak](/riak/kv/2.1.3/downloads/) and [Download Riak CS](/riak/cs/2.0.1/downloads). +[Download Riak]({{}}riak/kv/2.1.3/downloads/) and [Download Riak CS]({{}}riak/cs/2.0.1/downloads). You can skip Riak CS Control for now. Once you have the packages, install them per the instructions below. @@ -87,14 +87,14 @@ installing Riak. **Do not attempt to configure or start Riak until step 3 in this document.** - * [Debian and Ubuntu](/riak/kv/2.1.3/setup/installing/debian-ubuntu) - * [RHEL and CentOS](/riak/kv/2.1.3/setup/installing/rhel-centos) - * [Mac OS X](/riak/kv/2.1.3/setup/installing/mac-osx) - * [FreeBSD](/riak/kv/2.1.3/setup/installing/freebsd) - * [SUSE](/riak/kv/2.1.3/setup/installing/suse) - * [Windows Azure](/riak/kv/2.1.3/setup/installing/windows-azure) - * [AWS Marketplace](/riak/kv/2.1.3/setup/installing/amazon-web-services) - * [From Source](/riak/kv/2.1.3/setup/installing/source) + * [Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.1.3/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.1.3/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.1.3/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.1.3/setup/installing/suse) + * [Windows Azure]({{}}riak/kv/2.1.3/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.1.3/setup/installing/amazon-web-services) + * [From Source]({{}}riak/kv/2.1.3/setup/installing/source) #### Next, install Riak CS @@ -458,7 +458,7 @@ your first node with two exceptions: You will then need to verify the cluster plan with the `riak-admin cluster plan` command, and commit the cluster changes with `riak-admin cluster commit` to complete the join process. More information is -available in the [Command Line Tools](/riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. +available in the [Command Line Tools]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. > **Note** > @@ -469,4 +469,4 @@ such as a dedicated device, [HAProxy](http://haproxy.1wt.eu), or [Nginx](http://wiki.nginx.org/Main) between Riak CS and the outside world. -Once you have completed this step, You can progress to [testing the Riak CS installation](/riak/cs/2.0.1/tutorials/fast-track/test-installation) using s3cmd. +Once you have completed this step, You can progress to [testing the Riak CS installation]({{}}riak/cs/2.0.1/tutorials/fast-track/test-installation) using s3cmd. diff --git a/content/riak/cs/2.0.1/tutorials/fast-track/test-installation.md b/content/riak/cs/2.0.1/tutorials/fast-track/test-installation.md index 15707cae7d..11522853d2 100644 --- a/content/riak/cs/2.0.1/tutorials/fast-track/test-installation.md +++ b/content/riak/cs/2.0.1/tutorials/fast-track/test-installation.md @@ -141,6 +141,6 @@ bit of learning to be done, so make sure and check out the Reference section (click "Reference" on the nav on the left side of this page). A few items that may be of particular interest: -* [Details about API operations](/riak/cs/2.0.1/references/apis/storage) -* [Information about the Ruby Fog client](/riak/cs/2.0.1/cookbooks/fog) -* [Release Notes](/riak/cs/2.0.1/cookbooks/release-notes) +* [Details about API operations]({{}}riak/cs/2.0.1/references/apis/storage) +* [Information about the Ruby Fog client]({{}}riak/cs/2.0.1/cookbooks/fog) +* [Release Notes]({{}}riak/cs/2.0.1/cookbooks/release-notes) diff --git a/content/riak/cs/2.0.1/tutorials/fast-track/virtual-test-environment.md b/content/riak/cs/2.0.1/tutorials/fast-track/virtual-test-environment.md index 9b52505756..843935006e 100644 --- a/content/riak/cs/2.0.1/tutorials/fast-track/virtual-test-environment.md +++ b/content/riak/cs/2.0.1/tutorials/fast-track/virtual-test-environment.md @@ -22,7 +22,7 @@ want to tune the OS or node/memory count, you'll have to edit the If you want to build a testing environment with more flexibility in configuration and durability across environment resets, there are -instructions for [Building a Local Test Environment](/riak/cs/2.0.1/tutorials/fast-track/local-testing-environment). +instructions for [Building a Local Test Environment]({{}}riak/cs/2.0.1/tutorials/fast-track/local-testing-environment). ## Configuration @@ -87,7 +87,7 @@ Secret key: RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw== ## Next Steps Congratulations! You have deployed a virtualized environment of Riak CS. -You are ready to progress to [Testing the Riak CS Installation](/riak/cs/2.0.1/tutorials/fast-track/test-installation). +You are ready to progress to [Testing the Riak CS Installation]({{}}riak/cs/2.0.1/tutorials/fast-track/test-installation). ### Stopping Your Virtual Environment diff --git a/content/riak/cs/2.0.1/tutorials/fast-track/what-is-riak-cs.md b/content/riak/cs/2.0.1/tutorials/fast-track/what-is-riak-cs.md index 009a6ed8b3..d6654a5845 100644 --- a/content/riak/cs/2.0.1/tutorials/fast-track/what-is-riak-cs.md +++ b/content/riak/cs/2.0.1/tutorials/fast-track/what-is-riak-cs.md @@ -35,11 +35,11 @@ automatically take over the responsibility of failed or non-communicative nodes, data remains available even in the event of node failure or network partition. -When an object is uploaded via the [storage API](/riak/cs/2.0.1/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, +When an object is uploaded via the [storage API]({{}}riak/cs/2.0.1/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, written, and replicated in Riak. Each chunk is associated with metadata for later retrieval. The diagram below provides a visualization. -![Riak CS Chunking](/images/Riak-CS-Overview.png) +![Riak CS Chunking]({{}}images/Riak-CS-Overview.png) ## Riak CS Enterprise diff --git a/content/riak/cs/2.1.0/cookbooks/access-control-lists.md b/content/riak/cs/2.1.0/cookbooks/access-control-lists.md index 8514d051e7..03bc8422c4 100644 --- a/content/riak/cs/2.1.0/cookbooks/access-control-lists.md +++ b/content/riak/cs/2.1.0/cookbooks/access-control-lists.md @@ -82,9 +82,9 @@ Riak CS permissions are split into two types: **bucket permissions** and ## Buckets -Bucket names **must** be [globally unique](/riak/cs/2.1.0/theory/stanchion/#globally-unique-entities). To avoid conflicts, all +Bucket names **must** be [globally unique]({{}}riak/cs/2.1.0/theory/stanchion/#globally-unique-entities). To avoid conflicts, all bucket creation requests are made to an application called -[Stanchion](/riak/cs/2.1.0/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we +[Stanchion]({{}}riak/cs/2.1.0/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we believe it is appropriate based on the following statement from this [documentation on bucket restrictions](http://docs.amazonwebservices.com/AmazonS3/latest/dev/BucketRestrictions.html) from Amazon regarding restrictions on bucket operations: @@ -105,4 +105,4 @@ created granting the creator both ownership and full access control and denying access to all other parties. For information on specifying an ACL when making a `PUT` request, see -[Riak CS PUT Object ACL](/riak/cs/2.1.0/references/apis/storage/s3/put-object-acl). +[Riak CS PUT Object ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-object-acl). diff --git a/content/riak/cs/2.1.0/cookbooks/authentication.md b/content/riak/cs/2.1.0/cookbooks/authentication.md index 3027687f3d..d9a465f783 100644 --- a/content/riak/cs/2.1.0/cookbooks/authentication.md +++ b/content/riak/cs/2.1.0/cookbooks/authentication.md @@ -111,4 +111,4 @@ http://bucket.data.basho.com/document?AWSAccessKeyId=8EE3UE-UMW1YTPMBC3EB&Expire ## Keystone Authentication More information on using Keystone for authentication with Riak CS can -be found in [using Riak CS with Keystone](/riak/cs/2.1.0/cookbooks/using-with-keystone). +be found in [using Riak CS with Keystone]({{}}riak/cs/2.1.0/cookbooks/using-with-keystone). diff --git a/content/riak/cs/2.1.0/cookbooks/command-line-tools.md b/content/riak/cs/2.1.0/cookbooks/command-line-tools.md index 9657dafdaf..3cdb240db5 100644 --- a/content/riak/cs/2.1.0/cookbooks/command-line-tools.md +++ b/content/riak/cs/2.1.0/cookbooks/command-line-tools.md @@ -206,7 +206,7 @@ More information about Erlang's etop tool can be found in the ## riak-cs-admin gc -This command controls Riak CS's [garbage collection](/riak/cs/2.1.0/cookbooks/garbage-collection) system. +This command controls Riak CS's [garbage collection]({{}}riak/cs/2.1.0/cookbooks/garbage-collection) system. ```bash riak-cs-admin gc @@ -312,7 +312,7 @@ undergirding Riak CS. Temporarily changes the host and/or port used by Stanchion. This change is effective until the node is restarted, at which point Stanchion will -begin listening on the host and port specified in your [configuration files](/riak/cs/2.1.0/cookbooks/configuration/reference). +begin listening on the host and port specified in your [configuration files]({{}}riak/cs/2.1.0/cookbooks/configuration/reference). ```bash riak-cs-stanchion switch HOST PORT @@ -521,7 +521,7 @@ documented [above](#riak-cs-admin-access). Riak CS version 1.5 offers support for supercluster operations. The `supercluster` command interface enables you to interact with that system. -More information can be found in [Riak CS Supercluster Support](/riak/cs/2.1.0/cookbooks/supercluster). +More information can be found in [Riak CS Supercluster Support]({{}}riak/cs/2.1.0/cookbooks/supercluster). {{% note title="Note: technical preview" %}} Riak CS supercluster support is available only as a technical preview for @@ -635,7 +635,7 @@ Fetches all current weights from the master member. riak-cs-supercluster refresh ``` -When a member's weight is updated, that weight is stored in the [master member](/riak/cs/2.1.0/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The +When a member's weight is updated, that weight is stored in the [master member]({{}}riak/cs/2.1.0/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The `refresh` command syncs the weights stored in the master member with the weights cached in Riak CS so that there is no discrepancy. diff --git a/content/riak/cs/2.1.0/cookbooks/configuration.md b/content/riak/cs/2.1.0/cookbooks/configuration.md index 9139eaff90..0b0b1abcf1 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration.md @@ -27,7 +27,7 @@ If your system consists of several nodes, configuration primarily represents set ## Configuration of System Components -* [Configuring Riak](/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs) -* [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs) -* [Configuring Stanchion](/riak/cs/2.1.0/cookbooks/configuration/stanchion) -* [Configuring an S3 client](/riak/cs/2.1.0/cookbooks/configuration/s3-client) +* [Configuring Riak]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-for-cs) +* [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs) +* [Configuring Stanchion]({{}}riak/cs/2.1.0/cookbooks/configuration/stanchion) +* [Configuring an S3 client]({{}}riak/cs/2.1.0/cookbooks/configuration/s3-client) diff --git a/content/riak/cs/2.1.0/cookbooks/configuration/dragondisk.md b/content/riak/cs/2.1.0/cookbooks/configuration/dragondisk.md index 3706175d4d..74dca7c415 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration/dragondisk.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration/dragondisk.md @@ -29,7 +29,7 @@ other Linux distributions. This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_linux0.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux0.png) ## Create an account @@ -38,16 +38,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_linux1.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_linux2.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux2.png) * In the **Account** dialog window, choose **Other S3 compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_linux3.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -66,12 +66,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_linux4.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux4.png) * Click **Close** to complete account creation and to continue to attempt connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_linux5.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux5.png) ### Connect to Riak CS @@ -84,7 +84,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_linux6.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux6.png) ### Create a bucket @@ -93,7 +93,7 @@ Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_linux7.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket `dragondisklinux`. @@ -101,7 +101,7 @@ Riak CS. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_linux8.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux8.png) ### Copy files to bucket @@ -109,13 +109,13 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_linux9.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_linux10.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. @@ -131,7 +131,7 @@ This section describes configuration of DragonDisk for Mac OS X. * This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_osx0.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx0.png) ### Create an account @@ -140,16 +140,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_osx1.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_osx2.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx2.png) * In the **Account** dialog window, choose **Other S3 compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_osx3.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -168,12 +168,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_osx4.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx4.png) * Click **Close** to complete account creation and continue try connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_osx5.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx5.png) ### Connect to Riak CS @@ -186,7 +186,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_osx6.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx6.png) ### Create a bucket @@ -195,7 +195,7 @@ Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_osx7.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket *dragondiskosx*. @@ -203,7 +203,7 @@ Riak CS. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_osx8.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx8.png) ### Copy files to bucket @@ -211,14 +211,14 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_osx9.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_osx10.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. @@ -234,7 +234,7 @@ This section describes configuration of DragonDisk for Windows. * This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_windows0.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows0.png) ### Create an account @@ -243,16 +243,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_windows1.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_windows2.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows2.png) * In the **Account** dialog window, choose **Other S3-compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_windows3.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -271,12 +271,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_windows4.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows4.png) * Click **Close** to complete account creation and continue try connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_windows5.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows5.png) ### Connect to Riak CS @@ -289,7 +289,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_windows6.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows6.png) ### Create a bucket @@ -297,7 +297,7 @@ configuration. with Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_windows7.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket *dragonbucket*. @@ -305,7 +305,7 @@ configuration. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_windows8.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows8.png) ### Copy files to bucket @@ -313,13 +313,13 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_windows9.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_windows10.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. diff --git a/content/riak/cs/2.1.0/cookbooks/configuration/load-balancing-proxy.md b/content/riak/cs/2.1.0/cookbooks/configuration/load-balancing-proxy.md index 3d894db4a7..6f8e753d55 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration/load-balancing-proxy.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration/load-balancing-proxy.md @@ -51,7 +51,7 @@ act as a load balancer to a Riak CS installation. > **Note on open files limits** > > The operating system's open files limits need to be greater than 256000 -for the example configuration that follows. Consult the [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different +for the example configuration that follows. Consult the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different operating systems. ```config diff --git a/content/riak/cs/2.1.0/cookbooks/configuration/multi-datacenter.md b/content/riak/cs/2.1.0/cookbooks/configuration/multi-datacenter.md index 4c4b3eca08..42b75d4bfb 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration/multi-datacenter.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration/multi-datacenter.md @@ -28,7 +28,7 @@ CS cluster. As of Riak release 1.4.0, there are two different MDC replication modes that Riak CS can use to request data from remote clusters. Please see -the [comparison](/riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. +the [comparison]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. ### Replication Version 3 Configuration @@ -75,9 +75,9 @@ configured **sink cluster**. See also: -* [Upgrading from v2 to v3](/riak/kv/2.1.3/setup/upgrading/multi-datacenter) -* [Comparing v2 and v3](/riak/kv/2.1.3/using/reference/multi-datacenter/comparison) -* [Multi-Datacenter Operations](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) + +* [Comparing v2 and v3]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) +* [Multi-Datacenter Operations]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) ## Riak CS Configuration @@ -119,10 +119,10 @@ Replace the `node` variable above with the nodename specified in the ## Stanchion Configuration -Though there is no specific configuration for [Stanchion](/riak/cs/2.1.0/theory/stanchion), note that +Though there is no specific configuration for [Stanchion]({{}}riak/cs/2.1.0/theory/stanchion), note that Stanchion should be a single, globally unique process to which every Riak CS node sends requests, even if there are multiple replicated sites. Unlike Riak and Riak CS, Stanchion should run on _only one node in a given cluster_, perhaps on its own, dedicated hardware if you wish. Stanchion runs on only one node because it manages strongly consistent -updates to [globally unique entities](/riak/cs/2.1.0/theory/stanchion/#globally-unique-entities) like users and buckets. +updates to [globally unique entities]({{}}riak/cs/2.1.0/theory/stanchion/#globally-unique-entities) like users and buckets. diff --git a/content/riak/cs/2.1.0/cookbooks/configuration/reference.md b/content/riak/cs/2.1.0/cookbooks/configuration/reference.md index 89b880cf4b..090732501e 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration/reference.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration/reference.md @@ -53,12 +53,12 @@ aliases: ]}, ``` - and so on. More details can be found at [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). + and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). {{% /note %}} This document is intended as a reference listing of all configurable parameters for Riak CS. For a more narrative-style walkthrough of configuring Riak CS, we -recommend consulting the [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs) tutorial. +recommend consulting the [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs) tutorial. The configuration for Riak CS is handled through either the `riak-cs.conf` and `advanced.config` file pair, which were introduced in Riak CS 2.0.0, or the two @@ -130,7 +130,7 @@ The tables below will show settings for both `riak-cs.conf` and riak_host The IP address/port for the Riak CS node's corresponding Riak node (used by -Riak's Protocol Buffers interface) +Riak's Protocol Buffers interface) 127.0.0.1:8087 @@ -157,7 +157,7 @@ the corresponding HTTP host). riak_host The TCP IP/port for the Riak CS node's corresponding Riak node (used by -Riak's Protocol Buffers interface) +Riak's Protocol Buffers interface) {"127.0.0.1", 8087} @@ -298,7 +298,7 @@ tasks use the IP and port as all other Riak CS traffic. The admin key used for administrative access to Riak CS, e.g. usage of the /riak-cs/stats endpoint. Please note that both admin.key and admin.secret must match the -corresponding settings in the Stanchion node's stanchion.conf. +corresponding settings in the Stanchion node's stanchion.conf. admin-key @@ -326,7 +326,7 @@ this setting unless you implement a custom authentication scheme. rewrite_module A rewrite module contains a set of rules for translating requests made using -a particular API to requests in the the native Riak CS storage API. We do +a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you implement a custom module. riak_cs_s3_rewrite @@ -351,7 +351,7 @@ tasks use the IP and port as all other Riak CS traffic. The admin key used for administrative access to Riak CS, e.g. usage of the /riak-cs/stats endpoint. Please note that both admin_key and admin_secret must match the -corresponding settings in the Stanchion node's +corresponding settings in the Stanchion node's app.config. @@ -387,7 +387,7 @@ actions, including bucket deletion. rewrite_module A rewrite module contains a set of rules for translating requests -made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you +made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you implement a custom module. riak_cs_s3_rewrite @@ -396,7 +396,7 @@ implement a custom module. ## Usage Recording -These settings relate to Riak CS's [access logs](/riak/cs/2.1.0/cookbooks/usage-and-billing-data). +These settings relate to Riak CS's [access logs]({{}}riak/cs/2.1.0/cookbooks/usage-and-billing-data). ### `riak-cs.conf` @@ -519,7 +519,7 @@ of 86400 translates to 1 day. ## Garbage Collection -Settings related to Riak CS's [garbage collection](/riak/cs/2.1.0/cookbooks/garbage-collection) \(GC) process. +Settings related to Riak CS's [garbage collection]({{}}riak/cs/2.1.0/cookbooks/garbage-collection) \(GC) process. ### `riak-cs.conf` @@ -661,7 +661,7 @@ blocks to Riak. cs_version The Riak CS version number. This number is used to selectively enable new -features for the current version to better support rolling upgrades. New +features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set @@ -696,7 +696,7 @@ source IP address as an input (which is the default). cs_version The Riak CS version number. This number is used to selectively -enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value. +enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value. @@ -958,4 +958,4 @@ error logger. --> -[config_your_code]: http://docs.basho.com/riak/1.4.12/ops/advanced/configs/configuration-files/#Configuring-Your-code-vm-args-code- +[config_your_code]: {{< baseurl >}}riak/kv/2.1.0/configuring/basic/#erlang-vm-tunings diff --git a/content/riak/cs/2.1.0/cookbooks/configuration/riak-cs.md b/content/riak/cs/2.1.0/cookbooks/configuration/riak-cs.md index 644e4553fd..419d21e6b4 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration/riak-cs.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration/riak-cs.md @@ -10,8 +10,8 @@ menu: project: "riak_cs" project_version: "2.1.0" aliases: - - /riakcs/2.1.0/cookbooks/configuration/Configuring-Riak-CS/ - - /riak/cs/2.1.0/cookbooks/configuration/Configuring-Riak-CS/ + - /riakcs/2.1.0/cookbooks/configuration/riak-cs/ + - /riak/cs/2.1.0/cookbooks/configuration/riak-cs/ --- For Riak CS to operate properly it must know how to connect to Riak. @@ -51,9 +51,9 @@ files. If an `app.config` file is present, neither the `riak-cs.config` nor the to continue usage of the legacy `app.config` file, please note that some configuration options have changed names. Most notably, the IP/Port format has changed in 2.0 for Stanchion, Riak, and Riak CS. To view these changes, -please review the [Rolling Upgrades](/riak/cs/2.1.0/cookbooks/rolling-upgrades) Document. +please review the [Rolling Upgrades]({{}}riak/cs/2.1.0/cookbooks/rolling-upgrades) Document. > -> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference](/riak/cs/2.1.0/cookbooks/configuration/reference). +> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference]({{}}riak/cs/2.1.0/cookbooks/configuration/reference). The sections below walk you through some of the main configuration categories that you will likely encounter while operating Riak CS. @@ -81,12 +81,12 @@ is required. {{% /note %}} After making any changes to the `riak-cs.conf` file in Riak CS, -[restart](/riak/cs/2.1.0/cookbooks/command-line-tools/#riak-cs) the node if it is already running. +[restart]({{}}riak/cs/2.1.0/cookbooks/command-line-tools/#riak-cs) the node if it is already running. ## Specifying the Stanchion Node If you're running a single Riak CS node, you don't have to change the -[Stanchion](/riak/cs/2.1.0/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. +[Stanchion]({{}}riak/cs/2.1.0/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. The Stanchion settings reside in the Riak CS `riak-cs.conf` file, which is located in the `/etc/riak-cs` directory of each Riak CS node. @@ -264,7 +264,7 @@ particular use case. ### Tuning We strongly recommend that you take care when setting the value of the -[`pb_backlog` setting](/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is +[`pb_backlog` setting]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is started, each connection pool begins to establish connections to Riak. This can result in a [thundering herd problem](http://en.wikipedia.org/wiki/Thundering_herd_problem) in which connections in the pool believe they are connected to Riak, but in reality some of the connections have been reset. Due to TCP `RST` packet rate limiting (controlled by `net.inet.icmp.icmplim`) some of the connections may not receive notification until they are used to service a user's request. This manifests itself as an `{error, disconnected}` message in the Riak CS logs and an error returned to the user. @@ -354,7 +354,7 @@ data.riakcs.net The following options are available to make adjustments to the Riak CS garbage collection system. More details about garbage collection in Riak CS are -available in [Garbage Collection](/riak/cs/2.1.0/cookbooks/garbage-collection). +available in [Garbage Collection]({{}}riak/cs/2.1.0/cookbooks/garbage-collection). * `gc.leeway_period` (`leeway_seconds` in `advanced.config` or `app.config`) --- The amount of time that must elapse before an object version that has been @@ -420,4 +420,4 @@ been deprecated, and _will be removed_ in the next major release. ## Other Riak CS Settings For a complete listing of configurable parameters for Riak CS, see the -[configuration reference](/riak/cs/2.1.0/cookbooks/configuration/reference) document. +[configuration reference]({{}}riak/cs/2.1.0/cookbooks/configuration/reference) document. diff --git a/content/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs.md b/content/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs.md index 69deb87c34..358ea19f75 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs.md @@ -21,21 +21,21 @@ reference document listing important configurable parameters. ## The Proper Backends for Riak CS -The default backend used by Riak is the [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the +The default backend used by Riak is the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the Riak CS package includes a special backend that should be used by the Riak cluster that is part of the Riak CS system. It is a custom version -of the standard [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. +of the standard [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. Some of the Riak buckets used internally by Riak CS use secondary -indexes, which currently requires the [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts +indexes, which currently requires the [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts of the Riak CS system can benefit from the use of the Bitcask backend. -The use of the custom [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take +The use of the custom [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take advantage of the strengths of both of these backends to achieve the best blend of performance and features. The next section covers how to properly set up Riak to use this Multi backend. Additionally, the Riak CS storage calculation system uses Riak's -[MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. +[MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. A few other settings must be modified to configure a Riak node as part of a Riak CS system, such as the node IP address and the IP address and @@ -46,7 +46,7 @@ configure a Riak node to work as part of a Riak CS system. ## Setting up the Proper Riak Backend First, edit Riak's `riak.conf`, or the old-style `advanced.config` or -`app.config` [configuration file](/riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing +`app.config` [configuration file]({{}}riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing the following line: ```riakconf @@ -117,7 +117,7 @@ to use the custom backend provided by Riak CS. We need to use either the ``` It's important to note that many of these values will depend on various -directories specific to your [operating system](/riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` +directories specific to your [operating system]({{}}riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` parameter, for example, assumes that Riak CS is installed in `/usr/lib/riak-cs`, while the `data_root` parameters assume that Riak is installed in `/var/lib/`. @@ -152,7 +152,7 @@ buckets.default.allow_mult = true ]} ``` -This will enable Riak to create [siblings](/riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library](/riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS +This will enable Riak to create [siblings]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library]({{}}riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS operations are strongly consistent by definition. {{% note title="Note on `allow_mult`" %}} @@ -214,7 +214,7 @@ sure that you do not change the backend from `riak_cs_kv_multi_backend` to ## Setting Up Riak to Use Protocol Buffers -The Riak [Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, +The Riak [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, or in the `riak_api` section of the the old-style `advanced.config` or `app.config` files, which is located in the `/etc/riak/` folder. The default host is `127.0.0.1` and the default port is `8087`. You will need to change this if @@ -343,4 +343,4 @@ javascript.hook_pool_size = 0 ``` -[riak_conf_files]: http://docs.basho.com/riak/2.0.5/ops/advanced/configs/configuration-files/ +[riak_conf_files]: {{< baseurl >}}riak/kv/2.0.5/ops/advanced/configs/configuration-files/ diff --git a/content/riak/cs/2.1.0/cookbooks/configuration/stanchion.md b/content/riak/cs/2.1.0/cookbooks/configuration/stanchion.md index ff52ccd65b..942a407ec5 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration/stanchion.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration/stanchion.md @@ -86,7 +86,7 @@ ssl.keyfile = "./etc/key.pem" ## Specifying the Admin User -The admin user is created during the [configuration of Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs/#specifying-the-admin-user). +The admin user is created during the [configuration of Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs/#specifying-the-admin-user). The same user credentials must be added to each Stanchion used in the cluster. This is set in the `stanchion.conf` file, which is located in the `/etc/stanchion` directory. Enter the same `admin.key` and `admin.secret` as diff --git a/content/riak/cs/2.1.0/cookbooks/configuration/transmit.md b/content/riak/cs/2.1.0/cookbooks/configuration/transmit.md index db11f9fc77..5d27cbda61 100644 --- a/content/riak/cs/2.1.0/cookbooks/configuration/transmit.md +++ b/content/riak/cs/2.1.0/cookbooks/configuration/transmit.md @@ -38,11 +38,11 @@ dialog as follows: Defining a connection looks like this: -![Trasmit screenshot](/images/riak_cs_transmit0.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit0.jpg) > **Note** > -> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration](/riak/cs/2.1.0/cookbooks/configuration/load-balancing-proxy). +> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration]({{}}riak/cs/2.1.0/cookbooks/configuration/load-balancing-proxy). Finally, test the connection to Riak CS by clicking **Connect**. @@ -56,11 +56,11 @@ After successfully connecting to Riak CS, verify that you can create a bucket. The new bucket creation dialog looks like this: -![Trasmit screenshot](/images/riak_cs_transmit1.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit1.jpg) The newly created bucket is listed in the right hand pane of the Transmit interface: -![Trasmit screenshot](/images/riak_cs_transmit2.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit2.jpg) ## Copy Files @@ -74,7 +74,7 @@ copying of the files to the bucket. After copying, the files will appear in the bucket: -![Trasmit screenshot](/images/riak_cs_transmit3.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit3.jpg) You have now successfully configured a Transmit connection to Riak CS and verified basic file copying capabilities. diff --git a/content/riak/cs/2.1.0/cookbooks/faqs/riak-cs.md b/content/riak/cs/2.1.0/cookbooks/faqs/riak-cs.md index 6d0768ee84..3b6c000f5c 100644 --- a/content/riak/cs/2.1.0/cookbooks/faqs/riak-cs.md +++ b/content/riak/cs/2.1.0/cookbooks/faqs/riak-cs.md @@ -15,7 +15,7 @@ aliases: Q: What is Riak CS? A: - Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV](/riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. + Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV]({{}}riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. Q: Can users share data? A: @@ -27,7 +27,7 @@ Q: Is it possible to specify a filesystem where my Riak CS buckets will live? A: You can specify the location of **all** Riak CS bucket data by changing the settings for Riak's backends to a path on a particular filesystem. If this is your goal, you can configure Riak to suit your environment. If you look at our example Riak `advanced.config`/`app.config` backend - definition from the [Configuring Riak for CS](/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs) section, it looks like this: + definition from the [Configuring Riak for CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-for-cs) section, it looks like this: ```advanced.config {riak_kv, [ diff --git a/content/riak/cs/2.1.0/cookbooks/garbage-collection.md b/content/riak/cs/2.1.0/cookbooks/garbage-collection.md index 8fa00ea876..16acc03a02 100644 --- a/content/riak/cs/2.1.0/cookbooks/garbage-collection.md +++ b/content/riak/cs/2.1.0/cookbooks/garbage-collection.md @@ -15,7 +15,7 @@ aliases: This document describes some of the implementation details behind Riak CS's garbage collection process. For information on configuring this -system, please see our documentation on [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs). +system, please see our documentation on [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs). ## Versions and Manifests @@ -192,7 +192,7 @@ We recommend using only _one_ active garbage collection daemon in any Riak CS cluster. If multiple daemons are currently being used, you can disable the others by setting the `gc.interval` parameter to `infinity` on those nodes. More information on how to do that can be found in the -[CS configuration doc](/riak/cs/2.1.0/cookbooks/configuration/riak-cs/#garbage-collection-settings). +[CS configuration doc]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs/#garbage-collection-settings). ## Controlling the GC Daemon @@ -212,7 +212,7 @@ Command | Description `set-interval` | Set or update the garbage collection interval. This setting uses a unit of seconds. `set-leeway` | Set or update the garbage collection leeway time. This setting indicates how many seconds must elapse after an object is deleted or overwritten before the garbage collection system may reap the object. This setting uses a unit of seconds. -For more information, see our documentation on [Riak CS command-line tools](/riak/cs/2.1.0/cookbooks/command-line-tools). +For more information, see our documentation on [Riak CS command-line tools]({{}}riak/cs/2.1.0/cookbooks/command-line-tools). ## Manifest Updates @@ -256,7 +256,7 @@ manifest keys that could linger indefinitely. Riak CS's garbage collection implementation gives the deployer several knobs to adjust for fine-tuning system performace. More information -can be found in our documentation on [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs/#garbage-collection-settings). +can be found in our documentation on [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs/#garbage-collection-settings). ## More Information diff --git a/content/riak/cs/2.1.0/cookbooks/installing.md b/content/riak/cs/2.1.0/cookbooks/installing.md index 7da03dbce2..320d82ca67 100644 --- a/content/riak/cs/2.1.0/cookbooks/installing.md +++ b/content/riak/cs/2.1.0/cookbooks/installing.md @@ -22,16 +22,16 @@ You can install Riak CS on a single node (for development purposes) or using an automated deployment tool. Any Riak CS installation involves three components, all of which must be installed separately: -* [Riak KV](/riak/2.1.4/) --- The distributed database on top of which Riak CS +* [Riak KV]({{}}riak/kv/2.1.4/) --- The distributed database on top of which Riak CS is built * Riak CS itself -* [Stanchion](/riak/cs/2.1.0/theory/stanchion) --- An application used to manage [globally unique entities](/riak/cs/2.1.0/theory/stanchion/#globally-unique-entities) such as users and buckets. +* [Stanchion]({{}}riak/cs/2.1.0/theory/stanchion) --- An application used to manage [globally unique entities]({{}}riak/cs/2.1.0/theory/stanchion/#globally-unique-entities) such as users and buckets. [Riak KV](#installing-riak) and [Riak CS](#installing-riak-cs-on-a-node) must be installed on each node in your cluster. [Stanchion](#installing-stanchion-on-a-node), however, needs to be installed on only one node. ## Version Compatibility -We strongly recommend using one of the documented [version combinations](/riak/cs/2.1.0/cookbooks/version-compatibility/) +We strongly recommend using one of the documented [version combinations]({{}}riak/cs/2.1.0/cookbooks/version-compatibility/) when installing and running Riak CS. ## Installing Riak KV @@ -40,30 +40,30 @@ Before installing Riak CS, Riak KV must be installed on each node in your cluster. You can install Riak KV either as part of an OS-specific package or from source. - * [Debian and Ubuntu](/riak/kv/2.1.4/setup/installing/debian-ubuntu) - * [RHEL and CentOS](/riak/kv/2.1.4/setup/installing/rhel-centos) - * [Mac OS X](/riak/kv/2.1.4/setup/installing/mac-osx) - * [FreeBSD](/riak/kv/2.1.4/setup/installing/freebsd) - * [SUSE](/riak/kv/2.1.4/setup/installing/suse) - * [From Source](/riak/kv/2.1.4/setup/installing/source) + * [Debian and Ubuntu]({{}}riak/kv/2.1.4/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.1.4/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.1.4/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.1.4/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.1.4/setup/installing/suse) + * [From Source]({{}}riak/kv/2.1.4/setup/installing/source) Riak is also officially supported on the following public cloud infrastructures: - * [Windows Azure](/riak/kv/2.1.4/setup/installing/windows-azure) - * [AWS Marketplace](/riak/kv/2.1.4/setup/installing/amazon-web-services) + * [Windows Azure]({{}}riak/kv/2.1.4/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.1.4/setup/installing/amazon-web-services) Remember that you must repeat this installation process on each node in your cluster. For future reference, you should make note of the Riak KV installation directory. If you want to fully configure Riak KV prior to installing Riak CS, see our -documentation on [configuring Riak KV for CS](/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs/). +documentation on [configuring Riak KV for CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-for-cs/). ## Installing Riak CS on a Node -Riak CS and Stanchion packages are available on the [Download Riak CS](/riak/cs/2.1.0/downloads/) -page. Similarly, Riak packages are available on the [Download Riak KV](/riak/kv/2.1.4/downloads/) page. +Riak CS and Stanchion packages are available on the [Download Riak CS]({{}}riak/cs/2.1.0/downloads/) +page. Similarly, Riak packages are available on the [Download Riak KV]({{}}riak/kv/2.1.4/downloads/) page. After downloading Riak CS, Stanchion, and Riak, install them using your operating system's package management commands. @@ -78,7 +78,7 @@ such as a dedicated device [HAProxy](http://haproxy.1wt.eu) or [Nginx](http://wi ### Installing Riak CS on Mac OS X To install Riak CS on OS X, first download the appropriate package from -the [downloads](/riak/cs/2.1.0/downloads) page: +the [downloads]({{}}riak/cs/2.1.0/downloads) page: ```bash curl -O http://s3.amazonaws.com/downloads.basho.com/riak-cs/1.5/2.1.0/osx/10.8/riak-cs-2.1.0-OSX-x86_64.tar.gz @@ -90,7 +90,7 @@ Then, unpack the downloaded tarball: tar -xvzf riak-cs-2.1.0-OSX-x86_64.tar.gz ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs/). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs/). ### Installing Riak CS on Debian or Ubuntu @@ -255,11 +255,11 @@ can achieve this by specifying a load balancer IP as the Stanchion IP in each Riak CS node's `riak-cs.conf`. This load balancer must be configured to send all requests to a single Stanchion node, failing over to a secondary Stanchion node if the primary is unavailable. More -details can be found in [Specifying the Stanchion Node](/riak/cs/2.1.0/cookbooks/configuration/#specifying-the-stanchion-node). +details can be found in [Specifying the Stanchion Node]({{}}riak/cs/2.1.0/cookbooks/configuration/#specifying-the-stanchion-node). ### Installing Stanchion on Mac OS X -First, download the appropriate package from the [downloads](/riak/cs/2.1.0/downloads/#stanchion-1-4-3) page. +First, download the appropriate package from the [downloads]({{}}riak/cs/2.1.0/downloads/#stanchion-1-4-3) page. ```bash curl -O http://s3.amazonaws.com/downloads.basho.com/stanchion/1.4/1.4.3/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz @@ -271,7 +271,7 @@ Then, unpack the downloaded tarball: stanchion-2.0.0-OSX-x86_64.tar.gz ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs). ### Installing Stanchion on Debian or Ubuntu @@ -300,7 +300,7 @@ Now, install the `stanchion` package: sudo apt-get install stanchion ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs). #### Installing the `.deb` Package Manually (not recommended) @@ -311,7 +311,7 @@ sudo dpkg -i Replace `` with the actual filename for the package you are installing. -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs). ### Installing Stanchion on RHEL or CentOS @@ -338,7 +338,7 @@ Once the `.rpm` package has been installed, install Stanchion: sudo yum install stanchion ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs). #### Installing the `.rpm` Package Manually (not recommended) @@ -349,7 +349,7 @@ sudo rpm -Uvh Replace `` with the actual filename for the package you are installing. -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs). > **Note on SELinux** > @@ -359,4 +359,4 @@ encounter errors during installation, try disabling SELinux. ## What's Next? Once you've completed installation of Riak CS and Riak, you're ready to -learn more about [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs). +learn more about [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs). diff --git a/content/riak/cs/2.1.0/cookbooks/installing/chef.md b/content/riak/cs/2.1.0/cookbooks/installing/chef.md index e461530127..1040a79ec0 100644 --- a/content/riak/cs/2.1.0/cookbooks/installing/chef.md +++ b/content/riak/cs/2.1.0/cookbooks/installing/chef.md @@ -138,8 +138,8 @@ default['stanchion']['args']['-env']['ERL_CRASH_DUMP'] = "/var/log/stanchion/erl #### Storage Backends -Riak CS uses a specific combination of storage backends. [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) is used to -store blocks and [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: +Riak CS uses a specific combination of storage backends. [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) is used to +store blocks and [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: ```ruby default['riak']['config']['riak_kv']['storage_backend'] = "riak_cs_kv_multi_backend" @@ -183,5 +183,5 @@ default['stanchion']['config']['lager']['error_logger_redirect'] = true More information related to cluster configuration and building development environments is available in our documentation. -* [Building a Local Test Environment](/riak/cs/2.1.0/tutorials/fast-track/local-testing-environment) -* [Building a Virtual Testing Environment](/riak/cs/2.1.0/tutorials/fast-track/virtual-test-environment) +* [Building a Local Test Environment]({{}}riak/cs/2.1.0/tutorials/fast-track/local-testing-environment) +* [Building a Virtual Testing Environment]({{}}riak/cs/2.1.0/tutorials/fast-track/virtual-test-environment) diff --git a/content/riak/cs/2.1.0/cookbooks/keystone-setup.md b/content/riak/cs/2.1.0/cookbooks/keystone-setup.md index ea040a7e08..65b059cd16 100644 --- a/content/riak/cs/2.1.0/cookbooks/keystone-setup.md +++ b/content/riak/cs/2.1.0/cookbooks/keystone-setup.md @@ -73,7 +73,7 @@ pip install -r tools/pip-requires The next step is to select the appropriate options in the `keystone.conf` configuration file. A sample configuration that is -useful for local testing with Riak CS can be found [here](/riak/cs/2.1.0/cookbooks/keystone-conf-sample/). This configuration file sets up logging to +useful for local testing with Riak CS can be found [here]({{}}riak/cs/2.1.0/cookbooks/keystone-conf-sample/). This configuration file sets up logging to `./log/keystone/keystone.log` and uses the templated catalog backend to set up the Riak CS object store service. This catalog backend uses a local file to populate the service catalog. diff --git a/content/riak/cs/2.1.0/cookbooks/logging.md b/content/riak/cs/2.1.0/cookbooks/logging.md index 46904c20da..6aaf520a74 100644 --- a/content/riak/cs/2.1.0/cookbooks/logging.md +++ b/content/riak/cs/2.1.0/cookbooks/logging.md @@ -41,4 +41,4 @@ That section looks something like this: ``` A full description of all available parameters can be found in the -[configuration files](/riak/kv/2.1.3/configuring/reference) document for Riak. +[configuration files]({{}}riak/kv/2.1.3/configuring/reference) document for Riak. diff --git a/content/riak/cs/2.1.0/cookbooks/monitoring-and-metrics.md b/content/riak/cs/2.1.0/cookbooks/monitoring-and-metrics.md index 44acc71f12..1a17c44bca 100644 --- a/content/riak/cs/2.1.0/cookbooks/monitoring-and-metrics.md +++ b/content/riak/cs/2.1.0/cookbooks/monitoring-and-metrics.md @@ -10,11 +10,11 @@ menu: project: "riak_cs" project_version: "2.1.0" aliases: - - /riakcs/2.1.0/cookbooks/Monitoring-and-Metrics/ + - /riakcs/2.1.0/cookbooks/monitoring-and-metrics/ --- [amazon]: http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html -[s3 api]: http://docs.basho.com/riakcs/latest/references/apis/storage/s3/ +[s3 api]: {{< baseurl >}}riak/cs/latest/references/apis/storage/s3/ Riak S2 (CS) includes metrics and operational statistics to help you monitor your system in more detail and diagnose system issues more easily. There are three major categories of metrics: diff --git a/content/riak/cs/2.1.0/cookbooks/querying-access-statistics.md b/content/riak/cs/2.1.0/cookbooks/querying-access-statistics.md index 204013656f..9fec9eaad4 100644 --- a/content/riak/cs/2.1.0/cookbooks/querying-access-statistics.md +++ b/content/riak/cs/2.1.0/cookbooks/querying-access-statistics.md @@ -24,7 +24,7 @@ and access. {{% /note %}} For information about how access statistics are logged, please read -[Usage and Billing Data](/riak/cs/2.1.0/cookbooks/usage-and-billing-data). +[Usage and Billing Data]({{}}riak/cs/2.1.0/cookbooks/usage-and-billing-data). The following sections discuss accessing the access statistics using bare HTTP requests. Query parameters are used to specify the types and @@ -81,7 +81,7 @@ HTTP/1.1 404 Object Not Found > **Authentication Required** > > Queries to the usage resources described here must be authenticated as -described in the [Authentication documentation](/riak/cs/2.1.0/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. +described in the [Authentication documentation]({{}}riak/cs/2.1.0/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are @@ -234,7 +234,7 @@ the amount of time that may be retrieved in any request is limited. The limit is configured by the `riak_cs` application environment variable `usage_request_limit`. The value is expressed as an integer -number of archive intervals (see [Usage and Billing Data](/riak/cs/2.1.0/cookbooks/usage-and-billing-data) for a +number of archive intervals (see [Usage and Billing Data]({{}}riak/cs/2.1.0/cookbooks/usage-and-billing-data) for a description of archive intervals). The default value is `744`, which is 31 days at the default archive diff --git a/content/riak/cs/2.1.0/cookbooks/querying-storage-statistics.md b/content/riak/cs/2.1.0/cookbooks/querying-storage-statistics.md index 7bbf1ed6a7..68098eb585 100644 --- a/content/riak/cs/2.1.0/cookbooks/querying-storage-statistics.md +++ b/content/riak/cs/2.1.0/cookbooks/querying-storage-statistics.md @@ -26,9 +26,9 @@ and access. > **Note**: > -> Storage statistics are not calculated by default. Please read [Usage and Billing Data](/riak/cs/2.1.0/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. +> Storage statistics are not calculated by default. Please read [Usage and Billing Data]({{}}riak/cs/2.1.0/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. -The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics](/riak/cs/2.1.0/cookbooks/querying-access-statistics). +The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-access-statistics). Please refer to the descriptions there for more details. @@ -39,7 +39,7 @@ been configured to something other than default CS port of `8080`. > **Authentication Required** > -> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation](/riak/cs/2.1.0/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. +> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation]({{}}riak/cs/2.1.0/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are both omitted from the result by default: @@ -97,7 +97,7 @@ There are no statistics included in this report because the default time span is ### S3 Object-style Access -As described in [Querying Access Statistics](/riak/cs/2.1.0/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: +As described in [Querying Access Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: ```bash s3cmd get s3://riak-cs/usage/8NK4FH2SGKJJM8JIP2GU/bx/20120315T140000Z/20120315T160000Z diff --git a/content/riak/cs/2.1.0/cookbooks/release-notes.md b/content/riak/cs/2.1.0/cookbooks/release-notes.md index 0db0aef0c6..b9e4ec2299 100644 --- a/content/riak/cs/2.1.0/cookbooks/release-notes.md +++ b/content/riak/cs/2.1.0/cookbooks/release-notes.md @@ -13,7 +13,7 @@ aliases: - /riakcs/2.1.0/cookbooks/Riak-CS-Release-Notes/ - /riak/cs/2.1.0/cookbooks/Riak-CS-Release-Notes/ --- -[riak_cs_multibag_support]: /riak/cs/2.1.0/cookbooks/supercluster +[riak_cs_multibag_support]: {{}}riak/cs/2.1.0/cookbooks/supercluster [riak_cs_1.5_release_notes_upgrading]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading [riak_cs_1.5_release_notes_upgrading_1]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading-1 @@ -54,7 +54,7 @@ New metrics have been added that enable you to determine the health of your Riak * Memory information about the riak-cs virtual machine * HTTP listener information: active sockets and waiting acceptors -**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation](docs.basho.com/riakcs/latest/cookbooks/Monitoring-and-Metrics/) for more information. +**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation]({{}}riak/cs/latest/cookbooks/monitoring-and-metrics/) for more information. * [[PR 1189](https://github.com/basho/riak_cs/pull/1189)] * [[PR 1180](https://github.com/basho/riak_cs/pull/1180)] @@ -69,7 +69,7 @@ Additional storage usage metrics are also available. . These metrics are gathere * [[PR 1120](https://github.com/basho/riak_cs/pull/1120)] #### `riak-cs-admin` -The following administration CLIs have been replaced by the [`riak-cs-admin` command](http://docs.basho.com/riakcs/latest/cookbooks/command-line-tools/): +The following administration CLIs have been replaced by the [`riak-cs-admin` command]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): * `riak-cs-storage` * `riak-cs-gc` @@ -89,7 +89,7 @@ Several new options have been added to the `riak-cs-admin gc` command: * Riak S2 2.0 (and older) has a race condition where fullsync replication and garbage collection may resurrect deleted blocks without any way to delete them again. When real-time replication and replication of a garbage collection bucket entry object being dropped from the real-time queue are combined, blocks may remain on the sink side without being collected. Riak S2 2.1 introduces deterministic garbage collection to avoid fullsync replication. Additionally, garbage collection and fullsync replication run concurrently, and work on the same blocks and manifests. You can now specify the range of time using the `--start` and `--end` flags with `riak-cs-admin gc batch` for garbage collector in order to collect deleted objects synchronously on both sink and source sides. [[PR 1147 ](https://github.com/basho/riak_cs/pull/1147)] * `riak-cs-admin gc earliest-keys` is available so you can find the oldest entry after `epoch_start` in garbage collection. With this option, you can stay informed of garbage collection progress. [[PR 1160](https://github.com/basho/riak_cs/pull/1160)] -More information on garbage collection can be found in the [documentation](http://docs.basho.com/riakcs/latest/cookbooks/garbage-collection/). +More information on garbage collection can be found in the [documentation]({{< baseurl >}}riak/cs/latest/cookbooks/garbage-collection/). ### Additions @@ -113,7 +113,7 @@ More information on garbage collection can be found in the [documentation](http: * An option has been added to replace the `PR=all user GET` option with `PR=one` just before authentication. This option improves latency, especially in the presence of slow (or actually-failing) nodes blocking the whole request flow because of PR=all. When enabled, a user's owned-bucket list is never pruned after a bucket is deleted, instead it is just marked as deleted. [[PR 1191](https://github.com/basho/riak_cs/pull/1191)] * An info log has been added when starting a storage calculation batch. [[PR 1238](https://github.com/basho/riak_cs/pull/1238)] * `GET Bucket` requests now have clearer responses. A 501 stub for Bucket lifecycle and a simple stub for Bucket requestPayment have been added. [[PR 1223](https://github.com/basho/riak_cs/pull/1223)] -* Several user-friendly features have been added to [`riak-cs-debug`](http://docs.basho.com/riakcs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] +* Several user-friendly features have been added to [`riak-cs-debug`]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] #### Enterprise * MDC has `proxy_get`, which make block objects propagate to site clusters when they are requested. Now, multibag configuration with MDC supports `proxy_get`. [[PR 1171](https://github.com/basho/riak_cs/pull/1171) and [PR 25](https://github.com/basho/riak_cs_multibag/pull/25)] @@ -526,7 +526,7 @@ None ### Download Please see the [Riak CS Downloads -Page](http://docs.basho.com/riakcs/latest/riakcs-downloads/). +Page]({{< baseurl >}}riak/cs/latest/downloads/). ### Feedback @@ -544,7 +544,7 @@ venues: ### Changes * Improve logging around failures with Riak - [riak_cs/#987](http://docs.basho.com/riak/latest/dev/using/libraries/) + [riak_cs/#987](https://github.com/basho/riak_cs/pull/987) * Add amendment log output when storing access stats into Riak failed [riak_cs/#988](https://github.com/basho/riak_cs/pull/988). This change prevents losing access stats logs in cases of temporary connection @@ -572,7 +572,7 @@ None ### Download Please see the [Riak CS Downloads -Page](http://docs.basho.com/riakcs/latest/riakcs-downloads) +Page]({{< baseurl >}}riak/cs/latest/downloads) ### Feedback @@ -589,7 +589,7 @@ venues: ### Additions -* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here](http://docs.basho.com/riakcs/latest/cookbooks/configuration/Configuring-Riak-CS/). +* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here]({{< baseurl >}}riak/cs/latest/cookbooks/configuration/riak-cs/). ### Changes @@ -646,7 +646,7 @@ To avoid having a limit, set `max_buckets_per_user_user` to `unlimited`. ### Download -Please see the [Riak CS Downloads Page](http://docs.basho.com/riakcs/latest/riakcs-downloads/). +Please see the [Riak CS Downloads Page]({{< baseurl >}}riak/cs/latest/downloads/). ### Feedback @@ -662,7 +662,7 @@ Or via email at **info@basho.com**. ### Additions -* Added Multibag Technical Preview to Riak CS. More info is available [here](http://docs.basho.com/riakcs/latest/cookbooks/multibag/) +* Added Multibag Technical Preview to Riak CS. More info is available [here]({{< baseurl >}}riak/cs/latest/cookbooks/multibag/) * A new command `riak-cs-debug` including `cluster-info` [riak_cs/#769](https://github.com/basho/riak_cs/pull/769), [riak_cs/#832](https://github.com/basho/riak_cs/pull/832) * Tie up all existing commands into a new command `riak-cs-admin` [riak_cs/#839](https://github.com/basho/riak_cs/pull/839) * Add a command `riak-cs-admin stanchion` to switch Stanchion IP and port manually [riak_cs/#657](https://github.com/basho/riak_cs/pull/657) @@ -1003,7 +1003,7 @@ they will all share the name "struct". #### Additions -* Support query parameter authentication as specified in [http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html](Signing and Authenticating REST Requests). +* Support query parameter authentication as specified in [Signing and Authenticating REST Requests](http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html). ## Riak CS 1.0.1 diff --git a/content/riak/cs/2.1.0/cookbooks/rolling-upgrades.md b/content/riak/cs/2.1.0/cookbooks/rolling-upgrades.md index 653f6a26e0..b67d701ed4 100644 --- a/content/riak/cs/2.1.0/cookbooks/rolling-upgrades.md +++ b/content/riak/cs/2.1.0/cookbooks/rolling-upgrades.md @@ -18,7 +18,7 @@ Each node in a Riak CS cluster contains settings that define its operating modes and API coverage. The following steps outline the process of upgrading Riak CS in a rolling fashion. -Be sure to check the Riak CS [Version Compatibility](/riak/cs/2.1.0/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. +Be sure to check the Riak CS [Version Compatibility]({{}}riak/cs/2.1.0/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. As Riak CS 2.0.0 only works with Riak 2.0.5, the underlying Riak installation *must* be upgraded to Riak 2.0.5. @@ -80,9 +80,9 @@ detailed description.
4. Upgrade Riak, Riak CS, and Stanchion. See the Riak + href="{{< baseurl >}}riak/cs/latest/downloads">Riak CS Downloads and Riak Downloads + href="{{< baseurl >}}riak/kv/latest/downloads">Riak Downloads pages to find the appropriate packages. **Debian** / **Ubuntu** @@ -160,7 +160,7 @@ detailed description. ]}, ``` - and so on. More details can be found at [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs). + and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs).
Note on Memory Sizing
diff --git a/content/riak/cs/2.1.0/cookbooks/supercluster.md b/content/riak/cs/2.1.0/cookbooks/supercluster.md index 77300b4ac5..8705d09286 100644 --- a/content/riak/cs/2.1.0/cookbooks/supercluster.md +++ b/content/riak/cs/2.1.0/cookbooks/supercluster.md @@ -21,15 +21,15 @@ customers. It is not yet suitable for production use. While [Riak CS Enterprise](http://basho.com/riak-enterprise) enables you to distribute Riak CS objects across multiple data centers in a -[source/sink pattern](/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. +[source/sink pattern]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. With supercluster support, you can store object manifests and blocks in separate clusters or groups of clusters, a.k.a. **a set of supercluser members**, enhancing the scalability and overall storage capabilities of a Riak CS installation. ## Supercluster members -A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication](/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)\(MDC). -Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools](/riak/cs/2.1.0/cookbooks/command-line-tools). +A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)\(MDC). +Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools]({{}}riak/cs/2.1.0/cookbooks/command-line-tools). ## The Master Member @@ -134,7 +134,7 @@ That involves performing the following steps on each node: 1. Stop the node 2. Upgrade Stanchion to a version that supports Riak CS supercluster, i.e. Riak CS 1.5.0 and later -3. Set your desired Stanchion [configuration](/riak/cs/2.1.0/cookbooks/configuration/stanchion) +3. Set your desired Stanchion [configuration]({{}}riak/cs/2.1.0/cookbooks/configuration/stanchion) 4. Start Stanchion on each node ### Add Clusters @@ -145,7 +145,7 @@ connection information as explained above in the [supercluster Configuration](#s ### Set Weights -When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`](/riak/cs/2.1.0/cookbooks/command-line-tools) command-line interface. +When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`]({{}}riak/cs/2.1.0/cookbooks/command-line-tools) command-line interface. The example below sets the weight of the recently added supercluster member `Alpha` to zero: @@ -177,9 +177,9 @@ supercluster feature. ## Command Line Interface Complete documentation for the `riak-cs-supercluster` interface can be found -in our documentation on [Riak CS Command Line Tools](/riak/cs/2.1.0/cookbooks/command-line-tools/#riak-cs-supercluster). +in our documentation on [Riak CS Command Line Tools]({{}}riak/cs/2.1.0/cookbooks/command-line-tools/#riak-cs-supercluster). ## Limitations -Riak CS supercluster does not currently support [proxy gets](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from +Riak CS supercluster does not currently support [proxy gets]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from sink clusters. diff --git a/content/riak/cs/2.1.0/cookbooks/system-features.md b/content/riak/cs/2.1.0/cookbooks/system-features.md index 320babbd18..42cd8f91b0 100644 --- a/content/riak/cs/2.1.0/cookbooks/system-features.md +++ b/content/riak/cs/2.1.0/cookbooks/system-features.md @@ -9,9 +9,9 @@ aliases: The following pages detail Riak CS's system features. -* [Access Control Lists](/riak/cs/2.1.0/cookbooks/access-control-lists) -* [Authentication](/riak/cs/2.1.0/cookbooks/authentication) -* [Monitoring and Metrics](/riak/cs/2.1.0/cookbooks/monitoring-and-metrics) -* [Querying Access Statistics](/riak/cs/2.1.0/cookbooks/querying-access-statistics) -* [Querying Storage Statistics](/riak/cs/2.1.0/cookbooks/querying-storage-statistics) -* [Usage and Billing Data](/riak/cs/2.1.0/cookbooks/usage-and-billing-data) +* [Access Control Lists]({{}}riak/cs/2.1.0/cookbooks/access-control-lists) +* [Authentication]({{}}riak/cs/2.1.0/cookbooks/authentication) +* [Monitoring and Metrics]({{}}riak/cs/2.1.0/cookbooks/monitoring-and-metrics) +* [Querying Access Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-access-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-storage-statistics) +* [Usage and Billing Data]({{}}riak/cs/2.1.0/cookbooks/usage-and-billing-data) diff --git a/content/riak/cs/2.1.0/cookbooks/usage-and-billing-data.md b/content/riak/cs/2.1.0/cookbooks/usage-and-billing-data.md index c50b8ffeb9..e67ba8523d 100644 --- a/content/riak/cs/2.1.0/cookbooks/usage-and-billing-data.md +++ b/content/riak/cs/2.1.0/cookbooks/usage-and-billing-data.md @@ -28,7 +28,7 @@ and access. Access stats are tracked on a per-user basis, as rollups for slices of time. They are stored just like other Riak CS data, in the `cs.access` bucket in particular. For information about querying access statistics, -please read [Querying Access Statistics](/riak/cs/2.1.0/cookbooks/querying-access-statistics). +please read [Querying Access Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-access-statistics). ## Overview @@ -71,7 +71,7 @@ logger determines the operation type by comparing the method, resource module, and path to a known table. For example, it knows that a `GET` on the *key* module with the `acl` query parameter in the path is a `KeyReadACL` operation. A `PUT` to the same resource without the `acl` -query parameter is a `KeyWrite` operation. See [Querying Access Statistics](/riak/cs/2.1.0/cookbooks/querying-access-statistics) for a list of all operation types. +query parameter is a `KeyWrite` operation. See [Querying Access Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-access-statistics) for a list of all operation types. ### Log Accumulation @@ -181,7 +181,7 @@ slices of time. They are stored in the same Riak cluster as other Riak CS data, in the `cs.storage` bucket. For detailed information about querying storage statistics, please read -[Querying Storage Statistics](/riak/cs/2.1.0/cookbooks/querying-storage-statistics). +[Querying Storage Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-storage-statistics). ### High Level @@ -201,7 +201,7 @@ The storage calculation system uses MapReduce to sum the files in a bucket. This means you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. -See [Configuring Riak for CS](/riak/cs/2.1.0/cookbooks/configuration/riak-for-cs) for directions on setting this up. +See [Configuring Riak for CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-for-cs) for directions on setting this up. ### Scheduling and Manual Triggering diff --git a/content/riak/cs/2.1.0/cookbooks/using-with-keystone.md b/content/riak/cs/2.1.0/cookbooks/using-with-keystone.md index b58fccf560..e9a66354b9 100644 --- a/content/riak/cs/2.1.0/cookbooks/using-with-keystone.md +++ b/content/riak/cs/2.1.0/cookbooks/using-with-keystone.md @@ -291,7 +291,7 @@ section of the Riak CS `advanced.config` or `app.config` files: ### Keystone Setup -Follow the procedures documented in [Keystone Setup](/riak/cs/2.1.0/cookbooks/keystone-setup) to set up and run +Follow the procedures documented in [Keystone Setup]({{}}riak/cs/2.1.0/cookbooks/keystone-setup) to set up and run Keystone. 1. Create a tenant called `test`: diff --git a/content/riak/cs/2.1.0/index.md b/content/riak/cs/2.1.0/index.md index dc2b7a33bf..2fa0cb908c 100644 --- a/content/riak/cs/2.1.0/index.md +++ b/content/riak/cs/2.1.0/index.md @@ -27,11 +27,11 @@ and metering. ### Amazon S3-API Compatibility -Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API](/riak/cs/2.1.0/references/appendices/comparisons/swift/) +Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API]({{}}riak/cs/2.1.0/references/appendices/comparisons/swift/) ### Per-Tenant Visibility -With the Riak CS [Reporting API](/riak/cs/2.1.0/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, +With the Riak CS [Reporting API]({{}}riak/cs/2.1.0/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, subscription, chargebacks, plugins with billing systems, efficient multi-department utilization, and much more. ### Supports Large Objects of Arbitrary Content Type, Plus Metadata diff --git a/content/riak/cs/2.1.0/references/apis/storage.md b/content/riak/cs/2.1.0/references/apis/storage.md index ab3cf75391..fbea66fa7c 100644 --- a/content/riak/cs/2.1.0/references/apis/storage.md +++ b/content/riak/cs/2.1.0/references/apis/storage.md @@ -55,30 +55,30 @@ Multipart Uploads {{1.3.0-}} | Coming Soon | Planned for future release | ## Service-level Operations -* [GET Service](/riak/cs/2.1.0/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request +* [GET Service]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request ## Bucket-level Operations -* [GET Bucket](/riak/cs/2.1.0/references/apis/storage/s3/get-bucket) --- Returns a list of the objects +* [GET Bucket]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-bucket) --- Returns a list of the objects within a bucket -* [GET Bucket ACL](/riak/cs/2.1.0/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket -* [GET Bucket policy](/riak/cs/2.1.0/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket -* [PUT Bucket](/riak/cs/2.1.0/references/apis/storage/s3/put-bucket) --- Creates a new bucket -* [PUT Bucket ACL](/riak/cs/2.1.0/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions +* [GET Bucket ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions for a bucket -* [PUT Bucket policy](/riak/cs/2.1.0/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket -* [DELETE Bucket](/riak/cs/2.1.0/references/apis/storage/s3/delete-bucket) --- Deletes a bucket -* [DELETE Bucket policy](/riak/cs/2.1.0/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket +* [PUT Bucket policy]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.1.0/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.1.0/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket ## Object-level Operations -* [GET Object](/riak/cs/2.1.0/references/apis/storage/s3/get-object) --- Retrieves an object -* [GET Object ACL](/riak/cs/2.1.0/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object -* [PUT Object](/riak/cs/2.1.0/references/apis/storage/s3/put-object) --- Stores an object to a bucket -* [PUT Object (Copy)](/riak/cs/2.1.0/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object -* [PUT Object ACL](/riak/cs/2.1.0/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object -* [HEAD Object](/riak/cs/2.1.0/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) -* [DELETE Object](/riak/cs/2.1.0/references/apis/storage/s3/delete-object) --- Deletes an object +* [GET Object]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.1.0/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.1.0/references/apis/storage/s3/delete-object) --- Deletes an object ## Multipart Upload @@ -87,19 +87,19 @@ Object parts can be uploaded independently and in any order. After all parts are uploaded, Riak CS assembles an object out of the parts. When your object size reaches 100MB, you should consider using multipart uploads instead of uploading the object in a single operation. Read more -about multipart uploads on the [overview page](/riak/cs/2.1.0/cookbooks/multipart-upload-overview). +about multipart uploads on the [overview page]({{}}riak/cs/2.1.0/cookbooks/multipart-upload-overview). -* [Initiate Multipart Upload](/riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID -* [Upload Part](/riak/cs/2.1.0/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload -* [Complete Multipart Upload](/riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts -* [Abort Multipart Upload](/riak/cs/2.1.0/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts -* [List Parts](/riak/cs/2.1.0/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. -* [List Multipart Uploads](/riak/cs/2.1.0/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. +* [Initiate Multipart Upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.1.0/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.1.0/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.1.0/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. ## Common Headers -* [Common Riak CS Request Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-request-headers) -* [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers) +* [Common Riak CS Request Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers) There are two storage API options for Riak CS. The first and most fully featured is the S3 API. There is also limited but improving support for @@ -118,13 +118,13 @@ resource modules. * Module: `riak_cs_s3_rewrite` * [Documentation](http://docs.aws.amazon.com/AmazonS3/latest/API/APIRest.html) -* [Mapping](/riak/cs/2.1.0/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) +* [Mapping]({{}}riak/cs/2.1.0/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) ### Openstack Object Storage API (v1) * Module: `riak_cs_oos_rewrite` * [Documentation](http://docs.openstack.org/api/openstack-object-storage/1.0/content/index.html) -* [Mapping](/riak/cs/2.1.0/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) +* [Mapping]({{}}riak/cs/2.1.0/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) Selecting an API is done by adding or changing the `rewrite_module` key in the Riak CS `riak-cs.conf` file, or the old-style `advanced.config` or `app.config` @@ -157,5 +157,5 @@ included when installing a Riak CS package or building from source. More details for each option can be found by following one of the following links: -* [S3 API](/riak/cs/2.1.0/references/apis/storage/s3/) -* [OpenStack API](/riak/cs/2.1.0/references/apis/storage/openstack/) +* [S3 API]({{}}riak/cs/2.1.0/references/apis/storage/s3/) +* [OpenStack API]({{}}riak/cs/2.1.0/references/apis/storage/openstack/) diff --git a/content/riak/cs/2.1.0/references/apis/storage/openstack.md b/content/riak/cs/2.1.0/references/apis/storage/openstack.md index 4d4a5c4bc2..e83c4fab4a 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/openstack.md +++ b/content/riak/cs/2.1.0/references/apis/storage/openstack.md @@ -47,16 +47,16 @@ Update Object Metadata | Coming Soon | Planned for future release | ## Storage Account Services -* [List Containers](/riak/cs/2.1.0/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account +* [List Containers]({{}}riak/cs/2.1.0/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account ## Storage Container Services -* [List Objects](/riak/cs/2.1.0/references/apis/storage/openstack/list-objects) --- Lists the objects in a container -* [Create Container](/riak/cs/2.1.0/references/apis/storage/openstack/create-container) --- Creates a new container -* [Delete Container](/riak/cs/2.1.0/references/apis/storage/openstack/delete-container) --- Deletes a container +* [List Objects]({{}}riak/cs/2.1.0/references/apis/storage/openstack/list-objects) --- Lists the objects in a container +* [Create Container]({{}}riak/cs/2.1.0/references/apis/storage/openstack/create-container) --- Creates a new container +* [Delete Container]({{}}riak/cs/2.1.0/references/apis/storage/openstack/delete-container) --- Deletes a container ## Storage Object Services -* [Get Object](/riak/cs/2.1.0/references/apis/storage/openstack/get-object) --- Retrieves an object -* [Create or Update Object](/riak/cs/2.1.0/references/apis/storage/openstack/create-object) --- Write an object in a container -* [Delete Object](/riak/cs/2.1.0/references/apis/storage/openstack/delete-object) --- Delete an object from a container +* [Get Object]({{}}riak/cs/2.1.0/references/apis/storage/openstack/get-object) --- Retrieves an object +* [Create or Update Object]({{}}riak/cs/2.1.0/references/apis/storage/openstack/create-object) --- Write an object in a container +* [Delete Object]({{}}riak/cs/2.1.0/references/apis/storage/openstack/delete-object) --- Delete an object from a container diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3.md b/content/riak/cs/2.1.0/references/apis/storage/s3.md index 8ffd0a0826..6785d1d753 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3.md @@ -57,30 +57,30 @@ Multipart Uploads {{1.5.0+}} | ✓}}riak/cs/2.1.0/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request ## Bucket-level Operations -* [GET Bucket](/riak/cs/2.1.0/references/apis/storage/s3/get-bucket) --- Returns a list of the objects +* [GET Bucket]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-bucket) --- Returns a list of the objects within a bucket -* [GET Bucket ACL](/riak/cs/2.1.0/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket -* [GET Bucket policy](/riak/cs/2.1.0/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket -* [PUT Bucket](/riak/cs/2.1.0/references/apis/storage/s3/put-bucket) --- Creates a new bucket -* [PUT Bucket ACL](/riak/cs/2.1.0/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions +* [GET Bucket ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions for a bucket -* [PUT Bucket policy](/riak/cs/2.1.0/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket -* [DELETE Bucket](/riak/cs/2.1.0/references/apis/storage/s3/delete-bucket) --- Deletes a bucket -* [DELETE Bucket policy](/riak/cs/2.1.0/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket +* [PUT Bucket policy]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.1.0/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.1.0/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket ## Object-level Operations -* [GET Object](/riak/cs/2.1.0/references/apis/storage/s3/get-object) --- Retrieves an object -* [GET Object ACL](/riak/cs/2.1.0/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object -* [PUT Object](/riak/cs/2.1.0/references/apis/storage/s3/put-object) --- Stores an object to a bucket -* [PUT Object (Copy)](/riak/cs/2.1.0/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object -* [PUT Object ACL](/riak/cs/2.1.0/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object -* [HEAD Object](/riak/cs/2.1.0/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) -* [DELETE Object](/riak/cs/2.1.0/references/apis/storage/s3/delete-object) --- Deletes an object +* [GET Object]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.1.0/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.1.0/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.1.0/references/apis/storage/s3/delete-object) --- Deletes an object ## Multipart Upload @@ -89,16 +89,16 @@ Object parts can be uploaded independently and in any order. After all parts are uploaded, Riak CS assembles an object out of the parts. When your object size reaches 100MB, you should consider using multipart uploads instead of uploading the object in a single operation. Read more -about multipart uploads on the [overview page](/riak/cs/2.1.0/cookbooks/multipart-upload-overview). +about multipart uploads on the [overview page]({{}}riak/cs/2.1.0/cookbooks/multipart-upload-overview). -* [Initiate Multipart Upload](/riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID -* [Upload Part](/riak/cs/2.1.0/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload -* [Complete Multipart Upload](/riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts -* [Abort Multipart Upload](/riak/cs/2.1.0/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts -* [List Parts](/riak/cs/2.1.0/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. -* [List Multipart Uploads](/riak/cs/2.1.0/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. +* [Initiate Multipart Upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.1.0/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.1.0/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.1.0/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. ## Common Headers -* [Common Riak CS Request Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-request-headers) -* [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers) +* [Common Riak CS Request Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers) diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/abort-multipart-upload.md b/content/riak/cs/2.1.0/references/apis/storage/s3/abort-multipart-upload.md index 05bb97c796..216b6cbe22 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/abort-multipart-upload.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/abort-multipart-upload.md @@ -27,7 +27,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -37,7 +37,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload.md b/content/riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload.md index a86838a185..081a9f6e91 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload.md @@ -45,7 +45,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -75,7 +75,7 @@ This implementation of the operation uses only response headers that are common ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/delete-bucket-policy.md b/content/riak/cs/2.1.0/references/apis/storage/s3/delete-bucket-policy.md index ffe6fb264a..d77b3cf2d8 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/delete-bucket-policy.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/delete-bucket-policy.md @@ -29,7 +29,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -39,7 +39,7 @@ No body should be appended. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/get-bucket-policy.md b/content/riak/cs/2.1.0/references/apis/storage/s3/get-bucket-policy.md index ec899b3adf..77237c3ebf 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/get-bucket-policy.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/get-bucket-policy.md @@ -31,7 +31,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -41,7 +41,7 @@ No body should be appended. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload.md b/content/riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload.md index 0dca1cde8b..3bc63d804b 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload.md @@ -55,7 +55,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/list-multipart-uploads.md b/content/riak/cs/2.1.0/references/apis/storage/s3/list-multipart-uploads.md index c92c026337..19bea1a087 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/list-multipart-uploads.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/list-multipart-uploads.md @@ -54,7 +54,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -64,7 +64,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/list-parts.md b/content/riak/cs/2.1.0/references/apis/storage/s3/list-parts.md index c442345562..6c18edb508 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/list-parts.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/list-parts.md @@ -43,7 +43,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -53,7 +53,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/put-bucket-policy.md b/content/riak/cs/2.1.0/references/apis/storage/s3/put-bucket-policy.md index bc1b0cefb9..0a94ff40b4 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/put-bucket-policy.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/put-bucket-policy.md @@ -37,7 +37,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -88,7 +88,7 @@ More information on S3 Policies can be found in Amazon's [Permissions And Polici ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.0/references/apis/storage/s3/upload-part.md b/content/riak/cs/2.1.0/references/apis/storage/s3/upload-part.md index e1c72f6e70..01d74ff108 100644 --- a/content/riak/cs/2.1.0/references/apis/storage/s3/upload-part.md +++ b/content/riak/cs/2.1.0/references/apis/storage/s3/upload-part.md @@ -9,7 +9,7 @@ aliases: - /riak/cs/2.1.0/references/apis/storage/s3/RiakCS-Upload-Part/ --- -This operation uploads a part in a multipart upload. You must [initiate a multipart upload](/riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. +This operation uploads a part in a multipart upload. You must [initiate a multipart upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. ## Requests @@ -54,7 +54,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.0/references/apis/storage/s3/common-response-headers). ### Response Elements @@ -64,7 +64,7 @@ This operation does not use response elements. ### Sample Request -The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload](/riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload) request. +The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/initiate-multipart-upload) request. ``` PUT /large.iso?partNumber=1&uploadId=VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA HTTP/1.1 @@ -79,7 +79,7 @@ Authorization: AWS AKIAIOSFODNN7EXAMPLE:VGhpcyBtZXNzYWdlIHNpZ25lZGGieSRlbHZpbmc= ### Sample Response -The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload](/riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload) request. +The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload]({{}}riak/cs/2.1.0/references/apis/storage/s3/complete-multipart-upload) request. ``` HTTP/1.1 200 OK diff --git a/content/riak/cs/2.1.0/references/appendices/http-admin.md b/content/riak/cs/2.1.0/references/appendices/http-admin.md index 1f18e04e8b..c89b25a232 100644 --- a/content/riak/cs/2.1.0/references/appendices/http-admin.md +++ b/content/riak/cs/2.1.0/references/appendices/http-admin.md @@ -19,10 +19,10 @@ above and beyond those associated with Riak itself: Task | CS URI | Further reading :----|:-------|:--------------- -User management | `/riak-cs/user` | [Account Management](/riak/cs/2.1.0/cookbooks/account-management) -User access statistics | `/riak-cs/usage` | [Querying Access Statistics](/riak/cs/2.1.0/cookbooks/querying-access-statistics) -Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics](/riak/cs/2.1.0/cookbooks/querying-storage-statistics) -Global statistics | `/riak-cs/stats` | [Monitoring and Metrics](/riak/cs/2.1.0/cookbooks/monitoring-and-metrics) +User management | `/riak-cs/user` | [Account Management]({{}}riak/cs/2.1.0/cookbooks/account-management) +User access statistics | `/riak-cs/usage` | [Querying Access Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-access-statistics) +Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-storage-statistics) +Global statistics | `/riak-cs/stats` | [Monitoring and Metrics]({{}}riak/cs/2.1.0/cookbooks/monitoring-and-metrics) By default, these are accessible over the same IP/port as the rest of the CS API, but they can be configured to run elsewhere, with or without @@ -52,13 +52,13 @@ details. ## Related Resources -* [configuring Riak CS](/riak/cs/2.1.0/cookbooks/configuration/riak-cs) -* [Querying Access Statistics](/riak/cs/2.1.0/cookbooks/querying-access-statistics) - * [Usage and Billing Data](/riak/cs/2.1.0/cookbooks/usage-and-billing-data) +* [configuring Riak CS]({{}}riak/cs/2.1.0/cookbooks/configuration/riak-cs) +* [Querying Access Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-access-statistics) + * [Usage and Billing Data]({{}}riak/cs/2.1.0/cookbooks/usage-and-billing-data) * [Github wiki](https://github.com/basho/riak_cs/wiki/Querying-Access-Stats) -* [Querying Storage Statistics](/riak/cs/2.1.0/cookbooks/querying-storage-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.1.0/cookbooks/querying-storage-statistics) * [Enabling storage statistics](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) * [Github wiki](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) -* [Account Management](/riak/cs/2.1.0/cookbooks/account-management) +* [Account Management]({{}}riak/cs/2.1.0/cookbooks/account-management) * [Github wiki](https://github.com/basho/riak_cs/wiki/User-Management) -* [Monitoring and Metrics](/riak/cs/2.1.0/cookbooks/monitoring-and-metrics) +* [Monitoring and Metrics]({{}}riak/cs/2.1.0/cookbooks/monitoring-and-metrics) diff --git a/content/riak/cs/2.1.0/references/appendices/riak-cs-control.md b/content/riak/cs/2.1.0/references/appendices/riak-cs-control.md index a9f28a0fac..48679c879e 100644 --- a/content/riak/cs/2.1.0/references/appendices/riak-cs-control.md +++ b/content/riak/cs/2.1.0/references/appendices/riak-cs-control.md @@ -20,7 +20,7 @@ managing users in a Riak CS Cluster. ## Installing Riak CS Control -Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package](/riak/cs/2.1.0/downloads). +Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package]({{}}riak/cs/2.1.0/downloads). ## Setting Up Riak CS Control @@ -67,7 +67,7 @@ riak-cs-control start When you first navigate to the Riak CS Control UI, you will land on the Users page: -![Users Page](/images/cs_control_users.png) +![Users Page]({{}}images/cs_control_users.png) On this page you can quickly see all current Riak CS users along with their status, e-mail address, and credentials. From here you can filter, diff --git a/content/riak/cs/2.1.0/theory/stanchion.md b/content/riak/cs/2.1.0/theory/stanchion.md index 61be552c9e..4b95b49569 100644 --- a/content/riak/cs/2.1.0/theory/stanchion.md +++ b/content/riak/cs/2.1.0/theory/stanchion.md @@ -25,9 +25,9 @@ Riak CS cluster at any time. Correspondingly, your Stanchion installation must be managed and configured separately. For more information, see the following documents: -* [Configuring Stanchion](/riak/cs/2.1.0/cookbooks/configuration/stanchion) -* [Installing Stanchion](/riak/cs/2.1.0/cookbooks/installing#installing-stanchion-on-a-node) -* [The Stantion Command-line Interface](/riak/cs/2.1.0/cookbooks/command-line-tools#stanchion) +* [Configuring Stanchion]({{}}riak/cs/2.1.0/cookbooks/configuration/stanchion) +* [Installing Stanchion]({{}}riak/cs/2.1.0/cookbooks/installing#installing-stanchion-on-a-node) +* [The Stantion Command-line Interface]({{}}riak/cs/2.1.0/cookbooks/command-line-tools#stanchion) For a more in-depth discussion of implementation details, see the project's @@ -51,7 +51,7 @@ rejected. The uniqueness of these entities is enforced by serializing any creation or modification requests that involve them. This process is handled by Stanchion. What happens under the hood is essentially that Stanchion -mandates that all [vnodes](/riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. +mandates that all [vnodes]({{}}riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. One result of this enforcement is that user creation requests and bucket creation or modification, i.e. deletion, requests are not highly diff --git a/content/riak/cs/2.1.0/tutorials/fast-track/local-testing-environment.md b/content/riak/cs/2.1.0/tutorials/fast-track/local-testing-environment.md index 1c26659802..a1df868587 100644 --- a/content/riak/cs/2.1.0/tutorials/fast-track/local-testing-environment.md +++ b/content/riak/cs/2.1.0/tutorials/fast-track/local-testing-environment.md @@ -20,7 +20,7 @@ does not attempt to optimize your installation for your particular architecture. If you want to build a testing environment with a minimum of -configuration, there is an option for [Building a Virtual Testing Environment](/riak/cs/2.1.0/tutorials/fast-track/virtual-test-environment). +configuration, there is an option for [Building a Virtual Testing Environment]({{}}riak/cs/2.1.0/tutorials/fast-track/virtual-test-environment). ## Installing Your First Node @@ -30,7 +30,7 @@ and running Riak and Riak CS. ### Step 1: Raise your system's open file limits Riak can consume a large number of open file handles during normal -operation. See the [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit) document for more information on +operation. See the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) document for more information on how to increase your system's open files limit. If you are the root user, you can increase the system's open files limit @@ -52,7 +52,7 @@ riak soft nofile 65536 riak hard nofile 65536 ``` -For Mac OS X, consult the [open files limit](/riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. +For Mac OS X, consult the [open files limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. ### Step 2: Download and install packages @@ -67,14 +67,14 @@ sudo apt-get install -y curl substitute the appropriate CLI commands. If you are running Ubuntu 11.10 or later, you will also need the -`libssl0.9.8` package. See [Installing on Debian and Ubuntu](/riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. +`libssl0.9.8` package. See [Installing on Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. ```bash sudo apt-get install -y libssl0.9.8 ``` Now, grab the appropriate packages: Riak, Riak CS, and Stanchion. See -[Download Riak](/riak/kv/2.1.3/downloads/) and [Download Riak CS](/riak/cs/2.1.0/downloads). +[Download Riak]({{}}riak/kv/2.1.3/downloads/) and [Download Riak CS]({{}}riak/cs/2.1.0/downloads). You can skip Riak CS Control for now. Once you have the packages, install them per the instructions below. @@ -87,14 +87,14 @@ installing Riak. **Do not attempt to configure or start Riak until step 3 in this document.** - * [Debian and Ubuntu](/riak/kv/2.1.3/setup/installing/debian-ubuntu) - * [RHEL and CentOS](/riak/kv/2.1.3/setup/installing/rhel-centos) - * [Mac OS X](/riak/kv/2.1.3/setup/installing/mac-osx) - * [FreeBSD](/riak/kv/2.1.3/setup/installing/freebsd) - * [SUSE](/riak/kv/2.1.3/setup/installing/suse) - * [Windows Azure](/riak/kv/2.1.3/setup/installing/windows-azure) - * [AWS Marketplace](/riak/kv/2.1.3/setup/installing/amazon-web-services) - * [From Source](/riak/kv/2.1.3/setup/installing/source) + * [Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.1.3/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.1.3/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.1.3/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.1.3/setup/installing/suse) + * [Windows Azure]({{}}riak/kv/2.1.3/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.1.3/setup/installing/amazon-web-services) + * [From Source]({{}}riak/kv/2.1.3/setup/installing/source) #### Next, install Riak CS @@ -458,7 +458,7 @@ your first node with two exceptions: You will then need to verify the cluster plan with the `riak-admin cluster plan` command, and commit the cluster changes with `riak-admin cluster commit` to complete the join process. More information is -available in the [Command Line Tools](/riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. +available in the [Command Line Tools]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. > **Note** > @@ -469,4 +469,4 @@ such as a dedicated device, [HAProxy](http://haproxy.1wt.eu), or [Nginx](http://wiki.nginx.org/Main) between Riak CS and the outside world. -Once you have completed this step, You can progress to [testing the Riak CS installation](/riak/cs/2.1.0/tutorials/fast-track/test-installation) using s3cmd. +Once you have completed this step, You can progress to [testing the Riak CS installation]({{}}riak/cs/2.1.0/tutorials/fast-track/test-installation) using s3cmd. diff --git a/content/riak/cs/2.1.0/tutorials/fast-track/test-installation.md b/content/riak/cs/2.1.0/tutorials/fast-track/test-installation.md index 38dc725290..eeeac74327 100644 --- a/content/riak/cs/2.1.0/tutorials/fast-track/test-installation.md +++ b/content/riak/cs/2.1.0/tutorials/fast-track/test-installation.md @@ -141,6 +141,6 @@ bit of learning to be done, so make sure and check out the Reference section (click "Reference" on the nav on the left side of this page). A few items that may be of particular interest: -* [Details about API operations](/riak/cs/2.1.0/references/apis/storage) -* [Information about the Ruby Fog client](/riak/cs/2.1.0/cookbooks/fog) -* [Release Notes](/riak/cs/2.1.0/cookbooks/release-notes) +* [Details about API operations]({{}}riak/cs/2.1.0/references/apis/storage) +* [Information about the Ruby Fog client]({{}}riak/cs/2.1.0/cookbooks/fog) +* [Release Notes]({{}}riak/cs/2.1.0/cookbooks/release-notes) diff --git a/content/riak/cs/2.1.0/tutorials/fast-track/virtual-test-environment.md b/content/riak/cs/2.1.0/tutorials/fast-track/virtual-test-environment.md index ba630837de..f1936c20e4 100644 --- a/content/riak/cs/2.1.0/tutorials/fast-track/virtual-test-environment.md +++ b/content/riak/cs/2.1.0/tutorials/fast-track/virtual-test-environment.md @@ -22,7 +22,7 @@ want to tune the OS or node/memory count, you'll have to edit the If you want to build a testing environment with more flexibility in configuration and durability across environment resets, there are -instructions for [Building a Local Test Environment](/riak/cs/2.1.0/tutorials/fast-track/local-testing-environment). +instructions for [Building a Local Test Environment]({{}}riak/cs/2.1.0/tutorials/fast-track/local-testing-environment). ## Configuration @@ -87,7 +87,7 @@ Secret key: RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw== ## Next Steps Congratulations! You have deployed a virtualized environment of Riak CS. -You are ready to progress to [Testing the Riak CS Installation](/riak/cs/2.1.0/tutorials/fast-track/test-installation). +You are ready to progress to [Testing the Riak CS Installation]({{}}riak/cs/2.1.0/tutorials/fast-track/test-installation). ### Stopping Your Virtual Environment diff --git a/content/riak/cs/2.1.0/tutorials/fast-track/what-is-riak-cs.md b/content/riak/cs/2.1.0/tutorials/fast-track/what-is-riak-cs.md index 52dfb52188..44907cd80f 100644 --- a/content/riak/cs/2.1.0/tutorials/fast-track/what-is-riak-cs.md +++ b/content/riak/cs/2.1.0/tutorials/fast-track/what-is-riak-cs.md @@ -35,11 +35,11 @@ automatically take over the responsibility of failed or non-communicative nodes, data remains available even in the event of node failure or network partition. -When an object is uploaded via the [storage API](/riak/cs/2.1.0/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, +When an object is uploaded via the [storage API]({{}}riak/cs/2.1.0/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, written, and replicated in Riak. Each chunk is associated with metadata for later retrieval. The diagram below provides a visualization. -![Riak CS Chunking](/images/Riak-CS-Overview.png) +![Riak CS Chunking]({{}}images/Riak-CS-Overview.png) ## Riak CS Enterprise diff --git a/content/riak/cs/2.1.1/cookbooks/access-control-lists.md b/content/riak/cs/2.1.1/cookbooks/access-control-lists.md index fd64d333a2..84a48b703e 100644 --- a/content/riak/cs/2.1.1/cookbooks/access-control-lists.md +++ b/content/riak/cs/2.1.1/cookbooks/access-control-lists.md @@ -82,9 +82,9 @@ Riak CS permissions are split into two types: **bucket permissions** and ## Buckets -Bucket names **must** be [globally unique](/riak/cs/2.1.1/theory/stanchion/#globally-unique-entities). To avoid conflicts, all +Bucket names **must** be [globally unique]({{}}riak/cs/2.1.1/theory/stanchion/#globally-unique-entities). To avoid conflicts, all bucket creation requests are made to an application called -[Stanchion](/riak/cs/2.1.1/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we +[Stanchion]({{}}riak/cs/2.1.1/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we believe it is appropriate based on the following statement from this [documentation on bucket restrictions](http://docs.amazonwebservices.com/AmazonS3/latest/dev/BucketRestrictions.html) from Amazon regarding restrictions on bucket operations: @@ -105,4 +105,4 @@ created granting the creator both ownership and full access control and denying access to all other parties. For information on specifying an ACL when making a `PUT` request, see -[Riak CS PUT Object ACL](/riak/cs/2.1.1/references/apis/storage/s3/put-object-acl). +[Riak CS PUT Object ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-object-acl). diff --git a/content/riak/cs/2.1.1/cookbooks/authentication.md b/content/riak/cs/2.1.1/cookbooks/authentication.md index a6a8e54709..3a9f20915f 100644 --- a/content/riak/cs/2.1.1/cookbooks/authentication.md +++ b/content/riak/cs/2.1.1/cookbooks/authentication.md @@ -111,4 +111,4 @@ http://bucket.data.basho.com/document?AWSAccessKeyId=8EE3UE-UMW1YTPMBC3EB&Expire ## Keystone Authentication More information on using Keystone for authentication with Riak CS can -be found in [using Riak CS with Keystone](/riak/cs/2.1.1/cookbooks/using-with-keystone). +be found in [using Riak CS with Keystone]({{}}riak/cs/2.1.1/cookbooks/using-with-keystone). diff --git a/content/riak/cs/2.1.1/cookbooks/command-line-tools.md b/content/riak/cs/2.1.1/cookbooks/command-line-tools.md index ff58f3c680..7531eed555 100644 --- a/content/riak/cs/2.1.1/cookbooks/command-line-tools.md +++ b/content/riak/cs/2.1.1/cookbooks/command-line-tools.md @@ -205,7 +205,7 @@ More information about Erlang's etop tool can be found in the ## riak-cs-admin gc -This command controls Riak CS's [garbage collection](/riak/cs/2.1.1/cookbooks/garbage-collection) system. +This command controls Riak CS's [garbage collection]({{}}riak/cs/2.1.1/cookbooks/garbage-collection) system. ```bash riak-cs-admin gc @@ -311,7 +311,7 @@ undergirding Riak CS. Temporarily changes the host and/or port used by Stanchion. This change is effective until the node is restarted, at which point Stanchion will -begin listening on the host and port specified in your [configuration files](/riak/cs/2.1.1/cookbooks/configuration/reference). +begin listening on the host and port specified in your [configuration files]({{}}riak/cs/2.1.1/cookbooks/configuration/reference). ```bash riak-cs-stanchion switch HOST PORT @@ -520,7 +520,7 @@ documented [above](#riak-cs-admin-access). Riak CS version 1.5 offers support for supercluster operations. The `supercluster` command interface enables you to interact with that system. -More information can be found in [Riak CS Supercluster Support](/riak/cs/2.1.1/cookbooks/supercluster). +More information can be found in [Riak CS Supercluster Support]({{}}riak/cs/2.1.1/cookbooks/supercluster). {{% note title="Note: technical preview" %}} Riak CS supercluster support is available only as a technical preview for @@ -634,7 +634,7 @@ Fetches all current weights from the master member. riak-cs-supercluster refresh ``` -When a member's weight is updated, that weight is stored in the [master member](/riak/cs/2.1.1/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The +When a member's weight is updated, that weight is stored in the [master member]({{}}riak/cs/2.1.1/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The `refresh` command syncs the weights stored in the master member with the weights cached in Riak CS so that there is no discrepancy. diff --git a/content/riak/cs/2.1.1/cookbooks/configuration.md b/content/riak/cs/2.1.1/cookbooks/configuration.md index 11b9bed636..cf73729aaf 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration.md @@ -27,7 +27,7 @@ If your system consists of several nodes, configuration primarily represents set ## Configuration of System Components -* [Configuring Riak](/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs) -* [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs) -* [Configuring Stanchion](/riak/cs/2.1.1/cookbooks/configuration/stanchion) -* [Configuring an S3 client](/riak/cs/2.1.1/cookbooks/configuration/s3-client) +* [Configuring Riak]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-for-cs) +* [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs) +* [Configuring Stanchion]({{}}riak/cs/2.1.1/cookbooks/configuration/stanchion) +* [Configuring an S3 client]({{}}riak/cs/2.1.1/cookbooks/configuration/s3-client) diff --git a/content/riak/cs/2.1.1/cookbooks/configuration/dragondisk.md b/content/riak/cs/2.1.1/cookbooks/configuration/dragondisk.md index 09002f1e76..8674d6c470 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration/dragondisk.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration/dragondisk.md @@ -29,7 +29,7 @@ other Linux distributions. This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_linux0.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux0.png) ## Create an account @@ -38,16 +38,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_linux1.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_linux2.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux2.png) * In the **Account** dialog window, choose **Other S3 compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_linux3.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -66,12 +66,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_linux4.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux4.png) * Click **Close** to complete account creation and to continue to attempt connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_linux5.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux5.png) ### Connect to Riak CS @@ -84,7 +84,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_linux6.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux6.png) ### Create a bucket @@ -93,7 +93,7 @@ Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_linux7.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket `dragondisklinux`. @@ -101,7 +101,7 @@ Riak CS. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_linux8.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux8.png) ### Copy files to bucket @@ -109,13 +109,13 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_linux9.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_linux10.png) +![DragonDisk screenshot]({{}}images/dragondisk_linux10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. @@ -131,7 +131,7 @@ This section describes configuration of DragonDisk for Mac OS X. * This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_osx0.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx0.png) ### Create an account @@ -140,16 +140,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_osx1.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_osx2.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx2.png) * In the **Account** dialog window, choose **Other S3 compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_osx3.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -168,12 +168,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_osx4.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx4.png) * Click **Close** to complete account creation and continue try connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_osx5.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx5.png) ### Connect to Riak CS @@ -186,7 +186,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_osx6.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx6.png) ### Create a bucket @@ -195,7 +195,7 @@ Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_osx7.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket *dragondiskosx*. @@ -203,7 +203,7 @@ Riak CS. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_osx8.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx8.png) ### Copy files to bucket @@ -211,14 +211,14 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_osx9.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_osx10.png) +![DragonDisk screenshot]({{}}images/dragondisk_osx10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. @@ -234,7 +234,7 @@ This section describes configuration of DragonDisk for Windows. * This is the main DragonDisk window as it appears upon starting the application. -![DragonDisk screenshot](/images/dragondisk_windows0.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows0.png) ### Create an account @@ -243,16 +243,16 @@ save an account. The following describes the process for doing so. * From the **File** menu, select **Accounts**. -![DragonDisk screenshot](/images/dragondisk_windows1.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows1.png) * Click **New**. -![DragonDisk screenshot](/images/dragondisk_windows2.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows2.png) * In the **Account** dialog window, choose **Other S3-compatible service** under the **Provider** drop down menu. -![DragonDisk screenshot](/images/dragondisk_windows3.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows3.png) * Enter the hostname or IP address of your Riak CS cluster's public interface into the **Service Endpoint** field. @@ -271,12 +271,12 @@ the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. * Click **OK** to save the account configuration. -![DragonDisk screenshot](/images/dragondisk_windows4.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows4.png) * Click **Close** to complete account creation and continue try connecting to Riak CS. -![DragonDisk screenshot](/images/dragondisk_windows5.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows5.png) ### Connect to Riak CS @@ -289,7 +289,7 @@ configuration. right pane, then you're connected to Riak CS and can proceed to creating a bucket. -![DragonDisk screenshot](/images/dragondisk_windows6.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows6.png) ### Create a bucket @@ -297,7 +297,7 @@ configuration. with Riak CS. * Click the **Create bucket** icon to open the bucket creation dialog. -![DragonDisk screenshot](/images/dragondisk_windows7.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows7.png) * In the **Create bucket** dialog, give the bucket a name. In this example we're naming the bucket *dragonbucket*. @@ -305,7 +305,7 @@ configuration. * The bucket should now appear in the right pane and you can now proceed with copying some test files into the bucket. -![DragonDisk screenshot](/images/dragondisk_windows8.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows8.png) ### Copy files to bucket @@ -313,13 +313,13 @@ Finally, navigate your local computer in the left pane and select a file or files to copy from your local computer to the newly created Riak CS bucket. -![DragonDisk screenshot](/images/dragondisk_windows9.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows9.png) * After selecting a file or files, you can drag them to the bucket you created in the right pane and the copy operation will begin. * After the files are copied, they'll appear in the bucket. -![DragonDisk screenshot](/images/dragondisk_windows10.png) +![DragonDisk screenshot]({{}}images/dragondisk_windows10.png) * You have now successfully verified connectivity, bucket creation, and file copying operations for your Riak CS installation with DragonDisk. diff --git a/content/riak/cs/2.1.1/cookbooks/configuration/load-balancing-proxy.md b/content/riak/cs/2.1.1/cookbooks/configuration/load-balancing-proxy.md index 088c012a92..e95e728fb4 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration/load-balancing-proxy.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration/load-balancing-proxy.md @@ -51,7 +51,7 @@ act as a load balancer to a Riak CS installation. > **Note on open files limits** > > The operating system's open files limits need to be greater than 256000 -for the example configuration that follows. Consult the [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different +for the example configuration that follows. Consult the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different operating systems. ```config diff --git a/content/riak/cs/2.1.1/cookbooks/configuration/multi-datacenter.md b/content/riak/cs/2.1.1/cookbooks/configuration/multi-datacenter.md index ff9756e9d4..92082d53a9 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration/multi-datacenter.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration/multi-datacenter.md @@ -28,7 +28,7 @@ CS cluster. As of Riak release 1.4.0, there are two different MDC replication modes that Riak CS can use to request data from remote clusters. Please see -the [comparison](/riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. +the [comparison]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. ### Replication Version 3 Configuration @@ -75,9 +75,9 @@ configured **sink cluster**. See also: -* [Upgrading from v2 to v3](/riak/kv/2.1.3/setup/upgrading/multi-datacenter) -* [Comparing v2 and v3](/riak/kv/2.1.3/using/reference/multi-datacenter/comparison) -* [Multi-Datacenter Operations](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) + +* [Comparing v2 and v3]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) +* [Multi-Datacenter Operations]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) ## Riak CS Configuration @@ -119,10 +119,10 @@ Replace the `node` variable above with the nodename specified in the ## Stanchion Configuration -Though there is no specific configuration for [Stanchion](/riak/cs/2.1.1/theory/stanchion), note that +Though there is no specific configuration for [Stanchion]({{}}riak/cs/2.1.1/theory/stanchion), note that Stanchion should be a single, globally unique process to which every Riak CS node sends requests, even if there are multiple replicated sites. Unlike Riak and Riak CS, Stanchion should run on _only one node in a given cluster_, perhaps on its own, dedicated hardware if you wish. Stanchion runs on only one node because it manages strongly consistent -updates to [globally unique entities](/riak/cs/2.1.1/theory/stanchion/#globally-unique-entities) like users and buckets. +updates to [globally unique entities]({{}}riak/cs/2.1.1/theory/stanchion/#globally-unique-entities) like users and buckets. diff --git a/content/riak/cs/2.1.1/cookbooks/configuration/reference.md b/content/riak/cs/2.1.1/cookbooks/configuration/reference.md index 90ce509ee4..7bde8451e8 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration/reference.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration/reference.md @@ -53,12 +53,12 @@ aliases: ]}, ``` - and so on. More details can be found at [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). + and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). {{% /note %}} This document is intended as a reference listing of all configurable parameters for Riak CS. For a more narrative-style walkthrough of configuring Riak CS, we -recommend consulting the [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs) tutorial. +recommend consulting the [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs) tutorial. The configuration for Riak CS is handled through either the `riak-cs.conf` and `advanced.config` file pair, which were introduced in Riak CS 2.0.0, or the two @@ -130,7 +130,7 @@ The tables below will show settings for both `riak-cs.conf` and riak_host The IP address/port for the Riak CS node's corresponding Riak node (used by -Riak's Protocol Buffers interface) +Riak's Protocol Buffers interface) 127.0.0.1:8087 @@ -157,7 +157,7 @@ the corresponding HTTP host). riak_host The TCP IP/port for the Riak CS node's corresponding Riak node (used by -Riak's Protocol Buffers interface) +Riak's Protocol Buffers interface) {"127.0.0.1", 8087} @@ -298,7 +298,7 @@ tasks use the IP and port as all other Riak CS traffic. The admin key used for administrative access to Riak CS, e.g. usage of the /riak-cs/stats endpoint. Please note that both admin.key and admin.secret must match the -corresponding settings in the Stanchion node's stanchion.conf. +corresponding settings in the Stanchion node's stanchion.conf. admin-key @@ -326,7 +326,7 @@ this setting unless you implement a custom authentication scheme. rewrite_module A rewrite module contains a set of rules for translating requests made using -a particular API to requests in the the native Riak CS storage API. We do +a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you implement a custom module. riak_cs_s3_rewrite @@ -351,7 +351,7 @@ tasks use the IP and port as all other Riak CS traffic. The admin key used for administrative access to Riak CS, e.g. usage of the /riak-cs/stats endpoint. Please note that both admin_key and admin_secret must match the -corresponding settings in the Stanchion node's +corresponding settings in the Stanchion node's app.config. @@ -387,7 +387,7 @@ actions, including bucket deletion. rewrite_module A rewrite module contains a set of rules for translating requests -made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you +made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you implement a custom module. riak_cs_s3_rewrite @@ -396,7 +396,7 @@ implement a custom module. ## Usage Recording -These settings relate to Riak CS's [access logs](/riak/cs/2.1.1/cookbooks/usage-and-billing-data). +These settings relate to Riak CS's [access logs]({{}}riak/cs/2.1.1/cookbooks/usage-and-billing-data). ### `riak-cs.conf` @@ -519,7 +519,7 @@ of 86400 translates to 1 day. ## Garbage Collection -Settings related to Riak CS's [garbage collection](/riak/cs/2.1.1/cookbooks/garbage-collection) \(GC) process. +Settings related to Riak CS's [garbage collection]({{}}riak/cs/2.1.1/cookbooks/garbage-collection) \(GC) process. ### `riak-cs.conf` @@ -661,7 +661,7 @@ blocks to Riak. cs_version The Riak CS version number. This number is used to selectively enable new -features for the current version to better support rolling upgrades. New +features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set @@ -696,7 +696,7 @@ source IP address as an input (which is the default). cs_version The Riak CS version number. This number is used to selectively -enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value. +enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value. @@ -958,4 +958,4 @@ error logger. --> -[config_your_code]: http://docs.basho.com/riak/1.4.12/ops/advanced/configs/configuration-files/#Configuring-Your-code-vm-args-code- +[config_your_code]: {{< baseurl >}}riak/kv/2.1.1/configuring/basic/#erlang-vm-tunings diff --git a/content/riak/cs/2.1.1/cookbooks/configuration/riak-cs.md b/content/riak/cs/2.1.1/cookbooks/configuration/riak-cs.md index 524008ef0b..be68dc4ce9 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration/riak-cs.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration/riak-cs.md @@ -10,8 +10,8 @@ menu: project: "riak_cs" project_version: "2.1.1" aliases: - - /riakcs/2.1.1/cookbooks/configuration/Configuring-Riak-CS/ - - /riak/cs/2.1.1/cookbooks/configuration/Configuring-Riak-CS/ + - /riakcs/2.1.1/cookbooks/configuration/riak-cs/ + - /riak/cs/2.1.1/cookbooks/configuration/riak-cs/ --- For Riak CS to operate properly it must know how to connect to Riak. @@ -51,9 +51,9 @@ files. If an `app.config` file is present, neither the `riak-cs.config` nor the to continue usage of the legacy `app.config` file, please note that some configuration options have changed names. Most notably, the IP/Port format has changed in 2.0 for Stanchion, Riak, and Riak CS. To view these changes, -please review the [Rolling Upgrades](/riak/cs/2.1.1/cookbooks/rolling-upgrades) Document. +please review the [Rolling Upgrades]({{}}riak/cs/2.1.1/cookbooks/rolling-upgrades) Document. > -> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference](/riak/cs/2.1.1/cookbooks/configuration/reference). +> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference]({{}}riak/cs/2.1.1/cookbooks/configuration/reference). The sections below walk you through some of the main configuration categories that you will likely encounter while operating Riak CS. @@ -81,12 +81,12 @@ is required. {{% /note %}} After making any changes to the `riak-cs.conf` file in Riak CS, -[restart](/riak/cs/2.1.1/cookbooks/command-line-tools/#riak-cs) the node if it is already running. +[restart]({{}}riak/cs/2.1.1/cookbooks/command-line-tools/#riak-cs) the node if it is already running. ## Specifying the Stanchion Node If you're running a single Riak CS node, you don't have to change the -[Stanchion](/riak/cs/2.1.1/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. +[Stanchion]({{}}riak/cs/2.1.1/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. The Stanchion settings reside in the Riak CS `riak-cs.conf` file, which is located in the `/etc/riak-cs` directory of each Riak CS node. @@ -264,7 +264,7 @@ particular use case. ### Tuning We strongly recommend that you take care when setting the value of the -[`pb_backlog` setting](/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is +[`pb_backlog` setting]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is started, each connection pool begins to establish connections to Riak. This can result in a [thundering herd problem](http://en.wikipedia.org/wiki/Thundering_herd_problem) in which connections in the pool believe they are connected to Riak, but in reality some of the connections have been reset. Due to TCP `RST` packet rate limiting (controlled by `net.inet.icmp.icmplim`) some of the connections may not receive notification until they are used to service a user's request. This manifests itself as an `{error, disconnected}` message in the Riak CS logs and an error returned to the user. @@ -354,7 +354,7 @@ data.riakcs.net The following options are available to make adjustments to the Riak CS garbage collection system. More details about garbage collection in Riak CS are -available in [Garbage Collection](/riak/cs/2.1.1/cookbooks/garbage-collection). +available in [Garbage Collection]({{}}riak/cs/2.1.1/cookbooks/garbage-collection). * `gc.leeway_period` (`leeway_seconds` in `advanced.config` or `app.config`) --- The amount of time that must elapse before an object version that has been @@ -420,4 +420,4 @@ been deprecated, and _will be removed_ in the next major release. ## Other Riak CS Settings For a complete listing of configurable parameters for Riak CS, see the -[configuration reference](/riak/cs/2.1.1/cookbooks/configuration/reference) document. +[configuration reference]({{}}riak/cs/2.1.1/cookbooks/configuration/reference) document. diff --git a/content/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs.md b/content/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs.md index dcc0b0dd19..69f9ded8fc 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs.md @@ -21,21 +21,21 @@ reference document listing important configurable parameters. ## The Proper Backends for Riak CS -The default backend used by Riak is the [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the +The default backend used by Riak is the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the Riak CS package includes a special backend that should be used by the Riak cluster that is part of the Riak CS system. It is a custom version -of the standard [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. +of the standard [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. Some of the Riak buckets used internally by Riak CS use secondary -indexes, which currently requires the [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts +indexes, which currently requires the [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts of the Riak CS system can benefit from the use of the Bitcask backend. -The use of the custom [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take +The use of the custom [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take advantage of the strengths of both of these backends to achieve the best blend of performance and features. The next section covers how to properly set up Riak to use this Multi backend. Additionally, the Riak CS storage calculation system uses Riak's -[MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. +[MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. A few other settings must be modified to configure a Riak node as part of a Riak CS system, such as the node IP address and the IP address and @@ -46,7 +46,7 @@ configure a Riak node to work as part of a Riak CS system. ## Setting up the Proper Riak Backend First, edit Riak's `riak.conf`, or the old-style `advanced.config` or -`app.config` [configuration file](/riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing +`app.config` [configuration file]({{}}riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing the following line: ```riakconf @@ -117,7 +117,7 @@ to use the custom backend provided by Riak CS. We need to use either the ``` It's important to note that many of these values will depend on various -directories specific to your [operating system](/riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` +directories specific to your [operating system]({{}}riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` parameter, for example, assumes that Riak CS is installed in `/usr/lib/riak-cs`, while the `data_root` parameters assume that Riak is installed in `/var/lib/`. @@ -152,7 +152,7 @@ buckets.default.allow_mult = true ]} ``` -This will enable Riak to create [siblings](/riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library](/riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS +This will enable Riak to create [siblings]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library]({{}}riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS operations are strongly consistent by definition. {{% note title="Note on `allow_mult`" %}} @@ -214,7 +214,7 @@ sure that you do not change the backend from `riak_cs_kv_multi_backend` to ## Setting Up Riak to Use Protocol Buffers -The Riak [Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, +The Riak [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, or in the `riak_api` section of the the old-style `advanced.config` or `app.config` files, which is located in the `/etc/riak/` folder. The default host is `127.0.0.1` and the default port is `8087`. You will need to change this if @@ -343,4 +343,4 @@ javascript.hook_pool_size = 0 ``` -[riak_conf_files]: http://docs.basho.com/riak/2.0.5/ops/advanced/configs/configuration-files/ +[riak_conf_files]: {{< baseurl >}}riak/kv/2.0.5/ops/advanced/configs/configuration-files/ diff --git a/content/riak/cs/2.1.1/cookbooks/configuration/stanchion.md b/content/riak/cs/2.1.1/cookbooks/configuration/stanchion.md index 10b46d0bc9..1d068f12e9 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration/stanchion.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration/stanchion.md @@ -86,7 +86,7 @@ ssl.keyfile = "./etc/key.pem" ## Specifying the Admin User -The admin user is created during the [configuration of Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs/#specifying-the-admin-user). +The admin user is created during the [configuration of Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs/#specifying-the-admin-user). The same user credentials must be added to each Stanchion used in the cluster. This is set in the `stanchion.conf` file, which is located in the `/etc/stanchion` directory. Enter the same `admin.key` and `admin.secret` as diff --git a/content/riak/cs/2.1.1/cookbooks/configuration/transmit.md b/content/riak/cs/2.1.1/cookbooks/configuration/transmit.md index d1e0ce3839..5a89dec81a 100644 --- a/content/riak/cs/2.1.1/cookbooks/configuration/transmit.md +++ b/content/riak/cs/2.1.1/cookbooks/configuration/transmit.md @@ -38,11 +38,11 @@ dialog as follows: Defining a connection looks like this: -![Trasmit screenshot](/images/riak_cs_transmit0.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit0.jpg) > **Note** > -> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration](/riak/cs/2.1.1/cookbooks/configuration/load-balancing-proxy). +> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration]({{}}riak/cs/2.1.1/cookbooks/configuration/load-balancing-proxy). Finally, test the connection to Riak CS by clicking **Connect**. @@ -56,11 +56,11 @@ After successfully connecting to Riak CS, verify that you can create a bucket. The new bucket creation dialog looks like this: -![Trasmit screenshot](/images/riak_cs_transmit1.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit1.jpg) The newly created bucket is listed in the right hand pane of the Transmit interface: -![Trasmit screenshot](/images/riak_cs_transmit2.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit2.jpg) ## Copy Files @@ -74,7 +74,7 @@ copying of the files to the bucket. After copying, the files will appear in the bucket: -![Trasmit screenshot](/images/riak_cs_transmit3.jpg) +![Trasmit screenshot]({{}}images/riak_cs_transmit3.jpg) You have now successfully configured a Transmit connection to Riak CS and verified basic file copying capabilities. diff --git a/content/riak/cs/2.1.1/cookbooks/faqs/riak-cs.md b/content/riak/cs/2.1.1/cookbooks/faqs/riak-cs.md index b47a3dfe3c..0fcd9c19a4 100644 --- a/content/riak/cs/2.1.1/cookbooks/faqs/riak-cs.md +++ b/content/riak/cs/2.1.1/cookbooks/faqs/riak-cs.md @@ -15,7 +15,7 @@ aliases: Q: What is Riak CS? A: - Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV](/riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. + Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV]({{}}riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. Q: Can users share data? A: @@ -27,7 +27,7 @@ Q: Is it possible to specify a filesystem where my Riak CS buckets will live? A: You can specify the location of **all** Riak CS bucket data by changing the settings for Riak's backends to a path on a particular filesystem. If this is your goal, you can configure Riak to suit your environment. If you look at our example Riak `advanced.config`/`app.config` backend - definition from the [Configuring Riak for CS](/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs) section, it looks like this: + definition from the [Configuring Riak for CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-for-cs) section, it looks like this: ```advanced.config {riak_kv, [ diff --git a/content/riak/cs/2.1.1/cookbooks/garbage-collection.md b/content/riak/cs/2.1.1/cookbooks/garbage-collection.md index 8051f9eed1..5772d2f2c0 100644 --- a/content/riak/cs/2.1.1/cookbooks/garbage-collection.md +++ b/content/riak/cs/2.1.1/cookbooks/garbage-collection.md @@ -15,7 +15,7 @@ aliases: This document describes some of the implementation details behind Riak CS's garbage collection process. For information on configuring this -system, please see our documentation on [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). +system, please see our documentation on [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). ## Versions and Manifests @@ -192,7 +192,7 @@ We recommend using only _one_ active garbage collection daemon in any Riak CS cluster. If multiple daemons are currently being used, you can disable the others by setting the `gc.interval` parameter to `infinity` on those nodes. More information on how to do that can be found in the -[CS configuration doc](/riak/cs/2.1.1/cookbooks/configuration/riak-cs/#garbage-collection-settings). +[CS configuration doc]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs/#garbage-collection-settings). ## Controlling the GC Daemon @@ -212,7 +212,7 @@ Command | Description `set-interval` | Set or update the garbage collection interval. This setting uses a unit of seconds. `set-leeway` | Set or update the garbage collection leeway time. This setting indicates how many seconds must elapse after an object is deleted or overwritten before the garbage collection system may reap the object. This setting uses a unit of seconds. -For more information, see our documentation on [Riak CS command-line tools](/riak/cs/2.1.1/cookbooks/command-line-tools). +For more information, see our documentation on [Riak CS command-line tools]({{}}riak/cs/2.1.1/cookbooks/command-line-tools). ## Manifest Updates @@ -256,7 +256,7 @@ manifest keys that could linger indefinitely. Riak CS's garbage collection implementation gives the deployer several knobs to adjust for fine-tuning system performace. More information -can be found in our documentation on [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs/#garbage-collection-settings). +can be found in our documentation on [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs/#garbage-collection-settings). ## More Information diff --git a/content/riak/cs/2.1.1/cookbooks/installing.md b/content/riak/cs/2.1.1/cookbooks/installing.md index f9918390bf..3d73a7e657 100644 --- a/content/riak/cs/2.1.1/cookbooks/installing.md +++ b/content/riak/cs/2.1.1/cookbooks/installing.md @@ -22,16 +22,16 @@ You can install Riak CS on a single node (for development purposes) or using an automated deployment tool. Any Riak CS installation involves three components, all of which must be installed separately: -* [Riak KV](/riak/2.1.4/) --- The distributed database on top of which Riak CS +* [Riak KV]({{}}riak/kv/2.1.4/) --- The distributed database on top of which Riak CS is built * Riak CS itself -* [Stanchion](/riak/cs/2.1.1/theory/stanchion) --- An application used to manage [globally unique entities](/riak/cs/2.1.1/theory/stanchion/#globally-unique-entities) such as users and buckets. +* [Stanchion]({{}}riak/cs/2.1.1/theory/stanchion) --- An application used to manage [globally unique entities]({{}}riak/cs/2.1.1/theory/stanchion/#globally-unique-entities) such as users and buckets. [Riak KV](#installing-riak) and [Riak CS](#installing-riak-cs-on-a-node) must be installed on each node in your cluster. [Stanchion](#installing-stanchion-on-a-node), however, needs to be installed on only one node. ## Version Compatibility -We strongly recommend using one of the documented [version combinations](/riak/cs/2.1.1/cookbooks/version-compatibility/) +We strongly recommend using one of the documented [version combinations]({{}}riak/cs/2.1.1/cookbooks/version-compatibility/) when installing and running Riak CS. ## Installing Riak KV @@ -40,30 +40,30 @@ Before installing Riak CS, Riak KV must be installed on each node in your cluster. You can install Riak KV either as part of an OS-specific package or from source. - * [Debian and Ubuntu](/riak/kv/2.1.4/setup/installing/debian-ubuntu) - * [RHEL and CentOS](/riak/kv/2.1.4/setup/installing/rhel-centos) - * [Mac OS X](/riak/kv/2.1.4/setup/installing/mac-osx) - * [FreeBSD](/riak/kv/2.1.4/setup/installing/freebsd) - * [SUSE](/riak/kv/2.1.4/setup/installing/suse) - * [From Source](/riak/kv/2.1.4/setup/installing/source) + * [Debian and Ubuntu]({{}}riak/kv/2.1.4/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.1.4/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.1.4/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.1.4/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.1.4/setup/installing/suse) + * [From Source]({{}}riak/kv/2.1.4/setup/installing/source) Riak is also officially supported on the following public cloud infrastructures: - * [Windows Azure](/riak/kv/2.1.4/setup/installing/windows-azure) - * [AWS Marketplace](/riak/kv/2.1.4/setup/installing/amazon-web-services) + * [Windows Azure]({{}}riak/kv/2.1.4/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.1.4/setup/installing/amazon-web-services) Remember that you must repeat this installation process on each node in your cluster. For future reference, you should make note of the Riak KV installation directory. If you want to fully configure Riak KV prior to installing Riak CS, see our -documentation on [configuring Riak KV for CS](/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs/). +documentation on [configuring Riak KV for CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-for-cs/). ## Installing Riak CS on a Node -Riak CS and Stanchion packages are available on the [Download Riak CS](/riak/cs/2.1.1/downloads/) -page. Similarly, Riak packages are available on the [Download Riak KV](/riak/kv/2.1.4/downloads/) page. +Riak CS and Stanchion packages are available on the [Download Riak CS]({{}}riak/cs/2.1.1/downloads/) +page. Similarly, Riak packages are available on the [Download Riak KV]({{}}riak/kv/2.1.4/downloads/) page. After downloading Riak CS, Stanchion, and Riak, install them using your operating system's package management commands. @@ -78,7 +78,7 @@ such as a dedicated device [HAProxy](http://haproxy.1wt.eu) or [Nginx](http://wi ### Installing Riak CS on Mac OS X To install Riak CS on OS X, first download the appropriate package from -the [downloads](/riak/cs/2.1.1/downloads) page: +the [downloads]({{}}riak/cs/2.1.1/downloads) page: ```bash curl -O http://s3.amazonaws.com/downloads.basho.com/riak-cs/1.5/2.1.1/osx/10.8/riak-cs-2.1.1-OSX-x86_64.tar.gz @@ -90,7 +90,7 @@ Then, unpack the downloaded tarball: tar -xvzf riak-cs-2.1.1-OSX-x86_64.tar.gz ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs/). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs/). ### Installing Riak CS on Debian or Ubuntu @@ -255,11 +255,11 @@ can achieve this by specifying a load balancer IP as the Stanchion IP in each Riak CS node's `riak-cs.conf`. This load balancer must be configured to send all requests to a single Stanchion node, failing over to a secondary Stanchion node if the primary is unavailable. More -details can be found in [Specifying the Stanchion Node](/riak/cs/2.1.1/cookbooks/configuration/#specifying-the-stanchion-node). +details can be found in [Specifying the Stanchion Node]({{}}riak/cs/2.1.1/cookbooks/configuration/#specifying-the-stanchion-node). ### Installing Stanchion on Mac OS X -First, download the appropriate package from the [downloads](/riak/cs/2.1.1/downloads/#stanchion-1-4-3) page. +First, download the appropriate package from the [downloads]({{}}riak/cs/2.1.1/downloads/#stanchion-1-4-3) page. ```bash curl -O http://s3.amazonaws.com/downloads.basho.com/stanchion/1.4/1.4.3/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz @@ -271,7 +271,7 @@ Then, unpack the downloaded tarball: stanchion-2.0.0-OSX-x86_64.tar.gz ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). ### Installing Stanchion on Debian or Ubuntu @@ -300,7 +300,7 @@ Now, install the `stanchion` package: sudo apt-get install stanchion ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). #### Installing the `.deb` Package Manually (not recommended) @@ -311,7 +311,7 @@ sudo dpkg -i Replace `` with the actual filename for the package you are installing. -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). ### Installing Stanchion on RHEL or CentOS @@ -338,7 +338,7 @@ Once the `.rpm` package has been installed, install Stanchion: sudo yum install stanchion ``` -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). #### Installing the `.rpm` Package Manually (not recommended) @@ -349,7 +349,7 @@ sudo rpm -Uvh Replace `` with the actual filename for the package you are installing. -At this point, you can move on to [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). > **Note on SELinux** > @@ -359,4 +359,4 @@ encounter errors during installation, try disabling SELinux. ## What's Next? Once you've completed installation of Riak CS and Riak, you're ready to -learn more about [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs). +learn more about [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). diff --git a/content/riak/cs/2.1.1/cookbooks/installing/chef.md b/content/riak/cs/2.1.1/cookbooks/installing/chef.md index 64fa92242f..c25ff693f5 100644 --- a/content/riak/cs/2.1.1/cookbooks/installing/chef.md +++ b/content/riak/cs/2.1.1/cookbooks/installing/chef.md @@ -138,8 +138,8 @@ default['stanchion']['args']['-env']['ERL_CRASH_DUMP'] = "/var/log/stanchion/erl #### Storage Backends -Riak CS uses a specific combination of storage backends. [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) is used to -store blocks and [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: +Riak CS uses a specific combination of storage backends. [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) is used to +store blocks and [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: ```ruby default['riak']['config']['riak_kv']['storage_backend'] = "riak_cs_kv_multi_backend" @@ -183,5 +183,5 @@ default['stanchion']['config']['lager']['error_logger_redirect'] = true More information related to cluster configuration and building development environments is available in our documentation. -* [Building a Local Test Environment](/riak/cs/2.1.1/tutorials/fast-track/local-testing-environment) -* [Building a Virtual Testing Environment](/riak/cs/2.1.1/tutorials/fast-track/virtual-test-environment) +* [Building a Local Test Environment]({{}}riak/cs/2.1.1/tutorials/fast-track/local-testing-environment) +* [Building a Virtual Testing Environment]({{}}riak/cs/2.1.1/tutorials/fast-track/virtual-test-environment) diff --git a/content/riak/cs/2.1.1/cookbooks/keystone-setup.md b/content/riak/cs/2.1.1/cookbooks/keystone-setup.md index c8b53b8a55..ced186b3e4 100644 --- a/content/riak/cs/2.1.1/cookbooks/keystone-setup.md +++ b/content/riak/cs/2.1.1/cookbooks/keystone-setup.md @@ -73,7 +73,7 @@ pip install -r tools/pip-requires The next step is to select the appropriate options in the `keystone.conf` configuration file. A sample configuration that is -useful for local testing with Riak CS can be found [here](/riak/cs/2.1.1/cookbooks/keystone-conf-sample/). This configuration file sets up logging to +useful for local testing with Riak CS can be found [here]({{}}riak/cs/2.1.1/cookbooks/keystone-conf-sample/). This configuration file sets up logging to `./log/keystone/keystone.log` and uses the templated catalog backend to set up the Riak CS object store service. This catalog backend uses a local file to populate the service catalog. diff --git a/content/riak/cs/2.1.1/cookbooks/logging.md b/content/riak/cs/2.1.1/cookbooks/logging.md index 60ea0ce31e..70a535adde 100644 --- a/content/riak/cs/2.1.1/cookbooks/logging.md +++ b/content/riak/cs/2.1.1/cookbooks/logging.md @@ -41,4 +41,4 @@ That section looks something like this: ``` A full description of all available parameters can be found in the -[configuration files](/riak/kv/2.1.3/configuring/reference) document for Riak. +[configuration files]({{}}riak/kv/2.1.3/configuring/reference) document for Riak. diff --git a/content/riak/cs/2.1.1/cookbooks/monitoring-and-metrics.md b/content/riak/cs/2.1.1/cookbooks/monitoring-and-metrics.md index 187bd549ea..2ccf8ad796 100644 --- a/content/riak/cs/2.1.1/cookbooks/monitoring-and-metrics.md +++ b/content/riak/cs/2.1.1/cookbooks/monitoring-and-metrics.md @@ -10,11 +10,11 @@ menu: project: "riak_cs" project_version: "2.1.1" aliases: - - /riakcs/2.1.1/cookbooks/Monitoring-and-Metrics/ + - /riakcs/2.1.1/cookbooks/monitoring-and-metrics/ --- [amazon]: http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html -[s3 api]: http://docs.basho.com/riakcs/latest/references/apis/storage/s3/ +[s3 api]: {{< baseurl >}}riak/cs/latest/references/apis/storage/s3/ Riak S2 (CS) includes metrics and operational statistics to help you monitor your system in more detail and diagnose system issues more easily. There are three major categories of metrics: diff --git a/content/riak/cs/2.1.1/cookbooks/querying-access-statistics.md b/content/riak/cs/2.1.1/cookbooks/querying-access-statistics.md index 4e66d768a7..9fec715c88 100644 --- a/content/riak/cs/2.1.1/cookbooks/querying-access-statistics.md +++ b/content/riak/cs/2.1.1/cookbooks/querying-access-statistics.md @@ -24,7 +24,7 @@ and access. {{% /note %}} For information about how access statistics are logged, please read -[Usage and Billing Data](/riak/cs/2.1.1/cookbooks/usage-and-billing-data). +[Usage and Billing Data]({{}}riak/cs/2.1.1/cookbooks/usage-and-billing-data). The following sections discuss accessing the access statistics using bare HTTP requests. Query parameters are used to specify the types and @@ -81,7 +81,7 @@ HTTP/1.1 404 Object Not Found > **Authentication Required** > > Queries to the usage resources described here must be authenticated as -described in the [Authentication documentation](/riak/cs/2.1.1/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. +described in the [Authentication documentation]({{}}riak/cs/2.1.1/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are @@ -234,7 +234,7 @@ the amount of time that may be retrieved in any request is limited. The limit is configured by the `riak_cs` application environment variable `usage_request_limit`. The value is expressed as an integer -number of archive intervals (see [Usage and Billing Data](/riak/cs/2.1.1/cookbooks/usage-and-billing-data) for a +number of archive intervals (see [Usage and Billing Data]({{}}riak/cs/2.1.1/cookbooks/usage-and-billing-data) for a description of archive intervals). The default value is `744`, which is 31 days at the default archive diff --git a/content/riak/cs/2.1.1/cookbooks/querying-storage-statistics.md b/content/riak/cs/2.1.1/cookbooks/querying-storage-statistics.md index 787a63fcc9..421fcb0b72 100644 --- a/content/riak/cs/2.1.1/cookbooks/querying-storage-statistics.md +++ b/content/riak/cs/2.1.1/cookbooks/querying-storage-statistics.md @@ -26,9 +26,9 @@ and access. > **Note**: > -> Storage statistics are not calculated by default. Please read [Usage and Billing Data](/riak/cs/2.1.1/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. +> Storage statistics are not calculated by default. Please read [Usage and Billing Data]({{}}riak/cs/2.1.1/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. -The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics](/riak/cs/2.1.1/cookbooks/querying-access-statistics). +The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-access-statistics). Please refer to the descriptions there for more details. @@ -39,7 +39,7 @@ been configured to something other than default CS port of `8080`. > **Authentication Required** > -> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation](/riak/cs/2.1.1/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. +> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation]({{}}riak/cs/2.1.1/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are both omitted from the result by default: @@ -97,7 +97,7 @@ There are no statistics included in this report because the default time span is ### S3 Object-style Access -As described in [Querying Access Statistics](/riak/cs/2.1.1/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: +As described in [Querying Access Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: ```bash s3cmd get s3://riak-cs/usage/8NK4FH2SGKJJM8JIP2GU/bx/20120315T140000Z/20120315T160000Z diff --git a/content/riak/cs/2.1.1/cookbooks/release-notes.md b/content/riak/cs/2.1.1/cookbooks/release-notes.md index 28403be80b..9eaff5c9a5 100644 --- a/content/riak/cs/2.1.1/cookbooks/release-notes.md +++ b/content/riak/cs/2.1.1/cookbooks/release-notes.md @@ -13,7 +13,7 @@ aliases: - /riakcs/2.1.1/cookbooks/Riak-CS-Release-Notes/ - /riak/cs/2.1.1/cookbooks/Riak-CS-Release-Notes/ --- -[riak_cs_multibag_support]: /riak/cs/2.1.1/cookbooks/supercluster +[riak_cs_multibag_support]: {{}}riak/cs/2.1.1/cookbooks/supercluster [riak_cs_1.5_release_notes_upgrading]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading [riak_cs_1.5_release_notes_upgrading_1]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading-1 @@ -54,7 +54,7 @@ New metrics have been added that enable you to determine the health of your Riak * Memory information about the riak-cs virtual machine * HTTP listener information: active sockets and waiting acceptors -**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation](docs.basho.com/riakcs/latest/cookbooks/Monitoring-and-Metrics/) for more information. +**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation]({{}}riak/cs/latest/cookbooks/monitoring-and-metrics/) for more information. * [[PR 1189](https://github.com/basho/riak_cs/pull/1189)] * [[PR 1180](https://github.com/basho/riak_cs/pull/1180)] @@ -69,7 +69,7 @@ Additional storage usage metrics are also available. . These metrics are gathere * [[PR 1120](https://github.com/basho/riak_cs/pull/1120)] #### `riak-cs-admin` -The following administration CLIs have been replaced by the [`riak-cs-admin` command](http://docs.basho.com/riakcs/latest/cookbooks/command-line-tools/): +The following administration CLIs have been replaced by the [`riak-cs-admin` command]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): * `riak-cs-storage` * `riak-cs-gc` @@ -89,7 +89,7 @@ Several new options have been added to the `riak-cs-admin gc` command: * Riak S2 2.0 (and older) has a race condition where fullsync replication and garbage collection may resurrect deleted blocks without any way to delete them again. When real-time replication and replication of a garbage collection bucket entry object being dropped from the real-time queue are combined, blocks may remain on the sink side without being collected. Riak S2 2.1 introduces deterministic garbage collection to avoid fullsync replication. Additionally, garbage collection and fullsync replication run concurrently, and work on the same blocks and manifests. You can now specify the range of time using the `--start` and `--end` flags with `riak-cs-admin gc batch` for garbage collector in order to collect deleted objects synchronously on both sink and source sides. [[PR 1147 ](https://github.com/basho/riak_cs/pull/1147)] * `riak-cs-admin gc earliest-keys` is available so you can find the oldest entry after `epoch_start` in garbage collection. With this option, you can stay informed of garbage collection progress. [[PR 1160](https://github.com/basho/riak_cs/pull/1160)] -More information on garbage collection can be found in the [documentation](http://docs.basho.com/riakcs/latest/cookbooks/garbage-collection/). +More information on garbage collection can be found in the [documentation]({{< baseurl >}}riak/cs/latest/cookbooks/garbage-collection/). ### Additions @@ -113,7 +113,7 @@ More information on garbage collection can be found in the [documentation](http: * An option has been added to replace the `PR=all user GET` option with `PR=one` just before authentication. This option improves latency, especially in the presence of slow (or actually-failing) nodes blocking the whole request flow because of PR=all. When enabled, a user's owned-bucket list is never pruned after a bucket is deleted, instead it is just marked as deleted. [[PR 1191](https://github.com/basho/riak_cs/pull/1191)] * An info log has been added when starting a storage calculation batch. [[PR 1238](https://github.com/basho/riak_cs/pull/1238)] * `GET Bucket` requests now have clearer responses. A 501 stub for Bucket lifecycle and a simple stub for Bucket requestPayment have been added. [[PR 1223](https://github.com/basho/riak_cs/pull/1223)] -* Several user-friendly features have been added to [`riak-cs-debug`](http://docs.basho.com/riakcs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] +* Several user-friendly features have been added to [`riak-cs-debug`]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] #### Enterprise * MDC has `proxy_get`, which make block objects propagate to site clusters when they are requested. Now, multibag configuration with MDC supports `proxy_get`. [[PR 1171](https://github.com/basho/riak_cs/pull/1171) and [PR 25](https://github.com/basho/riak_cs_multibag/pull/25)] @@ -526,7 +526,7 @@ None ### Download Please see the [Riak CS Downloads -Page](http://docs.basho.com/riakcs/latest/riakcs-downloads/). +Page]({{< baseurl >}}riak/cs/latest/downloads/). ### Feedback @@ -544,7 +544,7 @@ venues: ### Changes * Improve logging around failures with Riak - [riak_cs/#987](http://docs.basho.com/riak/latest/dev/using/libraries/) + [riak_cs/#987](https://github.com/basho/riak_cs/pull/987) * Add amendment log output when storing access stats into Riak failed [riak_cs/#988](https://github.com/basho/riak_cs/pull/988). This change prevents losing access stats logs in cases of temporary connection @@ -572,7 +572,7 @@ None ### Download Please see the [Riak CS Downloads -Page](http://docs.basho.com/riakcs/latest/riakcs-downloads) +Page]({{< baseurl >}}riak/cs/latest/downloads) ### Feedback @@ -589,7 +589,7 @@ venues: ### Additions -* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here](http://docs.basho.com/riakcs/latest/cookbooks/configuration/Configuring-Riak-CS/). +* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here]({{< baseurl >}}riak/cs/latest/cookbooks/configuration/riak-cs/). ### Changes @@ -646,7 +646,7 @@ To avoid having a limit, set `max_buckets_per_user_user` to `unlimited`. ### Download -Please see the [Riak CS Downloads Page](http://docs.basho.com/riakcs/latest/riakcs-downloads/). +Please see the [Riak CS Downloads Page]({{< baseurl >}}riak/cs/latest/downloads/). ### Feedback @@ -662,7 +662,7 @@ Or via email at **info@basho.com**. ### Additions -* Added Multibag Technical Preview to Riak CS. More info is available [here](http://docs.basho.com/riakcs/latest/cookbooks/multibag/) +* Added Multibag Technical Preview to Riak CS. More info is available [here]({{< baseurl >}}riak/cs/latest/cookbooks/multibag/) * A new command `riak-cs-debug` including `cluster-info` [riak_cs/#769](https://github.com/basho/riak_cs/pull/769), [riak_cs/#832](https://github.com/basho/riak_cs/pull/832) * Tie up all existing commands into a new command `riak-cs-admin` [riak_cs/#839](https://github.com/basho/riak_cs/pull/839) * Add a command `riak-cs-admin stanchion` to switch Stanchion IP and port manually [riak_cs/#657](https://github.com/basho/riak_cs/pull/657) @@ -1003,7 +1003,7 @@ they will all share the name "struct". #### Additions -* Support query parameter authentication as specified in [http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html](Signing and Authenticating REST Requests). +* Support query parameter authentication as specified in [Signing and Authenticating REST Requests](http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html). ## Riak CS 1.0.1 diff --git a/content/riak/cs/2.1.1/cookbooks/rolling-upgrades.md b/content/riak/cs/2.1.1/cookbooks/rolling-upgrades.md index 2b38eeba68..d5960212f3 100644 --- a/content/riak/cs/2.1.1/cookbooks/rolling-upgrades.md +++ b/content/riak/cs/2.1.1/cookbooks/rolling-upgrades.md @@ -18,7 +18,7 @@ Each node in a Riak CS cluster contains settings that define its operating modes and API coverage. The following steps outline the process of upgrading Riak CS in a rolling fashion. -Be sure to check the Riak CS [Version Compatibility](/riak/cs/2.1.1/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. +Be sure to check the Riak CS [Version Compatibility]({{}}riak/cs/2.1.1/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. As Riak CS 2.0.0 only works with Riak 2.0.5, the underlying Riak installation *must* be upgraded to Riak 2.0.5. @@ -78,9 +78,9 @@ for a more detailed description.{{% /note %}}
4. Upgrade Riak, Riak CS, and Stanchion. See the Riak + href="{{< baseurl >}}riak/cs/latest/downloads">Riak CS Downloads and Riak Downloads + href="{{< baseurl >}}riak/kv/latest/downloads">Riak Downloads pages to find the appropriate packages. **Debian** / **Ubuntu** @@ -160,7 +160,7 @@ for a more detailed description.{{% /note %}} ]}, ``` -and so on. More details can be found at [configuring Riak CS](/riak/cs/2.1. 1/cookbooks/configuration/riak-cs). +and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs). {{% /note %}} {{% note title="Note on Memory Sizing" %}} diff --git a/content/riak/cs/2.1.1/cookbooks/supercluster.md b/content/riak/cs/2.1.1/cookbooks/supercluster.md index e22bad02a5..c1e942c584 100644 --- a/content/riak/cs/2.1.1/cookbooks/supercluster.md +++ b/content/riak/cs/2.1.1/cookbooks/supercluster.md @@ -21,15 +21,15 @@ customers. It is not yet suitable for production use. While [Riak CS Enterprise](http://basho.com/riak-enterprise) enables you to distribute Riak CS objects across multiple data centers in a -[source/sink pattern](/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. +[source/sink pattern]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. With supercluster support, you can store object manifests and blocks in separate clusters or groups of clusters, a.k.a. **a set of supercluser members**, enhancing the scalability and overall storage capabilities of a Riak CS installation. ## Supercluster members -A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication](/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)\(MDC). -Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools](/riak/cs/2.1.1/cookbooks/command-line-tools). +A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)\(MDC). +Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools]({{}}riak/cs/2.1.1/cookbooks/command-line-tools). ## The Master Member @@ -134,7 +134,7 @@ That involves performing the following steps on each node: 1. Stop the node 2. Upgrade Stanchion to a version that supports Riak CS supercluster, i.e. Riak CS 1.5.0 and later -3. Set your desired Stanchion [configuration](/riak/cs/2.1.1/cookbooks/configuration/stanchion) +3. Set your desired Stanchion [configuration]({{}}riak/cs/2.1.1/cookbooks/configuration/stanchion) 4. Start Stanchion on each node ### Add Clusters @@ -145,7 +145,7 @@ connection information as explained above in the [supercluster Configuration](#s ### Set Weights -When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`](/riak/cs/2.1.1/cookbooks/command-line-tools) command-line interface. +When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`]({{}}riak/cs/2.1.1/cookbooks/command-line-tools) command-line interface. The example below sets the weight of the recently added supercluster member `Alpha` to zero: @@ -177,9 +177,9 @@ supercluster feature. ## Command Line Interface Complete documentation for the `riak-cs-supercluster` interface can be found -in our documentation on [Riak CS Command Line Tools](/riak/cs/2.1.1/cookbooks/command-line-tools/#riak-cs-supercluster). +in our documentation on [Riak CS Command Line Tools]({{}}riak/cs/2.1.1/cookbooks/command-line-tools/#riak-cs-supercluster). ## Limitations -Riak CS supercluster does not currently support [proxy gets](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from +Riak CS supercluster does not currently support [proxy gets]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from sink clusters. diff --git a/content/riak/cs/2.1.1/cookbooks/system-features.md b/content/riak/cs/2.1.1/cookbooks/system-features.md index 49805271a6..32707bea7c 100644 --- a/content/riak/cs/2.1.1/cookbooks/system-features.md +++ b/content/riak/cs/2.1.1/cookbooks/system-features.md @@ -9,9 +9,9 @@ aliases: The following pages detail Riak CS's system features. -* [Access Control Lists](/riak/cs/2.1.1/cookbooks/access-control-lists) -* [Authentication](/riak/cs/2.1.1/cookbooks/authentication) -* [Monitoring and Metrics](/riak/cs/2.1.1/cookbooks/monitoring-and-metrics) -* [Querying Access Statistics](/riak/cs/2.1.1/cookbooks/querying-access-statistics) -* [Querying Storage Statistics](/riak/cs/2.1.1/cookbooks/querying-storage-statistics) -* [Usage and Billing Data](/riak/cs/2.1.1/cookbooks/usage-and-billing-data) +* [Access Control Lists]({{}}riak/cs/2.1.1/cookbooks/access-control-lists) +* [Authentication]({{}}riak/cs/2.1.1/cookbooks/authentication) +* [Monitoring and Metrics]({{}}riak/cs/2.1.1/cookbooks/monitoring-and-metrics) +* [Querying Access Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-access-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-storage-statistics) +* [Usage and Billing Data]({{}}riak/cs/2.1.1/cookbooks/usage-and-billing-data) diff --git a/content/riak/cs/2.1.1/cookbooks/usage-and-billing-data.md b/content/riak/cs/2.1.1/cookbooks/usage-and-billing-data.md index 0504c71df3..94322b8d5f 100644 --- a/content/riak/cs/2.1.1/cookbooks/usage-and-billing-data.md +++ b/content/riak/cs/2.1.1/cookbooks/usage-and-billing-data.md @@ -28,7 +28,7 @@ and access. Access stats are tracked on a per-user basis, as rollups for slices of time. They are stored just like other Riak CS data, in the `cs.access` bucket in particular. For information about querying access statistics, -please read [Querying Access Statistics](/riak/cs/2.1.1/cookbooks/querying-access-statistics). +please read [Querying Access Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-access-statistics). ## Overview @@ -71,7 +71,7 @@ logger determines the operation type by comparing the method, resource module, and path to a known table. For example, it knows that a `GET` on the *key* module with the `acl` query parameter in the path is a `KeyReadACL` operation. A `PUT` to the same resource without the `acl` -query parameter is a `KeyWrite` operation. See [Querying Access Statistics](/riak/cs/2.1.1/cookbooks/querying-access-statistics) for a list of all operation types. +query parameter is a `KeyWrite` operation. See [Querying Access Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-access-statistics) for a list of all operation types. ### Log Accumulation @@ -181,7 +181,7 @@ slices of time. They are stored in the same Riak cluster as other Riak CS data, in the `cs.storage` bucket. For detailed information about querying storage statistics, please read -[Querying Storage Statistics](/riak/cs/2.1.1/cookbooks/querying-storage-statistics). +[Querying Storage Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-storage-statistics). ### High Level @@ -201,7 +201,7 @@ The storage calculation system uses MapReduce to sum the files in a bucket. This means you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. -See [Configuring Riak for CS](/riak/cs/2.1.1/cookbooks/configuration/riak-for-cs) for directions on setting this up. +See [Configuring Riak for CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-for-cs) for directions on setting this up. ### Scheduling and Manual Triggering diff --git a/content/riak/cs/2.1.1/cookbooks/using-with-keystone.md b/content/riak/cs/2.1.1/cookbooks/using-with-keystone.md index 0b05a9cff3..a1aa43d0a6 100644 --- a/content/riak/cs/2.1.1/cookbooks/using-with-keystone.md +++ b/content/riak/cs/2.1.1/cookbooks/using-with-keystone.md @@ -290,7 +290,7 @@ section of the Riak CS `advanced.config` or `app.config` files: ### Keystone Setup -Follow the procedures documented in [Keystone Setup](/riak/cs/2.1.1/cookbooks/keystone-setup) to set up and run +Follow the procedures documented in [Keystone Setup]({{}}riak/cs/2.1.1/cookbooks/keystone-setup) to set up and run Keystone. 1. Create a tenant called `test`: diff --git a/content/riak/cs/2.1.1/index.md b/content/riak/cs/2.1.1/index.md index d1193b431a..1cdd0c2e29 100644 --- a/content/riak/cs/2.1.1/index.md +++ b/content/riak/cs/2.1.1/index.md @@ -22,17 +22,17 @@ API is [Amazon S3 compatible](http://docs.aws.amazon.com/AmazonS3/latest/API/API and supports per-tenant reporting for use cases involving billing and metering. -Riak CS is open source and [free for download](/riak/cs/2.1.1/downloads). +Riak CS is open source and [free for download]({{}}riak/cs/2.1.1/downloads). ## Notable Riak CS Features ### Amazon S3-API Compatibility -Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API](/riak/cs/2.1.1/references/appendices/comparisons/swift/) +Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API]({{}}riak/cs/2.1.1/references/appendices/comparisons/swift/) ### Per-Tenant Visibility -With the Riak CS [Reporting API](/riak/cs/2.1.1/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, +With the Riak CS [Reporting API]({{}}riak/cs/2.1.1/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, subscription, chargebacks, plugins with billing systems, efficient multi-department utilization, and much more. ### Supports Large Objects of Arbitrary Content Type, Plus Metadata diff --git a/content/riak/cs/2.1.1/references/apis/storage.md b/content/riak/cs/2.1.1/references/apis/storage.md index 26ba140fa0..8b49d77478 100644 --- a/content/riak/cs/2.1.1/references/apis/storage.md +++ b/content/riak/cs/2.1.1/references/apis/storage.md @@ -55,30 +55,30 @@ Multipart Uploads {{1.3.0-}} | Coming Soon | Planned for future release | ## Service-level Operations -* [GET Service](/riak/cs/2.1.1/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request +* [GET Service]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request ## Bucket-level Operations -* [GET Bucket](/riak/cs/2.1.1/references/apis/storage/s3/get-bucket) --- Returns a list of the objects +* [GET Bucket]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-bucket) --- Returns a list of the objects within a bucket -* [GET Bucket ACL](/riak/cs/2.1.1/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket -* [GET Bucket policy](/riak/cs/2.1.1/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket -* [PUT Bucket](/riak/cs/2.1.1/references/apis/storage/s3/put-bucket) --- Creates a new bucket -* [PUT Bucket ACL](/riak/cs/2.1.1/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions +* [GET Bucket ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions for a bucket -* [PUT Bucket policy](/riak/cs/2.1.1/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket -* [DELETE Bucket](/riak/cs/2.1.1/references/apis/storage/s3/delete-bucket) --- Deletes a bucket -* [DELETE Bucket policy](/riak/cs/2.1.1/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket +* [PUT Bucket policy]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.1.1/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.1.1/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket ## Object-level Operations -* [GET Object](/riak/cs/2.1.1/references/apis/storage/s3/get-object) --- Retrieves an object -* [GET Object ACL](/riak/cs/2.1.1/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object -* [PUT Object](/riak/cs/2.1.1/references/apis/storage/s3/put-object) --- Stores an object to a bucket -* [PUT Object (Copy)](/riak/cs/2.1.1/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object -* [PUT Object ACL](/riak/cs/2.1.1/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object -* [HEAD Object](/riak/cs/2.1.1/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) -* [DELETE Object](/riak/cs/2.1.1/references/apis/storage/s3/delete-object) --- Deletes an object +* [GET Object]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.1.1/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.1.1/references/apis/storage/s3/delete-object) --- Deletes an object ## Multipart Upload @@ -87,19 +87,19 @@ Object parts can be uploaded independently and in any order. After all parts are uploaded, Riak CS assembles an object out of the parts. When your object size reaches 100MB, you should consider using multipart uploads instead of uploading the object in a single operation. Read more -about multipart uploads on the [overview page](/riak/cs/2.1.1/cookbooks/multipart-upload-overview). +about multipart uploads on the [overview page]({{}}riak/cs/2.1.1/cookbooks/multipart-upload-overview). -* [Initiate Multipart Upload](/riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID -* [Upload Part](/riak/cs/2.1.1/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload -* [Complete Multipart Upload](/riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts -* [Abort Multipart Upload](/riak/cs/2.1.1/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts -* [List Parts](/riak/cs/2.1.1/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. -* [List Multipart Uploads](/riak/cs/2.1.1/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. +* [Initiate Multipart Upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.1.1/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.1.1/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.1.1/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. ## Common Headers -* [Common Riak CS Request Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-request-headers) -* [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers) +* [Common Riak CS Request Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers) There are two storage API options for Riak CS. The first and most fully featured is the S3 API. There is also limited but improving support for @@ -118,13 +118,13 @@ resource modules. * Module: `riak_cs_s3_rewrite` * [Documentation](http://docs.aws.amazon.com/AmazonS3/latest/API/APIRest.html) -* [Mapping](/riak/cs/2.1.1/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) +* [Mapping]({{}}riak/cs/2.1.1/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) ### Openstack Object Storage API (v1) * Module: `riak_cs_oos_rewrite` * [Documentation](http://docs.openstack.org/api/openstack-object-storage/1.0/content/index.html) -* [Mapping](/riak/cs/2.1.1/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) +* [Mapping]({{}}riak/cs/2.1.1/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) Selecting an API is done by adding or changing the `rewrite_module` key in the Riak CS `riak-cs.conf` file, or the old-style `advanced.config` or `app.config` @@ -157,5 +157,5 @@ included when installing a Riak CS package or building from source. More details for each option can be found by following one of the following links: -* [S3 API](/riak/cs/2.1.1/references/apis/storage/s3/) -* [OpenStack API](/riak/cs/2.1.1/references/apis/storage/openstack/) +* [S3 API]({{}}riak/cs/2.1.1/references/apis/storage/s3/) +* [OpenStack API]({{}}riak/cs/2.1.1/references/apis/storage/openstack/) diff --git a/content/riak/cs/2.1.1/references/apis/storage/openstack.md b/content/riak/cs/2.1.1/references/apis/storage/openstack.md index 4eb9e15759..a2f0b3226e 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/openstack.md +++ b/content/riak/cs/2.1.1/references/apis/storage/openstack.md @@ -47,16 +47,16 @@ Update Object Metadata | Coming Soon | Planned for future release | ## Storage Account Services -* [List Containers](/riak/cs/2.1.1/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account +* [List Containers]({{}}riak/cs/2.1.1/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account ## Storage Container Services -* [List Objects](/riak/cs/2.1.1/references/apis/storage/openstack/list-objects) --- Lists the objects in a container -* [Create Container](/riak/cs/2.1.1/references/apis/storage/openstack/create-container) --- Creates a new container -* [Delete Container](/riak/cs/2.1.1/references/apis/storage/openstack/delete-container) --- Deletes a container +* [List Objects]({{}}riak/cs/2.1.1/references/apis/storage/openstack/list-objects) --- Lists the objects in a container +* [Create Container]({{}}riak/cs/2.1.1/references/apis/storage/openstack/create-container) --- Creates a new container +* [Delete Container]({{}}riak/cs/2.1.1/references/apis/storage/openstack/delete-container) --- Deletes a container ## Storage Object Services -* [Get Object](/riak/cs/2.1.1/references/apis/storage/openstack/get-object) --- Retrieves an object -* [Create or Update Object](/riak/cs/2.1.1/references/apis/storage/openstack/create-object) --- Write an object in a container -* [Delete Object](/riak/cs/2.1.1/references/apis/storage/openstack/delete-object) --- Delete an object from a container +* [Get Object]({{}}riak/cs/2.1.1/references/apis/storage/openstack/get-object) --- Retrieves an object +* [Create or Update Object]({{}}riak/cs/2.1.1/references/apis/storage/openstack/create-object) --- Write an object in a container +* [Delete Object]({{}}riak/cs/2.1.1/references/apis/storage/openstack/delete-object) --- Delete an object from a container diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3.md b/content/riak/cs/2.1.1/references/apis/storage/s3.md index ee78abf0da..e41361dcb6 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3.md @@ -57,30 +57,30 @@ Multipart Uploads {{1.5.0+}} | ✓}}riak/cs/2.1.1/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request ## Bucket-level Operations -* [GET Bucket](/riak/cs/2.1.1/references/apis/storage/s3/get-bucket) --- Returns a list of the objects +* [GET Bucket]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-bucket) --- Returns a list of the objects within a bucket -* [GET Bucket ACL](/riak/cs/2.1.1/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket -* [GET Bucket policy](/riak/cs/2.1.1/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket -* [PUT Bucket](/riak/cs/2.1.1/references/apis/storage/s3/put-bucket) --- Creates a new bucket -* [PUT Bucket ACL](/riak/cs/2.1.1/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions +* [GET Bucket ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions for a bucket -* [PUT Bucket policy](/riak/cs/2.1.1/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket -* [DELETE Bucket](/riak/cs/2.1.1/references/apis/storage/s3/delete-bucket) --- Deletes a bucket -* [DELETE Bucket policy](/riak/cs/2.1.1/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket +* [PUT Bucket policy]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.1.1/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.1.1/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket ## Object-level Operations -* [GET Object](/riak/cs/2.1.1/references/apis/storage/s3/get-object) --- Retrieves an object -* [GET Object ACL](/riak/cs/2.1.1/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object -* [PUT Object](/riak/cs/2.1.1/references/apis/storage/s3/put-object) --- Stores an object to a bucket -* [PUT Object (Copy)](/riak/cs/2.1.1/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object -* [PUT Object ACL](/riak/cs/2.1.1/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object -* [HEAD Object](/riak/cs/2.1.1/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) -* [DELETE Object](/riak/cs/2.1.1/references/apis/storage/s3/delete-object) --- Deletes an object +* [GET Object]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.1.1/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.1.1/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.1.1/references/apis/storage/s3/delete-object) --- Deletes an object ## Multipart Upload @@ -89,16 +89,16 @@ Object parts can be uploaded independently and in any order. After all parts are uploaded, Riak CS assembles an object out of the parts. When your object size reaches 100MB, you should consider using multipart uploads instead of uploading the object in a single operation. Read more -about multipart uploads on the [overview page](/riak/cs/2.1.1/cookbooks/multipart-upload-overview). +about multipart uploads on the [overview page]({{}}riak/cs/2.1.1/cookbooks/multipart-upload-overview). -* [Initiate Multipart Upload](/riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID -* [Upload Part](/riak/cs/2.1.1/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload -* [Complete Multipart Upload](/riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts -* [Abort Multipart Upload](/riak/cs/2.1.1/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts -* [List Parts](/riak/cs/2.1.1/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. -* [List Multipart Uploads](/riak/cs/2.1.1/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. +* [Initiate Multipart Upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.1.1/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.1.1/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.1.1/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. ## Common Headers -* [Common Riak CS Request Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-request-headers) -* [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers) +* [Common Riak CS Request Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers) diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/abort-multipart-upload.md b/content/riak/cs/2.1.1/references/apis/storage/s3/abort-multipart-upload.md index 664c5cd2fc..8ddaec2b82 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/abort-multipart-upload.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/abort-multipart-upload.md @@ -27,7 +27,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -37,7 +37,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload.md b/content/riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload.md index 84f8d4513e..4a797a6f0f 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload.md @@ -45,7 +45,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -75,7 +75,7 @@ This implementation of the operation uses only response headers that are common ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/delete-bucket-policy.md b/content/riak/cs/2.1.1/references/apis/storage/s3/delete-bucket-policy.md index 2c99f74df4..b6e9554590 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/delete-bucket-policy.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/delete-bucket-policy.md @@ -29,7 +29,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -39,7 +39,7 @@ No body should be appended. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/get-bucket-policy.md b/content/riak/cs/2.1.1/references/apis/storage/s3/get-bucket-policy.md index 3819caed7a..f49e479968 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/get-bucket-policy.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/get-bucket-policy.md @@ -31,7 +31,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -41,7 +41,7 @@ No body should be appended. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload.md b/content/riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload.md index f7567b71fa..f675392cf1 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload.md @@ -55,7 +55,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/list-multipart-uploads.md b/content/riak/cs/2.1.1/references/apis/storage/s3/list-multipart-uploads.md index 493c67b1f2..79121cec52 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/list-multipart-uploads.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/list-multipart-uploads.md @@ -54,7 +54,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -64,7 +64,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/list-parts.md b/content/riak/cs/2.1.1/references/apis/storage/s3/list-parts.md index f2f8d29a0a..a5aa7f92e1 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/list-parts.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/list-parts.md @@ -43,7 +43,7 @@ Authorization: signatureValue ### Request Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Request Elements @@ -53,7 +53,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/put-bucket-policy.md b/content/riak/cs/2.1.1/references/apis/storage/s3/put-bucket-policy.md index a8cd2ea84a..89e39426e7 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/put-bucket-policy.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/put-bucket-policy.md @@ -37,7 +37,7 @@ This operation does not use request parameters. ### Request Headers -This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-request-headers). +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-request-headers). ### Request Elements @@ -88,7 +88,7 @@ More information on S3 Policies can be found in Amazon's [Permissions And Polici ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements diff --git a/content/riak/cs/2.1.1/references/apis/storage/s3/upload-part.md b/content/riak/cs/2.1.1/references/apis/storage/s3/upload-part.md index 3a3e51c403..9b9990fe95 100644 --- a/content/riak/cs/2.1.1/references/apis/storage/s3/upload-part.md +++ b/content/riak/cs/2.1.1/references/apis/storage/s3/upload-part.md @@ -9,7 +9,7 @@ aliases: - /riak/cs/2.1.1/references/apis/storage/s3/RiakCS-Upload-Part/ --- -This operation uploads a part in a multipart upload. You must [initiate a multipart upload](/riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. +This operation uploads a part in a multipart upload. You must [initiate a multipart upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. ## Requests @@ -54,7 +54,7 @@ This operation does not use request elements. ### Response Headers -This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers](/riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.1/references/apis/storage/s3/common-response-headers). ### Response Elements @@ -64,7 +64,7 @@ This operation does not use response elements. ### Sample Request -The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload](/riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload) request. +The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/initiate-multipart-upload) request. ``` PUT /large.iso?partNumber=1&uploadId=VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA HTTP/1.1 @@ -79,7 +79,7 @@ Authorization: AWS AKIAIOSFODNN7EXAMPLE:VGhpcyBtZXNzYWdlIHNpZ25lZGGieSRlbHZpbmc= ### Sample Response -The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload](/riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload) request. +The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload]({{}}riak/cs/2.1.1/references/apis/storage/s3/complete-multipart-upload) request. ``` HTTP/1.1 200 OK diff --git a/content/riak/cs/2.1.1/references/appendices/http-admin.md b/content/riak/cs/2.1.1/references/appendices/http-admin.md index 62867d6bd5..d6e800255b 100644 --- a/content/riak/cs/2.1.1/references/appendices/http-admin.md +++ b/content/riak/cs/2.1.1/references/appendices/http-admin.md @@ -19,10 +19,10 @@ above and beyond those associated with Riak itself: Task | CS URI | Further reading :----|:-------|:--------------- -User management | `/riak-cs/user` | [Account Management](/riak/cs/2.1.1/cookbooks/account-management) -User access statistics | `/riak-cs/usage` | [Querying Access Statistics](/riak/cs/2.1.1/cookbooks/querying-access-statistics) -Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics](/riak/cs/2.1.1/cookbooks/querying-storage-statistics) -Global statistics | `/riak-cs/stats` | [Monitoring and Metrics](/riak/cs/2.1.1/cookbooks/monitoring-and-metrics) +User management | `/riak-cs/user` | [Account Management]({{}}riak/cs/2.1.1/cookbooks/account-management) +User access statistics | `/riak-cs/usage` | [Querying Access Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-access-statistics) +Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-storage-statistics) +Global statistics | `/riak-cs/stats` | [Monitoring and Metrics]({{}}riak/cs/2.1.1/cookbooks/monitoring-and-metrics) By default, these are accessible over the same IP/port as the rest of the CS API, but they can be configured to run elsewhere, with or without @@ -52,13 +52,13 @@ details. ## Related Resources -* [configuring Riak CS](/riak/cs/2.1.1/cookbooks/configuration/riak-cs) -* [Querying Access Statistics](/riak/cs/2.1.1/cookbooks/querying-access-statistics) - * [Usage and Billing Data](/riak/cs/2.1.1/cookbooks/usage-and-billing-data) +* [configuring Riak CS]({{}}riak/cs/2.1.1/cookbooks/configuration/riak-cs) +* [Querying Access Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-access-statistics) + * [Usage and Billing Data]({{}}riak/cs/2.1.1/cookbooks/usage-and-billing-data) * [Github wiki](https://github.com/basho/riak_cs/wiki/Querying-Access-Stats) -* [Querying Storage Statistics](/riak/cs/2.1.1/cookbooks/querying-storage-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.1.1/cookbooks/querying-storage-statistics) * [Enabling storage statistics](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) * [Github wiki](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) -* [Account Management](/riak/cs/2.1.1/cookbooks/account-management) +* [Account Management]({{}}riak/cs/2.1.1/cookbooks/account-management) * [Github wiki](https://github.com/basho/riak_cs/wiki/User-Management) -* [Monitoring and Metrics](/riak/cs/2.1.1/cookbooks/monitoring-and-metrics) +* [Monitoring and Metrics]({{}}riak/cs/2.1.1/cookbooks/monitoring-and-metrics) diff --git a/content/riak/cs/2.1.1/references/appendices/riak-cs-control.md b/content/riak/cs/2.1.1/references/appendices/riak-cs-control.md index 05401133ec..0380b615ca 100644 --- a/content/riak/cs/2.1.1/references/appendices/riak-cs-control.md +++ b/content/riak/cs/2.1.1/references/appendices/riak-cs-control.md @@ -20,7 +20,7 @@ managing users in a Riak CS Cluster. ## Installing Riak CS Control -Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package](/riak/cs/2.1.1/downloads). +Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package]({{}}riak/cs/2.1.1/downloads). ## Setting Up Riak CS Control @@ -67,7 +67,7 @@ riak-cs-control start When you first navigate to the Riak CS Control UI, you will land on the Users page: -![Users Page](/images/cs_control_users.png) +![Users Page]({{}}images/cs_control_users.png) On this page you can quickly see all current Riak CS users along with their status, e-mail address, and credentials. From here you can filter, diff --git a/content/riak/cs/2.1.1/theory/stanchion.md b/content/riak/cs/2.1.1/theory/stanchion.md index 99ac53bdaf..fe9a276116 100644 --- a/content/riak/cs/2.1.1/theory/stanchion.md +++ b/content/riak/cs/2.1.1/theory/stanchion.md @@ -25,9 +25,9 @@ Riak CS cluster at any time. Correspondingly, your Stanchion installation must be managed and configured separately. For more information, see the following documents: -* [Configuring Stanchion](/riak/cs/2.1.1/cookbooks/configuration/stanchion) -* [Installing Stanchion](/riak/cs/2.1.1/cookbooks/installing#installing-stanchion-on-a-node) -* [The Stantion Command-line Interface](/riak/cs/2.1.1/cookbooks/command-line-tools#stanchion) +* [Configuring Stanchion]({{}}riak/cs/2.1.1/cookbooks/configuration/stanchion) +* [Installing Stanchion]({{}}riak/cs/2.1.1/cookbooks/installing#installing-stanchion-on-a-node) +* [The Stantion Command-line Interface]({{}}riak/cs/2.1.1/cookbooks/command-line-tools#stanchion) For a more in-depth discussion of implementation details, see the project's @@ -51,7 +51,7 @@ rejected. The uniqueness of these entities is enforced by serializing any creation or modification requests that involve them. This process is handled by Stanchion. What happens under the hood is essentially that Stanchion -mandates that all [vnodes](/riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. +mandates that all [vnodes]({{}}riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. One result of this enforcement is that user creation requests and bucket creation or modification, i.e. deletion, requests are not highly diff --git a/content/riak/cs/2.1.1/tutorials/fast-track/local-testing-environment.md b/content/riak/cs/2.1.1/tutorials/fast-track/local-testing-environment.md index 176bc2ed58..e6081b54b3 100644 --- a/content/riak/cs/2.1.1/tutorials/fast-track/local-testing-environment.md +++ b/content/riak/cs/2.1.1/tutorials/fast-track/local-testing-environment.md @@ -20,7 +20,7 @@ does not attempt to optimize your installation for your particular architecture. If you want to build a testing environment with a minimum of -configuration, there is an option for [Building a Virtual Testing Environment](/riak/cs/2.1.1/tutorials/fast-track/virtual-test-environment). +configuration, there is an option for [Building a Virtual Testing Environment]({{}}riak/cs/2.1.1/tutorials/fast-track/virtual-test-environment). ## Installing Your First Node @@ -30,7 +30,7 @@ and running Riak and Riak CS. ### Step 1: Raise your system's open file limits Riak can consume a large number of open file handles during normal -operation. See the [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit) document for more information on +operation. See the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) document for more information on how to increase your system's open files limit. If you are the root user, you can increase the system's open files limit @@ -52,7 +52,7 @@ riak soft nofile 65536 riak hard nofile 65536 ``` -For Mac OS X, consult the [open files limit](/riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. +For Mac OS X, consult the [open files limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. ### Step 2: Download and install packages @@ -67,14 +67,14 @@ sudo apt-get install -y curl substitute the appropriate CLI commands. If you are running Ubuntu 11.10 or later, you will also need the -`libssl0.9.8` package. See [Installing on Debian and Ubuntu](/riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. +`libssl0.9.8` package. See [Installing on Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. ```bash sudo apt-get install -y libssl0.9.8 ``` Now, grab the appropriate packages: Riak, Riak CS, and Stanchion. See -[Download Riak](/riak/kv/2.1.3/downloads/) and [Download Riak CS](/riak/cs/2.1.1/downloads). +[Download Riak]({{}}riak/kv/2.1.3/downloads/) and [Download Riak CS]({{}}riak/cs/2.1.1/downloads). You can skip Riak CS Control for now. Once you have the packages, install them per the instructions below. @@ -87,14 +87,14 @@ installing Riak. **Do not attempt to configure or start Riak until step 3 in this document.** - * [Debian and Ubuntu](/riak/kv/2.1.3/setup/installing/debian-ubuntu) - * [RHEL and CentOS](/riak/kv/2.1.3/setup/installing/rhel-centos) - * [Mac OS X](/riak/kv/2.1.3/setup/installing/mac-osx) - * [FreeBSD](/riak/kv/2.1.3/setup/installing/freebsd) - * [SUSE](/riak/kv/2.1.3/setup/installing/suse) - * [Windows Azure](/riak/kv/2.1.3/setup/installing/windows-azure) - * [AWS Marketplace](/riak/kv/2.1.3/setup/installing/amazon-web-services) - * [From Source](/riak/kv/2.1.3/setup/installing/source) + * [Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.1.3/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.1.3/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.1.3/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.1.3/setup/installing/suse) + * [Windows Azure]({{}}riak/kv/2.1.3/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.1.3/setup/installing/amazon-web-services) + * [From Source]({{}}riak/kv/2.1.3/setup/installing/source) #### Next, install Riak CS @@ -458,7 +458,7 @@ your first node with two exceptions: You will then need to verify the cluster plan with the `riak-admin cluster plan` command, and commit the cluster changes with `riak-admin cluster commit` to complete the join process. More information is -available in the [Command Line Tools](/riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. +available in the [Command Line Tools]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. > **Note** > @@ -469,4 +469,4 @@ such as a dedicated device, [HAProxy](http://haproxy.1wt.eu), or [Nginx](http://wiki.nginx.org/Main) between Riak CS and the outside world. -Once you have completed this step, You can progress to [testing the Riak CS installation](/riak/cs/2.1.1/tutorials/fast-track/test-installation) using s3cmd. +Once you have completed this step, You can progress to [testing the Riak CS installation]({{}}riak/cs/2.1.1/tutorials/fast-track/test-installation) using s3cmd. diff --git a/content/riak/cs/2.1.1/tutorials/fast-track/test-installation.md b/content/riak/cs/2.1.1/tutorials/fast-track/test-installation.md index 15f4f4ac76..7b1b83fed8 100644 --- a/content/riak/cs/2.1.1/tutorials/fast-track/test-installation.md +++ b/content/riak/cs/2.1.1/tutorials/fast-track/test-installation.md @@ -141,6 +141,6 @@ bit of learning to be done, so make sure and check out the Reference section (click "Reference" on the nav on the left side of this page). A few items that may be of particular interest: -* [Details about API operations](/riak/cs/2.1.1/references/apis/storage) -* [Information about the Ruby Fog client](/riak/cs/2.1.1/cookbooks/fog) -* [Release Notes](/riak/cs/2.1.1/cookbooks/release-notes) +* [Details about API operations]({{}}riak/cs/2.1.1/references/apis/storage) +* [Information about the Ruby Fog client]({{}}riak/cs/2.1.1/cookbooks/fog) +* [Release Notes]({{}}riak/cs/2.1.1/cookbooks/release-notes) diff --git a/content/riak/cs/2.1.1/tutorials/fast-track/virtual-test-environment.md b/content/riak/cs/2.1.1/tutorials/fast-track/virtual-test-environment.md index 0afed470bc..55d848fb91 100644 --- a/content/riak/cs/2.1.1/tutorials/fast-track/virtual-test-environment.md +++ b/content/riak/cs/2.1.1/tutorials/fast-track/virtual-test-environment.md @@ -22,7 +22,7 @@ want to tune the OS or node/memory count, you'll have to edit the If you want to build a testing environment with more flexibility in configuration and durability across environment resets, there are -instructions for [Building a Local Test Environment](/riak/cs/2.1.1/tutorials/fast-track/local-testing-environment). +instructions for [Building a Local Test Environment]({{}}riak/cs/2.1.1/tutorials/fast-track/local-testing-environment). ## Configuration @@ -87,7 +87,7 @@ Secret key: RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw== ## Next Steps Congratulations! You have deployed a virtualized environment of Riak CS. -You are ready to progress to [Testing the Riak CS Installation](/riak/cs/2.1.1/tutorials/fast-track/test-installation). +You are ready to progress to [Testing the Riak CS Installation]({{}}riak/cs/2.1.1/tutorials/fast-track/test-installation). ### Stopping Your Virtual Environment diff --git a/content/riak/cs/2.1.1/tutorials/fast-track/what-is-riak-cs.md b/content/riak/cs/2.1.1/tutorials/fast-track/what-is-riak-cs.md index 3f745c59f9..a11889f4ce 100644 --- a/content/riak/cs/2.1.1/tutorials/fast-track/what-is-riak-cs.md +++ b/content/riak/cs/2.1.1/tutorials/fast-track/what-is-riak-cs.md @@ -35,11 +35,11 @@ automatically take over the responsibility of failed or non-communicative nodes, data remains available even in the event of node failure or network partition. -When an object is uploaded via the [storage API](/riak/cs/2.1.1/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, +When an object is uploaded via the [storage API]({{}}riak/cs/2.1.1/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, written, and replicated in Riak. Each chunk is associated with metadata for later retrieval. The diagram below provides a visualization. -![Riak CS Chunking](/images/Riak-CS-Overview.png) +![Riak CS Chunking]({{}}images/Riak-CS-Overview.png) ## Riak CS Enterprise diff --git a/content/riak/cs/2.1.2/cookbooks/access-control-lists.md b/content/riak/cs/2.1.2/cookbooks/access-control-lists.md new file mode 100644 index 0000000000..616ef4583a --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/access-control-lists.md @@ -0,0 +1,109 @@ +--- +title: "Access Control Lists" +description: "" +menu: + riak_cs-2.1.2: + name: "Access Control Lists" + identifier: "admin_access_control" + weight: 101 + parent: "http_admin" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Access-Control-Lists/ + - /riak/cs/latest/cookbooks/access-control-lists/ +--- + +Access Control Lists (ACLs) are a means of granting and denying access +to buckets and objects. Each bucket and object in a Riak CS cluster will +have an ACL associated with it. When a bucket or object is created, a +default ACL will be created alongside it that grants full control to the +creating party and denies access to all other parties. + +Riak CS ACLs are modeled after S3 ACLs. For more information, see the +Amazon [Access Control List Overview](http://docs.amazonwebservices.com/AmazonS3/latest/dev/ACLOverview.html) +documentation. + +> **ACL Limit** +> +> An ACL can have up to 100 grants. + +## Representations + +XML is the only supported external format for ACLs. In the future, other +formats such as [JSON](http://www.json.org) may be supported. + +Example XML representation of an ACL: + +```xml + + + + abcd123 + joebob + + + + + abcd123 + joebob + + FULL_CONTROL + + + +``` + +## Permissions + +Riak CS permissions are split into two types: **bucket permissions** and +**object permissions**. + +### Bucket Permissions + +* `READ` --- Grantee may list the objects in the bucket +* `READ_ACP` --- Grantee may read the bucket ACL +* `WRITE` --- Grantee may create, overwrite, and delete any object in + the bucket +* `WRITE_ACP` --- Grantee may write the ACL for the applicable bucket +* `FULL_CONTROL` --- Grantee has `READ`, `WRITE`, `READ_ACP`, and + `WRITE_ACP` permissions on the bucket + +### Object Permissions + +* `READ` --- Grantee may read the object data and its metadata +* `READ_ACP` --- Grantee may read the object ACL. **Note:** The object + owner may read the object ACL even if not explicitly granted + `READ_ACP` permission. +* `WRITE_ACP` --- Grantee may write the ACL for the applicable object. + **Note:** The object owner may write the object ACL even if not + explicitly granted `WRITE_ACP` permission. +* `FULL_CONTROL` --- Grantee has `READ`, `READ_ACP`, and `WRITE_ACP` + permissions on the object. + +## Buckets + +Bucket names **must** be [globally unique]({{}}riak/cs/2.1.2/theory/stanchion/#globally-unique-entities). To avoid conflicts, all +bucket creation requests are made to an application called +[Stanchion]({{}}riak/cs/2.1.2/cookbooks/configuration/stanchion). This means that all requests for modification of a bucket ACL should be serialized through Stanchion. While this may cause undesirable serialization of these requests, we +believe it is appropriate based on the following statement from this +[documentation on bucket restrictions](http://docs.amazonwebservices.com/AmazonS3/latest/dev/BucketRestrictions.html) from Amazon regarding restrictions on bucket operations: + +> Because bucket operations work against a centralized, global +resource space, it is not appropriate to make bucket create or delete +calls on the high availability code path of your +application. + +This statement only directly references create or delete calls, but we +have taken a more broad interpretation to include requests that modify +the ACL. + +## Objects + +The object ACL is stored with each object as a metadata field. If no ACL +information is present in the object creation request, a default ACL is +created granting the creator both ownership and full access control and +denying access to all other parties. + +For information on specifying an ACL when making a `PUT` request, see +[Riak CS PUT Object ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-object-acl). diff --git a/content/riak/cs/2.1.2/cookbooks/account-management.md b/content/riak/cs/2.1.2/cookbooks/account-management.md new file mode 100644 index 0000000000..a7a4b1693e --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/account-management.md @@ -0,0 +1,229 @@ +--- +title: "Account Management" +description: "" +menu: + riak_cs-2.1.2: + name: "Account Management" + identifier: "admin_account_management" + weight: 100 + parent: "http_admin" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Account-Management/ + - /riak/cs/latest/cookbooks/account-management/ +--- + +## Creating a User Account + +Create a user account by performing an HTTP `POST` or `PUT` with a +unique email address and username. Here's an example: + +```curl +curl -X POST http://localhost:8080/riak-cs/user / + -H 'Content-Type: application/json' / + --data '{"email":"foobar@example.com", "name":"foo bar"}' / +``` + +{{% note title="Note on admin users" %}} +By default, only the admin user may create new user accounts. If you need to +create a user account without authenticating yourself, you must set +`{anonymous_user_creation, true}` in the Riak CS `app.config`. +{{% /note %}} + +The submitted user document may be either JSON or XML, but the type +should match the value of the `Content-Type` header used. Here are some +examples for JSON and XML input formats. + +```json +{ + "email": "foobar@example.com", + "name": "foo bar" +} +``` + +```xml + + foobar@example.com + foo bar + +``` + +The response will be in JSON or XML, and resembles the following examples. + +```json +{ + "email": "foobar@example.com", + "display_name": "foobar" + "key_id": "324ABC0713CD0B420EFC086821BFAE7ED81442C", + "key_secret": "5BE84D7EEA1AEEAACF070A1982DDA74DA0AA5DA7", + "name": "foo bar", + "id": "8d6f05190095117120d4449484f5d87691aa03801cc4914411ab432e6ee0fd6b", + "buckets": [] +} +``` + +```xml + + foobar@example.com + foobar + 324ABC0713CD0B420EFC086821BFAE7ED81442C + 5BE84D7EEA1AEEAACF070A1982DDA74DA0AA5DA7 + foo bar + 8d6f05190095117120d4449484f5d87691aa03801cc4914411ab432e6ee0fd6b + + +``` + +Once the user account exists, you can use the `key_id` and `key_secret` +to authenticate requests with Riak CS. To do that, add the `key_id` and +`key_secret` values to your s3cmd configuration file, which is located +by default in the `~/.s3cmd` folder, + +The canonical id represented by the `id` field can be used as an +alternative to an email address for user identification when granting or +revoking ACL permissions, for example with the `--acl-grant` or +`--acl-revoke` options to `s3cmd setacl`. + +## Retrieving User Account Information + +A user may retrieve their account information by sending a properly +signed request to the `riak-cs/user` resource. Additionally, the admin +user may request the information for any individual user on the system +as part of their role as administrator. Users are only permitted to +retrieve information for their account. + +Assuming the proper credentials were set in the `.s3cfg` file, an s3/ +request to retrieve this information would look like this: + +```bash +s3cmd get s3://riak-cs/user - +``` + +Using the admin credentials to retrieve another user's info would look +like this: + +```bash +s3cmd -c ~./s3cfg-admin get s3://riak-cs/user/XQKMYF4UL_MMTDFD6NCN +``` + +In this example, `XQKMYF4UL_MMTDFD6NCN` is the `key_id` of the user +whose information the administrator wishes to retrieve. + +## Modifying User Account Information + +### Changing the User Account Name and Email Address + +A user may use a `PUT` to `/riak-cs/user` to update the name and email +address associated with an account. The `PUT` must include a document +with a name and email field. JSON or XML formats are supported for this +document. Samples of each are shown below. The `Content-Type` header +should also be set appropriately. The admin user may also update a +user's account via a `PUT` to `/riak-cs/user/`. The value +for the email field must be a valid email address and must not be +already used by another user account in the system. Violation of either +condition results in an error response. + +Sample JSON and XML status update documents: + +```json +{ + "name": "foobaz", + "email": "foobaz@example.com" +} +``` + +```xml + + + foobaz + foobaz@example.com + +``` + +### Enabling and Disabling a User Account + +A user may use a `PUT` to `/riak-cs/user` to disabled their account. The +`PUT` must include a document with a status field whose value is +disabled. JSON or XML formats are supported for this document. Samples +of each are shown below. The `Content-Type` header should also be set +appropriately. The admin user may also disable or re-enable a user's +account via a `PUT` to `/riak-cs/user/`. Users may not +re-enable their own account once it is disabled. + +Sample JSON and XML status update documents: + +```json +{ + "status": "enabled" +} +``` + +```xml + + + disabled + +``` + +### Issuing New User Credentials + +The `key_secret` for a user account can be reissued by a `PUT` to +`/riak-cs/user` with the appropriate JSON or XML document. For admin +users, the `PUT` would be to `/riak-cs/user/`. + +The documents should resemble the following examples. + +```json +{ + "new_key_secret": true +} +``` + +```xml + + + true + +``` + +{{% note title="Note on update fields" %}} +The `new_key_secret` field (or `NewKeySecret` in XML) may be combined with +other user update fields in the same request. Currently, the only other +supported field is status, but more may be added in the future. Unsupported +fields are ignored. +{{% /note %}} + +## Retrieving a List of All Users + +The admin user may retrieve a list of all user accounts on the system. +This accomplished via a properly signed HTTP `GET` request to the +`/riak-cs/users` resource. Any non-admin user request for the user list +is rejected and a `403 Forbidden` error is returned. This request does +not properly work with s3cmd, but can be performed using a less dogmatic +tool such as [s3-curl](http://aws.amazon.com/code/128). + +{{% note title="Note on hostname" %}} +You must modify the `@endpoints` variable in the `s3curl.pl` script to include +your Riak CS hostname so that the following example will return the list of +users. +{{% /note %}} + +A sample URL for a user listing request looks like this: + +``` +GET http://data.example.com/riak-cs/users - +``` + +An example using s3-curl that assumes properly specified credentials for +the admin user in the `.s3curl` configuration file with an `id` of +`admin` is as follows: + +```bash +s3curl --id admin -- http://data.mystorage.me/riak-cs/users +``` + +By default, the listing of all users includes accounts that are both +enabled and disabled. The list can be filtered to only include enabled +or disabled accounts by using the status query parameter with a value of +enabled or disabled respectively. diff --git a/content/riak/cs/2.1.2/cookbooks/authentication.md b/content/riak/cs/2.1.2/cookbooks/authentication.md new file mode 100644 index 0000000000..5f471129fa --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/authentication.md @@ -0,0 +1,115 @@ +--- +title: "Authentication" +description: "" +menu: + riak_cs-2.1.2: + name: "Authentication" + identifier: "admin_authentication" + weight: 101 + parent: "http_admin" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Authentication/ + - /riak/cs/latest/cookbooks/authentication/ +--- + +## Authentication Options + +* S3 Signature Authentication + * Module name: `riak_cs_s3_auth` + * [Documentation](http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html) +* Keystone Authentication + * Module name: `riak-cs_keystone_auth` + * [Documentation](http://docs.openstack.org/api/openstack-identity-service/2.0/content/index.html) +* S3 Passthru Authentication + * Module name: `riak_cs_s3_passthru_auth` + * This module requires a valid user `key_id` to be included in the + `Authorization` header value, but no signature is required. For + example, a valid header using this authentication module would look + like this: `Authorization: AWS 4REM9H9ZKMXW-DZDC8RV`. + + **Warning**: This module is only intended for use in development or + testing scenarios. + +Selecting an authentication method is done by adding or changing the +`auth_module` key in the Riak CS `riak-cs.conf` file, or the old-style +`advanced.config` or `app.config` files in the `riak_cs` section. For example, +to instruct Riak CS to use S3-style request signing as the means of +authentication, ensure the following is contained in your configuration file: + +```riakcsconf +auth_module = riak_cs_s3_auth +``` + +```advancedconfig +{riak_cs, [ + %% Other configs + {auth_module, riak_cs_s3_auth}, + %% Other configs + ]} +``` + +```appconfig +{riak_cs, [ + %% Other configs + {auth_module, riak_cs_s3_auth}, + %% Other configs + ]} +``` + +S3-style authentication is used by default. + +## S3 Authentication + +### Signing and Authenticating REST Requests + +The primary authentication scheme available to use with Riak CS is the S3 +authentication scheme. A signature is calculated using several elements from +each request and the user's `key_id` and `key_secret`. This signature is +included in the `Authorization` header of the request. Once a request is +received by the server, the server also calculates the signature for the +request and compares the result with the signature presented in then +`Authorization` header. If they match then the request is authenticated; +otherwise, the authentication fails. + +Full details are available in the [S3 authentication scheme +documentation](http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html). + +### Query String Authentication + +Riak CS also supports authentication using a query parameter. This +allows issuing of pre-signed requests that can be used to grant public +access to private Riak CS data. It also supports an expiry timestamp so +that the pre-signed URL can be invalidated after a certain period of +time. + +The signature in the query string secures the request and you can +specify any future expiration time in epoch or UNIX time. + +1. Create a query +2. Specify an expiration time for the query +3. Sign it with your signature +4. Place the data in an HTTP request +5. Distribute the request to a user or embed the request in a web page + +#### Query String Parameters + +Parameter | Description | Data type +:---------|:------------|:--------- +`AWSAccessKeyId` | Your Riak CS Access Key ID | string +`Expires` | The time when the signature expires, specified as the number of seconds since the epoch | integer +`Signature` | The URL encoding of the Base64 encoding of the HMAC-SHA1 of `StringToSign` | string + +#### Example + +For example, a query URL is similar to the following example. + +```http +http://bucket.data.basho.com/document?AWSAccessKeyId=8EE3UE-UMW1YTPMBC3EB&Expires=1177363698&Signature=vjSAMPLENmGa%2ByT272YEAiv4%3D +``` + +## Keystone Authentication + +More information on using Keystone for authentication with Riak CS can +be found in [using Riak CS with Keystone]({{}}riak/cs/2.1.2/cookbooks/using-with-keystone). diff --git a/content/riak/cs/2.1.2/cookbooks/command-line-tools.md b/content/riak/cs/2.1.2/cookbooks/command-line-tools.md new file mode 100644 index 0000000000..eec1bdbb4c --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/command-line-tools.md @@ -0,0 +1,643 @@ +--- +title: "Riak CS Command-line Tools" +description: "" +menu: + riak_cs-2.1.2: + name: "Command-line Tools" + identifier: "run_cli" + weight: 100 + parent: "run" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/command-line-tools/ + - /riak/cs/latest/cookbooks/command-line-tools/ +--- + +Riak CS comes equipped with a variety of command-line interfaces that +you can use to manage each node in your Riak CS cluster. The scripts for +these commands are available by default in the `/bin` directory of each +node. + +## riak-cs + +This is the primary script for controlling the processes associated with +a Riak CS node. Running the `riak-cs` command by itself will output a +listing of available commands: + +``` +Usage: riak-cs {start | stop | restart | reboot | ping | console | attach | + attach-direct | ertspath | chkconfig | escript | version | + getpid | top [-interval N] [-sort reductions|memory|msg_q] [-lines N] } +``` + +#### start + +Starts the Riak CS node. + +```bash +riak-cs start +``` + +If starting the node is successful, you will see no return output. If +the node is already running, this command will return `Node is already +running!`. + +#### stop + +Stops the Riak CS node. + +```bash +riak-cs stop +``` + +This command will print `ok` if the stoppage is successful. + +If you attempt to run `riak-cs stop` on a node that is not currently +running, you will see the following: + +``` +Node not responding to pings. +Node is not running! +``` + +#### restart + +Stops and then starts a running Riak CS node without exiting the Erlang +VM. + +```bash +riak-cs restart +``` + +Prints `ok` when successful. If the node is already stopped or not +responding, you will see the following output: + +``` +Node not responding to pings. +``` + +#### reboot + +Stops all applications and starts without restarting the Erlang VM. + +```bash +riak-cs reboot +``` + +{{% note title="Deprecation notice" %}} +The `riak-cs reboot` command has been deprecated. We recommend using the +`riak-cs restart` command instead. +{{% /note %}} + +#### ping + +Checks whether the Riak CS node is currently running. + +```bash +riak-cs ping +``` + +Prints `pong` when the node is running or `Node not +responding to pings` when the node is stopped or not responding. + +#### console + +Starts the Riak CS node in the foreground, providing direct access to +the node via the Erlang shell. + +```bash +riak-cs console +``` + +If the node is already running in the background, you will see the +output `Node is already running - use 'riak-cs attach' instead`. If the +command is successful, you can exit the shell by pressing **Ctrl-G q**. + +#### attach + +Attaches to the console of a Riak CS node running in the background, +providing access to the Erlang shell and to runtime messages. + +```bash +riak-cs attach +``` + +Prints `Node is not running!` when the node cannot be reached. + +#### attach-direct + +Attaches to the console of a Riak CS node running in the background +using a directly connected first-in-first-out (FIFO), providing access +to the Erlang shell and to runtime messages. + +```bash +riak-cs attach-direct +``` + +Prints `Node is not running!` when the node cannot be reached. You can +exit the shell by pressing **Ctrl-G q**. + +#### ertspath + +Outputs the path of Riak CS's Erlang runtime environment. + +```bash +riak-cs ertspath +``` + +#### chkconfig + +Checks whether the Riak CS nodes configuration files are valid. + +```bash +riak-cs chkconfig +``` + +If the files are valid, `config is OK` will be included in the output. + +#### escript + +Provides a means of calling [escript](http://www.erlang.org/doc/man/escript.html) +scripts using Riak CS's Erlang runtime environment. + +```bash +riak-cs escript +``` + +#### version + +Outputs the Riak CS version identifier. + +```bash +riak-cs version +``` + +#### getpid + +Outputs the process identifier for the currently running instance of +Riak CS. + +```bash +riak-cs getpid +``` + +#### top + +The `riak-cs top` command provides information about what the Erlang +processes inside of Riak CS are doing. `top` reports process reductions +(an indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-cs top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + `top` output and defaults to 5 +* `sort` determines the category on which `riak-cs top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the `top` + output and defaults to 10 + +More information about Erlang's etop tool can be found in the +[official documentation](http://www.erlang.org/doc/man/etop.html). + +## riak-cs-admin gc + +This command controls Riak CS's [garbage collection]({{}}riak/cs/2.1.2/cookbooks/garbage-collection) system. + +```bash +riak-cs-admin gc +``` + +#### batch + +Starts garbage collection for a batch of eligible objects. + +```bash +riak-cs-admin gc batch +``` + +Optionally, you can specify the number of leeway seconds: + +```bash +riak-cs-admin gc batch +``` + +In Riak CS 2.1 you can specify the target range of GC bucket key range, with start and end timestamps. For example: + +```bash +riak-cs-admin gc batch --end=20150801T000000Z +``` + +Or + +```bash +riak-cs-admin gc batch --start=20150801T000000Z --end=20150901T000000Z +``` + +#### status + +Returns the status of the garbage collection daemon, depending on its +current state. + +```bash +riak-cs-admin gc status +``` + +#### pause + +Pauses the current batched garbage collection process and halts any +further garbage collection until the daemon is resumed. + +```bash +riak-cs-admin gc pause +``` + +#### resume + +Resumes a paused garbage collection process. This will have no effect if +there is no previously paused process. + +```bash +riak-cs-admin gc resume +``` + +#### cancel + +Cancels the current batch of garbage collection. This will have no +effect if there is no currently running garbage collection process. + +```bash +riak-cs-admin gc cancel +``` + +#### set-interval + +Sets or updates the garbage collection interval. Expressed in terms of +seconds: + +```bash +riak-cs-admin gc set-interval +``` + +#### set-leeway + +Sets or updates the garbage collection leeway time, which indicates how +many seconds must elapse after an object is deleted or overwritten +before the garbage collection system may reap the object. Expressed in +seconds. + +```bash +riak-cs-admin gc set-leeway +``` + +#### earliest-keys + +Find oldest entry after `epoch_start` in garbage collection. + +```bash +riak-cs-admin gc earliest-keys +``` + +## riak-cs-stanchion + +This command interface controls aspects of the interaction between Riak +CS and Stanchion, the access control and user management platform +undergirding Riak CS. + +#### switch + +Temporarily changes the host and/or port used by Stanchion. This change +is effective until the node is restarted, at which point Stanchion will +begin listening on the host and port specified in your [configuration files]({{}}riak/cs/2.1.2/cookbooks/configuration/reference). + +```bash +riak-cs-stanchion switch HOST PORT +``` + +The following command would change the host to 100.0.0.1 and the port to +9999: + +```bash +riak-cs-stanchion switch 100.0.0.1 9999 +``` + +The following output would appear if the change were successful: + +``` +Successfully switched stanchion to 100.0.0.1:9999: This change is only effective until restart. +To make permanent change, be sure to edit app.config file. +``` + +#### show + +Shows the current host/port address for Stanchion. + +```bash +riak-cs-stanchion show +``` + +The output should look something like this: + +``` +Current Stanchion Address: http://127.0.0.1:8085 +``` + +## riak-cs-admin storage + +This command is the direct equivalent of `riak-cs-admin storage` +documented [above](#riak-cs-admin). + +## stanchion + +This command interface enables you to control Stanchion, the user +management and access control platform undergirding Riak CS. + +#### start + +Starts Stanchion in the background. + +```bash +stanchion start +``` + +If Stanchion is already running on the node, the message `Node is +already running!` will be returned. + +#### stop + +Stops Stanchion on the node. + +```bash +stanchion stop +``` + +Prints `ok` when successful or `Node not responding to +pings` if the Stanchion node is not running. + +#### restart + +Stops and then starts the running Stanchion node without exiting the +Erlang VM. Prints `ok` when successful or `Node not +responding to pings.` when the node is stopped or not responding. + +```bash +stanchion restart +``` + +#### reboot + +Stops and then restarts the running node, exiting the Erlang VM. Prints +`ok` when successful or `Node not responding to pings.` when +the node is stopped or not responding. + +```bash +stanchion reboot +``` + +#### ping + +Checks that the Stanchion node is running. Prints `pong` when +successful or `Node not responding to pings.` when the +Stanchion node is stopped or not responding. + +```bash +stanchion ping +``` + +#### console + +Starts the Stanchion node in the foreground, providing access to the +Erlang shell and to runtime messages. + +```bash +stanchion console +``` + +Prints `Node is already running - use 'stanchion attach' instead` if +the node is already running in the background. + +#### attach + +Attaches to the console of a Stanchion node running in the background, +providing access to the Erlang shell and to runtime messages. + +```bash +stanchion attach +``` + +Prints `Node is not running!` when the node cannot be reached. + +#### attach-direct + +Attaches to the console of a Stanchion node running in the background +using a directly connected first-in-first-out (FIFO), providing access +to the Erlang shell and to runtime messages. + +```bash +stanchion attach-direct +``` + +Prints `Node is not running!` when the node cannot be reached. You can +exit the shell by typing **Ctrl-D**. + +#### ertspath + +Outputs the path of the Stanchion node's Erlang runtime environment. + +```bash +stanchion ertspath +``` + +#### chkconfig + +Checks whether Stanchion's configuration file is valid. + +```bash +stanchion chkconfig +``` + +If the file is valid, `config is OK` will be returned. If not, +appropriate error messages will be returned. + +#### escript + +Provides a means of calling [escript](http://www.erlang.org/doc/man/escript.html) +scripts using Stanchion's Erlang runtime environment. + +```bash +stanchion escript +``` + +#### version + +Outputs the Stanchion version identifier. + +```bash +stanchion version +``` + +#### getpid + +Outputs the process identifier for the currently running instance of +Stanchion. + +```bash +stanchion getpid +``` + +#### top + +The `stanchion top` command provides information about what the Erlang +processes inside of Stanchion are doing. `top` reports process +reductions (an indicator of CPU utilization), memory used, and message +queue sizes. + +```bash +stanchion top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + `top` output and defaults to 5 +* `sort` determines the category on which `riak-cs top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the `top` + output and defaults to 10 + +More information about Erlang's etop tool can be found in the +[official documentation](http://www.erlang.org/doc/man/etop.html). + +## riak-cs-access + +This command is the direct equivalent of `riak-cs-admin access`, +documented [above](#riak-cs-admin-access). + +## riak-cs-supercluster + +Riak CS version 1.5 offers support for supercluster operations. The +`supercluster` command interface enables you to interact with that system. +More information can be found in [Riak CS Supercluster Support]({{}}riak/cs/2.1.2/cookbooks/supercluster). + +{{% note title="Note: technical preview" %}} +Riak CS supercluster support is available only as a technical preview for +users of Riak CS installations with support for Multi-Datacenter Replication. +{{% /note %}} + +#### list-members + +Lists the members currently available in a multi-cluster Riak CS setup. + +```bash +riak-cs-supercluster list-members +``` + +The output will list the name, host, and port for each member, as in the +following example output: + +``` +sc-member-A 127.0.0.1:10017 +sc-member-B 127.0.0.1:10027 +# and so on +``` + +#### weight + +When new buckets are created, they are randomly assigned to a member. The +weight of each member is the likelihood, expressed as a percentage, that +new buckets will be stored in a given member. You can use the commands +under the `weight` heading to set, list, and refresh weight information +stored in the master member (which is shared between all Riak nodes). + +When the `weight` command itself is used without an argument, it will +return the weights of all members. + +```bash +riak-cs-supercluster weight +``` + +You can also return the weight for a specific member on the basis of its +member ID: + +``` +riak-cs-supercluster weight +``` + +You can also set the weight for a member: + +```bash +riak-cs-supercluster weight +``` + +This command would set the weight for member `sc-member-A` to 40: + +```bash +riak-cs-supercluster weight sc-member-A 40 +``` + +Weights are assigned to members as an integer. The percentage weight +applied to a given member is a function of the total weight assigned to all +members. So if you assign 30 to member A, 30 to member B, and 60 to member C, they will bear the following weights, respectively: 25%, 25%, and 50%. +Consequently, there is no specific number to which all member weights need +to add up. + +#### weight-manifest + +Retrieves the manifest weights for all currently available members. + +```bash +riak-cs-supercluster weight-manifest +``` + +You can also retrieve the manifest weights for a specific member on the +basis of its member ID: + +```bash +riak-cs-supercluster weight-manifest +``` + +You can also set the manifest weight for a specific member: + +```bash +riak-cs-supercluster weight-manifest +``` + +#### weight-block + +Retrieves the block weights for all currently available members. + +```bash +riak-cs-supercluster weight-block +``` + +You can also retrieve the block weight for a specific member on the basis +of its member ID: + +```bash +riak-cs-supercluster weight-block +``` + +Or you can set the weight block for a specific member: + +```bash +riak-cs-supercluster weight-block +``` + +#### refresh + +Fetches all current weights from the master member. + +```bash +riak-cs-supercluster refresh +``` + +When a member's weight is updated, that weight is stored in the [master member]({{}}riak/cs/2.1.2/cookbooks/supercluster/#the-master-member) and cached in Riak CS. Riak CS fetches weights from the master member only periodically. The +`refresh` command syncs the weights stored in the master member with the +weights cached in Riak CS so that there is no discrepancy. + +This command is particularly useful immediately after any member weight +changes are made that need to be registered across all clusters. diff --git a/content/riak/cs/2.1.2/cookbooks/configuration.md b/content/riak/cs/2.1.2/cookbooks/configuration.md new file mode 100644 index 0000000000..76165b643e --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration.md @@ -0,0 +1,34 @@ +--- +title: "Configuring Riak CS Overview" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuring" + identifier: "config" + weight: 100 + parent: "ops" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/ + - /riak/cs/latest/cookbooks/configuration/ +--- + +In a Riak CS storage system, three components work in conjunction with one another, which means that you must configure each component to work with the others: + +* Riak --- The database system that acts as the backend storage +* Riak CS --- The cloud storage layer over Riak which exposes the storage and billing APIs, storing files and metadata in Riak, and streaming them back to users +* Stanchion --- Manages requests involving globally unique system entities, such as buckets and users sent to a Riak instance, for example, to create users or to create or delete buckets + +In addition, you must also configure the S3 client you use to communicate with your Riak CS system. + +You should plan on having one Riak node for every Riak CS node in your system. Riak and Riak CS nodes can be run on separate physical machines, but in many cases it is preferable to run one Riak and one Riak CS node on the same physical machine. Assuming the single physical machine has sufficient capacity to meet the needs of both a Riak and a Riak CS node, you will typically see better performance due to reduced network latency. + +If your system consists of several nodes, configuration primarily represents setting up the communication between components. Other settings, such as where log files are stored, are set to default values and need to be changed only if you want to use non-default values. + +## Configuration of System Components + +* [Configuring Riak]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-for-cs) +* [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs) +* [Configuring Stanchion]({{}}riak/cs/2.1.2/cookbooks/configuration/stanchion) +* [Configuring an S3 client]({{}}riak/cs/2.1.2/cookbooks/configuration/s3-client) diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/dragondisk.md b/content/riak/cs/2.1.2/cookbooks/configuration/dragondisk.md new file mode 100644 index 0000000000..d8be6b05c3 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/dragondisk.md @@ -0,0 +1,330 @@ +--- +title: "Configuring DragonDisk" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuring DragonDisk" + identifier: "config_dragondisk" + weight: 103 + parent: "api_s3" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/Configuring-DragonDisk/ + - /riak/cs/2.1.2/cookbooks/configuration/Configuring-DragonDisk/ + - /riak/cs/latest/cookbooks/configuration/dragondisk/ +--- + +[DragonDisk](http://www.dragondisk.com/) is a cross-platform, +S3-compatible client with a graphical user interface available for +Linux, Mac OS X, and Windows. The following guide describes +configuration of DragonDisk for use with Riak CS. + +## Configuration for Linux + +DragonDisk is available for multiple Linux distributions. This section +specifically describes configuration for Ubuntu Linux version 12.04 +(Precise Pangolin) 32 bit, but the configuration will be similar for +other Linux distributions. + +This is the main DragonDisk window as it appears upon starting the +application. + +![DragonDisk screenshot]({{}}images/dragondisk_linux0.png) + +## Create an account + +Before you can connect DragonDisk to Riak CS, you'll need to create and +save an account. The following describes the process for doing so. + +* From the **File** menu, select **Accounts**. + +![DragonDisk screenshot]({{}}images/dragondisk_linux1.png) + +* Click **New**. + +![DragonDisk screenshot]({{}}images/dragondisk_linux2.png) + +* In the **Account** dialog window, choose **Other S3 compatible + service** under the **Provider** drop down menu. + +![DragonDisk screenshot]({{}}images/dragondisk_linux3.png) + +* Enter the hostname or IP address of your Riak CS cluster's public + interface into the **Service Endpoint** field. +* Enter your Riak CS username into the **Account name** field. +* Enter the access key associated with your username into the **Access + Key** field. +* Enter the secret key associated with your username into the **Secret + Key** field. +* Enter the Riak CS public interface HTTP port into the **HTTP Port** + field. + +{{% note title="Note on HTTPS" %}} +If you'll be using HTTPS, be sure to enter the correct public HTTPS port into +the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. +{{% /note %}} + +* Click **OK** to save the account configuration. + +![DragonDisk screenshot]({{}}images/dragondisk_linux4.png) + +* Click **Close** to complete account creation and to continue to + attempt connecting to Riak CS. + +![DragonDisk screenshot]({{}}images/dragondisk_linux5.png) + +### Connect to Riak CS + +Now let's try to connect to Riak CS with the newly defined account +configuration. + +* In the **Root** drop down menu on the right pane of the DragonDisk + interface, select the **Riak CS** account definition that you created. +* If no error occurs and you see the account definition appear in the + right pane, then you're connected to Riak CS and can proceed to + creating a bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_linux6.png) + +### Create a bucket + +Now that you're connected, create a bucket for testing file access with +Riak CS. + +* Click the **Create bucket** icon to open the bucket creation dialog. + +![DragonDisk screenshot]({{}}images/dragondisk_linux7.png) + +* In the **Create bucket** dialog, give the bucket a name. In this + example we're naming the bucket `dragondisklinux`. +* Click **OK** to create the bucket. +* The bucket should now appear in the right pane and you can now proceed + with copying some test files into the bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_linux8.png) + +### Copy files to bucket + +Finally, navigate your local computer in the left pane and select a file +or files to copy from your local computer to the newly created Riak CS +bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_linux9.png) + +* After selecting a file or files, you can drag them to the bucket you + created in the right pane and the copy operation will begin. +* After the files are copied, they'll appear in the bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_linux10.png) + +* You have now successfully verified connectivity, bucket creation, and + file copying operations for your Riak CS installation with DragonDisk. + +Consult the [DragonDisk +documentation](http://www.dragondisk.com/getting-started-guide.html) for +details on performing other operations. + +## Configuration for Mac OS X + +This section describes configuration of DragonDisk for Mac OS X. + +* This is the main DragonDisk window as it appears upon starting the + application. + +![DragonDisk screenshot]({{}}images/dragondisk_osx0.png) + +### Create an account + +Before you can connect DragonDisk to Riak CS, you'll need to create and +save an account. The following describes the process for doing so. + +* From the **File** menu, select **Accounts**. + +![DragonDisk screenshot]({{}}images/dragondisk_osx1.png) + +* Click **New**. + +![DragonDisk screenshot]({{}}images/dragondisk_osx2.png) + +* In the **Account** dialog window, choose **Other S3 compatible + service** under the **Provider** drop down menu. + +![DragonDisk screenshot]({{}}images/dragondisk_osx3.png) + +* Enter the hostname or IP address of your Riak CS cluster's public + interface into the **Service Endpoint** field. +* Enter your Riak CS username into the **Account name** field. +* Enter the access key associated with your username into the **Access + Key** field. +* Enter the secret key associated with your username into the **Secret + Key** field. +* Enter the Riak CS public interface HTTP port into the **HTTP Port** + field. + +{{% note title="Note on HTTPS" %}} +If you'll be using HTTPS, be sure to enter the correct public HTTPS port into +the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. +{{% /note %}} + +* Click **OK** to save the account configuration. + +![DragonDisk screenshot]({{}}images/dragondisk_osx4.png) + +* Click **Close** to complete account creation and continue try + connecting to Riak CS. + +![DragonDisk screenshot]({{}}images/dragondisk_osx5.png) + +### Connect to Riak CS + +Now let's try to connect to Riak CS with the newly defined account +configuration. + +* In the **Root** drop down menu on the right pane of the DragonDisk + interface, select the **Riak CS** account definition that you created. +* If no error occurs and you see the account definition appear in the + right pane, then you're connected to Riak CS and can proceed to + creating a bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_osx6.png) + +### Create a bucket + +Now that you're connected, create a bucket for testing file access with +Riak CS. + +* Click the **Create bucket** icon to open the bucket creation dialog. + +![DragonDisk screenshot]({{}}images/dragondisk_osx7.png) + +* In the **Create bucket** dialog, give the bucket a name. In this + example we're naming the bucket *dragondiskosx*. +* Click **OK** to create the bucket. +* The bucket should now appear in the right pane and you can now proceed + with copying some test files into the bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_osx8.png) + +### Copy files to bucket + +Finally, navigate your local computer in the left pane and select a file +or files to copy from your local computer to the newly created Riak CS +bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_osx9.png) + +* After selecting a file or files, you can drag them to the bucket you + created in the right pane and the copy operation will begin. + +* After the files are copied, they'll appear in the bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_osx10.png) + +* You have now successfully verified connectivity, bucket creation, and +file copying operations for your Riak CS installation with DragonDisk. + +Consult the [DragonDisk +documentation](http://www.dragondisk.com/getting-started-guide.html) for +details on performing other operations. + +## Configuration for Windows + +This section describes configuration of DragonDisk for Windows. + +* This is the main DragonDisk window as it appears upon starting the + application. + +![DragonDisk screenshot]({{}}images/dragondisk_windows0.png) + +### Create an account + +Before you can connect DragonDisk to Riak CS, you'll need to create and +save an account. The following describes the process for doing so. + +* From the **File** menu, select **Accounts**. + +![DragonDisk screenshot]({{}}images/dragondisk_windows1.png) + +* Click **New**. + +![DragonDisk screenshot]({{}}images/dragondisk_windows2.png) + +* In the **Account** dialog window, choose **Other S3-compatible + service** under the **Provider** drop down menu. + +![DragonDisk screenshot]({{}}images/dragondisk_windows3.png) + +* Enter the hostname or IP address of your Riak CS cluster's public + interface into the **Service Endpoint** field. +* Enter your Riak CS username into the **Account name** field. +* Enter the access key associated with your username into the **Access + Key** field. +* Enter the secret key associated with your username into the **Secret + Key** field. +* Enter the Riak CS public interface HTTP port into the **HTTP Port** + field. + +{{% note title="Note on HTTPS" %}} +If you'll be using HTTPS, be sure to enter the correct public HTTPS port into +the **HTTPS Port** field and click the **Connect using SSL/HTTS** check box. +{{% /note %}} + +* Click **OK** to save the account configuration. + +![DragonDisk screenshot]({{}}images/dragondisk_windows4.png) + +* Click **Close** to complete account creation and continue try + connecting to Riak CS. + +![DragonDisk screenshot]({{}}images/dragondisk_windows5.png) + +### Connect to Riak CS + +Now let's try to connect to Riak CS with the newly defined account +configuration. + +* In the **Root** drop down menu on the right pane of the DragonDisk + interface, select the **Riak CS** account definition that you created. +* If no error occurs and you see the account definition appear in the + right pane, then you're connected to Riak CS and can proceed to + creating a bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_windows6.png) + +### Create a bucket + +* Now that you're connected, create a bucket for testing file access + with Riak CS. +* Click the **Create bucket** icon to open the bucket creation dialog. + +![DragonDisk screenshot]({{}}images/dragondisk_windows7.png) + +* In the **Create bucket** dialog, give the bucket a name. In this + example we're naming the bucket *dragonbucket*. +* Click **OK** to create the bucket. +* The bucket should now appear in the right pane and you can now proceed + with copying some test files into the bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_windows8.png) + +### Copy files to bucket + +Finally, navigate your local computer in the left pane and select a file +or files to copy from your local computer to the newly created Riak CS +bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_windows9.png) + +* After selecting a file or files, you can drag them to the bucket you + created in the right pane and the copy operation will begin. +* After the files are copied, they'll appear in the bucket. + +![DragonDisk screenshot]({{}}images/dragondisk_windows10.png) + +* You have now successfully verified connectivity, bucket creation, and +file copying operations for your Riak CS installation with DragonDisk. + +Consult the [DragonDisk +documentation](http://www.dragondisk.com/getting-started-guide.html) for +details on performing other operations. diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/load-balancing-proxy.md b/content/riak/cs/2.1.2/cookbooks/configuration/load-balancing-proxy.md new file mode 100644 index 0000000000..9626ea0343 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/load-balancing-proxy.md @@ -0,0 +1,170 @@ +--- +title: "Load Balancing and Proxy Configuration for CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Load Balancing & Proxy Configuration" + identifier: "config_load_balance" + weight: 103 + parent: "config" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/Load-Balancing-and-Proxy-Configuration/ + - /riak/cs/2.1.2/cookbooks/configuration/Load-Balancing-and-Proxy-Configuration/ + - /riak/cs/latest/cookbooks/configuration/load-balancing-proxy/ +--- + +If you plan on using Riak CS in production, we highly recommend that you +place Riak CS behind a load-balancing or proxy solution, be it hardware +or software based. Also note that you should *not* directly expose Riak +CS to public-facing network interfaces. + +Riak CS users have reported success in using Riak CS with a variety of +load-balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load-balancing options---such +as Amazon's Elastic Load Balancer---and open-source software projects +like [HAProxy](http://haproxy.1wt.eu/) and +[Nginx](http://wiki.nginx.org/Main). + +This guide briefly explores the commonly used open-source solutions +HAProxy and Nginx and provides some configuration and operational tips +gathered from community users and operations-oriented engineers at +Basho. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak CS +in a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from the experiences of +users in the Riak CS community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting point configuration for HAProxy to +act as a load balancer to a Riak CS installation. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) documentation for details on configuring the value for different +operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + spread-checks 5 + daemon + +defaults + log global + option dontlognull + option redispatch + option allbackups + no option httpclose + retries 3 + maxconn 256000 + timeout connect 5000 + timeout client 5000 + timeout server 5000 + +frontend riak_cs + bind 10.0.24.100:8080 + # Example bind for SSL termination + # bind 10.0.24.100:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option httplog + capture request header Host len 64 + acl good_ips src -f /opt/local/haproxy/etc/gip.lst + block if !good_ips + use_backend riak_cs_backend if good_ips + +backend riak_cs_backend + mode http + balance roundrobin + # Ping Riak CS to determine health + option httpchk GET /riak-cs/ping + timeout connect 60s + timeout http-request 60s + server riak1 r1s01.example.com:8081 weight 1 maxconn 1024 check + server riak2 r1s02.example.com:8081 weight 1 maxconn 1024 check + server riak3 r1s03.example.com:8081 weight 1 maxconn 1024 check + server riak4 r1s04.example.com:8081 weight 1 maxconn 1024 check + server riak5 r1s05.example.com:8081 weight 1 maxconn 1024 check +``` + +Please note that the above example is considered a starting point and is +a work in progress. You should carefully examine this configuration and +change it according to your specific environment. + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note the option for checking Riak CS health via the `/riak-cs/ping` +endpoint. This option is essential for checking each Riak CS node as +part of the round robin load-balancing method. + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak CS. An example that provides +access to Riak CS is provided here for reference. + +### Example Configuration + +The following is an example starting-point configuration for Nginx to +act as a front-end proxy to Riak CS. + +```config +upstream riak_cs_host { + server 10.0.1.10:8080; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak_cs.access.log; + + location / { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_redirect off; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with a + # "headers too large" error + + proxy_buffers 8 64k; # Increase from default of (8, 8k). + # If left to default with increased + # proxy_buffer_size, nginx complains + # that proxy_busy_buffers_size is too + # large. + + proxy_pass http://riak_cs_host; + } +} +``` + +Note that the directive `proxy_set_header Host $http_host` is essential +to ensure that the `HTTP Host:` header is passed to Riak CS as received +rather than being translated into the hostname or address of the Riak CS +backend server. + +It's also important to note that `proxy_pass` should _not_ end in a +slash, as this can lead to a variety of issues. diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/multi-datacenter.md b/content/riak/cs/2.1.2/cookbooks/configuration/multi-datacenter.md new file mode 100644 index 0000000000..bac8b441da --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/multi-datacenter.md @@ -0,0 +1,129 @@ +--- +title: "Configuring Riak CS Multi-Datacenter" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuring" + identifier: "mdc_config" + weight: 100 + parent: "mdc_overview" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/Configuring-MDC/ + - /riak/cs/2.1.2/cookbooks/configuration/Configuring-MDC/ + - /riak/cs/latest/cookbooks/configuration/multi-datacenter/ +--- + +{{% note title="Riak CS Enterprise requires a separate download" %}} +Please note that Riak CS Enterprise requires a download separate from the +open-source Riak CS, which will not work in conjunction with Riak Enterprise. +{{% /note %}} + +Configuring Multi-Datacenter Replication in Riak CS requires the +addition of a new group of settings to the `app.config` configuration +file for all Riak CS and Riak Enterprise nodes that are part of the Riak +CS cluster. + +## Riak Enterprise Configuration + +As of Riak release 1.4.0, there are two different MDC replication modes +that Riak CS can use to request data from remote clusters. Please see +the [comparison]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) doc for more information. + +### Replication Version 3 Configuration + +For each Riak node in the cluster, update the `mdc.proxy_get` setting in +`riak.conf`, or by appending the `{proxy_get, enabled}` setting to the +`riak_repl` section of the old-style `advanced.config` or `app.config` files, + as shown in the following example: + +```riakconf +mdc.proxy_get = on +``` + +```advancedconfig +{riak_repl, [ + %% Other configs + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + {data_root, "/var/lib/riak/data/riak_repl"}, + {proxy_get, enabled} + %% Other configs + ]} +``` + +```appconfig +{riak_repl, [ + %% Other configs + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + {data_root, "/var/lib/riak/data/riak_repl"}, + {proxy_get, enabled} + %% Other configs + ]} +``` + +Version 3 replication requires additional configuration in the **source +cluster** via the command line. + +```bash +riak-repl proxy_get enable +``` + +The `sink_cluster_name` should be replaced with the name of your +configured **sink cluster**. + +See also: + + +* [Comparing v2 and v3]({{}}riak/kv/2.1.3/using/reference/multi-datacenter/comparison) +* [Multi-Datacenter Operations]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) + +## Riak CS Configuration + +For each Riak CS node in the cluster, update the `riak_cs` section of the +`advanced.config`, or the old-style `app.config` files, by appending the +`proxy_get` setting as shown in the following example: + +```advancedconfig +{riak_cs, [ + %% Other configs + {proxy_get, enabled}, + %% Other configs + ]} +``` + +```appconfig +{riak_cs, [ + %% Other configs + {proxy_get, enabled}, + %% Other configs + ]} +``` + +
+
Note on restarting Riak nodes
+Be sure that you restart cluster nodes in a rolling fashion after making +configuration changes. In particular, after restarting a node, be sure +that you wait for Riak's key/value store to become available before +restarting the next node. To check the status of `riak_kv` on a node +after restarting, execute the following command: + +```bash +riak-admin wait-for-service riak_kv +``` + +Replace the `node` variable above with the nodename specified in the +`vm.args` configuration file. +
+ +## Stanchion Configuration + +Though there is no specific configuration for [Stanchion]({{}}riak/cs/2.1.2/theory/stanchion), note that +Stanchion should be a single, globally unique process to which every +Riak CS node sends requests, even if there are multiple replicated +sites. Unlike Riak and Riak CS, Stanchion should run on _only one node +in a given cluster_, perhaps on its own, dedicated hardware if you wish. +Stanchion runs on only one node because it manages strongly consistent +updates to [globally unique entities]({{}}riak/cs/2.1.2/theory/stanchion/#globally-unique-entities) like users and buckets. diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/reference.md b/content/riak/cs/2.1.2/cookbooks/configuration/reference.md new file mode 100644 index 0000000000..ca7543cde1 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/reference.md @@ -0,0 +1,962 @@ +--- +title: "Riak CS Configuration Reference" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuration Reference" + identifier: "config_ref" + weight: 104 + parent: "config" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/configuration-reference + - /riak/cs/2.1.2/cookbooks/configuration/configuration-reference + - /riak/cs/latest/cookbooks/configuration/reference/ +--- + +{{% note title="Note on Legacy Configuration Usage" %}} + **If you choose to use the legacy `app.config` files for Riak CS and/or + Stanchion, some parameters have changed names and must be updated**. + + In particular, for the Riak CS `app.config`: + + - `cs_ip` and `cs_port` have been combined into `listener`. + - `riak_ip` and `riak_pb_port` have been combined into `riak_host`. + - `stanchion_ip` and `stanchion_port` have been combined into `stanchion_host`. + - `admin_ip` and `admin_port` have been combined into `admin_listener`. + - `webmachine_log_handler` has become `webmachine_access_log_handler`. + - `{max_open_files, 50}` has been depricated and should be replaced with + `{total_leveldb_mem_percent, 30}`. + + For the Stanchion `app.config`: + + - `stanchion_ip` and `stanchion_port` have been combined into `listener`. + - `riak_ip` and `riak_port` have been combined into `riak_host`. + + Each of the above pairs follows a similar form. For example, if your legacy + `app.config` configuration was previously: + + ``` + {riak_cs, [ + {cs_ip, "127.0.0.1"}, + {cs_port, 8080 }, + . . . + ]}, + ``` + + It should now read: + + ``` + {riak_cs, [ + {listener, {"127.0.0.1", 8080}}, + . . . + ]}, + ``` + + and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). +{{% /note %}} + +This document is intended as a reference listing of all configurable parameters +for Riak CS. For a more narrative-style walkthrough of configuring Riak CS, we +recommend consulting the [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs) tutorial. + +The configuration for Riak CS is handled through either the `riak-cs.conf` and +`advanced.config` file pair, which were introduced in Riak CS 2.0.0, or the two +old-style `app.config` and `vm.args` files. All configuration files will be +located in each Riak CS node's `/etc` directory. Please note that you may only +use one of these pairs at a time, as the `app.config`/`vm.args` pair will take +priority over the new-style configuration files. + +If you are using it, the `vm.args` file will house settings related to the +[Erlang VM](http://www.erlang.org/) on which both Riak and Riak CS run. These +settings have been folded into the `riak-cs.conf` and `riak.conf` configuration +files in newer systems. + +The `app.config` and `advanced.config` files share an identical format, and can +control all of Riak CS's behaviors. The files are divided into the following +sections: + +* `riak_cs` --- Most settings are housed in this section of the file +* `webmachine` --- Settings related to + [Webmachine](https://github.com/basho/webmachine), the HTTP server + framework that Riak CS uses for HTTP connections +* `lager` --- Settings for [lager](https://github.com/basho/lager), the + Erlang logging framework used by Riak CS +* `sasl` --- There is only one setting in this section, + `sasl_error_lager`, which determines whether and how Riak CS uses + Erlang's [SASL error + logger](http://www.erlang.org/doc/man/sasl_app.html) + +Most of the settings you will need to manipulate have been ported into the newer +`riak-cs.conf` configuration format, but there may be some advanced settings -- +such as setting up customized `lager` streams -- that will need to be configured +in `advanced.config`. + +{{% note title="A Note About Time Values" %}} +In the `app.config` configuration files, time periods were generally written +as either seconds or milliseconds, with no real indication of which was being +used. With the update to `riak-cs.conf`, all values that describe a period of +time are written as an integer and a character, describing the unit of time +and the number of times that unit should be repeated for the period. For +example `31d` represents 31 days, `6h` represents six hours, `6000ms` +represents 6,000 milliseconds. + +The full list of valid time units are as follows: + +`f` -- Fortnights +`w` -- Weeks +`d` -- Days +`h` -- Hours +`m` -- Minutes +`s` -- Seconds +`ms` -- Milliseconds +{{% /note %}} + +The tables below will show settings for both `riak-cs.conf` and +`advanced.config`/`app.config` where applicable, organized by functionality. + +## Connection Information + +### `riak-cs.conf` + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
listenerThe IP address/port for the Riak CS node127.0.0.1:8080
riak_hostThe IP address/port for the Riak CS node's corresponding Riak node (used by +Riak's Protocol Buffers interface) +127.0.0.1:8087
root_hostThe root host name accepted by Riak CS. Changing this setting to, +for example, my_cs_host would enable users to make requests +to a URL such as http://bucket.my_cs_host/object/ (or to +the corresponding HTTP host).s3.amazonaws.com
+ +### `advanced.config`/`app.config` + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
listenerThe IP address for the Riak CS node{"127.0.0.1", 8080}
riak_hostThe TCP IP/port for the Riak CS node's corresponding Riak node (used by +Riak's Protocol Buffers interface) +{"127.0.0.1", 8087}
cs_root_hostThe root host name accepted by Riak CS. Changing this setting to, +for example, my_cs_host would enable users to make requests +to a URL such as http://bucket.my_cs_host/object/ (or to +the corresponding HTTP host).s3.amazonaws.com
+ +## Connection Pools + +Riak CS enables you to establish connection pools for normal requests +(such as `GET` and `PUT`) as well as for bucket listing requests. + +### `riak-cs.conf` + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
pool.request.sizeFixed-Size settings for the general request pool for Riak CS. Please note +that we recommend setting Riak's protobuf.backlog setting to be +higher than pool.request.size's fixed size, i.e. higher than 128. +The default for protobuf.backlog is 128.128
pool.request.overflowOverflow-size settings for the general request pool for Riak CS.0
pool.list.sizeFixed-Size settings for the bucket listing request pool for Riak CS.5
pool.list.overflowOverflow-size settings for the bucket listing request pool for Riak CS.0
+ +### `advanced.config`/`app.config` + +In these files, each pool is specified as a nested tuple of the following form: + +```advanced.config +{riak_cs, [ + {Name, {FixedSize, OverflowSize}} + ]} +``` + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
request_poolSettings for the general request pool for Riak CS. Please note that +we recommend setting Riak's pb_backlog setting higher than +request_pool's fixed size, i.e. higher than 128. The +default for pb_backlog is 128.{128, 0}
bucket_list_poolSettings for the bucket listing request pool for Riak CS{5, 0}
+ +## Stanchion + +### `riak-cs.conf` + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
stanchion_hostThe IP address/port for the Stanchion node in the cluster. Please note that +there should be only one Stanchion node in the cluster.127.0.0.1:8085
stanchion_sslWhether SSL is enabled for connections between the Riak CS node and +Stanchionoff
+ +### `advanced.config`/`app.config` + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
stanchion_hostThe IP address/port for the Stanchion node in the cluster. Please note that +there should be only one Stanchion node in the cluster.{"127.0.0.1",8085}
stanchion_sslWhether SSL is enabled for connections between the Riak CS node and +Stanchionfalse
+ +## Admin and Authentication Settings + +### `riak-cs.conf` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
admin.listenerYou have the option to provide a special endpoint for performing system +administration tasks in Riak CS. This setting sets the IP address and port for +that endpoint. If you leave this setting commented out, then administrative +tasks use the IP and port as all other Riak CS traffic.127.0.0.1:8000
admin.keyThe admin key used for administrative access to Riak CS, e.g. usage of the +/riak-cs/stats endpoint. Please note that both +admin.key and admin.secret must match the +corresponding settings in the Stanchion node's stanchion.conf. +admin-key
admin.secretThe admin secret used for administrative access to Riak CS. See the +description for admin.key above for more information.admin-secret
anonymous_user_creationYou will need to set this parameter to on to allow for the +creation of an admin user when setting up a new Riak CS cluster. We recommend, +however, that you enable anonymous user creation only temporarily, +unless your use case specifically dictates that anonymous users should +be able to create accounts.off
auth_moduleThe module used by Riak CS for authentication. We do not recommend changing +this setting unless you implement a custom authentication scheme.riak_cs_s3_auth
rewrite_moduleA rewrite module contains a set of rules for translating requests made using +a particular API to requests in the the native Riak CS storage API. We do +not recommend changing this setting unless you implement a custom module.riak_cs_s3_rewrite
+ +### `advanced.config`/`app.config` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
admin_listenerYou have the option to provide a special endpoint for performing system +administration tasks in Riak CS. This setting sets the IP address and port for +that endpoint. If you leave this setting commented out, then administrative +tasks use the IP and port as all other Riak CS traffic.{"127.0.0.1",8000}
admin_keyThe admin key used for administrative access to Riak CS, e.g. usage +of the /riak-cs/stats endpoint. Please note that both +admin_key and admin_secret must match the +corresponding settings in the Stanchion node's +app.config.
admin_secretThe admin secret used for administrative access to Riak CS. See the +description for admin_key above for more information.
anonymous_user_creationYou will need to set this parameter to true to allow +for the creation of an admin user when setting up a new Riak CS cluster. +We recommend, however, that you enable anonymous user creation only +temporarily, unless your use case specifically dictates that +anonymous users should be able to create accounts.false
auth_moduleThe module used by Riak CS for authentication. We do not recommend +changing this setting unless you implement a custom authentication +scheme.riak_cs_s3_auth
max_buckets_per_userThe number of buckets that can be created by each user. If a user +exceeds the bucket creation limit, they are still able to perform other +actions, including bucket deletion.100
rewrite_moduleA rewrite module contains a set of rules for translating requests +made using a particular API to requests in the the native Riak CS storage API. We do not recommend changing this setting unless you +implement a custom module.riak_cs_s3_rewrite
+ +## Usage Recording + +These settings relate to Riak CS's [access logs]({{}}riak/cs/2.1.2/cookbooks/usage-and-billing-data). + +### `riak-cs.conf` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
stats.access.archive_periodHow large each access archive object is. This setting should be a multiple +of stats.access.flush_factor. Expressed as a time-value.1h
stats.access.archiver.max_backlogThe number of access logs that are allowed to accumulate in the archiver's +queue before it begins skipping to catch up. Expressed as an integer number of +logs.2
stats.access.flush_factorHow often the access log should be flushed, as a factor of +access_archive_period, where 1 means once per period, +2 means twice per period, etc.1
access_log_flush_sizeThe additional access log flush trigger. After this many accesses have been +recorded, the log will be flushed, even if the flush interval has not expired. +Expressed as an integer number of accesses.1000000
riak_cs.usage_request_limitHow many archive periods a user can request in one usage read, applied +independently to access/usage and billing/storage. Expressed as a time-value31d
stats.storage.schedule.$timeWhen to automatically start storage calculation batches. Expressed as an +HHMM UTC time. For example, 0600 would calculate +at 6 am UTC every day. If you would like to schedule multiple batches, changing +$time for each entry. For example stats.storage.schedule.2 = +1800 could be the second entry, scheduled for 6:00pm UTC.0600
stats.storage.archive_periodThe size of each storage archive object. Should be chosen such that each +stats.storage.schedule-based calculation falls in a different +period. Expressed as a time-value.1h
+ +### `advanced.config`/`app.config` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
access_archive_periodHow large each access archive object is. This setting should be a +multiple of access_log_flush_factor. Expressed as an +integer number of seconds (e.g. 3600 translates to 1 hour).3600
access_archive_max_backlogThe number of access logs that are allowed to accumulate in the +archiver's queue before it begins skipping to catch up. Expressed as an +integer number of logs.2
access_log_flush_factorHow often the access log should be flushed, as a factor of +access_archive_period, where 1 means once per +period, 2 means twice per period, etc.1
access_log_flush_sizeThe additional access log flush trigger. After this many accesses +have been recorded, the log will be flushed, even if the flush interval +has not expired. Expressed as an integer number of accesses.1000000
usage_request_limitHow many archive periods a user can request in one usage read, +applied independently to access/usage and billing/storage. Expressed as +an integer number of intervals. The default of 744 thus translates to +one month at one-hour intervals. +of 744744
storage_scheduleWhen to automatically start storage calculation batches. Expressed +as a list of HHMM UTC times. For example, +["0600"] would calculate at 6 am UTC every day, +["0600", "1945"] would calculate at 6 am and 7:45 pm UTC +every day, and so on.[]
storage_archive_periodThe size of each storage archive object. Should be chosen such +that each storage_schedule-based calculation falls in a +different period. Expressed as an integer number of seconds. The default +of 86400 translates to 1 day.86400
+ +## Garbage Collection + +Settings related to Riak CS's [garbage collection]({{}}riak/cs/2.1.2/cookbooks/garbage-collection) /(GC) process. + +### `riak-cs.conf` + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
gc.intervalHow often the GC daemon waits between GC batch operations. Expressed as a +time-value.15m
gc.max_workersThe maximum number of worker processes that may be started by the GC daemon +to use for concurrent reaping of GC-eligible objects.2
gc.retry_intervalHow long a move to the GC to-do list can remain failed before it is +re-attempted. Expressed as a time-value.6h
gc.leeway_periodHow long to retain the block for an object after it has been deleted. This +leeway period is set to give the delete indication enough time to propagate to +all replicas. Expressed as a time-value.24h
+ +### `advanced.config`/`app.config` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
epoch_startThe time that the GC daemon uses to begin collecting keys from the +GC eligibility bucket. Records in this bucket use keys based the epoch +time the record is created plus leeway_seconds. The default +is 0 and should be sufficient for general use. A case for +readjusting this value is if the secondary index query run by the GC +daemon continually times out. Raising the starting value can decrease +the range of the query and make it more likely that the query will +succeed. The value must be specified in Erlang binary format, e.g. set +it to `<<10>>` to specify 10.0
gc_batch_sizeThis option is used only when gc_paginated_indexes is +set to true. It represents the size used for paginating the +results of the secondary index query.1000
gc_intervalHow often the GC daemon waits between GC batch operations. Expressed +as an integer number of seconds.900 (15 minutes)
gc_max_workersThe maximum number of worker processes that may be started by the GC +daemon to use for concurrent reaping of GC-eligible objects.5
gc_paginated_indexesIf you're running Riak nodes that are of a version prior to 1.4.0, +set this to false. Otherwise, you will not need to adjust +this setting.true
gc_retry_intervalHow long a move to the GC to-do list can remain failed before it is +re-attempted. Expressed as an integer number of seconds.21600 (6 hours)
leeway_seconds The number of seconds to retain +the block for an object after it has been deleted. This leeway time is +set to give the delete indication time to propagate to all replicas. +Expressed as an integer number of seconds.86400 (24 hours)
max_scheduled_delete_manifestsThe maximum number of manifests (representative of object versions) +that can be in the scheduled_delete state for a given key. +A value of unlimited means that there is no maximum and +that pruning will not be based on count. An example of where this option +is useful is a use case involving a lot of churn on a fixed set of keys +in a time frame that is relatively short compared to the +leeway_seconds value. This can result in the manifest +objects reaching a size that can negatively impact system performance. +unlimited
+ +## Concurrency and Buffering + +### `advanced.config`/`app.config` Only + +There are two parameters related to concurrency and buffering that you should +consider adding to your Riak CS settings if you are having issues with PUT +requests. Raising the value of both of these settings may provide higher single- +client throughput. + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
put_buffer_factorThe number of blocks that will be buffered in-memory in Riak CS +before it begins to slow down reading from the HTTP client.1
put_concurrencyThe number of threads inside of Riak CS that are used to write +blocks to Riak.1
+ +## Miscellaneous Settings + +### `riak-cs.conf` + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
cs_versionThe Riak CS version number. This number is used to selectively enable new +features for the current version to better support rolling upgrades. New +installs shouldn't need to modify this. If you're performing a rolling upgrade, +keep the original value (if not defined, Riak CS uses 0) of the old +app.config until all nodes have been upgraded. At that point, set +it to the new value.10300
dtraceIf your Erlang VM supports DTrace +or SystemTap, +set this parameter to on.off
trust_x_forwarded_forIf your load balancer adds an X-Forwarded-For header and is +reliable, i.e. the load balancer is able to guarantee that it is not added by a +malicious user, set this option to on. Otherwise, Riak CS takes the +source IP address as an input (which is the default).off
+ +### `advanced.config`/`app.config` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
cs_versionThe Riak CS version number. This number is used to selectively +enable new features for the current version to better support rolling upgrades. New installs shouldn't need to modify this. If you're performing a rolling upgrade, keep the original value (if not defined, Riak CS uses 0) of the old app.config until all nodes have been upgraded. At that point, set to the new value.
dtrace_supportIf your Erlang VM supports DTrace +or SystemTap, +set this parameter to true.false
fold_objects_for_list_keysIf your Riak CS cluster is running Riak nodes prior to version +1.4.0, set this parameter to false. Otherwise, you will not +need to modify it.This setting has been deprecated and will be +removed in the next major version.true
n_val_1_get_requestsIf set to true, Riak CS will use a special request +option when retrieving the blocks of an object. This special option +instructs Riak to only send a request for the object block to a single +eligible virtual node (vnode) instead of to all eligible vnodes. This +differs from the standard r request option provided by Riak +in that r affects how many vnode responses to wait for +before returning and has no effect on how many vnodes are actually +contacted. Enabling this option (the default) has the effect of +greatly reducing the intra-cluster bandwidth used by Riak when +retrieving objects with Riak CS. This option is harmless when used with +a version of Riak prior to 1.4.0, but the option to disable is provided +as a safety measure. This setting has been deprecated and +will be removed in the next major version.true
trust_x_forwarded_forIf your load balancer adds an X-Forwarded-For +header and is reliable, i.e. the load balancer is able to guarantee that +it is not added by a malicious user, set this option to +true. Otherwise, Riak CS takes the source IP address as an +input (which is the default).false
+ +## Timeouts on each Riak call + +As Riak CS stores all data in underlying Riak, Riak CS processes +communicate to Riak over an API using protocol buffers. This is a typical remote +call - depending on system requirements, the `timeout` could be configured +to avoid unnecessary timeouts. + +In Riak 1.5.3 or later, configurations under `riakc` section are unavailable. Timeouts are configurable depending on each +access case. This enables fine grained tuning or ad-hoc reaction in +production environment issues. These items are only configurable in +`riak_cs` section of `advanced.config`. All units in the chart below +are milliseconds. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ping_timeoutA timeout value used in ping API5000
get_user_timeoutA timeout value on retrieving user information for authentication, authentication60000
get_bucket_timeoutA timeout value on retrieving bucket information, for ACL or policy information60000
get_manifest_timeoutA timeout value on retrieving manifest of a key60000
get_block_timeoutA timeout value on retrieving a chunk of an object60000
local_block_timeoutA timeout value on retrieving a local chunk of an object5000
proxy_get_block_timeoutA timeout value of proxy get request to remote cluster (EE only)60000
get_access_timeoutA timeout value of retrieving a timeslot information of access statistics60000
get_gckey_timeoutA timeout value of retrieving a key in GC bucket60000
put_manifest_timeoutA timeout value on putting a new manifest60000
put_block_timeoutA timeout value on putting a chunk of a object60000
put_access_timeoutA timeout value of putting an entry into access statistics60000
put_gckey_timeoutA timeout value of putting an entry into GC bucket60000
put_user_usage_timeoutA timeout value on storing a result of storage calculation of each user60000
delete_manifest_timeoutA timeout value on deleting a manifest in garbage collection60000
delete_block_timeoutA timeout value on deleting a chunk of an object in garbage collection60000
delete_gckey_timeoutA timeout value on deleting an entry in GC bucket60000
list_keys_list_objects_timeoutA timeout value on listing objects of a bucket, older version (will be removed in 2.x)60000
list_keys_list_users_timeoutA timeout value on listing users60000
storage_calc_timeoutA timeout value on running storage calculation on a bucket60000
list_objects_timeoutA timeout value on listing objects of a bucket, older version (will be removed in 2.x)60000
fold_objects_timeoutA timeout value on listing objects of a bucket (default since 1.5.0)60000
get_index_range_gckeys_timeoutA timeout value on listing keys in garbage collection bucket, overall call60000
get_index_range_gckeys_call_timeoutA timeout value on listing keys in garbage collection bucket, each continuation call60000
get_index_list_multipart_uploads_timeoutA timeout value on listing incomplete multipart upload of an object60000
+ +## Webmachine + +### `advanced.config`/`app.config` Only + +Settings specific to [Webmachine](https://github.com/basho/webmachine), the web +server that handles all HTTP and HTTPS connections to Riak CS. The +`riak_cs_access_log_handler` and `webmachine_log_handler` settings are part of a +`log_handlers` sub-grouping: + +```appconfig +{webmachine, [ + %% Other configs + {log_handlers, [ + {webmachine_access_log_handler, ...}, + {riak_cs_access_log_handler, ...}, + ]}, + %% Other configs + ]} +``` + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
server_name
webmachine_log_handlerIf this setting is commented out or removed, access to Webmachine +log handling will be disabled.["./log"]
riak_cs_access_log_handlerWe do not recommend changing or removing this setting.[]
+ +## Logging + +### `advanced.config`/`app.config` Only + +These settings relate to [lager](https://github.com/basho/lager), the Erlang +logging framework used by Riak CS. They are included in the `lager` settings in +`app.config`. + +The `lager_console_backend` and `lager_file_backend` settings are part of a +`handlers` sub-group: + +```appconfig +{lager, [ + %% Other configs + {handlers, [ + {lager_console_backend, ...}, + {lager_file_backend, ...} + ]}, + %% Other configs + ]} +``` + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
lager_console_backendSee the lager +documentation for more details.
lager_file_backendSee the lager +documentation for more details.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
crash_logWhether to write to a crash log and where. If commented out, +omitted, or undefined, no crash logging will take place../log/crash.log
crash_log_countThe number of crash logs to keep. Setting this parameter to +0 (the default) means that only the current log will be +kept.0
crash_log_dateWhen to rotate the crash log. The default is no time rotation. For +documentation on the syntax of this parameter, see here.$D0
crash_log_msg_sizeThe maximum size of events in the crash log, expressed as a number +of bytes.65536
crash_log_sizeThe maximum size of the crash log, in bytes, before it is rotated. +Setting this parameter to 0 disables rotation.10485760
error_logger_redirectWhether to redirect error_logger messages into +lager.true
+ +## SASL + +### `advanced.config`/`app.config` Only + + + + + + + + + + +
ConfigDescriptionDefault
sasl_error_lagerWhether to enable , Erlang's built-in +error logger.false
+ + + +[config_your_code]: {{< baseurl >}}riak/kv/2.1.2/configuring/basic/#erlang-vm-tunings diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/riak-cs.md b/content/riak/cs/2.1.2/cookbooks/configuration/riak-cs.md new file mode 100644 index 0000000000..c9a880c365 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/riak-cs.md @@ -0,0 +1,424 @@ +--- +title: "Configuring Riak CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuring Riak CS" + identifier: "config_riak_cs" + weight: 101 + parent: "config" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/riak-cs/ + - /riak/cs/2.1.2/cookbooks/configuration/riak-cs/ + - /riak/cs/latest/cookbooks/configuration/riak-cs/ +--- + +For Riak CS to operate properly it must know how to connect to Riak. +A Riak CS node typically runs on the same server as its corresponding +Riak node, which means that changes will only be necessary if Riak is +configured using non-default settings. + +Riak CS's settings typically reside in a CS node's `riak-cs.conf` and +`advanced.config` files, both of which are typically located in the +`/etc/riak-cs` directory. The newer `riak-cs.conf` file is a simple list +of `configuration = option` pairs, but there are some configuration options +that can only be changed through the `advanced.config` file. That file looks +something like: + +```advancedconfig +{riak_cs, [ + {parameter1, value}, + {parameter2, value}, + %% and so on... +]}, +``` + +If you're updating from a version of Riak CS prior to 2.0.0 -- when the +`riak-cs.conf` file was introduced -- you can still use the old-style +`app.config` configuration file in place of the `riak-cs.conf`/`advanced.config` +pair. The `app.config` file has identical syntax to the `advanced.config` file, +so any examples that use the `advanced.config` syntax can be directly translated +to the `app.config` file. + +Please note that the older `app.config` file supersedes the newer configuration +files. If an `app.config` file is present, neither the `riak-cs.config` nor the +`advanced.config` will be used. + +> **Note on Legacy app.config** +> +> If you are upgrading to Riak CS 2.0 from a previous version of Riak and plan +to continue usage of the legacy `app.config` file, please note that some +configuration options have changed names. Most notably, the IP/Port format +has changed in 2.0 for Stanchion, Riak, and Riak CS. To view these changes, +please review the [Rolling Upgrades]({{}}riak/cs/2.1.2/cookbooks/rolling-upgrades) Document. +> +> For a comprehensive listing of available parameters and a full list of `app.config` parameters, see the [Full Configuration Reference]({{}}riak/cs/2.1.2/cookbooks/configuration/reference). + +The sections below walk you through some of the main configuration categories +that you will likely encounter while operating Riak CS. + +## Host and Port + +To connect Riak CS to Riak, make sure that the following parameter is set to the +host and port used by Riak: + +* `riak_host` --- Replace `127.0.0.1:8087` with the IP address and port number + of the Riak node you want Riak CS to connect to. + +You will also need to set the host listener for Riak CS: + +* `listener` --- Replace `127.0.0.1:8080` with the IP address and port number + of the Riak CS node if you are running CS non-locally. Make sure that the + port number does not conflict with the `riak_host` port number of the Riak + node and the Riak CS node that are running on the same machine. + +{{% note title="Note on IP addresses" %}} +The IP address you enter here must match the IP address specified for the +Protocol Buffers interface in the Riak `riak.conf` file unless Riak CS is +running on a completely different network, in which case address translation +is required. +{{% /note %}} + +After making any changes to the `riak-cs.conf` file in Riak CS, +[restart]({{}}riak/cs/2.1.2/cookbooks/command-line-tools/#riak-cs) the node if it is already running. + +## Specifying the Stanchion Node + +If you're running a single Riak CS node, you don't have to change the +[Stanchion]({{}}riak/cs/2.1.2/cookbooks/configuration/stanchion) settings because Stanchion runs on the local host. If your Riak CS system has multiple nodes, however, you must specify the IP address and port for the Stanchion node and whether or not SSL is enabled. + +The Stanchion settings reside in the Riak CS `riak-cs.conf` file, which is +located in the `/etc/riak-cs` directory of each Riak CS node. + +To set the host and port for Stanchion, make sure the following parameter is set +to the host and port used by Stanchion: + +* `stanchion_host` --- replace `127.0.0.1:8085` with the IP address and port + number of the Stanchion node + +## Enabling SSL + +SSL is disabled by default in Stanchion, i.e. the `stanchion_ssl` variable is +set to `off`. If Stanchion is configured to use SSL, change this variable to +`on`. The following example configuration would set the Stanchion host to +`localhost`, the port to `8085` (the default), and set up Stanchion to use SSL: + +```riakcsconf +stanchion_host = 127.0.0.1:8085 +stanchion_ssl = on +``` + +```advancedconfig +{riak_cs, [ + %% Other configs + {stanchion_host, {"127.0.0.1", 8085}}, + {stanchion_ssl, true}, + %% Other configs +]} +``` + +## Specifying the Node Name + +You can also set a more useful name for the Riak CS node, which is helpful to +identify the node from which requests originate during troubleshooting. This +setting resides in the Riak CS `riak-cs.conf` configuration file, or the old- +style `vm.args` file which is also located in the `/etc/riak-cs` directory. This +would set the name of the Riak CS node to `riak_cs@127.0.0.1`: + +```riakcsconf +nodename = riak_cs@127.0.0.1 +``` + +```vmargs +-name riak_cs@127.0.0.1 +``` + +Change `127.0.0.1` to the IP address or hostname for the server on which Riak CS +is running. + +## Specifying the Admin User + +The admin user is authorized to perform actions such as creating users or +obtaining billing statistics. An admin user account is no different from any +other user account. **You must create an admin user to use Riak CS**. + +{{% note title="Note on anonymous user creation" %}} +Before creating an admin user, you must first set `anonymous_user_creation = +on` in the Riak CS `riak-cs.conf` (or set `{anonymous_user_creation, true}` in +the old-style `advanced.config`/`app.config`). You may disable this again once +the admin user has been created. +{{% /note %}} + +To create an account for the admin user, use an HTTP `POST` request with the +username you want to use for the admin account. The following is an + +```curl +curl -H 'Content-Type: application/json' / + -XPOST http://:/riak-cs/user / + --data '{"email":"admin@example.com", "name":"admin"}' +``` + +The JSON response will look something like this: + +```json +{ + "display_name" : "admin", + "email" : "admin@example.com", + "id" : "8d6f05190095117120d4449484f5d87691aa03801cc4914411ab432e6ee0fd6b", + "key_id" : "OUCXMB6I3HOZ6D0GWO2D", + "key_secret" : "a58Mqd3qN-SqCoFIta58Mqd3qN7umE2hnunGag==", + "name" : "admin_example", + "status" : "enabled" +} +``` + +You can optionally send and receive XML if you set the `Content-Type` to +`application/xml`, as in this example: + +Once the admin user exists, you must specify the credentials of the admin user +on each node in the Riak CS system. The admin user credential settings reside in +the Riak CS `riak-cs.conf` file, which is located in the `/etc/riak-cs` +directory. Paste the `key_id` string between the quotes for the `admin.key`. +Paste the `key_secret` string into the `admin.secret` variable, as shown here: + +```riakcsconf +admin.key = OUCXMB6I3HOZ6D0GWO2D +admin.secret = a58Mqd3qN-SqCoFIta58Mqd3qN7umE2hnunGag== +``` + +```advancedconfig +{riak_cs, [ + %% Admin user credentials + {admin_key, "OUCXMB6I3HOZ6D0GWO2D"}, + {admin_secret, "a58Mqd3qN-SqCoFIta58Mqd3qN7umE2hnunGag=="}, + %% Other configs + ]} +``` + +## Bucket Restrictions + +If you wish, you can limit the number of buckets created per user. The default +maximum is 100. Please note that if a user exceeds the bucket creation limit, +they are still able to perform other actions, including bucket deletion. You can +change the default limit using the `max_buckets_per_user` parameter in each +node's `advanced.config` file---there is no equivalent configuration for the +`riak-cs.conf` file. The example configuration below would set the maximum to +1000: + +```advancedconfig +{riak_cs, [ + %% Other configs + {max_buckets_per_user, 1000}, + %% Other configs + ]} +``` + +If you want to avoid setting a limit on per-user bucket creation, you can set +`max_buckets_per_user` to `unlimited`. + +## Connection Pools + +Riak CS uses two distinct connection pools for communication with Riak: a +**primary** and a **secondary** pool. + +The primary connection pool is used to service the majority of API requests +related to the upload or retrieval of objects. It is identified in the +configuration file as `pool.request.size`. The default size of this pool is 128. + +The secondary connection pool is used strictly for requests to list the contents +of buckets. The separate connection pool is maintained in order to improve +performance. This secondary connection pool is identified in the configuration +file as `pool.list.size`. The default size of this pool is 5. + +The following shows the `connection_pools` default configuration entry +that can be found in the `app.config` file: + +```riakcsconf +pool.request.size = 128 +pool.request.overflow = 0 +pool.list.size = 5 +pool.list.overflow = 0 +``` + +```advancedconfig +{riak_cs, [ + %% Other configs + {connection_pools, + [ + {request_pool, {128, 0} }, + {bucket_list_pool, {5, 0} } + ]}, + %% Other configs +]} +``` + +The value for each pool split into pairs, with the first number representing the +normal size of the pool. This is representative of the number of concurrent +requests of a particular type that a Riak CS node may service. The second number +represents the number of allowed overflow pool requests that are allowed. It is +not recommended that you use any value other than 0 for the overflow amount +unless careful analysis and testing has shown it to be beneficial for a +particular use case. + +### Tuning + +We strongly recommend that you take care when setting the value of the +[`pb_backlog` setting]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-for-cs/#setting-up-riak-to-use-protocol-buffers) in Riak. When a Riak CS node is +started, each connection pool begins to establish connections to Riak. This can +result in a [thundering herd problem](http://en.wikipedia.org/wiki/Thundering_herd_problem) in which connections in the pool believe they are connected to Riak, but in reality some of the connections have been reset. Due to TCP `RST` packet rate limiting (controlled by `net.inet.icmp.icmplim`) some of the connections may not receive notification until they are used to service a user's request. This manifests itself as an `{error, disconnected}` message in the Riak CS logs and an error returned to the user. + +## Enabling SSL in Riak CS + +```riakcsconf +ssl.certfile = "./etc/cert.pem" +ssl.keyfile = "./etc/key.pem" +``` + +```advancedconfig +{ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"} + ]}, +``` + +Replace the text in quotes with the path and filename for your SSL encryption +files. By default, there's a `cert.pem` and a `key.pem` in each node's `/etc` +directory. You're free to use those or to supply your own. + +Please note that you may also need to provide a [certificate +authority](http://en.wikipedia.org/wiki/Certificate_authority), aka a CA cert. +If you do, you must use the `advanced.config` file, and specify its location +using the `cacertfile` parameter. Unlike `certfile` and `keyfile`, the +`cacertfile` parameter is not commented out. You will need to +add it yourself. Here's an example configuration with this parameter included: + +```advancedconfig +{ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacert.pem"} + ]}, + %% Other configs +``` + +Instructions on creating your own CA cert can be found +[here](http://www.akadia.com/services/ssh_test_certificate.html). + +## Proxy vs. Direct Configuration + +Riak CS can interact with S3 clients in one of two ways: + +* A [**proxy** configuration](http://basho.com/riak-cs-proxy-vs-direct-configuration/) + enables an S3 client to communicate with Riak CS as if it were Amazon S3 + itself, i.e. using typical Amazon URLs. +* A **direct** configuration requires that an S3 client connecting to Riak CS be + configured for an "S3-compatible service," i.e. with a Riak CS endpoint that + is not masquerading as Amazon S3. Examples of such services include + [Transmit](http://panic.com/transmit/), [s3cmd](http://s3tools.org/s3cmd), and + [DragonDisk](http://www.dragondisk.com/). + +### Proxy + +To establish a proxy configuration, configure your client's proxy settings to +point to Riak CS cluster's address. Then configure your client with Riak CS +credentials. + +When Riak CS receives the request to be proxied, it services the request itself +and responds back to the client as if the request went to S3. + +On the server side, the `root_host` configuration in the `riak-cs.conf` file +must be set to `s3.amazonaws.com` because all of the bucket URLs request by the +client will be destined for `s3.amazonaws.com`. This is the default. + +**Note**: One issue with proxy configurations is that many GUI clients only +allow for one proxy to be configured for all connections. For customers trying +to connect to both S3 and Riak CS, this can prove problematic. + +### Direct + +The establish a direct configuration, the `cs_root_host` in the +`riak_cs` section of `app.config` must be set to the FQDN of your Riak +CS endpoint, as all of the bucket URLs will be destined for the FQDN +endpoint. + +You will also need wildcard DNS entries for any child of the endpoint to +resolve to the endpoint itself. Here's an example: + +```config +data.riakcs.net +*.data.riakcs.net +``` + +## Garbage Collection Settings + +The following options are available to make adjustments to the Riak CS garbage +collection system. More details about garbage collection in Riak CS are +available in [Garbage Collection]({{}}riak/cs/2.1.2/cookbooks/garbage-collection). + +* `gc.leeway_period` (`leeway_seconds` in `advanced.config` or `app.config`) --- + The amount of time that must elapse before an object version that has been + explicitly deleted or overwritten is eligible for garbage collection. The + default value is `24h` (24 hours). +* `gc.interval` (`gc_interval` in `advanced.config` or `app.config`) --- The + interval at which the garbage collection daemon runs to search for and reap + eligible object versions. The default value is `15m` (15 minutes). It is + important that you have only _one_ garbage collection daemon running in a + cluster at any point in time. To disable the daemon on a node, set the + `gc.interval` parameter to `infinity`. +* `gc.retry_interval` (`gc_retry_interval` in `advanced.config` or `app.config`) + --- The amount of time that must elapse before another attempt is made to + write a record for an object manifest in the `pending_delete` state to the + garbage collection eligibility bucket. In general, this timeout should never + expire, but may if an error condition caused the original record in the + garbage collection eligibility bucket to be removed prior to the reaping + process completing. The default value is `6h` (6 hours). +* `gc.max_workers` (`gc.max_workers` in `advanced.config` or `app.config`) --- + The maximum number of worker processes that may be started by the garbage + collection daemon to use for concurrent reaping of garbage-collection-eligible + objects. The default value is 2. +* `active_delete_threshold` (`active_delete_threshold` in `advanced.config` or `app.config`) --- Blocks of objects smaller than the threshold are synchronously deleted while their manifests are marked as `scheduled_delete`. The default value is 0. + +There are some additional settings that may only be configured in the +`advanced.config` or `app.config` configuration files. **None of the below +settings are available through the `riak-cs.conf` configuration file.** + +* `epoch_start` --- The time that the garbage collection daemon uses + to begin collecting keys from the garbage collection eligibility + bucket. Records in this bucket use keys based on the epoch time the + record is created + `leeway_seconds`. The default is 0 and should be + sufficient for general use. A case for adjusting this value is if the + secondary index query run by the garbage collection daemon continually + times out. Raising the starting value can decrease the range of the + query and make it more likely the query will succeed. The value must + be specified in Erlang binary format. *e.g.* to set it to 10, specify + `<<"10">>`. +* `initial_gc_delay` --- The number of seconds to wait in addition to + the `gc_interval` value before the first execution of the garbage + collection daemon when the Riak CS node is started. **Note**: + Originally, this setting was used to stagger the execution of GC on + multiple nodes; we no longer recommend running multiple GC daemons. + Correspondingly, we do not recommend setting `initial_gc_delay`. +* `max_scheduled_delete_manifests` --- The maximum number of + manifests (representative of object versions) that can be in the + `scheduled_delete` state for a given key. A value of `unlimited` means + there is no maximum, and pruning will not happen based on + count. An example of where this option is useful is a use case + involving a lot of churn on a fixed set of keys in a time frame that + is relatively short compared to the `leeway_seconds` value. This can + result in the manifest objects reaching a size that can negatively + impact system performance. The default value is `unlimited`. +* `gc_batch_size` --- This option represents the size used for paginating the + results of the secondary index query. The default value is 1000. + +{{% note title="Deprecated Configurations" %}} +While Riak CS 2.0.0 still allows the configuration of `gc_paginated_indexes`, +it is strongly recommended that these settings not be used. This setting has +been deprecated, and _will be removed_ in the next major release. +{{% /note %}} + +## Other Riak CS Settings + +For a complete listing of configurable parameters for Riak CS, see the +[configuration reference]({{}}riak/cs/2.1.2/cookbooks/configuration/reference) document. diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/riak-for-cs.md b/content/riak/cs/2.1.2/cookbooks/configuration/riak-for-cs.md new file mode 100644 index 0000000000..2d311355b2 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/riak-for-cs.md @@ -0,0 +1,347 @@ +--- +title: "Configuring Riak KV for CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuring Riak KV for CS" + identifier: "config_riak_for_cs" + weight: 100 + parent: "config" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/Configuring-Riak/ + - /riak/cs/2.1.2/cookbooks/configuration/Configuring-Riak/ + - /riak/cs/latest/cookbooks/configuration/riak-for-cs/ +--- + +Because Riak CS is an application built on top of Riak, it's important +to pay special attention to your Riak configuration when running Riak +CS. This document is both a tutorial on Riak configuration as well as a +reference document listing important configurable parameters. + +## The Proper Backends for Riak CS + +The default backend used by Riak is the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend, but the +Riak CS package includes a special backend that should be used by the +Riak cluster that is part of the Riak CS system. It is a custom version +of the standard [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend that ships with Riak. + +Some of the Riak buckets used internally by Riak CS use secondary +indexes, which currently requires the [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) backend. Other parts +of the Riak CS system can benefit from the use of the Bitcask backend. +The use of the custom [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend enables Riak CS to take +advantage of the strengths of both of these backends to achieve the best +blend of performance and features. The next section covers how to +properly set up Riak to use this Multi backend. + +Additionally, the Riak CS storage calculation system uses Riak's +[MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce) to sum the files in a bucket. This means that you must tell all of your Riak nodes where to find Riak CS's compiled files before calculating storage. + +A few other settings must be modified to configure a Riak node as part +of a Riak CS system, such as the node IP address and the IP address and +port to use for communicating through Protocol Buffers. Other settings +can be modified if necessary. The following sections describe how to +configure a Riak node to work as part of a Riak CS system. + +## Setting up the Proper Riak Backend + +First, edit Riak's `riak.conf`, or the old-style `advanced.config` or +`app.config` [configuration file]({{}}riak/kv/2.1.3/configuring/reference). These files can be found in the `/etc/riak` or `/opt/riak/etc` directories. By default, Riak uses the [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) backend. The first thing we need to do is to change that by removing +the following line: + +```riakconf +## Delete this line: +storage_backend = bitcask +``` + +```advancedconfig +{riak_kv, [ + %% Delete this line: + {storage_backend, riak_kv_bitcask_backend}, +]} +``` + +```appconfig +{riak_kv, [ + %% Delete this line: + {storage_backend, riak_kv_bitcask_backend}, +]} +``` + +Next, we need to expose the necessary Riak CS modules to Riak and instruct Riak +to use the custom backend provided by Riak CS. We need to use either the +`advanced.config` or `app.config` file and insert the following options: + +```advancedconfig +{eleveldb, [ + {total_leveldb_mem_percent, 30} + ]}, +{riak_kv, [ + %% Other configs + {add_paths, ["/usr/lib/riak-cs/lib/riak_cs-2.1.2/ebin"]}, + {storage_backend, riak_cs_kv_multi_backend}, + {multi_backend_prefix_list, [{<<"0b:">>, be_blocks}]}, + {multi_backend_default, be_default}, + {multi_backend, [ + {be_default, riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]}, + {be_blocks, riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]} + ]}, + %% Other configs +]} +``` + +```appconfig +{eleveldb, [ + {total_leveldb_mem_percent, 30} + ]}, +{riak_kv, [ + %% Other configs + {add_paths, ["/usr/lib/riak-cs/lib/riak_cs-2.1.2/ebin"]}, + {storage_backend, riak_cs_kv_multi_backend}, + {multi_backend_prefix_list, [{<<"0b:">>, be_blocks}]}, + {multi_backend_default, be_default}, + {multi_backend, [ + {be_default, riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]}, + {be_blocks, riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]} + ]}, + %% Other configs +]} +``` + +It's important to note that many of these values will depend on various +directories specific to your [operating system]({{}}riak/kv/2.1.3/setup/installing), so make sure to adjust them accordingly. The `add_paths` +parameter, for example, assumes that Riak CS is installed in +`/usr/lib/riak-cs`, while the `data_root` parameters assume that Riak is +installed in `/var/lib/`. + +This configuration also assumes that the Riak CS package is installed on +the same machine as Riak. If not, the package will need to be copied +onto the same box. + +## Allowing for Sibling Creation + +Now, we need to set the `allow_mult` parameter to `true`. We can add this line +to the either the `riak.conf` configuration file, or to the `riak_core` section +of old-style `advanced.config` or `app.config` files: + +```riakconf +buckets.default.allow_mult = true +``` + +```advancedconfig +{riak_core, [ + %% Other configs + {default_bucket_props, [{allow_mult, true}]}, + %% Other configs +]} +``` + +```appconfig +{riak_core, [ + %% Other configs + {default_bucket_props, [{allow_mult, true}]}, + %% Other configs +]} +``` + +This will enable Riak to create [siblings]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings), which is necessary for Riak CS to function. If you are connecting to Riak CS from a [client library]({{}}riak/kv/2.1.3/developing/client-libraries), don't worry: you will not have to manage [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), as all Riak CS +operations are strongly consistent by definition. + +{{% note title="Note on `allow_mult`" %}} +Any Riak node that also supports Riak CS should have `allow_mult` set to +`true` at all times. Riak CS will refuse to start if `allow_mult` is set to +`false`. +{{% /note %}} + +## Specifying the Nodename and IP Address + +Every Riak node has a name that can be specified in `riak.conf` using the +`nodename` option. If you are using the old-style `app.config` configuration +file, you will need to create a file named `vm.args` in the same directory as +the `app.config` file, and set the node name using the `-name` flag. We +recommend providing nodes a name of the form `@`. So if you have +three nodes running on the host `100.0.0.1`, you could name them +`riak1@100.0.0.1`, `riak2@100.0.0.1`, and `riak3@100.0.0.1`, or you could give +them names that are more specific, such as `test_cluster1@100.0.0.1`, +`user_data3@100.0.0.1`, and so on. The example below demonstrates changing a +node's name to `riak1@127.0.0.1`, which would work for a node running on +`localhost`: + +```riakconf +nodename = riak1@127.0.0.1 +``` + +```vmargs +-name riak1@127.0.0.1 +``` + +You should name _all_ nodes prior to starting them and connecting them +to a cluster. + +## Testing the Configuration + +Now that the necessary changes have been made to the Riak node's configuration, +we can attempt to start Riak: + +```bash +riak start +``` + +This could take a second. We can then test whether the node is running: + +```bash +riak ping +``` + +If the response is `pong`, then Riak is running; if the response is +`Node not responding to pings`, then something has gone wrong. + +If the node has not started properly, look at the `erlang.log.1` in the +`/log` directory of the node to see if the problem can be identified. +One common error is `invalid_storage_backend`, which indicates that the +path to the Riak CS library in `advanced.config` or in `app.config` is incorrect +(or that Riak CS is not installed on the server). In spite of this error, make +sure that you do not change the backend from `riak_cs_kv_multi_backend` to +`riak_kv_multi_backend`. + +## Setting Up Riak to Use Protocol Buffers + +The Riak [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers) settings reside in the Riak `riak.conf`, +or in the `riak_api` section of the the old-style `advanced.config` or +`app.config` files, which is located in the `/etc/riak/` folder. The default host +is `127.0.0.1` and the default port is `8087`. You will need to change this if +you plan on running Riak and Riak CS in a non-local environment. Replace +`127.0.0.1` with the IP address of the Riak node and `8087` with the appropriate +port: + +```riakconf +listener.protobuf.internal = 10.0.2.10:10001 +``` + +```advancedconfig +{riak_api, [ + %% Other configs + {pb, ["10.0.2.10", 10001]}, + %% Other configs +]} +``` + +```appconfig +{riak_api, [ + %% Other configs + {pb, ["10.0.2.10", 10001]}, + %% Other configs +]} +``` + +**Note**: The `listener.protobuf.internal` values in the Riak `riak.conf` (or +the `pb` value in `advanced.config`/`app.config`) file must match the values for +`riak_host` in the Riak CS `riak-cs.config` and Stanchion `stanchion.conf` (or +`riak_host` the relative `advanced.config`/`app.config`) files. + +{{% note title="Note on port numbers" %}} +A different port number might be required if the port number conflicts with +ports used by another application or if you use a load balancer or proxy +server. +{{% /note %}} + +It is also recommended that users insure that the size of Riak's +`protobuf.backlog` (or in the `advanced.config`/`app.config` files, the +`pb_backlog`) is equal to or greater than the size of the +`pool.request.size`, specified in the Riak CS `riak-cs.conf` (or +the `request_pool` size in the `advanced.config`/`app.config` files). + +If the `pool.request.size` value in Riak CS is changed, the `protobuf.backlog` +value in Riak should be updated as well. + +## Other Riak Settings + +The `riak.conf` and `advanced.config` files includes other settings, such as +turning on the creation of log files and specifying where to store them. These +settings have default values that should work in most cases. For more +information, we recommend reading our [configuration files][riak_conf_files] +documentation. + +## Specifying the Riak IP Address + +By setting the Riak IP address you ensure that your Riak nodes have unique IP +addresses, whether you're working with a single node or adding additional nodes +to the system. The Riak IP address setting resides in the Riak `riak.conf` or +-- if you're using the `app.config` file -- in the `vm.args` configuration file, +which is located in the same `/etc/riak/` directory (or in `/opt/riak/etc/` on +some operating systems). + +Initially, the line that specifies the riak node IP address is set to the local +host, as follows: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Replace `127.0.0.1` with the appropriate IP address or hostname for the Riak +node. + +### Performance and Capacity settings + +For performance reasons, we strongly recommended that you insert the following +values into Riak's `riak.conf`, or the old-style `vm.args`, configuration file, +located in the `/etc/riak` or `/opt/riak/etc` folder: + +```riakconf +erlang.max_ports = 65536 +``` + +```vmargs +## This setting should already be present for recent Riak installs. +-env ERL_MAX_PORTS 65536 +``` + +### Disable JavaScript MapReduce + +It is recommended that you not use the now-deprecated JavaScript MapReduce in +conjunction with _any_ version of Riak CS. For performance reasons, you should +disable the VM that performs JavaScript MapReduce operations by setting the +following in the `riak.conf` configuration file, or the `riak_kv` section of the +old-style `advanced.config` or `app.config`: + +```riakconf +javascript.map_pool_size = 0 +javascript.reduce_pool_size = 0 +javascript.hook_pool_size = 0 +``` + +```advancedconfig +{riak_kv, [ + %% Other configs + {map_js_vm_count, 0}, + {reduce_js_vm_count, 0}, + {hook_js_vm_count, 0} + %% Other configs +]} +``` + +```appconfig +{riak_kv, [ + %% Other configs + {map_js_vm_count, 0}, + {reduce_js_vm_count, 0}, + {hook_js_vm_count, 0} + %% Other configs +]} +``` + + +[riak_conf_files]: {{< baseurl >}}riak/kv/2.0.5/ops/advanced/configs/configuration-files/ diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/s3-client.md b/content/riak/cs/2.1.2/cookbooks/configuration/s3-client.md new file mode 100644 index 0000000000..c4c2efda0a --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/s3-client.md @@ -0,0 +1,162 @@ +--- +title: "Configuring an S3 Client" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuring an S3 Client" + identifier: "config_s3_client" + weight: 101 + parent: "api_s3" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/Configuring-an-S3-Client/ + - /riak/cs/2.1.2/cookbooks/configuration/Configuring-an-S3-Client/ + - /riak/cs/latest/cookbooks/configuration/s3-client/ +--- + +This tutorial will show you how to use [s3cmd](http://s3tools.org/s3cmd) +as an S3 client. While it won't cover all of the client's features, it +will show you how to create a configuration and run some basic commands. + +>**Warning: s3cmd Signature Version** +> +> If you are using s3cmd version 1.5.0 or greater you will need to append the +> `--signature-v2` flag to every command that targets a Riak CS cluster to have +> s3cmd use the AWS Signature version 2 rather than the default AWS Signature +> version 3 + +## Initial Setup + +To use s3cmd in conjunction with Riak CS, you must configure it to +interact with your Riak CS system. One way to do so is to create a +`.s3cfg` file and store it in your home directory. When you run any +s3cmd-related command, the contents of that file will be read by +default. Alternatively, you can specify a non-default configuration +file location using the `-c` flag. Here's an example: + +```bash +s3cmd -c /PATH/TO/CONFIG/FILE +``` + +Another way to configure s3cmd is to run `s3cmd --configure`, which +launches an interactive tool that will assemble a configuration file for +you on the basis of what you enter. + +In the next section you'll find a few sample `.s3cfg` files that can be +used to configure s3cmd to interact with Riak CS. + +## Sample s3cmd Configuration File for Local Use + +Use this `.s3cfg` configuration file example to interact with Riak CS +locally via port `8080` with s3cmd (remember to use information specific +to your Riak CS installation where necessary): + +```config +[default] +access_key = 8QON4KC7BMAYYBCEX5J+ +bucket_location = US +cloudfront_host = cloudfront.amazonaws.com +cloudfront_resource = /2010-07-15/distribution +default_mime_type = binary/octet-stream +delete_removed = False +dry_run = False +enable_multipart = False +encoding = UTF-8 +encrypt = False +follow_symlinks = False +force = False +get_continue = False +gpg_command = /usr/local/bin/gpg +gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s +gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s +gpg_passphrase = password +guess_mime_type = True +host_base = s3.amazonaws.com +host_bucket = %(bucket)s.s3.amazonaws.com +human_readable_sizes = False +list_md5 = False +log_target_prefix = +preserve_attrs = True +progress_meter = True +proxy_host = localhost +proxy_port = 8080 +recursive = False +recv_chunk = 4096 +reduced_redundancy = False +secret_key = rGyDLBi7clBuvrdrkFA6mAJkwJ3ApUVr4Pr9Aw== +send_chunk = 4096 +simpledb_host = sdb.amazonaws.com +skip_existing = False +socket_timeout = 300 +urlencoding_mode = normal +use_https = False +verbosity = WARNING +signature_v2 = True +``` + +## Sample s3cmd Configuration File for Production Use + +Use this `.s3cfg` configuration file example to interact with Riak CS +using s3cmd in a production system: + +```config +[default] +access_key = EJ8IUJX9X0F2P9HAMIB0 +bucket_location = US +cloudfront_host = cloudfront.amazonaws.com +cloudfront_resource = /2010-07-15/distribution +default_mime_type = binary/octet-stream +delete_removed = False +dry_run = False +enable_multipart = False +encoding = UTF-8 +encrypt = False +follow_symlinks = False +force = False +get_continue = False +gpg_command = /usr/local/bin/gpg +gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s +gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s +gpg_passphrase = password +guess_mime_type = True +host_base = +host_bucket = %(bucket)s. +human_readable_sizes = False +list_md5 = False +log_target_prefix = +preserve_attrs = True +progress_meter = True +proxy_host = +proxy_port = 0 +recursive = False +recv_chunk = 4096 +reduced_redundancy = False +secret_key = XOY/9IFKVEDUl6Allrkj7oyH9XW+CANnFLEVuw== +send_chunk = 4096 +simpledb_host = sdb.amazonaws.com +skip_existing = False +socket_timeout = 300 +urlencoding_mode = normal +use_https = True +verbosity = WARNING +signature_v2 = True +``` + +To configure the s3cmd client for the user, you must change the +`access_key` and `secret_key` settings. + +## Specifying Storage Location + +By default, the `.s3cfg` file uses the Amazon S3 service as the storage +backend. For a Riak CS system, change the following settings to point to +your storage system: + +* `host_base` --- Specify the domain name or the path to your data + storage, such as `data.example.com` +* `host_bucket` --- Specify the bucket location, such as + `my_cs_bucket.data.example.com`. + +## Enabling SSL in the Client + +If you are using SSL, set `use_https` equal to `True`. diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/stanchion.md b/content/riak/cs/2.1.2/cookbooks/configuration/stanchion.md new file mode 100644 index 0000000000..b4bc3b23bb --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/stanchion.md @@ -0,0 +1,127 @@ +--- +title: "Configuring Stanchion" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuring Stanchion" + identifier: "config_stanchion" + weight: 102 + parent: "config" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/Configuring-Stanchion/ + - /riak/cs/2.1.2/cookbooks/configuration/Configuring-Stanchion/ + - /riak/cs/latest/cookbooks/configuration/stanchion/ +--- + +In your cluster, you must include one -- and only one -- Stanchion node. All the +Riak CS nodes in that cluster must then be configured to communicate with that +Stanchion node so that the cluster is able to track and negotiate +causally-sensitive operations. + +All of the settings used by the Stanchion node are stored in the +`stanchion.conf` file, which is located in the `/etc/stanchion` folder on most +operating systems. + +If you're upgrading from a version of Riak CS prior to 2.0.0 -- when the +`stanchion.conf` and `riak-cs.conf` files was introduced -- you can still use +the old-style `app.config` configuration files. Examples for both configuration +types will be provided. + +```stanchionconf +configuration.name = value +``` + +```appconfig +{stanchion, [ + %% Configs here + ]} +``` + +## Specifying the Stanchion IP Address and Port + +If you have a single node, you don't have to change the Stanchion settings +because Stanchion simply listens to the requests from the local host. If your +Riak CS cluster has multiple nodes, you must set the IP address and port that +Stanchion listens on for requests from other nodes. + +You can set the IP using the `listener` parameter. Replace `127.0.0.1` with the +IP address of the Stanchion node, and `8080` with the port of the Stanchion +node. + +```stanchionconf +listener = 127.0.0.1:8080 +``` + +```appconfig +{stanchion, [ + {host, {"127.0.0.1", 8085}}, + %% Other configs + ]} +``` + +{{% note title="Note on matching IP addresses" %}} +The IP address you enter here must match the IP address specified for the +`stanchion_host` variable in the Riak `riak.conf` file and the Riak CS +`riak-cs.conf` file. +{{% /note %}} + +If you want to use SSL, make sure the `ssl.certfile` and `ssl.keyfile` settings +are not commented out, and have been set correctly. + +```stanchionconf +ssl.certfile = "./etc/cert.pem" +ssl.keyfile = "./etc/key.pem" +``` + +```appconfig +{stanchion, [ + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"} + ]}, + %% Other configs + ]} +``` + +## Specifying the Admin User + +The admin user is created during the [configuration of Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs/#specifying-the-admin-user). +The same user credentials must be added to each Stanchion used in the cluster. +This is set in the `stanchion.conf` file, which is located in the +`/etc/stanchion` directory. Enter the same `admin.key` and `admin.secret` as + +```stanchionconf +admin.key = OUCXMB6I3HOZ6D0GWO2D +admin.secret = a58Mqd3qN-SqCoFIta58Mqd3qN7umE2hnunGag== +``` + +```appconfig +{stanchion, [ + %% Admin user credentials + {admin_key, "OUCXMB6I3HOZ6D0GWO2D"}, + {admin_secret, "a58Mqd3qN-SqCoFIta58Mqd3qN7umE2hnunGag=="}, + %% Other configs + ]} +``` + +## Specifying Riak Information + +If you are running a single node for experimentation, or if a Riak node is +running locally and configured to listen for protocol buffer traffic on +`0.0.0.0`, the default Riak configuration for Stanchion should be fine. + +Otherwise, update the IP address and port for the Riak host in the Stanchion +configuration file. + +```stanchionconf +riak_host = 127.0.0.1:8087 +``` + +```appconfig +{stanchion, [ + {riak_host, {"127.0.0.1", 8087}}, + %% Other configs + ]} +``` diff --git a/content/riak/cs/2.1.2/cookbooks/configuration/transmit.md b/content/riak/cs/2.1.2/cookbooks/configuration/transmit.md new file mode 100644 index 0000000000..9b0be69ef6 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/configuration/transmit.md @@ -0,0 +1,81 @@ +--- +title: "Configuring Transmit" +description: "" +menu: + riak_cs-2.1.2: + name: "Configuring Transmit" + identifier: "config_transmit" + weight: 104 + parent: "api_s3" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/configuration/Configuring-Transmit/ + - /riak/cs/2.1.2/cookbooks/configuration/Configuring-Transmit/ + - /riak/cs/latest/cookbooks/configuration/transmit/ +--- + +[Transmit](https://www.panic.com/transmit/) is an S3-compatible client with a +graphical user interface for Mac OS X. The following guide describes configuration of Transmit for use with Riak CS. + +{{% note title="Note" %}} +S3 support was added in Transmit version 4.4, so ensure that you're following +along with a version that supports S3 before continuing. +{{% /note %}} + +## Define a Connection + +When Transmit is started, a new connection window appears. Ensure that you've +selected the **S3** tab, then complete the details in the **Connect to S3** +dialog as follows: + +* **Server** --- Enter the fully qualified domain name of the Riak CS server here. Be sure that this matches the value specified for `cs_root_host` in the Riak CS `app.config`. + +* **Access Key ID** --- Enter the Access Key ID (`key_id`) for the user account you will use to connect to Riak CS. + +* **Secret** --- Enter the Access Key Secret (`key_secret`) matching the user account you entered for the Access Key ID above. + +* **Initial Path** --- If you're connecting to a Riak CS instance with existing buckets to which the user account has access, you can optionally enter a specific bucket name to use for this connection here. + +Defining a connection looks like this: + +![Trasmit screenshot]({{}}images/riak_cs_transmit0.jpg) + +> **Note** +> +> Transmit expects a secure connection, so ensure that your Riak CS proxy server is configured with SSL support. For information on configuring a software solution like HAProxy with SSL for use with Riak CS, see [Load Balancing and Proxy Configuration]({{}}riak/cs/2.1.2/cookbooks/configuration/load-balancing-proxy). + +Finally, test the connection to Riak CS by clicking **Connect**. + +## Create a Bucket + +After successfully connecting to Riak CS, verify that you can create a bucket. + +1. From the **File** menu, select **New Bucket...** +2. In the bucket creation dialog, enter the name of the new bucket +3. Click **Create** + +The new bucket creation dialog looks like this: + +![Trasmit screenshot]({{}}images/riak_cs_transmit1.jpg) + +The newly created bucket is listed in the right hand pane of the Transmit interface: + +![Trasmit screenshot]({{}}images/riak_cs_transmit2.jpg) + +## Copy Files + +Now that you've created a bucket, you can perform a basic file copy test. + +Double-click the bucket icon in the right hand pane of the Transmit interface +to access the bucket. + +Drag and drop one or more files to the right hand pane to initiate +copying of the files to the bucket. + +After copying, the files will appear in the bucket: + +![Trasmit screenshot]({{}}images/riak_cs_transmit3.jpg) + +You have now successfully configured a Transmit connection to Riak CS and +verified basic file copying capabilities. diff --git a/content/riak/cs/2.1.2/cookbooks/designate-admin-user.md b/content/riak/cs/2.1.2/cookbooks/designate-admin-user.md new file mode 100644 index 0000000000..0c0a71fdf7 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/designate-admin-user.md @@ -0,0 +1,21 @@ +--- +title: "Designating an Admin User" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Designating-an-Admin-User/ + - /riak/cs/2.1.2/cookbooks/Designating-an-Admin-User/ + - /riak/cs/latest/cookbooks/designate-admin-user/ +--- + +Once a user has been created, you should designate a user as an admin by +editing and replacing the `admin_key` and `admin_secret` in `app.config` +with the user's credentials. Once this is done, do not forget to update +the same credentials in the Stanchion `app.config` as well. + +{{% note title="Note on the admin role" %}} +This is a powerful role and gives the designee administrative capabilities +within the system. As such, caution should be used to protect the access +credentials of the admin user. +{{% /note %}} diff --git a/content/riak/cs/2.1.2/cookbooks/faqs/riak-cs.md b/content/riak/cs/2.1.2/cookbooks/faqs/riak-cs.md new file mode 100644 index 0000000000..76aca01624 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/faqs/riak-cs.md @@ -0,0 +1,65 @@ +--- +title: "Riak CS FAQs" +description: "" +menu: + riak_cs-2.1.2: + name: "FAQs" + identifier: "reference_faq" + weight: 100 + parent: "reference" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/faqs/riak-cs/ + - /riak/cs/latest/cookbooks/faqs/riak-cs/ +--- + +Q: What is Riak CS? +A: + Riak CS is [multi-tenant](http://en.wikipedia.org/wiki/Multitenancy) cloud storage software for public and private clouds. Built on Basho's distributed database [Riak KV]({{}}riak/kv/2.1.3), Riak CS is commercial software designed to provide simple, available, distributed cloud storage at any scale. Riak CS is S3 API compatible and supports per-tenant reporting for billing and metering use cases. + +Q: Can users share data? +A: + Data is private by default. Users can manipulate Access Control Lists (ACLs) to grant access to their buckets or objects to other users---or even to unauthenticated requesters. + + For implementation details, see the [Access Control Lists](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) documentation. + +Q: Is it possible to specify a filesystem where my Riak CS buckets will live? +A: You can specify the location of **all** Riak CS bucket data by changing the settings for Riak's backends to a path on a particular filesystem. If this is your goal, you can configure Riak to suit your environment. + + If you look at our example Riak `advanced.config`/`app.config` backend + definition from the [Configuring Riak for CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-for-cs) section, it looks like this: + + ```advanced.config + {riak_kv, [ + {add_paths, ["/usr/lib/riak-cs/lib/riak_cs-2.1.2/ebin"]}, + {storage_backend, riak_cs_kv_multi_backend}, + {multi_backend_prefix_list, [{<<"0b:">>, be_blocks}]}, + {multi_backend_default, be_default}, + {multi_backend, [ + {be_default, riak_kv_eleveldb_backend, [ + {total_leveldb_mem_percent, 30}, + {data_root, "/var/lib/riak/leveldb"} + ]}, + {be_blocks, riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]} + ]}, + %% Other configs + ]}, + %% Other sections + ``` + + You need to change the two `data_root` values, substituting `/var/lib/riak` as shown in the above example with the path to the filesystem you prefer. Please note that you should do this before starting Riak for the + first time; if you do make these changes *after* you've started Riak, the previous data will not be available unless you manually move it to to the new location. + + This will allow you to change the location of all bucket data. Riak CS does not yet currently support specifying per-bucket filesystem granularity. + +Q: Does Riak CS encrypt data at rest? +A: No, Riak CS does not currently support encryption of data at rest. + +Q: Does Riak CS support compression of objects at rest? +A: By default, the LevelDB backend used by Riak CS relies on [gzip](http://www.gzip.org/) compression for data at higher levels in LevelDB's storage system. Any additional object compression needs to be performed by clients connecting to Riak CS. + +Q: Does Riak CS support object search? +A: There is currently no search functionality in Riak CS. Search functionality can be provided using an external application to read and index items stored in Riak CS (provided that they are not encrypted). diff --git a/content/riak/cs/2.1.2/cookbooks/fog.md b/content/riak/cs/2.1.2/cookbooks/fog.md new file mode 100644 index 0000000000..bf9486149f --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/fog.md @@ -0,0 +1,154 @@ +--- +title: "Fog on Riak CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Fog on Riak CS" + identifier: "cookbook_fog" + weight: 102 + parent: "api_s3" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/fog/ + - /riak/cs/latest/cookbooks/fog/ +--- + +Fog is a general cloud services library written in Ruby. It is built to +support as many cloud providers as possible, ranging from most AWS +services to Rackspace, Linode, Joyent, and beyond, and this includes an +extension for Riak CS. + +You can install it via [RubyGems](http://rubygems.org/): + +```bash +gem install fog +``` + +Or using [Bundler](http://gembundler.com/): + +```ruby +gem "fog", "~> 1.10.1" +``` + +## User Management + +The first thing that needs to be done when using Fog is creating a new +user. Before you can do that, however, you must create connections to +your Riak CS server to handle communication to different services. + +### Setup + +First, create a new instance of the provisioning object (capitalized +constants are to be set by you). + +```ruby +client = Fog::RiakCS::Provisioning.new( + :riakcs_access_key_id => RIAK_CS_ADMIN_KEY, + :riakcs_secret_access_key => RIAK_CS_ADMIN_SECRET, + :host => RIAK_CS_HOST, + :port => RIAK_CS_PORT +) +``` + +### Create User + +The following command creates a user, given an email or name. This will +either return a response object or raise an error if the operation +fails. The response body will contain a JSON document containing the +user's information, while the `key_id` is required for further +operations on the user. + +```ruby +response = client.create_user(email, name) +``` + +### List Users + +You can list the users in the current Riak CS cluster, optionally +filtering by the user's status. The response body is an array of hashes +representing each matching user. + +```ruby +users = client.list_users(:status => 'enabled') +``` + +### Get User + +With the user's `key_id` (`riakcs_access_key_id`), `get_user` either +returns a JSON document describing the user or raises and error if the +user doesn't exist. + +```ruby +user = client.get_user(key_id) +user.body +# {"key_secret"=>"XXX", "display_name"=>"dizzy", "email"=>"dizzy@basho.com", "status"=>"enabled", "name"=>"Eric Redmond", "key_id"=>"YYY", "id"=>"ZZZ"} +``` + +### Manage User + +You can enable or disable users' access with the following commands. + +```ruby +client.enable_user(key_id) +client.disable_user(key_id) +``` + +You can also revoke users' current credentials and grant new +credentials. The `regrant_secret` function returns a JSON document with +the users' refreshed credentials. + +```ruby +client.regrant_secret(key_id) +``` + +## Usage Retrieval + +Fetches information about Riak CS requests. + +### Setup + +First, create a new instance of the Usage object. + +```ruby +usage = Fog::RiakCS::Usage.new( + :riakcs_access_key_id => RIAK_CS_ADMIN_KEY, + :riakcs_secret_access_key => RIAK_CS_ADMIN_SECRET, + :host => RIAK_CS_HOST, + :port => RIAK_CS_PORT +) +``` + +The example below is targeted at [riakcs.net](https://www.riakcs.net): + +```ruby +usage = Fog::RiakCS::Usage.new( + :riakcs_access_key_id => 'XXXPRQ_MVWUC7QZ5OBHF', + :riakcs_secret_access_key => 'Hhti-b9YFBjYkFgFFq5PbrOs2pFgBIhu3LF6Aw==', + :host => 'data.riakcs.net', + :port => 8080 +) +``` + +**Note**: You may use regular (non-admin) credentials for usage +retrieval if you are accessing your own usage. + +### Get usage + +The `get_usage` method returns usage information for the +`requested_key_id`. You can choose which type of usage you want via the +`:types` attribute: `:access` or `:storage` (defaults to both). You may +also specify a `:start_time` and an `:end_time` (this defaults to the +previous 24-hour window). You'll receive a response object, whose `body` +is a nested set of hashes containing usage data broken down by `type`, +and further by `node`. + +```ruby +response = client.get_usage(requested_key_id, + :types => [:access, :storage], + :start_time => start_time, + :end_time => end_time) +``` + +If user access is denied, it will return a `Excon::Errors::Forbidden` +error. diff --git a/content/riak/cs/2.1.2/cookbooks/garbage-collection.md b/content/riak/cs/2.1.2/cookbooks/garbage-collection.md new file mode 100644 index 0000000000..037ba9ed2c --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/garbage-collection.md @@ -0,0 +1,265 @@ +--- +title: "Garbage Collection" +description: "" +menu: + riak_cs-2.1.2: + name: "Garbage Collection" + identifier: "theory_garbage_collection" + weight: 102 + parent: "theory" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/garbage-collection/ + - /riak/cs/latest/cookbooks/garbage-collection/ +--- + +This document describes some of the implementation details behind Riak +CS's garbage collection process. For information on configuring this +system, please see our documentation on [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). + +## Versions and Manifests + +In Riak CS, any named object bears multiple **versions** that are stored +in the system at any given time. These versions are not exposed to end +users and are used only for internal purposes. Each version of the +object is accessible via an object **manifest** that includes a +[UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier) for +that version. + +At the system level, Riak CS attempts to have only one _active_ manifest +for a named object at any given time, although multiple active manifests +can coexist in some cases. In spite of this, **only one active object +manifest is available to users accessing Riak CS at any given time**, +which means that Riak CS users are never exposed to multiple manifests. + +Garbage collection (GC) of object versions involves a variety of actions +that can be divided into two essential phases: + +1. Synchronous actions that occur in the foreground while the user is + waiting for notification of successful command completion +2. Asynchronous actions that occur in the background and are not + directly tied to user actions + +These two phases are described in more detail in the following sections. + +{{% note title="Note on manifest pruning" %}} +A Riak CS object's manifest is updated any time a write, i.e. a `PUT` or +`DELETE` request, is issued, which means that manifest sizes can grow +significantly over time. This can lead to latency problems. Riak CS's GC +subsystem will prune these manifests. If you're experiencing manifest-related +issues, we would recommend using GC. +{{% /note %}} + +## Synchronous GC Actions + +Riak CS users can undertake two actions to initiate garbage collection +of an object version: + +1. Overwriting the object with a new version +2. Deleting the object + +When an object version is overwritten, a new object manifest is written +with the state set to `active`. This new version is then made available +to Riak CS users. When an object is explicitly deleted, however, this +means that no active versions remain and thus that the object is no +longer externally available to users. + +Behind the scenes, overwriting or deleting an object also means that a +set of eligible manifest versions is determined, while the state of each +eligible manifest is changed to `pending_delete` and the +`delete_marked_time` field is set to a time value representing the +current time. + +The method for compiling the list of eligible manifests is dependent +on the operation, i.e. whether the object is being overwritten or +deleted. + +If the object is being overwritten, the previously `active` manifest +version is selected along with any manifest versions that are in the +`writing` state. An object is in a `writing` state if the +`last_block_written_time` field represents a time value greater than +`gc.leeway_period` ago (or the `write_start_time` in cases where the +`last_block_written_time` is undefined). + +If a manifest version remains in the `writing` state for greater than +`gc.leeway_period`, Riak CS assumes that that manifest version +represents a failed upload attempt. In that case, Riak CS deems it +acceptable to reap any object blocks that may have been written. +Manifest versions in the `writing` state whose `last_block_written_time` +has not exceeded the `gc.leeway_period` threshold are _not_ deemed +eligible because they could represent an object version that is still in +the process of writings its blocks. + +Object deletes are more straightforward. Since no object is externally +available to the user after a delete operation, any manifest versions +in the `active` or `writing` state are eligible to be cleaned up. In +this case, there is no concern about reaping the object version that is +currently being written to become the next `active` version. + +Once the states of the eligible manifests have been updated to +`pending_delete` the manifest information for any `pending_delete` +manifest versions are collected into a CRDT set and the set is written +as a value to the `riak-cs-gc` bucket keyed by a time value +representing the current epoch time. If that write is +successful then the state for each manifest in the set is updated to +`scheduled_delete`. This indicates that the blocks of the object have +been scheduled for deletion by the garbage collection daemon and +avoids other manifest resolution processes for the object from +scheduling unnecessary deletions. + +The use of the current epoch time as the basis for the keys in the +`riak-cs-gc` bucket is a change from previous versions of Riak +CS. Previously the current epoch time the value of `gc.leeway_period` +was used. This change means that the `gc.leeway_period` interval is +enforced by the garbage collection daemon process and not during the +synchronous portion of the garbage collection process. The benefit of +this is that the `gc.leeway_period` interval may be changed for objects +that have already been deleted or overwritten and allows system +operators to potentially reap objects sooner than originally specified +`gc.leeway_period` interval if it is necessary. + +Once the manifest enters the `scheduled_delete` state it remains as a +tombstone for a minimum of `gc.leeway_period`. + +After these actions have been attempted, the synchronous portion of the +garbage collection process is concluded and a response is returned to +the user who issued the request. + +## Garbage Collection Daemon + +The asynchronous portion of the garbage collection process is +orchestrated by the garbage collection daemon that wakes up at specific +intervals and checks the `riak-cs-gc` bucket for any scheduled entries +that are eligible for reaping. + +The daemon gathers the eligible keys for deletion by performing a +secondary index range query on the `$key` index with a lower bound of +time *0* and an upper bound of the current time. This allows the +daemon to collect all the keys that are eligible for deletion and have +some way of accounting for clock skew. + +The daemon may also be configured to use more efficient paginated +index queries to gather the deletion-eligible keys by setting the +`gc_paginated_indexes` configuration option to `true`. In this case the gc +daemon requests up to `gc_batch_size` keys from the GC bucket and +deletes the manifests associated with those keys before requesting the +next set of keys. + +The initial query performed by the garbage collection daemon may +return a subset of the eligible records if `gc_paginated_indexes` is +`true` or all eligible records otherwise. + +The daemon starts up a worker process to carry out the actual reaping +of the records and passes it the batch of keys from the query of the +`riak-cs-gc` bucket. The value for each key received by the worker +process is a set containing one or more object manifests that must be +reaped. The worker process removes the objects represented by each +object manifest in the set and then notifies the garbage collection +daemon that it has completed the task and is available for more work. + +Meanwhile, the daemon repeats the process of querying the `riak-cs-gc` +bucket for more eligible records to delete and feeding the resulting +keys to worker processes until either the maximum number of worker +processes is reached (`gc.max_workers`) or there are no remaining +records eligible for removal. + +Deletion eligibility is determined using the key values in the +`riak-cs-gc` bucket. The keys in the `riak-cs-gc` bucket are +representations of epoch time values with random suffixes +appended. The purpose of the random suffix is to avoid hot keys when +the system is dealing with high volumes of deletes or overwrites. If +the current time according to the daemon minus the leeway interval is +later than the time represented by a key then the blocks for any +object manifests stored at that key are eligible for deletion and the +daemon passes them off to a worker process that attempts to delete +them. + +There are two levels of concurrency within the garbage collection +process. The first is the use of worker processes by the garbage +collection daemon to allow different groups of eligible records from +the garbage collection bucket to be processed independently. The +second is that multiple workers processes can be employed in the +deletion of data blocks associated with a single object. The latter is +discussed more in the *Object Block Reaping* section below. + +Once all of the objects represented by manifests stored for a +particular key in the `riak-cs-gc` bucket have been deleted, the key +is deleted from the `riak-cs-gc` bucket. + +### One Daemon per Cluster + +We recommend using only _one_ active garbage collection daemon in any +Riak CS cluster. If multiple daemons are currently being used, you can +disable the others by setting the `gc.interval` parameter to `infinity` +on those nodes. More information on how to do that can be found in the +[CS configuration doc]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs/#garbage-collection-settings). + +## Controlling the GC Daemon + +The garbage collection daemon may be queried and manipulated using the +`riak-cs-gc` script. The script is installed to the `bin` or `sbin` +directory (depending on OS) along with the primary `riak-cs` script. +The available commands that can be used with the `riak-cs-gc` script are +listed below. Running the script with no command provided displays a +list of the available commands. + +Command | Description +:-------|:----------- +`batch` | Manually start garbage collection for a batch of eligible objects. This command takes an optional argument to indicate a leeway time other than the currently configured `gc.leeway_period` time for the batch. +`status` | Get the current status of the garbage collection daemon. The output is dependent on the current state of the daemon. +`pause` | Pause the current batch of object garbage collection. It has no effect if there is no active batch. +`resume` | Resume a paused garbage collection batch. It has no effect if there is no previously paused batch. +`set-interval` | Set or update the garbage collection interval. This setting uses a unit of seconds. +`set-leeway` | Set or update the garbage collection leeway time. This setting indicates how many seconds must elapse after an object is deleted or overwritten before the garbage collection system may reap the object. This setting uses a unit of seconds. + +For more information, see our documentation on [Riak CS command-line tools]({{}}riak/cs/2.1.2/cookbooks/command-line-tools). + +## Manifest Updates + +Manifest versions are retrieved and updated by the +`riak_cs_manifest_fsm` module with very few exceptions. This module +encapsulates the logic needed to retrieve the manifests, resolve any +conflicts due to siblings, and write updated manifest versions back to +Riak. + +## Object Block Reaping + +The actual deletion of the blocks of an object is managed by the +`riak_cs_delete_fsm` module. It starts up a number of delete workers +(based on the configured delete concurrency) and passes off object +block information to those workers who in turn carry out the actual +delete operation for that block. The delete workers are instances of +the `riak_cs_block_server` module. + +Once a worker deletes a block it notifies the delete fsm and waits for +notification about another block to delete. Once all blocks of an +object are deleted then the delete fsm starts an instance of the +manifest fsm to handle deleting the manifest version from the object +manifest data structure and if there are no remaining manifest +versions to delete the entire object manifest data structure. The goal +of this final step is to avoid the cost of scanning through empty +manifest keys that could linger indefinitely. + +## Trade-offs + +1. A **slow** reader may have blocks GC'd as it is reading an object if + the read exceeds the leeway interval. +2. There is some reliance on system clocks and this could lead to object + blocks being deleted earlier or later than their intended eligibility + window dictates due to clock skew. +3. A network partition (or machine failure) lasting longer than + `gc.leeway_period` could cause a manifest to "come back to life" and + appear active, it would then continually serve requests whose blocks + could not be found. + +## Configuration + +Riak CS's garbage collection implementation gives the deployer several +knobs to adjust for fine-tuning system performace. More information +can be found in our documentation on [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs/#garbage-collection-settings). + +## More Information + +If you'd like more in-depth material on garbage collection in Riak CS, +we recommend consulting the [Riak CS wiki](https://github.com/basho/riak_cs/wiki/Object-Chunking-and-Garbage-Collection) diff --git a/content/riak/cs/2.1.2/cookbooks/installing.md b/content/riak/cs/2.1.2/cookbooks/installing.md new file mode 100644 index 0000000000..4e50b01f6d --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/installing.md @@ -0,0 +1,363 @@ +--- +title: "Installing Riak CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Installing Riak CS" + identifier: "installing" + weight: 200 + parent: "index" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/installing/Installing-Riak-CS/ + - /riak/cs/2.1.2/cookbooks/installing/Installing-Riak-CS/ + - /riak/cs/latest/cookbooks/installing/ +--- + +Riak CS is supported on a variety of operating systems, including +Ubuntu, CentOS, Fedora, Solaris, SmartOS, FreeBSD, and OS X. Riak CS is +*not* supported on Microsoft Windows. + +You can install Riak CS on a single node (for development purposes) or +using an automated deployment tool. Any Riak CS installation involves +three components, all of which must be installed separately: + +* [Riak KV]({{}}riak/kv/2.1.4/) --- The distributed database on top of which Riak CS +is built +* Riak CS itself +* [Stanchion]({{}}riak/cs/2.1.2/theory/stanchion) --- An application used to manage [globally unique entities]({{}}riak/cs/2.1.2/theory/stanchion/#globally-unique-entities) such as users and buckets. + +[Riak KV](#installing-riak) and [Riak CS](#installing-riak-cs-on-a-node) must be installed on each node in your cluster. [Stanchion](#installing-stanchion-on-a-node), however, needs to be installed on only one node. + +## Version Compatibility + +We strongly recommend using one of the documented [version combinations]({{}}riak/cs/2.1.2/cookbooks/version-compatibility/) +when installing and running Riak CS. + +## Installing Riak KV + +Before installing Riak CS, Riak KV must be installed on each node in +your cluster. You can install Riak KV either as part of an OS-specific package +or from source. + + * [Debian and Ubuntu]({{}}riak/kv/2.1.4/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.1.4/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.1.4/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.1.4/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.1.4/setup/installing/suse) + * [From Source]({{}}riak/kv/2.1.4/setup/installing/source) + +Riak is also officially supported on the following public cloud +infrastructures: + + * [Windows Azure]({{}}riak/kv/2.1.4/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.1.4/setup/installing/amazon-web-services) + +Remember that you must repeat this installation process on each node in +your cluster. For future reference, you should make note of the Riak KV +installation directory. + +If you want to fully configure Riak KV prior to installing Riak CS, see our +documentation on [configuring Riak KV for CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-for-cs/). + +## Installing Riak CS on a Node + +Riak CS and Stanchion packages are available on the [Download Riak CS]({{}}riak/cs/2.1.2/downloads/) +page. Similarly, Riak packages are available on the [Download Riak KV]({{}}riak/kv/2.1.4/downloads/) page. + +After downloading Riak CS, Stanchion, and Riak, install them using your +operating system's package management commands. + +> **Note on Riak CS and public ports** +> +> **Riak CS is not designed to function directly on TCP port 80, and +it should not be operated in a manner that exposes it directly to the +public internet**. Instead, consider a load-balancing solution +such as a dedicated device [HAProxy](http://haproxy.1wt.eu) or [Nginx](http://wiki.nginx.org/Main) between Riak CS and the outside world. + +### Installing Riak CS on Mac OS X + +To install Riak CS on OS X, first download the appropriate package from +the [downloads]({{}}riak/cs/2.1.2/downloads) page: + +```bash +curl -O http://s3.amazonaws.com/downloads.basho.com/riak-cs/1.5/2.1.2/osx/10.8/riak-cs-2.1.2-OSX-x86_64.tar.gz +``` + +Then, unpack the downloaded tarball: + +```bash +tar -xvzf riak-cs-2.1.2-OSX-x86_64.tar.gz +``` + +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs/). + +### Installing Riak CS on Debian or Ubuntu + +On Debian and Ubuntu, Riak CS packages are hosted on +[packagecloud.io](https://packagecloud.io/basho/riak-cs). Instructions +for installing via shell scripts, manual installation, Chef, and Puppet +can be found in packagecloud's [installation docs](https://packagecloud.io/basho/riak/install). + +Platform-specific pages are linked below: + +* [Lucid](https://packagecloud.io/basho/riak-cs/riak-cs_2.1.2-1_amd64.deb?distro=lucid) +* [Precise](https://packagecloud.io/basho/riak-cs/riak-cs_2.1.2-1_amd64.deb?distro=precise) +* [Squeeze](https://packagecloud.io/basho/riak-cs/riak-cs_2.1.2-1_amd64.deb?distro=squeeze) +* [Trusty](https://packagecloud.io/basho/riak-cs/riak-cs_2.1.2-1_amd64.deb?distro=trusty) +* [Wheezy](https://packagecloud.io/basho/riak-cs/riak-cs_2.1.2-1_amd64.deb?distro=wheezy) + +#### Advanced apt Installation + +For the simplest installation process on LTS (Long-Term Support) +releases, use `apt-get`. First, you must retrieve the signing key: + +```curl +curl https://packagecloud.io/gpg.key | sudo apt-key add - +``` + +Second, you must install the `apt-transport-https` package in order to +be able to fetch packages over HTTPS: + +```curl +sudo apt-get install -y apt-transport-https +``` + +With HTTPS enabled, we recommend adding the desired Riak CS package to +your `.list` file. packagecloud can autogenerate such a file on the +basis of a name that you specify, e.g. a hostname, and the desired +operating system and distribution. The following example script would +store your hostname in the variable `HOSTNAME`, send that information to +packagecloud to autogenerate a `.list` file, and then store the return +value in a file called `basho.list`, which is stored in the +`/etc/apt/sources.list.d` directory. This example script is specific to +the Precise Ubuntu distribution: + +```bash +#!/bin/bash + +HOSTNAME=`hostname -f` +FILENAME=/etc/apt/sources.list.d/basho.list +OS=ubuntu +DIST=precise +PACKAGE_CLOUD_RIAK_CS_DIR=https://packagecloud.io/install/repositories/basho/riak-cs +curl "${PACKAGE_CLOUD_RIAK_CS_DIR}/config_file.list?os=${OS}&dist=${DIST}&name=${HOSTNAME}" > $FILENAME +``` + +The `name` that you submit to packagecloud can be anything you like. The +`HOSTNAME` used above was for example purposes. The resulting file +should hold contents like the following: + +``` +# this file was generated by packagecloud.io for +# the repository at https://packagecloud.io/basho/riak + +deb https://packagecloud.io/basho/riak-cs/ubuntu/ precise main +deb-src https://packagecloud.io/basho/riak-cs/ubuntu/ precise main +``` + +With your `basho.list` file populated, you can update your apt sources +list: + +```bash +sudo apt-get update +``` + +Now install the `riak-cs` package: + +```bash +sudo apt-get install riak-cs +``` + +### Installing Riak CS on RHEL or CentOS + + +On RHEL or CentOS, Riak CS packages are hosted on +[packagecloud.io](https://packagecloud.io/basho/riak-cs). Instructions +for installing via shell scripts, manual installation, Chef, and Puppet +can be found in packagecloud's [installation +docs](https://packagecloud.io/basho/riak-cs/install). + +Platform-specific pages are linked below: + +* [el5](https://packagecloud.io/basho/riak-cs/riak-cs-2.1.2-1.x86_64.rpm?distro=5) +* [el6](https://packagecloud.io/basho/riak-cs/packages/el/6/riak-cs-2.1.2-1.el6.x86_64.rpm) + +* [Fedora 19](https://packagecloud.io/basho/riak-cs/riak-cs-2.1.2-1.fc19.x86_64.rpm?distro=19) + +#### Advanced rpm Installation + +For the simplest installation process on LTS (Long-Term Support) +releases, use yum. First, you must install the `pygpgme` package, which +enables yum to handle [GPG](https://www.gnupg.org/) signatures: + +```bash +sudo yum install pygpgme +``` + +If you wish to install using a `.repo` file, packagecloud can generate +one for you on the basis of a name that you specify, e.g. a hostname, +and the desired operating system and distribution. The following example +script would store your hostname in the variable `HOSTNAME`, send that +information to packagecloud to generate a `.repo` file, and then store +the return value in a file called `basho.repo`, which is stored in the +`/etc/yum.repos.d` directory: + +```bash +#!/bin/bash + +HOSTNAME=`hostname -f` +FILENAME=/etc/yum.repos.d/basho.repo +OS=el +DIST=5 +PACKAGE_CLOUD_RIAK_CS_DIR=https://packagecloud.io/install/repositories/basho/riak-cs +curl "${PACKAGE_CLOUD_RIAK_CS_DIR}/config_file.repo?os=${OS}&dist=${DIST}&name=${HOSTNAME}" > $FILENAME +``` + +The `name` that you submit to packagecloud can be anything you like. The +`HOSTNAME` used above was for example purposes. The resulting file +should hold contents like the following: + +``` +[basho_riak-cs] +name=basho_riak-cs +baseurl=https://packagecloud.io/basho/riak-cs/el/5/$basesearch +repo_gpgcheck=1 +gpgcheck=0 +enabled=1 +gpgkey=https://packagecloud.io/gpg.key +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +``` + +With your `basho.repo` file populated, you can update your rpm sources +list. + +## Installing Stanchion on a Node + +Stanchion is an application that manages globally unique entities within +a Riak CS cluster. It performs actions such as ensuring unique user +accounts and bucket names across the whole system. **Riak CS cannot be +used without Stanchion**. + +All Riak CS nodes must be configured to communicate with a single +Stanchion node. Although multiple Stanchion instances may be installed +and running within a cluster, even one on each node, only one may be +actively used by the cluster. Running multiple instances of Stanchion +simultaneously can produce a variety of problems such as the inability +to create user accounts and buckets or the inability to enforce their +uniqueness. + +Because only one Stanchion instance can be used at any given time, it's +not uncommon for a load balancer to be used to handle Stanchion failover +in the event that the primary Stanchion node becomes unavailable. You +can achieve this by specifying a load balancer IP as the Stanchion IP +in each Riak CS node's `riak-cs.conf`. This load balancer must be +configured to send all requests to a single Stanchion node, failing over +to a secondary Stanchion node if the primary is unavailable. More +details can be found in [Specifying the Stanchion Node]({{}}riak/cs/2.1.2/cookbooks/configuration/#specifying-the-stanchion-node). + +### Installing Stanchion on Mac OS X + +First, download the appropriate package from the [downloads]({{}}riak/cs/2.1.2/downloads/#stanchion-1-4-3) page. + +```bash +curl -O http://s3.amazonaws.com/downloads.basho.com/stanchion/1.4/1.4.3/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz +``` + +Then, unpack the downloaded tarball: + +```bash +stanchion-2.0.0-OSX-x86_64.tar.gz +``` + +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). + +### Installing Stanchion on Debian or Ubuntu + +On Debian or Ubuntu, you can either use `apt` or install the `.deb` +package manually. + +#### Installing Using `apt` (recommended) + +First, install the signing key: + +```curl +curl http://apt.basho.com/gpg/basho.apt.key | sudo apt-key add - +``` + +If the signing key and `apt` repository have already been added, add +the Basho repository to your `apt` sources list (and update them): + +```bash +sudo bash -c "echo deb http://apt.basho.com $(lsb_release -sc) main > /etc/apt/sources.list.d/basho.list" +sudo apt-get update +``` + +Now, install the `stanchion` package: + +```bash +sudo apt-get install stanchion +``` + +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). + +#### Installing the `.deb` Package Manually (not recommended) + +```bash +sudo dpkg -i +``` + +Replace `` with the actual filename for the package +you are installing. + +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). + +### Installing Stanchion on RHEL or CentOS + +On RHEL or CentOS, you can either use `yum` or install the `.rpm` +package manually. + +#### Installing Using `yum` (recommended) + +For CentOS/RHEL 6: + +```bash +sudo yum install http://yum.basho.com/gpg/basho-release-6-1.noarch.rpm +``` + +For CentOS/RHEL 5: + +``` +sudo yum install http://yum.basho.com/gpg/basho-release-5-1.noarch.rpm +``` + +Once the `.rpm` package has been installed, install Stanchion: + +```basho +sudo yum install stanchion +``` + +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). + +#### Installing the `.rpm` Package Manually (not recommended) + +```bash +sudo rpm -Uvh +``` + +Replace `` with the actual filename for the +package you are installing. + +At this point, you can move on to [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). + +> **Note on SELinux** +> +> CentOS enables Security-Enhanced Linux (SELinux) by default. If you +encounter errors during installation, try disabling SELinux. + +## What's Next? + +Once you've completed installation of Riak CS and Riak, you're ready to +learn more about [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). diff --git a/content/riak/cs/2.1.2/cookbooks/installing/chef.md b/content/riak/cs/2.1.2/cookbooks/installing/chef.md new file mode 100644 index 0000000000..e6aa9af744 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/installing/chef.md @@ -0,0 +1,188 @@ +--- +title: "Installing Riak CS With Chef" +description: "" +menu: + riak_cs-2.1.2: + name: "Installing With Chef" + identifier: "installing_chef" + weight: 201 + parent: "index" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/installing/Riak-CS-Using-Chef/ + - /riak/cs/2.1.2/cookbooks/installing/Riak-CS-Using-Chef/ + - /riak/cs/latest/cookbooks/installing/chef/ +--- + +If you manage your infrastructure with [Chef](http://www.opscode.com/chef/), +the open-source configuration management framework, you'll be happy to know +that we maintain a [cookbook](http://community.opscode.com/cookbooks/riak-cs) +for installing Riak CS with Chef. + +## Getting Started + +The Riak CS cookbook can be used (alongside the Riak cookbook), by adding the +following recipes to your runlist: + +```ruby +run_list( + "recipe[riak-cs::package]", + "recipe[riak]", + "recipe[riak-cs]", + "recipe[riak-cs::stanchion]" +) +``` + +The default settings will cause Riak and Riak CS to be installed and configured via Basho-maintained package repositories. + +### Package Installation + +There are two options for installation: `package` and `enterprise_package`. +`package` is the default--- it installs Riak CS open source---and is the recommended option for Red Hat- and Debian-based operating systems. For source installations of Riak, Erlang/OTP R15B01 and above is recommended. + +### Enterprise Installation + +To install Riak CS Enterprise, populate `node['riak_cs']['package']['enterprise_key']` with a Basho-provided key for +the release. + +Riak Enterprise installations managed through the cookbook must be installed +via a package. + +### Basic Configuration + +All the configuration options exist within the `node['riak_cs']['config']` +namespace. In cases where an Erlang data type is necessary, use the appropriate methods from the `[erlang_template_helper](https://github.com/basho/erlang_template_helper)`. + +#### Networking + +Riak CS and Stanchion communicate with Riak through the Protocol Buffers interface. By default, Riak listens for Protocol Buffers connections on port +`8087`: + +```ruby +# Riak CS +default['riak_cs']['config']['riak_cs']['riak_ip'] = node['ipaddress'].to_erl_string +default['riak_cs']['config']['riak_cs']['riak_pb_port'] = 8087 + +# Stanchion +default['stanchion']['config']['stanchion']['riak_ip'] = node['ipaddress'].to_erl_string +default['stanchion']['config']['stanchion']['riak_pb_port'] = 8087 +``` + +At the same time, Riak listens for HTTP requests on port `8080` and Stanchion +on port `8085`: + +```ruby +# Riak CS +default['riak_cs']['config']['riak_cs']['cs_ip'] = node['ipaddress'].to_erl_string +default['riak_cs']['config']['riak_cs']['cs_port'] = 8080 + +# Stanchion +default['stanchion']['config']['stanchion']['stanchion_ip'] = node['ipaddress'].to_erl_string +default['stanchion']['config']['stanchion']['stanchion_port'] = 8085 +``` + +#### Credentials + +Both Riak CS and Stanchion require administrative user credentials. The two credentials are `admin_key` and `admin_secret`: + +```ruby +# Riak CS +default['riak_cs']['config']['riak_cs']['admin_key'] = "admin-key".to_erl_string +default['riak_cs']['config']['riak_cs']['admin_secret'] = "admin-secret".to_erl_string + +# Stanchion +default['stanchion']['config']['stanchion']['admin_key'] = "admin-key".to_erl_string +default['stanchion']['config']['stanchion']['admin_secret'] = "admin-secret".to_erl_string +``` + +#### Webmachine + +Webmachine is used to service HTTP requests in Riak CS. Its `server_name` and +Lager `log_handlers` can be configured with the following: + +```ruby +default['riak_cs']['config']['webmachine']['server_name'] = "Riak CS".to_erl_string +default['riak_cs']['config']['webmachine']['log_handlers']['webmachine_log_handler'] = ["/var/log/riak-cs".to_erl_string].to_erl_list +default['riak_cs']['config']['webmachine']['log_handlers']['riak_cs_access_log_handler'] = [].to_erl_list +``` + +#### Erlang + +A number of Erlang parameters may be configured through the cookbook. The node +`-name` and `-setcookie` are most important for creating multi-node clusters. + +The rest of the parameters are primarily for performance tuning, with kernel +polling and SMP enabled by default. A few examples follow: + +```ruby +# Riak CS +default['riak_cs']['args']['-name'] = "riak-cs@#{node['fqdn']}" +default['riak_cs']['args']['-setcookie'] = "riak-cs" +default['riak_cs']['args']['+K'] = true +default['riak_cs']['args']['+A'] = 64 +default['riak_cs']['args']['+W'] = "w" +default['riak_cs']['args']['-env']['ERL_MAX_PORTS'] = 4096 +default['riak_cs']['args']['-env']['ERL_FULLSWEEP_AFTER'] = 0 +default['riak_cs']['args']['-env']['ERL_CRASH_DUMP'] = "/var/log/riak/erl_crash.dump" + +# Stanchion +default['stanchion']['args']['-name'] = "stanchion@#{node['ipaddress']}" +default['stanchion']['args']['-setcookie'] = "stanchion" +default['stanchion']['args']['+K'] = true +default['stanchion']['args']['+A'] = 64 +default['stanchion']['args']['+W'] = "w" +default['stanchion']['args']['-env']['ERL_MAX_PORTS'] = 4096 +default['stanchion']['args']['-env']['ERL_FULLSWEEP_AFTER'] = 0 +default['stanchion']['args']['-env']['ERL_CRASH_DUMP'] = "/var/log/stanchion/erl_crash.dump" +``` + +#### Storage Backends + +Riak CS uses a specific combination of storage backends. [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) is used to +store blocks and [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb) to store manifests. The `riak_cs_kv_multi_backend` must be specified in the Riak configuration file for Riak CS to work: + +```ruby +default['riak']['config']['riak_kv']['storage_backend'] = "riak_cs_kv_multi_backend" +``` + +The Riak cookbook takes care of populating all of the other default required +for the `riak_cs_kv_multi_backend` to be configured correctly. + +### Lager + +[Lager](https://github.com/basho/lager) is the logging framework used within +Riak CS and Stanchion. It can also be used with Erlang/OTP. + +```ruby +# Riak CS +error_log = ["/var/log/riak-cs/error.log".to_erl_string,"error",10485760,"$D0".to_erl_string,5].to_erl_tuple +info_log = ["/var/log/riak-cs/console.log".to_erl_string,"info",10485760,"$D0".to_erl_string,5].to_erl_tuple + +default['riak_cs']['config']['lager']['handlers']['lager_file_backend'] = [error_log, info_log] +default['riak_cs']['config']['lager']['crash_log'] = "/var/log/riak-cs/crash.log".to_erl_string +default['riak_cs']['config']['lager']['crash_log_msg_size'] = 65536 +default['riak_cs']['config']['lager']['crash_log_size'] = 10485760 +default['riak_cs']['config']['lager']['crash_log_date'] = "$D0".to_erl_string +default['riak_cs']['config']['lager']['crash_log_count'] = 5 +default['riak_cs']['config']['lager']['error_logger_redirect'] = true + +# Stanchion +error_log = ["/var/log/stanchion/error.log".to_erl_string,"error",10485760,"$D0".to_erl_string,5].to_erl_tuple +info_log = ["/var/log/stanchion/console.log".to_erl_string,"info",10485760,"$D0".to_erl_string,5].to_erl_tuple + +default['stanchion']['config']['lager']['handlers']['lager_file_backend'] = [error_log, info_log] +default['stanchion']['config']['lager']['crash_log'] = "/var/log/stanchion/crash.log".to_erl_string +default['stanchion']['config']['lager']['crash_log_msg_size'] = 65536 +default['stanchion']['config']['lager']['crash_log_size'] = 10485760 +default['stanchion']['config']['lager']['crash_log_date'] = "$D0".to_erl_string +default['stanchion']['config']['lager']['crash_log_count'] = 5 +default['stanchion']['config']['lager']['error_logger_redirect'] = true +``` + +## Additional Resources + +More information related to cluster configuration and building development environments is available in our documentation. + +* [Building a Local Test Environment]({{}}riak/cs/2.1.2/tutorials/fast-track/local-testing-environment) +* [Building a Virtual Testing Environment]({{}}riak/cs/2.1.2/tutorials/fast-track/virtual-test-environment) diff --git a/content/riak/cs/2.1.2/cookbooks/installing/launching-and-stopping.md b/content/riak/cs/2.1.2/cookbooks/installing/launching-and-stopping.md new file mode 100644 index 0000000000..1d11ba0b45 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/installing/launching-and-stopping.md @@ -0,0 +1,67 @@ +--- +title: "Launching and Stopping Riak CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Launching and Stopping" + identifier: "run_launch_stop" + weight: 101 + parent: "run" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/installing/Launching-and-Stopping-Riak-CS/ + - /riak/cs/2.1.2/cookbooks/installing/Launching-and-Stopping-Riak-CS/ + - /riak/cs/latest/cookbooks/installing/launching-and-stopping/ +--- + +To launch Riak CS in the background: + +```bash +sudo riak-cs start +``` + +To run Riak CS with an interactive Erlang console: + +```bash +sudo riak-cs console +``` + +When Riak CS is running, the Riak CS process appears in the process +list. To check for the Riak CS process, enter: + +```bash +ps -ef | grep riak-cs +``` + +To stop Riak CS, enter: + +```bash +sudo riak-cs stop +``` + +You can use the command + +```bash +sudo riak-cs attach +``` + +to attach and obtain an interactive console to a running instance of +Riak CS. + +You can check the liveness of your Riak CS installation with the +`riak-cs ping` command, which should return `pong` if Riak CS is up and +running. + +```bash +riak-cs ping +``` + +Please note that `riak-cs ping` tests only the liveness of Riak CS and +does not test the connection between Riak CS and Riak. In order to test +that, you can run a `GET` request against the `/riak-cs/ping` endpoint of a Riak +CS node, as in the example below: + +```curl +curl http://localhost:8080/riak-cs/ping +``` diff --git a/content/riak/cs/2.1.2/cookbooks/keystone-conf-sample.md b/content/riak/cs/2.1.2/cookbooks/keystone-conf-sample.md new file mode 100644 index 0000000000..9b0f707e51 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/keystone-conf-sample.md @@ -0,0 +1,251 @@ +--- +title: "Keystone Configuration Sample" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Keystone-Conf-Sample/ + - /riak/cs/latest/cookbooks/keystone-conf-sample/ +--- + +The following displays the contents of a sample `keystone.conf` file +that can be used to test Riak CS with the Keystone authentication +service on a `localhost` setup. + +```config +[DEFAULT] +# A "shared secret" between keystone and other openstack services +admin_token = ADMIN + +# The IP address of the network interface to listen on +bind_host = 127.0.0.1 + +# The port number which the public service listens on +public_port = 5000 + +# The port number which the public admin listens on +admin_port = 35357 + +# The port number which the OpenStack Compute service listens on +# compute_port = 8774 + +# Path to your policy definition containing identity actions +# TODO(dolph): This config method will probably be deprecated during grizzly +# policy_file = policy.json + +# Rule to check if no matching policy definition is found +# FIXME(dolph): This should really be defined as [policy] default_rule +# policy_default_rule = admin_required + +# === Logging Options === +# Print debugging output +verbose = True + +# Print more verbose output +# (includes plaintext request logging, potentially including passwords) +debug = True + +# Name of log file to output to. If not set, logging will go to stdout. +log_file = keystone.log + +# The directory to keep log files in (will be prepended to --logfile) +log_dir = log/keystone + +# Use syslog for logging. +use_syslog = False + +# syslog facility to receive log lines +# syslog_log_facility = LOG_USER + +# If this option is specified, the logging configuration file specified is +# used and overrides any other logging options specified. Please see the +# Python logging module documentation for details on logging configuration +# files. +#log_config = logging.conf + +# A logging.Formatter log message format string which may use any of the +# available logging.LogRecord attributes. +# log_format = %(asctime)s %(levelname)8s [%(name)s] %(message)s + +# Format string for %(asctime)s in log records. +# log_date_format = %Y-%m-%d %H:%M:%S + +# onready allows you to send a notification when the process is ready to serve +# For example, to have it notify using systemd, one could set shell command: +# onready = systemd-notify --ready +# or a module with notify() method: +# onready = keystone.common.syst/ + +[sql] +# The SQLAlchemy connection string used to connect to the database +connection = sqlite:///keystone.db + +# the timeout before idle sql connections are reaped +idle_timeout = 200 + +[identity] +driver = keystone.identity.backends.sql.Identity + +[catalog] +# dynamic, sql-based backend (supports API/CLI-based management commands) +#driver = keystone.catalog.backends.sql.Catalog + +# static, file-based backend (does *NOT* support any management commands) +driver = keystone.catalog.backends.templated.TemplatedCatalog + +template_file = ./etc/default_catalog.templates + +[token] +# driver = keystone.token.backends.kvs.Token + +# Amount of time a token should remain valid (in seconds) +# expiration = 86400 + +[policy] +driver = keystone.policy.backends.sql.Policy + +[ec2] +# driver = keystone.contrib.ec2.backends.kvs.Ec2 + +[ssl] +#enable = True +#certfile = /etc/keystone/ssl/certs/keystone.pem +#keyfile = /etc/keystone/ssl/private/keystonekey.pem +#ca_certs = /etc/keystone/ssl/certs/ca.pem +#cert_required = True + +[signing] +token_format = UUID +#certfile = /etc/keystone/ssl/certs/signing_cert.pem +#keyfile = /etc/keystone/ssl/private/signing_key.pem +#ca_certs = /etc/keystone/ssl/certs/ca.pem +#key_size = 1024 +#valid_days = 3650 +#ca_password = None + +[ldap] +# url = ldap://localhost +# user = dc=Manager,dc=example,dc=com +# password = None +# suffix = cn=example,cn=com +# use_dumb_member = False +# allow_subtree_delete = False +# dumb_member = cn=dumb,dc=example,dc=com + +# user_tree_dn = ou=Users,dc=example,dc=com +# user_filter = +# user_objectclass = inetOrgPerson +# user_id_attribute = cn +# user_name_attribute = sn +# user_mail_attribute = email +# user_pass_attribute = userPassword +# user_enabled_attribute = enabled +# user_enabled_mask = 0 +# user_enabled_default = True +# user_attribute_ignore = tenant_id,tenants +# user_allow_create = True +# user_allow_update = True +# user_allow_delete = True + +# tenant_tree_dn = ou=Groups,dc=example,dc=com +# tenant_filter = +# tenant_objectclass = groupOfNames +# tenant_id_attribute = cn +# tenant_member_attribute = member +# tenant_name_attribute = ou +# tenant_desc_attribute = desc +# tenant_enabled_attribute = enabled +# tenant_attribute_ignore = +# tenant_allow_create = True +# tenant_allow_update = True +# tenant_allow_delete = True + +# role_tree_dn = ou=Roles,dc=example,dc=com +# role_filter = +# role_objectclass = organizationalRole +# role_id_attribute = cn +# role_name_attribute = ou +# role_member_attribute = roleOccupant +# role_attribute_ignore = +# role_allow_create = True +# role_allow_update = True +# role_allow_delete = True + +[filter:debug] +paste.filter_factory = keystone.common.wsgi:Debug.factory + +[filter:token_auth] +paste.filter_factory = keystone.middleware:TokenAuthMiddleware.factory + +[filter:admin_token_auth] +paste.filter_factory = keystone.middleware:AdminTokenAuthMiddleware.factory + +[filter:xml_body] +paste.filter_factory = keystone.middleware:XmlBodyMiddleware.factory + +[filter:json_body] +paste.filter_factory = keystone.middleware:JsonBodyMiddleware.factory + +[filter:user_crud_extension] +paste.filter_factory = keystone.contrib.user_crud:CrudExtension.factory + +[filter:crud_extension] +paste.filter_factory = keystone.contrib.admin_crud:CrudExtension.factory + +[filter:ec2_extension] +paste.filter_factory = keystone.contrib.ec2:Ec2Extension.factory + +[filter:s3_extension] +paste.filter_factory = keystone.contrib.s3:S3Extension.factory + +[filter:url_normalize] +paste.filter_factory = keystone.middleware:NormalizingFilter.factory + +[filter:stats_monitoring] +paste.filter_factory = keystone.contrib.stats:StatsMiddleware.factory + +[filter:stats_reporting] +paste.filter_factory = keystone.contrib.stats:StatsExtension.factory + +[app:public_service] +paste.app_factory = keystone.service:public_app_factory + +[app:service_v3] +paste.app_factory = keystone.service:v3_app_factory + +[app:admin_service] +paste.app_factory = keystone.service:admin_app_factory + +[pipeline:public_api] +pipeline = stats_monitoring url_normalize token_auth admin_token_auth xml_body json_body debug ec2_extension user_crud_extension public_service + +[pipeline:admin_api] +pipeline = stats_monitoring url_normalize token_auth admin_token_auth xml_body json_body debug stats_reporting ec2_extension s3_extension crud_extension admin_service + +[pipeline:api_v3] +pipeline = stats_monitoring url_normalize token_auth admin_token_auth xml_body json_body debug stats_reporting ec2_extension s3_extension service_v3 + +[app:public_version_service] +paste.app_factory = keystone.service:public_version_app_factory + +[app:admin_version_service] +paste.app_factory = keystone.service:admin_version_app_factory + +[pipeline:public_version_api] +pipeline = stats_monitoring url_normalize xml_body public_version_service + +[pipeline:admin_version_api] +pipeline = stats_monitoring url_normalize xml_body admin_version_service + +[composite:main] +use = egg:Paste#urlmap +/v2.0 = public_api +/v3 = api_v3 +/ = public_version_api + +[composite:admin] +use = egg:Paste#urlmap +/v2.0 = admin_api +/v3 = api_v3 +/ = admin_version_api +``` diff --git a/content/riak/cs/2.1.2/cookbooks/keystone-setup.md b/content/riak/cs/2.1.2/cookbooks/keystone-setup.md new file mode 100644 index 0000000000..67b4279f41 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/keystone-setup.md @@ -0,0 +1,134 @@ +--- +title: "Keystone Setup" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Keystone-Setup/ + - /riak/cs/latest/cookbooks/keystone-setup/ +--- + +Keystone is a Python application that requires a number of dependencies +to be installed. This document covers how to use Python, +[pip](https://github.com/basho/stanchion), and +[virtualenv](https://github.com/basho/stanchion) to set up an isolated +test environment for running and testing Keystone. + +## Prerequisites + +1. Make sure Python is installed. +1. Install pip, the Python package tool. Installation instructions can + be found [here](http://guide.python-distribute.org/installation.html#installing-pip). + pip can also be installed via [Homebrew](http://brew.sh/) for Mac OS + X users. + +1. Install `virtualenv` and `virtualenvwrapper`: + + ```bash + pip install virtualenv virtualenvwrapper` + ``` + +1. Set up `virtualenvwrapper`. Add the following lines to your + `.bash_profile` to get the `virtualenvwrapper` scripts in the path. + + ```bash + export WORKON_HOME=$HOME/.virtualenvs + source /usr/local/bin/virtualenvwrapper.sh + ``` + +1. Clone the Keystone repo: + + ```bash + git clone https://github.com/openstack/keystone.git + ``` + +1. Navigate to the Keystone repo and checkout the proper tag: + + ```bash + git checkout grizzly-2 + ``` + +## Create a Virtual Environment for Keystone + +Run the following command to create a virtual environment: + +```bash +mkvirtualenv keystone-test +``` + +The `keystone-test` virtual environment is now created, activated, and +ready to use. + +## Install the Keystone dependencies + +The dependencies for running Keystone can be found in +`tools/pip-requires`. At the time of this writing, `grizzly-2` is the +latest tag of Keystone and the dependencies are based on versions that +work with that tag. Use `pip` to install the dependencies as follows: + +```bash +pip install -r tools/pip-requires +``` + +## Configure Keystone + +The next step is to select the appropriate options in the +`keystone.conf` configuration file. A sample configuration that is +useful for local testing with Riak CS can be found [here]({{}}riak/cs/2.1.2/cookbooks/keystone-conf-sample/). This configuration file sets up logging to +`./log/keystone/keystone.log` and uses the templated catalog backend to +set up the Riak CS object store service. This catalog backend uses a +local file to populate the service catalog. + +The default file in the previously referenced sample `keystone.conf` +file uses `etc/default_catalog.templates`, but this can be changed. Set +the contents of `etc/default_catalog.templates` to be the following: + +```config +# config for TemplatedCatalog, using camelCase +catalog.RegionOne.identity.publicURL = http://localhost:$(public_port)s/v2.0 +catalog.RegionOne.identity.adminURL = http://localhost:$(admin_port)s/v2.0 +catalog.RegionOne.identity.internalURL = http://localhost:$(public_port)s/v2.0 +catalog.RegionOne.identity.name = Identity Service + +catalog.RegionOne.object-store.publicURL = http://localhost:8080/v1/AUTH_$(tenant_id)s +catalog.RegionOne.object-store.adminURL = http://localhost:8080/ +catalog.RegionOne.object-store.internalURL = http://localhost:8080/v1/AUTH_$(tenant_id)s +catalog.RegionOne.object-store.name = 'Object Store Service' +``` + +## Optional configuration + +For testing, it can be easier to configure keystone to use UUID as the +token format. To do this, edit `keystone.conf` and set the following: + +```config +token_format = UUID +``` + +## Prepare the database + +```bash +./bin/keystone-manage db_sync +``` + +## Run Keystone + +```bash +./bin/keystone-all --config-file / + //keystone/etc/keystone.conf -d --debug +``` + +The following script can be used to set a number of useful environment +variables to make using the Keystone client less cumbersome. + +```bash +#!/bin/bash + +export OS_SERVICE_TOKEN=ADMIN +export OS_SERVICE_ENDPOINT=http://localhost:35357/v2.0 +export OS_IDENTITY_API_VERSION=2.0 +export OS_AUTH_URL=http://localhost:5000/v2.0 +export OS_USERNAME=test +export OS_PASSWORD=test +export OS_TENANT_NAME=test +``` diff --git a/content/riak/cs/2.1.2/cookbooks/logging.md b/content/riak/cs/2.1.2/cookbooks/logging.md new file mode 100644 index 0000000000..df3a263a22 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/logging.md @@ -0,0 +1,45 @@ +--- +title: "Riak CS Logging" +description: "" +menu: + riak_cs-2.1.2: + name: "Logging" + identifier: "run_log" + weight: 102 + parent: "run" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/logging/ + - /riak/cs/latest/cookbooks/logging/ +--- + +In versions 1.5.0 and later, you can use Riak CS in conjunction with +[Lager](https://github.com/basho/lager), the logging framework used for +Riak. By default, all Riak CS logs can be found in the `/log` directory +of each node. + +You can configure Lager for Riak CS in the `advanced.config` configuration +file in each Riak CS node, in the section of that file named `lager`. +That section looks something like this: + +```advancedconfig +{lager, [ + {handlers, [ + ... + + %% Other configs +]} +``` + +```appconfig +{lager, [ + {handlers, [ + ... + + %% Other configs +]} +``` + +A full description of all available parameters can be found in the +[configuration files]({{}}riak/kv/2.1.3/configuring/reference) document for Riak. diff --git a/content/riak/cs/2.1.2/cookbooks/monitoring-and-metrics.md b/content/riak/cs/2.1.2/cookbooks/monitoring-and-metrics.md new file mode 100644 index 0000000000..ecc97d2b7e --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/monitoring-and-metrics.md @@ -0,0 +1,234 @@ +--- +title: "Monitoring and Metrics" +description: "" +menu: + riak_cs-2.1.2: + name: "Monitoring & Metrics" + identifier: "advanced_monitor_metrics" + weight: 101 + parent: "run_advanced" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/monitoring-and-metrics/ + - /riak/cs/latest/cookbooks/monitoring-and-metrics/ +--- + +[amazon]: http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html +[s3 api]: {{< baseurl >}}riak/cs/latest/references/apis/storage/s3/ + +Riak S2 (CS) includes metrics and operational statistics to help you monitor your system in more detail and diagnose system issues more easily. There are three major categories of metrics: + +1. Frontend API performance +2. Backend Riak performance (Stanchion) +3. S2 internal performance + +Metrics are also available for Stanchion, in addition to the Stanchion-specific `stanchion-admin` command and `/stats` HTTP endpoint. + +provides operational statistics that can be useful for +monitoring through the Folsom statistics library, and initial probes for +analysis of the running system with +[DTrace](http://dtrace.org/blogs/about/). + +>**Note: Older Versions of Riak S2** +> +>All statistics available in versions of Riak S2 below 2.0.x have either been renamed or removed entirely. + + +## Using Metrics + +Riak S2 exposes statistics on critical operations that +are commonly used for monitoring, alerting, and trend analysis. These +statistics can be accessed through the command line: + +```bash +riak-cs-admin status +``` + +or through HTTP requests to the following resource: + +```http +/riak-cs/stats +``` + +>**Note** +> +>In order to access statistics from the /stats endpoint, you +must issue signed requests containing the admin user's access key and +secret key. The interface used by Riak S2 is directly analogous to that +of Amazon S3. For more information on signed requests, see [Amazon's +documentation][amazon]. +> +>Unsigned requests will yield a 403 Forbidden error. + +## `riak-cs-admin status` + +Running `riak-cs-admin status` will show the names and values of all available metrics. + +There are too many metrics (over 1000) to list all of them here. The following sections provide an overview of each major statistic category, associated prefixes, and major operations for that category. + +### S3 API statistics + +S3 API statistics start with one of the following prefixes (all of which are names for S3 APIs): + +- `service` +- `bucket` +- `list` +- `multiple_delete` +- `object` +- `multipart` + +Each prefix is typically followed by operations such as: + +- `put` +- `get` +- `delete` + +Operation | Description +:---------|:----------- +`service_get` | GET Service +`bucket_(put∣head∣delete)` | PUT, HEAD, DELETE Bucket +`bucket_acl_(get∣put)` | PUT, GET Bucket ACL +`bucket_policy_(get∣put∣delete)` | PUT, GET, DELETE Bucket Policy +`bucket_location_get` | GET Bucket Location +`list_uploads` | listing all multipart uploads +`multiple_delete` | Delete Multiple Objects +`list_objects` | listing all objects in a bucket, equally GET Bucket +`object_(get∣put∣delete)` | GET, PUT, DELETE, HEAD Objects +`object_put_copy` | PUT Copy Object +`object_acl` | GET, PUT Object ACL +`multipart_post` | Initiate a multipart upload +`multipart_upload_put` | PUT Multipart Upload, putting a part of an object by copying from existing object +`multipart_upload_post` | complete a multipart upload +`multipart_upload_delete` | delete a part of a multipart upload +`multipart_upload_get` | get a list of parts in a multipart upload + +See the [S3 API documentation][s3 api] for information on all available APIs. + +### Stanchion access statistics + +Stanchion access statistics start with the prefix `velvet`. + +These statistics cover latency and counts for the Stanchion process creating/updating/deleting buckets or creating users. Stanchion access statistics can help determine if latency or slow requests are in Stanchion. + +Operation | Description +:---------|:----------- +`velvet_create_user` | requesting creating a user to Stanchion +`velvet_update_user` | requesting updating a user to Stanchion +`velvet_create_bucket` | requesting creating a bucket to Stanchion +`velvet_delete_bucket` | requesting deleting a bucket to Stanchion +`velvet_set_bucket_acl` | requesting updating a bucket ACL to Stanchion +`velvet_set_bucket_policy` | requesting putting a new bucket policy to Stanchion +`velvet_delete_bucket_policy` | requesting deleting a policy of the bucket to Stanchion + +### Riak access statistics + +Riak access statistics start with the prefix `riakc`. + +These statistics cover latency and call counts to Riak PB API. Riak access statistics are useful in determining the source of latency. For example getting a user record, bucket record, or updating manifests. + +The `riakc` prefix is typically followed by operations like: + +- `put` +- `get` + +And their targets, such as manifests or blocks. + +Operation | Description +:---------|:----------- +`riakc_ping` | ping PB API. invoked by /riak-cs/ping +`riakc_get_cs_bucket` | getting a bucket record +`riakc_get_cs_user_strong` | getting a user record with PR=all +`riakc_get_cs_user` | getting a user record with R=quorum and PR=one +`riakc_put_cs_user` | putting a user record after create/deleting a bucket +`riakc_get_manifest` | getting a manifest +`riakc_put_manifest` | putting a manifest +`riakc_delete_manifest` | deleting a manifest (invoked via GC) +`riakc_get_block_n_one` | getting a block with N=1 without sloppy quorum +`riakc_get_block_n_all` | getting a block with N=3 after N=1 get failed +`riakc_get_block_remote` | getting a block after N=3 get resulted in not found +`riakc_get_block_legacy` | getting a block when N=1 get is turned off +`riakc_put_block` | putting a block +`riakc_put_block_resolved` | putting a block when block siblings resolution is invoked +`riakc_head_block` | heading a block, invoked via GC +`riakc_delete_block_constrained` | first trial to delete block with PW=all +`riakc_delete_block_secondary` | second trial to delete block with PW=quorum, after PW=all failed +`riakc_(get∣put)_gc_manifest_set` | invoked when a manifest is being moved to GC bucket +`riakc_(get∣delete)_gc_manifest_set` | invoked when manifests are being collected +`riakc_(get∣put)_access` | getting access stats, putting access stats +`riakc_(get∣put)_storage` | getting storage stats, putting storage stats +`riakc_fold_manifest_objs` | invoked inside GET Bucket (listing objects within a bucket) +`riakc_mapred_storage` | stats on each MapReduce job performance +`riakc_list_all_user_keys` | all users are listed out when starting storage calculation +`riakc_list_all_manifest_keys` | only used when deleting a bucket to verify it's empty +`riakc_list_users_receive_chunk` | listing users invoked via /riak-cs/users API. +`riakc_get_uploads_by_index` | +`riakc_get_user_by_index` | +`riakc_get_gc_keys_by_index` | +`riakc_get_cs_buckets_by_index` | +`riakc_get_clusterid` | invoked when for the first time when a proxy_get is performed + + +## `/riak-cs/stats` + +That will return a JSON object containing a series of latency histograms +and counters for a variety of operations, e.g. `object_get` and +`block_put`. Alongside each operation there will be a list showing the +count and rate for the operation, as well as a latency histogram showing +mean, median, and 95th and 99th percentiles: + +```json +: [MeterCount, MeterRate, LatencyMean, LatencyMedian, Latency95, Latency99] +``` + +You will see a list of that form for each of the following operations: + +Operation | Description +:---------|:----------- +`block_get` | Total BLOCK GET operations performed +`block_put` | Total BLOCK GET operations performed +`block_delete` | Total BLOCK DELETE operations performed +`service_get_buckets` | Total GET BUCKETS operations performed +`bucket_list_keys` | Total BUCKET LIST KEYS operations performed +`bucket_create` | Total BUCKET CREATE operations performed +`bucket_delete` | Total BUCKET DELETE operations performed +`bucket_get_acl` | Total BUCKET GET ACL operations performed +`bucket_put_acl` | Total BUCKET PUT ACL operations performed +`object_get` | Total GET operations performed +`object_put` | Total PUT operations performed +`object_head` | Total OBJECT HEAD operations performed +`object_delete` | Total OBJECT DELETE operations performed +`object_get_acl` | Total OBJECT GET ACL operations performed +`object_put_acl` | Total OBJECT PUT ACL operations performed + +## Stanchion + +## DTrace Probes + +Riak CS is built with some probes for use with +[DTrace](http://dtrace.org/blogs/about/) to inspect certain operations +in the live system, which can be helpful in diagnosing issues. + +### Usage Examples + +The following are examples of using DTrace for inspecting various +components of a running Riak CS installation. + +#### Trace User Object Requests + +```bash +dtrace -qn 'erlang*:::user_trace* /arg2 == 703/ {printf("pid %s: mod %s op %s: user %s bucket/file %s/n", copyinstr(arg0), copyinstr(arg6), copyinstr(arg7), copyinstr(arg8), copyinstr(arg9));}' +``` + +#### Trace Webmachine Resource Execution + +```bash +dtrace -qn 'erlang*:::user_trace* /arg2 == 705/ {printf("pid %s: %s:%s/n", copyinstr(arg0), copyinstr(arg6), copyinstr(arg7));}' +``` + +{{% note title="Note on DTrace Support" %}} +Work on packaging of Riak CS for SmartOS and other operating systems with +DTrace support is ongoing with the goal of providing enhanced ability to +diagnose low-level issues in instances of Riak CS running on such operating +systems. +{{% /note %}} diff --git a/content/riak/cs/2.1.2/cookbooks/multi-datacenter-overview.md b/content/riak/cs/2.1.2/cookbooks/multi-datacenter-overview.md new file mode 100644 index 0000000000..86ecdaae96 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/multi-datacenter-overview.md @@ -0,0 +1,72 @@ +--- +title: "Riak CS Multi-Datacenter Overview" +description: "" +menu: + riak_cs-2.1.2: + name: "Riak CS Enterprise" + identifier: "mdc_overview" + weight: 600 + pre: cloud +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/mdc-overview/ + - /riak/cs/2.1.2/cookbooks/mdc-overview/ + - /riak/cs/latest/cookbooks/mdc-overview/ + - /riak/cs/latest/cookbooks/multi-datacenter-overview/ +--- + +## Riak CS Enterprise + +Riak CS Enterprise extends Riak CS with Multi-Datacenter Replication, +monitoring, and 24×7 support. Customers may use Multi-Datacenter +Replication to serve global traffic, create availability zones, maintain +active backups, or meet disaster recovery and regulatory requirements. +Multi-Datacenter Replication can be used in two or more sites, and data +can be replicated across datacenters using realtime or fullsync +synchronization. + +If you are interested, sign up for a [developer trial](http://info.basho.com/RiakCS1.1_DeveloperTrialRequest.html) of Riak CS Enterprise or [contact us](http://basho.com/contact/) for more information. + +{{% note title="Riak CS Enterprise requires a separate download" %}} +Please note that Riak CS Enterprise requires a download separate from the +open-source Riak CS, which will not work in conjunction with Riak Enterprise. +{{% /note %}} + +## Multi-Datacenter Replication + +Multi-Datacenter Replication in Riak CS provides two modes of object +replication: **fullsync** and **realtime sync**. Data is streamed over a +TCP connection and Multi-Datacenter Replication in Riak CS has support +for SSL so that data can be securely replicated between sites. + +In Riak CS, large objects are broken into blocks and streamed to the +underlying Riak cluster on write, where they are replicated for high +availability (3 replicas by default). A manifest for each object is +maintained so that blocks can be retrieved from the cluster and the full +object presented to clients. For multi-site replication in Riak CS, +global information for users, bucket information, and manifests are +streamed in realtime from a primary implementation (a **source** +cluster) to a secondary site (a **sink** cluster) so that global state +is maintained across locations. Objects can then be replicated in either +fullsync or realtime sync mode. + +## Fullsync Mode + +In a fullsync operation, objects are replicated from a primary Riak CS +implementation to a secondary site on a configurable interval (the +default is 6 hours). In fullsync replication, each cluster computes a +hash for each key’s block value. Key/block pairs are compared and the +primary site streams any missing blocks or updates needed to the +secondary site. + +## Realtime Mode + +Realtime sync is triggered when an update is sent from a client to a +primary Riak CS implementation. Once replicated in the first location, +the updated manifests are streamed in real time to the secondary site. +But what happens if a client requests an object from the secondary +cluster and not all of its blocks have been replicated to that cluster? +With Riak multi-site replication, the secondary cluster will request any +missing blocks via `proxy_get` from the primary cluster so that the +client can be served. diff --git a/content/riak/cs/2.1.2/cookbooks/multipart-upload-overview.md b/content/riak/cs/2.1.2/cookbooks/multipart-upload-overview.md new file mode 100644 index 0000000000..91c23f0fd4 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/multipart-upload-overview.md @@ -0,0 +1,134 @@ +--- +title: "Multipart Upload Overview" +description: "" +menu: + riak_cs-2.1.2: + name: "Multipart Upload Overview" + identifier: "theory_multipart_upload" + weight: 101 + parent: "theory" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Multipart-Upload-Overview/ + - /riak/cs/latest/cookbooks/multipart-upload-overview/ +--- + +Multipart upload allows users of Riak CS to do the following: + +* upload large objects, potentially multiple terabytes, as a set of + smaller parts +* pause and resume the upload of a large object +* begin an upload without prior knowledge of the total size of the whole + object + +In general, multipart uploads tend to be more efficient because parts +may be uploaded in parallel. In Riak CS they are designed to both behave +like Amazon S3 multipart uploads and to utilize the same user-facing +API. + +{{% note title="Note on file size limit" %}} +The size limit on individual parts of a multipart upload is 5 gigabytes. +{{% /note %}} + +There are three phases to a multipart upload: **initiation**, **parts +upload**, and **completion**. Each phase is described in more detail +below. + +## Multipart Upload Phases + +### Initiation + +Initiation is done by sending a properly formatted multipart upload +initiation request to Riak CS. If the upload initiation is successful, +the response from Riak CS includes an upload ID. + +This ID is a unique identifier for a particular multipart upload and +*must* be included with all subsequent requests to Riak CS pertaining to +this upload. This includes any of the upload operations described in the +remainder of this document except for listing all active multipart +uploads. + +Metadata may be attached to an object uploaded using multipart upload +just like any other object stored in Riak CS. To do so, the metadata +should be included with the multipart upload initiation request. + +### Parts Upload + +A part upload must include both the upload ID received in response to an +initiation request and a part number. Part numbers should be integers +between 1 and 10,000. These numbers identify a part within the context +of the multipart upload and also specify positioning within the final +object. Uploading a part with a part number that has previously been +uploaded results in the previous part associated with that part number +being overwritten. + +For each part that is uploaded, Riak CS returns an `ETag` header as part +of the response. Each `ETag` value and the part number it corresponds to +should be reserved for use in the multipart upload completion request. + +The size of each part may be in the range of 5MB to 5GB. + +### Completion + +Once a complete multipart upload request is received, Riak CS assembles +the object from the uploaded parts. Subsequently, the object is +presented as a single entity to the user with no difference from any +other object stored in Riak CS. The uploaded parts are no longer +individually accessible. + +A complete multipart upload request *must* include the upload ID and a +list of all part numbers and their corresponding `ETag` values. Riak CS +returns another `ETag` that identifies the completed object. It should +be noted that this `ETag` value is not necessarily an MD5 hash of the +object data and that this fact may cause warnings to be issued by some +client libraries or tools. + +## Terminating an upload + +#### Abort Multipart Upload + +A multipart upload request may be aborted prior to sending an upload +completion request. The storage for all parts that have been fully +uploaded will be released. + +Part uploads that are in-progress *may not* have their storage released, +so the abort request should only be sent after all parts already in +progress have uploaded to ensure that all storage is reclaimed. + +Once a multipart upload is aborted, the upload ID is no longer valid. + +## Listing uploads + +#### Active Multipart Uploads + +Riak CS can list all of the active multipart uploads for each user +account. The number of multipart uploads included in the response is +capped at 1000. If there are more than 1000 active multipart uploads for +a particular user account, they can be listed by using multiple +requests. + +#### Completed Parts From An Active Upload + +Riak CS can list the parts that have been successfully uploaded for a +specific multipart upload. If a multipart upload is comprised of more +than 1000 parts then the parts must be retrieved using multiple parts +requests. + +The results of this request are not intended to be used when sending a +complete multipart upload request. The proper procedure is to record the +part numbers and the associated `ETag` values returned with part upload +responses and use that information when completing a multipart upload. + +## Storage Calculation + +As with [Amazon +S3](http://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html), +once you initiate a multipart upload, Riak CS retains all of the parts +of the upload until it is either completed or aborted. If the upload is +aborted, Riak CS deletes all upload artifacts and they will no longer be + +For example, if a user has uploaded a 10 GB object via multipart upload without +completing the request, the object won't appear in the list objects +result but its object size _will_ be included in the user's usage +statistics. diff --git a/content/riak/cs/2.1.2/cookbooks/querying-access-statistics.md b/content/riak/cs/2.1.2/cookbooks/querying-access-statistics.md new file mode 100644 index 0000000000..5fc6a783c5 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/querying-access-statistics.md @@ -0,0 +1,409 @@ +--- +title: "Querying Access Statistics" +description: "" +menu: + riak_cs-2.1.2: + name: "Access Statistics" + identifier: "access_stats" + weight: 300 + parent: "develop" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Querying-Access-Statistics/ + - /riak/cs/latest/cookbooks/querying-access-statistics/ +--- + +Access statistics are tracked on a per-user basis as rollups for slices +of time. Querying these statistics is done via the +`/riak-cs/usage/$USER_KEY_ID` resource. + +{{% note title="Note on terminology" %}} +In this and other documents in the Riak CS documentation, the terms "storage" +and "billing" are used interchangeably. The same goes for the terms "usage" +and access. +{{% /note %}} + +For information about how access statistics are logged, please read +[Usage and Billing Data]({{}}riak/cs/2.1.2/cookbooks/usage-and-billing-data). + +The following sections discuss accessing the access statistics using +bare HTTP requests. Query parameters are used to specify the types and +date ranges of information to include. For information on using `s3cmd` +(or other tools) to fetch statistics as S3 objects, skip to the [The Magic `usage` Bucket](#the-magic-usage-bucket) section. + +The examples on this page assume that the `admin_port` has not +been configured to something other than default CS port of `8080`. + +## Choosing the Result Format + +Results are available as either JSON or XML. Request the appropriate +format by using the HTTP `Accept` header with either `application/json` +or `application/xml`, respectively. + +## Specifying the User + +Access statistics are provided on a per-user basis. Specify which user's +statistics you want by providing that user's `key_id` in the URL. For +example, to get access statistics for the user key +`8NK4FH2SGKJJM8JIP2GU`, use the URL +`/riak-cs/usage/8NK4FH2SGKJJM8JIP2GU`. + +**Note**: The new user id generator should not include non-URL-safe + characters, but if it does, those characters will need to be escaped + in this URL. + +A `404` code with an error message body will be returned if the user +does not exist. For example, there is no `ASDF` user in my cluster, so +fetching `http://localhost:8080/riak-cs/usage/ASDF` produces the +following JSON/XML (reformatted for easy reading): + +```json +HTTP/1.1 404 Object Not Found + +{ + "Error": { + "Message":"Unknown user" + } +} +``` + +```xml +HTTP/1.1 404 Object Not Found + + + + Unknown user + +``` + +## Enable Access Results + +> **Authentication Required** +> +> Queries to the usage resources described here must be authenticated as +described in the [Authentication documentation]({{}}riak/cs/2.1.2/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. + +The usage HTTP resource provides both access and storage statistics. +Since each of these queries can be taxing in its own right, they are +both omitted from the result by default: + +```curl +curl http://localhost:8080/riak-cs/usage/8NK4FH2SGKJJM8JIP2GU +``` + +Sample responses (reformatted for easy reading): + +```json +{ + "Access" :"not_requested", + "Storage":"not_requested" +} +``` + +```xml + + + not_requested + not_requested + +``` + +To request that access results be included, pass the query parameter `a` +to the resource (any true-ish value will work, including just the bare +`a`, `t`, `true`, `1`, `y`, and `yes`): + +```curl +curl http://localhost:8080/riak-cs/usage/8NK4FH2SGKJJM8JIP2GU?a +``` + +Sample responses (reformatted for easy reading): + +```json +{ + "Access": [ + { "Errors": [] } + ], + "Storage": "not_requested" +} +``` + +```xml + + + + + + not_requested + +``` + +There are no statistics included in this report because the default time +span is *now*, which is not available in the archives. + +## Specifying the Time Span to Report + +Request the time span you want data for by passing `s` (start) and `e` +(end) query parameters to the resource. The slices for which data will +be returned are all of those between `s` and `e`, as well as the slice +including `s` and the slice including `e`. + +For example, for slices `A`-`I`: + + A B C D E F G H I + |-----|-----|-----|-----|-----|-----|-----|-----|-----| + s e + +Specifying an `s` that falls somewhere in slice `C` and an `e` that +falls somewhere in slice `F` means that data for slices `C`, `D`, `E`, +and `F` will be returned. + +Each should be provided in ISO 8601 format (`yyyymmddThhmmssZ`). For +example, the following values would request the span between 2:00pm and +4:00pm (GMT) on January 30, 2012: + +```curl +http://localhost:8080/riak-cs/usage/8NK4FH2SGKJJM8JIP2GU?a&s=20120315T140000Z&e=20120315T160000Z +``` + +Sample responses (reformatted for easy reading): + +```json +{ + "Access": [ + { + "Node": "riak_cs@127.0.0.1", + "Samples": [ + { + "StartTime": "20120315T150000Z", + "EndTime":"20120315T152931Z", + "KeyWrite": { "BytesIn": 32505856, "Count": 1 }, + "KeyRead": { "BytesOut": 32505856, "Count": 1 }, + "BucketRead": { "BytesOut": 3633, "Count": 5 } + } + ] + }, + { + "Errors": [] + } + ], + "Storage": "not_requested" +} +``` + +```xml + + + + + + + 32505856 + 1 + + + 32505856 + 1 + + + 3633 + 5 + + + + + + not_requested + +``` + +The behavior of the resource when the `s` or `e` parameter is omitted +may change, but is currently as follows: + +* Omitting `e` will cause the resource to return only data for the slice + in which `s` falls +* Omitting `s` will cause the resource to return data for all slices + from `e` through the current time + +Or, more simply, the default `s` is *now* and the default `e` is equal +to `s`. + +### Time Span Limit + +To prevent excessive time and memory from being accidentally consumed, +the amount of time that may be retrieved in any request is limited. + +The limit is configured by the `riak_cs` application environment +variable `usage_request_limit`. The value is expressed as an integer +number of archive intervals (see [Usage and Billing Data]({{}}riak/cs/2.1.2/cookbooks/usage-and-billing-data) for a +description of archive intervals). + +The default value is `744`, which is 31 days at the default archive +interval of one hour. + +## The Magic `usage` Bucket + +If you would prefer to use `s3cmd` or another S3 library to fetch access +stats, you may do so by referencing objects in the global `usage` +bucket. The format for objects in the usage bucket is: + +```bash +s3://riak-cs/usage/UserKeyId/Options/StartTime/EndTime +``` + +Or, if `/` is automatically quoted (`%2f`) by your client, the `.` +character may be used (this is also nicer for s3cmd, since it will +automatically choose a more useful name for the file it creates): + +```bash +s3://riak-cs/usage/UserKeyId.Options.StartTime.EndTime +``` + +That is, in the usage bucket, this is a sub-bucket named for the user's +`key_id` (the `UserKeyId` part of the path). + +Inside the user's bucket is a sub-bucket named for the contents and +their representation (the `Options` part of the path). This portion +should be: + +* `aj` to receive access statistics as JSON data +* `ax` to receive access statistics as XML data + +The next two portions of the path, `StartTime` and `EndTime`, are the +start and end times for the window to report, respectively. These take +the same ISO 8601 format that the `s` and `e` query parameters take in +the other request method. + +As an example, making the same request as the last example, for +JSON-format access statistics between 2:00pm and 4:00pm GMT on January +30, 2012, looks like this: + +```bash +s3cmd get s3://riak-cs/usage/8NK4FH2SGKJJM8JIP2GU/aj/20120315T140000Z/20120315T160000Z +``` + +**Note**: All objects in the `usage` bucket are read-only. `PUT` and +`DELETE` requests will fail for them. + +**Note**: Regular users are only allowed to access the statistics bucket +for their own `key_id`. The admin user is allowed to access any stat +bucket. + +## Interpreting the Results + +Results of the access query are grouped by node. That is, within the +access field of the result will be one entry for each Riak CS node that +had data for the requested time span. + +Each node entry will contain one or more "samples" for each time slice +that the user accessed that Riak CS node. The sample will have a start +time and end time describing what span the sample covers. + +The other entries of each sample are the operations the user performed +during the sampled time. Operation statistics are provided as rollups +for each operation type. The rollup includes one or more of the +following fields: + +* `Count` --- the number of times this operation was used successfully +* `UserErrorCount` --- the number of times this operation was used but + ended in a 400-499 response code +* `SystemErrorCount` --- the number of times this operation was used but + ended in a 500-599 response code +* `BytesIn` --- the number of bytes that were included in the request + bodies of successful operations +* `UserErrorBytesIn` --- the number of bytes that were included in the + request bodies of operations that ended in 400-499 response codes +* `SystemErrorBytesIn` --- the number of bytes that were included in the + request bodies of operations that ended in 500-599 response codes +* `BytesOut` --- the number of bytes that were included in the response + bodies of successful operations +* `UserErrorBytesOut` --- the number of bytes that were included in the + response bodies of operations that ended in 400-499 response codes +* `SystemErrorBytesOut` --- the number of bytes that were included in + the response bodies of operations that ended in 500-599 response codes +* `BytesOutIncomplete` --- the number of bytes that were sent in + response bodies before the client disconnected, if there was more that + could have been sent afterward (i.e. the byte count of partial + downloads) + +It is important to note that accesses are only logged when the +Webmachine request finishes. This means that, for example, an upload +started in one time slice but ended in another will only add to the +`bytes in` field for the time slice in which in finished, rather than +splitting the statistics between the slices in which they actually +happened. + +### Operation Types + +The operation types that are currently tracked are the following: + +Operation | Description +:---------|:----------- +`ListBuckets` | Lists a user's buckets (`GET /`) +`UsageRead` | Reads a user's usage statistics (`GET /riak-cs/usage/user/*`) +`BucketRead` | Lists the files in a bucket (`GET /bucket`) +`BucketStat` | Checks for the existence of a bucket (`HEAD /bucket`) +`BucketCreate` | Creates a bucket (`PUT /bucket`) +`BucketDelete` | Deletes a bucket (`DELETE /bucket`) +`BucketUnknown` | Unknown bucket operation (`?? /bucket`) +`BucketReadACL` | Retrieves the ACL of a bucket (`GET /bucket?acl`) +`BucketStatACL` | Checks for the existence of a bucket (`HEAD /bucket?acl`) +`BucketWriteACL` | Changes the ACL of a bucket (`PUT /bucket?acl`) +`BucketUnknownACL` | Unknown bucket ACL operation (`?? /bucket?acl`) +`KeyRead` | Fetches an object (`GET /bucket/key`) +`KeyStat` | Checks for the existence of an object (`HEAD /bucket/key`) +`KeyWrite` | Uploads an object (`PUT /bucket/key`) +`KeyDelete` | Deletes an object (`DELETE /bucket/key`) +`KeyUnknown` | Unknown object operation (`?? /bucket/key`) +`KeyReadACL` | Retrieves the ACL of a key (`GET /bucket/key?acl`) +`KeyStatACL` | Checks for the existence of an object (`HEAD /bucket/key?acl`) +`KeyWriteACL` | Changes the ACL of an object (`PUT /bucket/key?acl`) +`KeyUnknownACL` | Unknown key ACL operation (`?? /bucket/key?acl`) +`UnknownGET` | A `GET` was issued on an unrecognized resource, which likely means that the `riak_cs_access_logger:operation/1` function is out of date +`UnknownHEAD` | See `UnknownGET` +`UnknownPUT` | See `UnknownGET` +`UnknownPOST` | See `UnknownGET` +`UnknownDELETE` | See `UnknownGET` + +### Lookup Errors + +In addition to the node entries in the access results, there is also an +entry for errors that Riak CS encountered while fetching access +archives. The errors list is very similar to the samples of a node list: +each entry will contain the start and end times of the period, as well +as the "reason" the lookup failed. + +For example, if the Riak lookups that Riak CS uses end in timeout +instead of success, the result including an errors list might look like +the following (reformatted for easy reading): + +```json +{ + "Access": [ + { + "Errors": [ + { + "StartTime": "20120315T160000Z", + "EndTime": "20120315T170000Z", + "Reason": "timeout" + } + ] + } + ], + "Storage": "not_requested" +} +``` + +```xml + + + + + + timeout + + + + not_requested + +``` diff --git a/content/riak/cs/2.1.2/cookbooks/querying-storage-statistics.md b/content/riak/cs/2.1.2/cookbooks/querying-storage-statistics.md new file mode 100644 index 0000000000..811512dd34 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/querying-storage-statistics.md @@ -0,0 +1,182 @@ +--- +title: "Querying Storage Statistics" +description: "" +menu: + riak_cs-2.1.2: + name: "Storage Statistics" + identifier: "storage_stats" + weight: 301 + parent: "develop" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Querying-Storage-Statistics/ + - /riak/cs/latest/cookbooks/querying-storage-statistics/ +--- + +Storage statistics are tracked on a per-user basis, as rollups for +slices of time. Querying these statistics is done via the +`/riak-cs/usage/$USER_KEY_ID` resource. + +{{% note title="Note on terminology" %}} +In this and other documents in the Riak CS documentation, the terms "storage" +and "billing" are used interchangeably. The same goes for the terms "usage" +and access. +{{% /note %}} + + +> **Note**: +> +> Storage statistics are not calculated by default. Please read [Usage and Billing Data]({{}}riak/cs/2.1.2/cookbooks/usage-and-billing-data) for details about how to enable storage calculation archiving. + +The basics of querying storage statistics, including the URL used and the parameters for specifying the time slice, are the same as they are for [Querying Access Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-access-statistics). + +Please refer to the descriptions there for more details. + +The examples on this page assume that the `admin_port` has not +been configured to something other than default CS port of `8080`. + +## Enable Storage Results + +> **Authentication Required** +> +> Queries to the usage resources described here must be authenticated as described in the [Authentication documentation]({{}}riak/cs/2.1.2/cookbooks/authentication). Keep this in mind when using `curl`. Authentication credentials for `s3cmd` or `s3-curl` can be specified in their respective configuration files. + +The usage HTTP resource provides both access and storage statistics. Since each of these queries can be taxing in its own right, they are both omitted from the result by default: + +```curl +curl http://localhost:8080/riak-cs/usage/8NK4FH2SGKJJM8JIP2GU +``` + +Sample responses (reformatted for easy reading): + +```json +{ + "Access": "not_requested", + "Storage": "not_requested" +} +``` + +```xml + + + not_requested + not_requested + +``` + +To request that storage results be included, pass the query parameter `b` to the resource (any true-ish value will work, including just the bare `b`, `t`, `true`, `1`, `y`, and `yes`): + +```curl +curl http://localhost:8080/riak-cs/usage/8NK4FH2SGKJJM8JIP2GU?b +``` + +Sample responses (reformatted for easy reading): + +```json +{ + "Access": "not_requested", + "Storage": [ + { + "Errors":[] + } + ] +} +``` + +```xml + + + not_requested + + + + +``` + +There are no statistics included in this report because the default time span is *now*, which is not available in the archives. + +### S3 Object-style Access + +As described in [Querying Access Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-access-statistics), these statistics are also available as S3 objects. To add storage statistics to the result, add the character `b` to the `Options` portion of the object's path. For example, the following command would produce storage statistics in XML format: + +```bash +s3cmd get s3://riak-cs/usage/8NK4FH2SGKJJM8JIP2GU/bx/20120315T140000Z/20120315T160000Z +``` + +You may also pass both `b` and `a` as `Options` to fetch both types of stats, as in: + +```bash +s3cmd get s3://riak-cs/usage/8NK4FH2SGKJJM8JIP2GU/abx/20120315T140000Z/20120315T160000Z +``` + +## Interpreting the Results + +The result of the storage query is one or more "samples" for each time slice in which storage was calculated for the user. The sample will have a start time and end time describing what span the sample covers. + +The other entries of each sample are the buckets the user owned during the sampled time. Bucket statistics are provided as rollups including each of the following fields: + +* `Objects` --- the number of active---not deleted and not incompletely uploaded---files in the bucket +* `Bytes` --- the total of number of bytes stored in the files of the bucket + +For example, a user that owns two buckets, `foo` and `bar`, where `foo` contains one 32MB file and `bar` contains 4 32MB files, would have a sample similar to the following. + +Sample responses (reformatted for easy reading): + +```json +{ + "Access": "not_requested", + "Storage": [ + { + "StartTime": "20120316T123318Z", + "EndTime": "20120316T123319Z", + "foo": { + "Objects": 1, + "Bytes": 32505856 + }, + "bar": { + "Objects": 4, + "Bytes": 130023424 + } + }, + { + "Errors": [] + } + ] +} +``` + +```xml + + + not_requested + + + + 1 + 32505856 + + + 4 + 130023424 + + + + + +``` + +If any errors occurred during calculation for a bucket, the error will +be returned (e.g., timeout) instead of a bucket's usage. + +```json + { + "StartTime": "20120316T123318Z", + "EndTime": "20120316T123319Z", + "baz": "{error,{timeout,[]}}", + "bar": { + "Objects": 4, + "Bytes": 130023424 + } + }, +``` diff --git a/content/riak/cs/2.1.2/cookbooks/release-notes.md b/content/riak/cs/2.1.2/cookbooks/release-notes.md new file mode 100644 index 0000000000..b48ee9069d --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/release-notes.md @@ -0,0 +1,1084 @@ +--- +title: "Riak CS Release Notes" +description: "" +menu: + riak_cs-2.1.2: + name: "Riak CS Release Notes" + identifier: "reference_release_notes" + weight: 102 + parent: "reference" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Riak-CS-Release-Notes/ + - /riak/cs/2.1.2/cookbooks/Riak-CS-Release-Notes/ + - /riak/cs/latest/cookbooks/release-notes/ +--- + +[riak_cs_multibag_support]: {{}}riak/cs/2.1.2/cookbooks/supercluster + +[riak_cs_1.5_release_notes_upgrading]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading +[riak_cs_1.5_release_notes_upgrading_1]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#notes-on-upgrading-1 +[riak_cs_1.5_release_notes_incomplete_mutipart]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#incomplete-multipart-uploads +[riak_cs_1.5_release_notes_leeway_and_disk]: https://github.com/basho/riak_cs/blob/release/1.5/RELEASE-NOTES.md#leeway-seconds-and-disk-space +[riak_cs_2.0.0_release_notes]: https://github.com/basho/riak_cs/blob/develop/RELEASE-NOTES/ +1.0 Release Notes + +Released October 13, 2015. + +This is a backwards-compatible* release that introduces a new metrics system, garbage collection refinements, and several other new features. Riak S2 2.1 is designed to work with both Riak KV 2.0.5+ and 2.1.2+. + +**Note:** This release is backwards compatible only with the Riak S2 2.x series. + +### Riak KV 2.1.2 Usage Note +Riak KV 2.1.2 includes a copy of `riak_cs_kv_multi_backend`, therefore there is no need to add lines specifying special `multi_backend` and `add_paths` configurations in advanced.config. + +Instead, you can set the following in riak.conf: + +``` +storage_backend = prefix_multi +cs_version = 20100 +``` + +If you need storage calculation, you will still require the `add_paths` config to load MapReduce codes into Riak KV. + + +### New Features +#### Metrics +New metrics have been added that enable you to determine the health of your Riak S2 system, as well as get reports on your storage utilization per bucket or user. The following stats items are available: + + * All calls, latencies, and counters in the S3 API + * All calls, latencies, and counters in Stanchion + * All Riak Erlang client operations, latencies, and counters + * Information about the counts (active, idle, and overflow) for the process pool and connection pool + * System information, versions, port count, and process count + * Memory information about the riak-cs virtual machine + * HTTP listener information: active sockets and waiting acceptors + +**Note:** stats item names from prior to 2.0.x are not preserved; they have been renamed or removed. No backward consistency is maintained. Please see [the documentation]({{}}riak/cs/latest/cookbooks/monitoring-and-metrics/) for more information. + +* [[PR 1189](https://github.com/basho/riak_cs/pull/1189)] +* [[PR 1180](https://github.com/basho/riak_cs/pull/1180)] +* [[PR 1214](https://github.com/basho/riak_cs/pull/1214)] +* [[PR 1194](https://github.com/basho/riak_cs/pull/1194)] +* [[PR 99](https://github.com/basho/stanchion/pull/99)] + +Additional storage usage metrics are also available. . These metrics are gathered during storage calculation. Gathering these metrics is off by default, but you can turn it on by setting `detailed_storage_calc` to `true` in advanced.config. When you enable this option, you have access to information about how many manifests are `writing`, `pending_delete`, `scheduled_delete` and `active` which is not visible via the API. + +**Note:** Metrics do not always correctly reflect actual disk usage. For instance, `writing` may indicate more space than is actually used. Or, for example, if an upload was cancelled in the middle, the calculation does not know how much actual storage space is consumed. In the same way, `scheduled_delete` also may not reflect the exact amount of disk usage because blocks might already be partially deleted by garbage collection. + +* [[PR 1120](https://github.com/basho/riak_cs/pull/1120)] + +#### `riak-cs-admin` +The following administration CLIs have been replaced by the [`riak-cs-admin` command]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): + +* `riak-cs-storage` +* `riak-cs-gc` +* `riak-cs-access` +* `riak-cs-stanchion` + +The commands listed above are deprecated and will be removed in future releases. + +* [[PR 1175](https://github.com/basho/riak_cs/pull/1175)] + +#### Garbage Collection Refinements +Several new options have been added to the `riak-cs-admin gc` command: + +* `active_delete_threshold` is an option to avoid delegating manifests and block deletion to garbage collector. This option relieves garbage collector from having to delete small objects. This can optimise performance in cases where both garbage collector does not catch up with DELETE Object API calls and garbage collector's elapsed time is dominated by small objects.[[PR 1174](https://github.com/basho/riak_cs/pull/1174)] +* `--start` and `--end` options have been added to the `riak-cs-admin gc batch` command to specify start and end in manual batch execution. Note that the `--start` flag on the command line will overwrite the `epoch_start` option in advanced.config. [[PR 1147 ](https://github.com/basho/riak_cs/pull/1147)] +* `--leeway` has been added to create a temporary leeway period whose values are used only once and not repeated at the next run, and `--max-workers` has been added to allow you to override the concurrency value temporarily for a single run of garbage collector. [[PR 1147 ](https://github.com/basho/riak_cs/pull/1147)] +* Riak S2 2.0 (and older) has a race condition where fullsync replication and garbage collection may resurrect deleted blocks without any way to delete them again. When real-time replication and replication of a garbage collection bucket entry object being dropped from the real-time queue are combined, blocks may remain on the sink side without being collected. Riak S2 2.1 introduces deterministic garbage collection to avoid fullsync replication. Additionally, garbage collection and fullsync replication run concurrently, and work on the same blocks and manifests. You can now specify the range of time using the `--start` and `--end` flags with `riak-cs-admin gc batch` for garbage collector in order to collect deleted objects synchronously on both sink and source sides. [[PR 1147 ](https://github.com/basho/riak_cs/pull/1147)] +* `riak-cs-admin gc earliest-keys` is available so you can find the oldest entry after `epoch_start` in garbage collection. With this option, you can stay informed of garbage collection progress. [[PR 1160](https://github.com/basho/riak_cs/pull/1160)] + +More information on garbage collection can be found in the [documentation]({{< baseurl >}}riak/cs/latest/cookbooks/garbage-collection/). + + +### Additions +#### Open Source +* A MapReduce optimisation in fetching Riak objects was introduced in Riak 2.1. Now, Riak CS 2.1 introduces an option to use that optimisation in storage calculation. It is off by default, but it can be used by setting `use_2i_for_storage_calc` as `true` in advanced.config. This reduced 50% of I/O in LevelDB. [[PR 1089](https://github.com/basho/riak_cs/pull/1089)] +* Erlang/OTP 17 support is now included. [[PR 1245](https://github.com/basho/riak_cs/pull/1245) and [PR 1040](https://github.com/basho/stanchion/pull/1040)] +* A module-level hook point for limiting user access and quota usage is now available with very preliminary, simple, node-wide limiting example modules. Operators can make, plug in, or combine different modules as quota-limiting, rate-limiting or bandwidth-limiting depending on their unique requirements. [[PR 1118](https://github.com/basho/riak_cs/pull/1118)] +* An orphaned block scanner is now available. [[PR 1133](https://github.com/basho/riak_cs/pull/1133)] +* `riak-cs-admin audit-bucket-ownership` is a new tool to check integrity between users and buckets added. For example, it can be used in cases where a bucket is visible when listing buckets but not accessible, or a bucket is visible and exists but could not be deleted. [[PR 1202](https://github.com/basho/riak_cs/pull/1202)] +* The following log rotation items have been added to cuttlefish: + * log.console.size + * log.console.rotation + * log.console.rotation.keep + * log.error.rotation + * log.error.rotation.keep + * log.error.size + +[[PR 1164](https://github.com/basho/riak_cs/pull/1164) and [PR 97](https://github.com/basho/stanchion/pull/97)] + +* `riak_cs_wm_common` now has a default callback of `multiple_choices`, which prevents `code_server` from becoming a bottleneck. [[PR 1181](https://github.com/basho/riak_cs/pull/1181)] +* An option has been added to replace the `PR=all user GET` option with `PR=one` just before authentication. This option improves latency, especially in the presence of slow (or actually-failing) nodes blocking the whole request flow because of PR=all. When enabled, a user's owned-bucket list is never pruned after a bucket is deleted, instead it is just marked as deleted. [[PR 1191](https://github.com/basho/riak_cs/pull/1191)] +* An info log has been added when starting a storage calculation batch. [[PR 1238](https://github.com/basho/riak_cs/pull/1238)] +* `GET Bucket` requests now have clearer responses. A 501 stub for Bucket lifecycle and a simple stub for Bucket requestPayment have been added. [[PR 1223](https://github.com/basho/riak_cs/pull/1223)] +* Several user-friendly features have been added to [`riak-cs-debug`]({{< baseurl >}}riak/cs/latest/cookbooks/command-line-tools/): fine-grained information gathering options, user-defined filtering for configuration files, and verbose output for failed commands. [[PR 1236](https://github.com/basho/riak_cs/pull/1236)] + +#### Enterprise +* MDC has `proxy_get`, which make block objects propagate to site clusters when they are requested. Now, multibag configuration with MDC supports `proxy_get`. [[PR 1171](https://github.com/basho/riak_cs/pull/1171) and [PR 25](https://github.com/basho/riak_cs_multibag/pull/25)] +* Multibag is now renamed to "Supercluster". A bag has been a set of replicated underlying Riak clusters, which is now **a member of a supercluster**. `riak-cs-multibag` command has been renamed as `riak-cs-supercluster` as well. [[PR 1257](https://github.com/basho/riak_cs/pull/1257)], [[PR 1260](https://github.com/basho/riak_cs/pull/1260)], [[PR 106](https://github.com/basho/stanchion/pull/106)], [[PR 107](https://github.com/basho/stanchion/pull/107)] and [[PR 31](https://github.com/basho/riak_cs_multibag/pull/31)]. +* Several internal operation tools have been added to help diagnose or address + issues. [[PR 1145](https://github.com/basho/riak_cs/pull/1145), + [PR 1134](https://github.com/basho/riak_cs/pull/1134), and + [PR 1133](https://github.com/basho/riak_cs/pull/1133)] +* Added a generic function for manual operations to resolve siblings of manifests and blocks, which will assist Basho Client Service Engineers with troubleshooting and solving issues. [[PR 1188](https://github.com/basho/riak_cs/pull/1188)] + + +### Changes +* Dependency versions have been updated in Riak S2 and Stanchion as follows: cuttlefish 2.0.4, node_package 2.0.3, riak-erlang-client 2.1.2, lager 2.2.0, lager_syslog 2.1.2, eper 0.92 (Basho patched), cluster_info 2.0.3, riak_repl_pb_api 2.1.2, and riak_cs_multibag 2.1.0. [[PR 1190](https://github.com/basho/riak_cs/pull/1190), [PR 1197 ](https://github.com/basho/riak_cs/pull/1197), [PR 27](https://github.com/basho/riak_cs_multibag/pull/27), [PR 1245](https://github.com/basho/riak_cs/pull/1245), and [PR 104](https://github.com/basho/stanchion/pull/104)]. +* Riak CS has moved from Folsom to Exometer. [[PR 1165](https://github.com/basho/riak_cs/pull/1165) and [PR 1180](https://github.com/basho/riak_cs/pull/1180)] +* Improvements have been made to error tracing for retrieving blocks from client GET requests. There is a complex logic to resolve blocks when a GET is requested from the client. First, Riak CS tries to retrieve a block with `n_val=1`. If it fails, a retry will be done using `n_val=3`. If the block cannot be resolved locally, `proxy_get` is enabled, and the system is configured with datacenter replication, then Riak CS will try to perform a proxied GET to the remote site. The fallback and retry logic is complex and hard to trace, especially in a faulty or unstable situation. This improvement adds error tracing for the whole sequence described above, which will help diagnose issues. Specifically, for each block, the block server stacks all errors returned from the Riak client and reports the reason for every error as well as the type of call in which the error occurred. [[PR 1177](https://github.com/basho/riak_cs/pull/1177)] +* Using the `GET Bucket` API with a specified prefix to list objects in a bucket needed optimization. It had been specifying end keys for folding objects in Riak too loosely. With this change, a tighter end key is specified for folding objects in Riak, which omits unnecessary fold in vnodes. [[PR 1233](https://github.com/basho/riak_cs/pull/1233)] +* A limitation to the max length of keys has been introduced. This limitation can be specified as 1024 by default, meaning no keys longer than 1024 bytes can be PUT, GET or DELETED unless `max_key_length` is explicitly specified as more than '1024' in riak-cs.conf. If you want to preserve the old key length behaviour, you may specify the `max_key_length` as 'unlimited'. [[PR 1233](https://github.com/basho/riak_cs/pull/1233)] +* If a faulty cluster had several nodes down, the block server misunderstood that a block was already deleted and issued a false-notfound. This could lead to block leak. The PR default has been set to 'quorum' in an attempt to avoid this problem. Updates have also been made to make sure at least a single replica of a block is written in one of the primary nodes by setting the PW default to '1'. Additionally, measures are in place to prevent the block server from crashing when "not found" errors are returned due to a particular block of an object not being found in the cluster. Instead, unreachable blocks are skipped and the remaining blocks and manifests are collected. Since the PR and PW values are increased at blocks, the availability of PUTs and through-put of garbage collection may decrease. A few Riak nodes being unreachable may prevent PUT requests from returning successfully and may prevent garbage collection from collecting all blocks until the unreachable nodes come back. [[PR 1242](https://github.com/basho/riak_cs/pull/1242)] +* The infinity timeout option has been set so that several functions make synchronous `gen_fsm` calls indefinitely, which prevents unnecessary timeouts. [[PR 1249](https://github.com/basho/riak_cs/pull/1249)] + + +### Bugs Fixed +* [[Issue 1097](https://github.com/basho/riak_cs/issues/1097)/[PR 1212](https://github.com/basho/riak_cs/pull/1212)] When `x-amz-metadata-directive=COPY` was specified, Riak CS did not actually COPY the metadata of original resource. Instead, it would treat it as a `REPLACE`. When directed to `x-amz-metadata-directive=REPLACE` `Content-Type`, Riack CS would `REPLACE` it. Correct handling for the `x-amz-metadata-directive` has been added to PUT Object Copy API. +* [[Issue 1099](https://github.com/basho/riak_cs/issues/1099)/[PR 1096](https://github.com/basho/riak_cs/pull/1096)] There was an unnecessary NextMarker in Get Bucket's response if `CommonPrefixes` contained the last key. Fixed handling of uploaded parts that should be deleted after Multipart Complete Request. +* [[Issue 939](https://github.com/basho/riak_cs/issues/939)/[PR 1200](https://github.com/basho/riak_cs/pull/1200)] Copy requests without Content-Length request headers failed with 5xx errors. Such requests are now allowed without Content-Length header in Copy API calls. Additionally, Copy API calls with Content-Lengths more than zero have been given explicit errors. +* [[Issue 1143](https://github.com/basho/riak_cs/issues/1143)/[PR 1144](https://github.com/basho/riak_cs/pull/1144)] Manual batch start caused the last batch time to appear to be in the future. All temporal shifts have been fixed. +* [[Issue PR 1162](https://github.com/basho/riak_cs/pull/1162)/[PR 1163](https://github.com/basho/riak_cs/pull/1163)] Fix a configuration system bug where Riak CS could not start if `log.syslog=on` was set. +* [[Issue 1169](https://github.com/basho/riak_cs/issues/1169)/[PR 1200](https://github.com/basho/riak_cs/pull/1200)] The error response of the PUT Copy API call showed the target resource path rather than the source path when the source was not found or not accessible by the request user. It now shows the source path appropriately. +* [[PR 1178](https://github.com/basho/riak_cs/pull/1178)] Multiple IP address descriptions under a single condition statement of a bucket policy were not being properly parsed as lists. +* [[PR 1185](https://github.com/basho/riak_cs/pull/1185)] If `proxy_get_active` was defined in riak-cs.conf as anything other than enabled or disabled, there would be excessive log output. Now, `proxy_get_active` also accepts non-boolean definitions. +* [[PR 1184](https://github.com/basho/riak_cs/pull/1184)] `put_gckey_timeout` was used instead of `put_manifest_timeout` when a delete process tried to update the status of manifests. +* [[Issue 1201](https://github.com/basho/riak_cs/issues/1201)/[PR 1230](https://github.com/basho/riak_cs/pull/1230)] A single slow or silently failing node caused intermittent user fetch failure. A grace period has been added so `riakc_pb_socket` can attempt to reconnect. +* [[PR 1232](https://github.com/basho/riak_cs/pull/1232)] Warning logs were being produced for unsatisfied primary reads. Since users are objects in Riak CS and CS tries to retrieve these objects for authentication for almost every request, the retrieval option (PR=all) would fail if even one primary vnode was stopped or unresponsive and a log would be created. Given that Riak is set up to be highly available, these logs were quite noisy. Now, the "No WM route" log from prior to Riak CS 2.1 has been revived. Also, the log severity has been downgraded to debug, since it indicates a client error in all but the development phase. +* [[PR 1237](https://github.com/basho/riak_cs/pull/1237)] The `riak-cs-admin` status command exit code was non-zero, even in successful execution. It will now return zero. +* [[Issue 1097](https://github.com/basho/riak_cs/issues/1097)/[PR 1212](https://github.com/basho/riak_cs/pull/1212) and [PR 4](https://github.com/basho/s3-tests/pull/4)] Riak S2 did not copy the metadata of an original resource when the `x-amz-metadata-directive=COPY` command was used, nor when `x-amz-metadata-directive` was specified. Handling of the `x-amz-metadata-directive` command in PUT Object Copy API has been added. +* [[Issue 1097](https://github.com/basho/riak_cs/issues/1097)/[PR 1212](https://github.com/basho/riak_cs/pull/1212) and [PR 4](https://github.com/basho/s3-tests/pull/4)] Riak CS did not store `Content-Type` in COPY requests when the `x-amz-metadata-directive=REPLACE` command was used. Handling of the `x-amz-metadata-directive` command in PUT Object Copy API has been added. +* [[Issue 1097](https://github.com/basho/riak_cs/issues/1097)/[PR 1212](https://github.com/basho/riak_cs/pull/1212) and [PR 4](https://github.com/basho/s3-tests/pull/4)] Fixed the handling of uploaded parts that should be deleted after Multipart Complete Request. +* [[Issue 1214](https://github.com/basho/riak_cs/issues/1244)/[PR 1246](https://github.com/basho/riak_cs/pull/1246)] Prior to Riak S2 2.1.0, a PUT Copy API command with identical source and destination changed user metadata (`x-amz-meta-*` headers) but failed to update Content-Type. Content-Type is now correctly updated by the API call. +* [[Issue PR 1261](https://github.com/basho/riak_cs/pull/1261), [[PR 1263](https://github.com/basho/riak_cs/pull/1263)] Fix `riak-cs-debug` to include `app.config` when no generated files are found when `riak-cs.conf` is not used. + + +## Riak CS 2.0.1 Release Notes + +### General Information + +This is a bugfix release. + +### Bug Fixes + +* [riak_cs/#1125](https://github.com/basho/riak_cs/issues/1125) - Fix config item `gc.interval` not working when `infinity` is set. + * [riak_cs/pull/1126](https://github.com/basho/riak_cs/pull/1126) + +* [riak_cs/#](https://github.com/basho/riak_cs/issues/1109) - Add `log.access` switch to disable access logging. + * [riak_cs/pull/1115](https://github.com/basho/riak_cs/pull/1115) + +* [riak_cs/#1109](https://github.com/basho/riak_cs/issues/1109) - Add missing riak-cs.conf items:` max_buckets_per_user` and `gc.batch_size`. + * [riak_cs/pull/1115](https://github.com/basho/riak_cs/pull/1115) + +* [riak_cs/#1129](https://github.com/basho/riak_cs/issues/1129) - Fix bugs around subsequent space characters for Delete Multiple Objects API and user administration API with XML content. + * [riak_cs/pull/1135](https://github.com/basho/riak_cs/pull/1135) + +## Riak CS 2.0.0 + +**For a complete set of release notes, upgrade instructions, and changed +configuration settings, please see the +[Full Riak CS 2.0.0 Release Notes][riak_cs_2.0.0_release_notes]** + +### General Information + +- This release updates Riak CS to work with Riak 2.0.5. +- We have simplified the configuration system. +- All official patches for older versions of Riak and Riak CS have been included + in these releases. There is no need to apply any patches released for Riak CS + 1.4.x or 1.5.x to the Riak CS 2.0.x series. Patches released for Riak CS 1.4.x + or 1.5.x cannot be directly applied to Riak CS 2.0.x because the version of + Erlang/OTP shipped with Riak CS has been updated in version 2.0.0. +- Please review the complete Release Notes before upgrading. + +### Known Issues & Limitations + +- None. + +### Changes and Additions + +- Changed the name of `gc_max_workers` to `gc.max_workers`, and lowered the + default value from 5 to 2 (#1110) to reduce the workload on the cs cluster. +- Partial support of GET Location API (#1057) +- Add very preliminary AWS v4 header authentication - without query + string authentication, object chunking and payload checksum (#1064). + There is still a lot of work to reliably use v4 authentication. +- Put Enterprise deps into dependency graph (#1065) +- Introduce Cuttlefish (#1020, #1068, #1076, #1086, #1090) + (Stanchion #88, #90, #91) +- Yessir Riak client to measure performance (#1072, #1083) +- Inspector improvement with usage change (#1084) +- Check signed date in S3 authentication (#1067) +- Update `cluster_info` and various dependent libraries (#1087, #1088) + (Stanchion #85, #87, #93) +- Storage calculation optimization (#1089) With Riak >= 2.1 this works + with `use_2i_for_storage_calc` flag might relieve disk read of + storage calculation. + +### Bugfixes + +- Fix wrong webmachine log handler name (#1075) +- Fix lager crash (#1038) +- Fix hardcoded crashdump path (#1052) +- Suppress unnecessary warnings (#1053) +- Multibag simpler state transition (Multibag #21) +- GC block deletion failure after transition to multibag environment + (Multibag #19) +- Connection closing caused errors for objects stored before the + transition, after transition from single bag to multibag + configuration (Multibag #18). + +### Deprecation Notices + +- Multi-Datacenter Replication using v2 replication support has been deprecated. +- Old list objects which required `fold_objects_for_list_keys` as `false` have + been deprecated and *will be removed* in the next major version. +- Non-paginated GC in cases where `gc_paginated_indexes` is `false` has been + deprecated and *will be removed* in the next major version. + +### General Notes on Upgrading to Riak CS 2.0.0 + +Upgrading a Riak CS system involves upgrading the underlying Riak, Riak CS and +Stanchion installations. The upgrade process can be non-trivial depending on +your existing system configurations and the combination of sub-system versions. +This document contains general instructions and notices on upgrading the whole +system to Riak CS 2.0.0. + +#### New Configuration System + +Riak 2.0.0 introduced a new configuration system (`riak.conf`), and as of Riak +CS 2.0.0, Riak CS now supports the new configuration style. Both Riak and Riak +CS still support the older style configurations through `app.config` and +`vm.args`. + +**Basho recommends moving to the new unified configuration system**, using the +files `riak.conf`, `riak-cs.conf` and `stanchion.conf`. + +#### Note on Legacy app.config Usage + +**If you choose to use the legacy `app.config` files for Riak CS and/or +Stanchion, some parameters have changed names and must be updated**. + +In particular, for the Riak CS `app.config`: + + - `cs_ip` and `cs_port` have been combined into `listener`. + - `riak_ip` and `riak_pb_port` have been combined into `riak_host`. + - `stanchion_ip` and `stanchion_port` have been combined into `stanchion_host`. + - `admin_ip` and `admin_port` have been combined into `admin_listener`. + - `webmachine_log_handler` has become `webmachine_access_log_handler`. + - `{max_open_files, 50}` has been deprecated and should be replaced with + `{total_leveldb_mem_percent, 30}`. + +For the Stanchion `app.config`: + + - `stanchion_ip` and `stanchion_port` have been combined into `listener`. + - `riak_ip` and `riak_port` have been combined into `riak_host`. + +Each of the above pairs follows a similar form. Where the old form used a +separate IP and Port parameter, the new form combines those as `{new_option, { +"IP", Port}}`. For example, if your legacy `app.config` configuration was +previously: + +``` +{riak_cs, [ + {cs_ip, "127.0.0.1"}, + {cs_port, 8080 }, + . . . +]}, +``` + +It should now read: + +``` +{riak_cs, [ + {listener, {"127.0.0.1", 8080}}, + . . . +]}, +``` + +and so on. + +#### Note: Upgrading from Riak CS 1.5.3 or Older + +[Some key objects changed names][riak_cs_1.5_release_notes_upgrading] after the +upgrade. Applications may need to change their behaviour due to this bugfix. + +#### Note: Upgrading from Riak CS 1.5.0 or Older + +[Bucket number limitation per user][riak_cs_1.5_release_notes_upgrading_1] have +been introduced in 1.5.1. Users who have more than 100 buckets cannot create any +bucket after the upgrade unless the limit is extended in the system +configuration. + +#### Note: Upgrading From Riak CS 1.4.x + +An operational procedure [to clean up incomplete multipart under deleted +buckets][riak_cs_1.5_release_notes_incomplete_mutipart] is needed. Otherwise new +buckets with names that used to exist can't be created. The operation will fail +with 409 Conflict. + +Leeway seconds and disk space should also be carefully watched during the +upgrade, because timestamp management of garbage collection was changed in the +1.5.0 release. Consult the "[Leeway seconds and disk +space][riak_cs_1.5_release_notes_leeway_and_disk] section of 1.5 release notes +for a more detailed description. + +#### Note: Upgrading From Riak CS 1.3.x or Older + +Basho supports upgrading from the two previous major versions to the latest +release. Thus, this document will only cover upgrading from Riak CS versions +1.4.x and 1.5.x. + +To upgrade to Riak CS 2.0.0 from versions prior to Riak CS 1.4.0, operators will +need to first upgrade their system to Riak CS version 1.4.5 or 1.5.4. Upgrading +to Riak CS 1.5.4 is recommended. The underlying Riak installation must also be +upgraded to the Riak 1.4.x series, preferably version 1.4.12. + +### General Upgrade Instructions + +**For a complete set of release notes, upgrade instructions, and changed +configuration settings, please see the +[Full Riak CS 2.0.0 Release Notes][riak_cs_2.0.0_release_notes]** + +#### All Scenarios + +We recommend updating Stanchion before all other subsystems. Be careful not to +have multiple live Stanchion nodes accessible from Riak CS nodes at the same +time. + +Repeat these steps on each node running Stanchion: + +1. Stop Stanchion +2. Back up all Stanchion configuration files +3. Uninstall the current Stanchion package +4. Install the new Stanchion 2.0.0 package +5. Migrate the Stanchion configuration (See below) +6. Start Stanchion + +#### Scenario: If Riak CS and Riak are both running on the same host. + +Repeat these steps on every host: + +1. Stop Riak CS +2. Stop Riak +3. Back up all Riak and Riak CS configuration files and remove all patches +4. Uninstall the current Riak CS package +5. Uninstall the current Riak Riak packages +6. Install the new Riak package +7. Install the new Riak CS 2.0.0 package +8. Migrate the Riak configuration (See below) +9. Migrate the Riak CS configuration (See below) +10. Start Riak +11. Start Riak CS + +#### Scenario: If Riak CS and Riak are running on separate hosts. + +When Riak CS is not installed on the same host as Riak, Riak CS can be upgraded +at any time while the corresponding remote Riak node is alive. + +Repeat these steps on every host: + +1. Stop Riak CS +2. Back up all configuration files and remove all patches +3. Uninstall the current Riak CS package +4. Install the new Riak CS 2.0.0 package +5. Migrate the Riak CS configuration (See below) +6. Start Riak CS + +**For a complete set of release notes, upgrade instructions, and changed +configuration settings, please see the +[Full Riak CS 2.0.0 Release Notes][riak_cs_2.0.0_release_notes]** + + + + +## Riak CS 1.5.4 + +### Fixes + +* Disable backpressure sleep + [riak_cs/#1041](https://github.com/basho/riak_cs/pull/1041) + * **Problem**: When backpressure sleep is triggered due to the + presence of many siblings, this can lead to even more siblings. + * **Solution**: This change prevents unnecessary siblings growth in + cases where (a) backpressure is triggered under high upload + concurrency and (b) uploads are interleaved during backpressure + sleep. This issue does not affect multipart uploads. +* Fix an incorrect path rewrite in the S3 API caused by unnecessary URL + decoding [riak_cs/#1040](https://github.com/basho/riak_cs/pull/1040) + * **Problem**: Due to the incorrect handling of URL + encoding/decoding, object keys including + `%[0-9a-fA-F][0-9a-fA-F]` (as a regular expression) or `+` had + been mistakenly decoded. As a consequence, the former case was + decoded to some other binary, while in the latter case `+` was + replaced with a space. In both cases, there was a possibility of + an implicit data overwrite. For the latter case, an overwrite + occurs for an object including `+` in its key, e.g. `foo+bar`, + by a different object with a name that is largely similar but + replaced with space, e.g. `foo bar`, and vice versa. + * **Solution**: Fix the incorrect handling of URL encoding/decoding. + This fix also addresses + [riak_cs/#910](https://github.com/basho/riak_cs/pull/910) and + [riak_cs/#977](https://github.com/basho/riak_cs/pull/977). + +### Notes on Upgrading + +After upgrading to Riak CS 1.5.4, objects including +`%[0-9a-fA-F][0-9a-fA-F]` (as a regular expression) or `+` in their key, +e.g.`foo+bar`, become invisible and can be seen as objects with a +different name. For the former case, objects will be referred to with +the unnecessarily decoded key; in the latter case, those objects will +be referred to with keys in which `+` is replaced with a space, e.g. +`foo bar`, by default. + +The table below provides examples for URLs including +`%[0-9a-fA-F][0-9a-fA-F]` and how they will work before and after the +upgrade. + + + | Before upgrade | After upgrade +:--|:---------------|:------------- +written as | `a%2Fkey` | `-` +read as | `a%2Fkey` or `a/key` | `a/key` +listed as | `a/key` | `a/key` + +Examples for unique objects including `+` or an empty space through +upgrade: + + | Before upgrade | After upgrade +:--|:---------------|:------------- +written as | `a+key` | `-` +read as | `a+key` or `a key` | `a key` +listed as | `a key` | `a key` + +Examples for unique objects with an empty space in the URL: + + | Before upgrade | After upgrade +:--|:---------------|:------------- +written as | `a key` | `-` +read as | `a+key` or `a key` | `a key` +listed as | `a key` | `a key` + +This fix also changes the path format in access logs from the +single-URL-encoded style to the doubly-encoded URL style. Below is an +example of the old style: + +``` +127.0.0.1 - - [07/Jan/2015:08:27:07 +0000] "PUT /buckets/test/objects/path1%2Fpath2%2Fte%2Bst.txt HTTP/1.1" 200 0 "" """ +``` + +And here is the analogous URL in the new style: + +``` +127.0.0.1 - - [07/Jan/2015:08:27:07 +0000] "PUT /buckets/test/objects/path1%2Fpath2%2Fte%252Bst.txt HTTP/1.1" 200 0 "" "" +``` + +Note that the object path has changed from `path1%2Fpath2%2Fte%2Bst.txt` +to `path1%2Fpath2%2Fte%252Bst.txt` between the two examples above. + +If the old behavior is preferred, e.g. because applications using Riak +CS have been written to use the older style, you can retain that +behavior on upgrade by modifying your Riak CS configuration. Change the +`rewrite_module` setting as follows: + +```appconfig +{riak_cs, [ + %% Other settings + {rewrite_module, riak_cs_s3_rewrite_legacy}, + %% Other settings +]} +``` + +**Note**: The old behavior is technically incorrect and implicitly +overwrites data in the ways described above. Retain the old behavior +with caution. + +## Riak CS 1.5.3 + +### Changes + +* Add `read_before_last_manifest_write` option to help avoid sibling + explosion for use cases involving high churn and concurrency on a + fixed set of keys. When sibling explosion occurs, the objects stored in + Riak can become very large and severely impair the functioning of the + system. The trade-off in enabling this option is a latency penalty of + doing an extra read before the final write of an object's manifest to + Riak. However, for use cases matching the description, the minor + latency penalty is preferable to consequences of sibling explosion. + [riak_cs/#1011](https://github.com/basho/riak_cs/pull/1011) +* Add configurable timeouts for all Riak CS interactions with Riak to + provide more flexibility in operational situations. + [riak_cs/#1021](https://github.com/basho/riak_cs/pull/1021) + +### Fixes + +* Fix storage calculation + [riak_cs/#996](https://github.com/basho/riak_cs/pull/996) + * **Problem**: Data for deleted buckets would be included in the + calculation results + * **Solution**: Storage calculations no longer include deleted buckets + +### Known Issues + +None + +### Download + +Please see the [Riak CS Downloads +Page]({{< baseurl >}}riak/cs/latest/downloadsFeedback + +We would love to hear from you. You can reach us in any of the following +venues: + +* [Basho mailing + list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +* [The official Basho docs](https://github.com/basho/basho_docs) +* [Riak CS on GitHub](https://github.com/basho/riak_cs) +* Via email at **info@basho.com** + +## Riak CS 1.5.2 + +### Changes + +* Improve logging around failures with Riak + [riak_cs/#987]({{< baseurl >}}riak/kv/latest/developing/client-libraries/) +* Add amendment log output when storing access stats into Riak failed + [riak_cs/#988](https://github.com/basho/riak_cs/pull/988). This change + prevents losing access stats logs in cases of temporary connection + failure between Riak and Riak CS. Access logs are stored in + `console.log` at the `warning` level. +* Add script to repair invalid garbage collection manifests + [riak_cs/#983](https://github.com/basho/riak_cs/pull/983). There is a + known issue where an active manifest would be stored in the GC bucket. + This script changes invalid state to valid state. + +### Fixes + +* Fix Protocol Buffers connection pool (`pbc_pool_master`) leak + [riak_cs/#986](https://github.com/basho/riak_cs/pull/986). + * **Problem**: Requests for non-existent buckets without an + authorization header and requests for listing users make connections + leak from the pool, causing the pool to eventually go empty. This bug + was introduced in release 1.5.0. + * **Solution**: Fix the leak by properly releasing connections. + +### Known Issues + +None + +### Download + +Please see the [Riak CS Downloads +Page]({{< baseurl >}}riak/cs/latest/downloadsedback + +We would love to hear from you. You can reach us in any of the following +venues: + +* [Basho mailing + list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +* [The official Basho docs](https://github.com/basho/basho_docs) +* [Riak CS on GitHub](https://github.com/basho/riak_cs) +* Via email at **info@basho.com** + +## Riak CS 1.5.1 + +### Additions + +* Bucket restrictions --- Similar to S3, you can now limit the number of buckets created per user to prevent users from creating an unusually large number of buckets. More details are included [here]({{< baseurl >}}riak/cs/latest/cookbooks/configuration/riak-cs/). + +### Changes + +* Add sleep interval after updating manifests to suppress sibling explosion [riak_cs/#959](https://github.com/basho/riak_cs/pull/959). In order to suppress sibling explosion, a sleep interval is added after updating manifests. The duration of the sleep interval depends on the number of siblings. The problem is documented in more detail [here](https://github.com/basho/riak_cs/pull/959). +* Update `riak-cs-debug` to include information about bags in a multibag environment [riak_cs/#930](https://github.com/basho/riak_cs/issues/882). Bag listing and weight information are now included in the output of the `riak-cs-debug` command in order to help in investigating issues in a multibag environment. +* More efficient bucket resolution [riak_cs/#951](https://github.com/basho/riak_cs/pull/951). Previously, sibling resolution logic was inefficient in cases where users had many buckets (> 1000). For the sake of optimization, resolution is now skipped entirely when no siblings are present (i.e. when there is a single value). +* Similar to S3, add a limitation on the part number in a multipart upload [riak_cs/#957](https://github.com/basho/riak_cs/pull/957). Part numbers can now range from 1 to 10,000 (inclusive). + +### Fixes + +* GC may stall due to `riak_cs_delete_fsm` deadlock [riak_cs/#949](https://github.com/basho/riak_cs/pull/949) + * **Problem** --- Garbage collection can stall when a `riak_cs_delete_fsm` worker process encounters a deadlock condition. + * **Solution** --- One of the requirements in an internal data structure was violated. This fix satisfies the requirement so that deadlock does not happen. +* Fix wrong log directory for gathering logs on `riak-cs-debug` [riak_cs/#953](https://github.com/basho/riak_cs/pull/953) + * **Problem** --- Directory structure of log files gathered by `riak-cs-debug` was different from `riak-debug`. + * **Solution** --- The directory structure is now the same as that of `riak-debug`. +* Avoid DST-aware translation from local time to GMT [riak_cs/#954](https://github.com/basho/riak_cs/pull/954) + * **Problem** --- Transformation from local time to GMT is slow, especially when performed by multiple threads. One such transformation was in the path of the `GET Object` API call. + * **Solution** --- Eliminate the transformation. +* Use new UUID for seed of canonical ID instead of secret [riak_cs/#956](https://github.com/basho/riak_cs/pull/956) + * **Problem** --- MD5-hashed value of secret access key was used in order to generate a canonical ID, which is public information. Although MD5 reverse is not realistic, it is unnecessary and avoidable to use a secret access key for canonical ID generation. + * **Solution** --- Use newly generated UUID for canonical ID. +* Set timeout as `infinity` to replace the default of `5000ms` [riak_cs/#963](https://github.com/basho/riak_cs/pull/963) + * **Problem** --- In Riak CS 1.5.0, middleman process wrappers for Protocol Buffers sockets were introduced and call timeout to them was incorrectly set to a default of 5000 milliseconds. + * **Solution** --- Change the call timeout to `infinity` and actual timeout is controlled by Protocol Buffers processes. +* Skip invalid state manifests in GC bucket [riak_cs/#964](https://github.com/basho/riak_cs/pull/964) + * **Problem** --- If there were active state manifests in the GC bucket the GC process crashed. + * **Solution** --- Skip active state manifests and make the GC process collect valid manifests. + +### Known Issues + +None + +### Platforms Tested + +* Ubuntu GNU / Linux 12.04 + +### Installation and Upgrade Notes + +#### Per-user bucket creation restrictions + +Beginning with Riak CS 1.5.1, you can limit the number of buckets that can be created per user. The default maximum number is 100. While this limitation prohibits the creation of new buckets by users, users that exceed the limit can still perform other operations, including bucket deletion. To change the default limit, add the following line to the `riak_cs` section of `app.config`: + +```appconfig +{riak_cs, [ + %% ... + {max_buckets_per_user, 5000}, + %% ... +]} +``` + +To avoid having a limit, set `max_buckets_per_user_user` to `unlimited`. + + +### Download + +Please see the [Riak CS Downloads Page]({{< baseurl >}}riak/cs/latest/downloadsFeedback + +We would love to hear from you. You can reach us at any of the following links: + +* http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com +* https://github.com/basho/basho_docs +* https://github.com/basho/riak_cs + +Or via email at **info@basho.com**. + +## Riak CS 1.5.0 + +### Additions + +* Added Multibag Technical Preview to Riak CS. More info is available [here]({{< baseurl >}}riak/cs/latest/cookbooks/supercluster/) +* A new command `riak-cs-debug` including `cluster-info` [riak_cs/#769](https://github.com/basho/riak_cs/pull/769), [riak_cs/#832](https://github.com/basho/riak_cs/pull/832) +* Tie up all existing commands into a new command `riak-cs-admin` [riak_cs/#839](https://github.com/basho/riak_cs/pull/839) +* Add a command `riak-cs-admin stanchion` to switch Stanchion IP and port manually [riak_cs/#657](https://github.com/basho/riak_cs/pull/657) +* Performance of garbage collection has been improved via Concurrent GC [riak_cs/#830](https://github.com/basho/riak_cs/pull/830) +* Iterator refresh [riak_cs/#805](https://github.com/basho/riak_cs/pull/805) +* `fold_objects_for_list_keys` made default in Riak CS [riak_cs/#737](https://github.com/basho/riak_cs/pull/737), [riak_cs/#785](https://github.com/basho/riak_cs/pull/785) +* Add support for Cache-Control header [riak_cs/#821](https://github.com/basho/riak_cs/pull/821) +* Allow objects to be reaped sooner than leeway interval. [riak_cs/#470](https://github.com/basho/riak_cs/pull/470) +* PUT Copy on both objects and upload parts [riak_cs/#548](https://github.com/basho/riak_cs/pull/548) +* Update to lager 2.0.3 +* Compiles with R16B0x (Releases still by R15B01) +* Change default value of `gc_paginated_index` to `true` [riak_cs/#881](https://github.com/basho/riak_cs/issues/881) +* Add new API: Delete Multiple Objects [riak_cs/#728](https://github.com/basho/riak_cs/pull/728) +* Add warning logs for manifests, siblings, bytes and history [riak_cs/#915](https://github.com/basho/riak_cs/pull/915) + +### Bugs Fixed + +* Align `ERL_MAX_PORTS` with Riak default: 64000 [riak_cs/#636](https://github.com/basho/riak_cs/pull/636) +* Allow Riak CS admin resources to be used with OpenStack API [riak_cs/#666](https://github.com/basho/riak_cs/pull/666) +* Fix path substitution code to fix Solaris source builds [riak_cs/#733](https://github.com/basho/riak_cs/pull/733) +* `sanity_check(true,false)` logs invalid error on `riakc_pb_socket` error [riak_cs/#683](https://github.com/basho/riak_cs/pull/683) +* Riak-CS-GC timestamp for scheduler is in the year 0043, not 2013. [riak_cs/#713](https://github.com/basho/riak_cs/pull/713) fixed by [riak_cs/#676](https://github.com/basho/riak_cs/pull/676) +* Excessive calls to OTP code_server process #669 fixed by [riak_cs/#675](https://github.com/basho/riak_cs/pull/675) +* Return HTTP 400 if content-md5 does not match [riak_cs/#596](https://github.com/basho/riak_cs/pull/596) +* `/riak-cs/stats` and `admin_auth_enabled=false` don't work together correctly. [riak_cs/#719](https://github.com/basho/riak_cs/pull/719) +* Storage calculation doesn't handle tombstones, nor handle undefined manifest.props [riak_cs/#849](https://github.com/basho/riak_cs/pull/849) +* MP initiated objects remains after delete/create buckets #475 fixed by [riak_cs/#857](https://github.com/basho/riak_cs/pull/857) and [stanchion/#78](https://github.com/basho/stanchion/pull/78) +* handling empty query string on list multipart upload [riak_cs/#843](https://github.com/basho/riak_cs/pull/843) +* Setting ACLs via headers at PUT Object creation [riak_cs/#631](https://github.com/basho/riak_cs/pull/631) +* Improve handling of poolboy timeouts during ping requests [riak_cs/#763](https://github.com/basho/riak_cs/pull/763) +* Remove unnecessary log message on anonymous access [riak_cs/#876](https://github.com/basho/riak_cs/issues/876) +* Fix inconsistent ETag on objects uploaded by multipart [riak_cs/#855](https://github.com/basho/riak_cs/issues/855) +* Fix policy version validation in PUT Bucket Policy [riak_cs/#911](https://github.com/basho/riak_cs/issues/911) +* Fix return code of several commands, to return 0 for success [riak_cs/#908](https://github.com/basho/riak_cs/issues/908) +* Fix `{error, disconnected}` repainted with notfound [riak_cs/#929](https://github.com/basho/riak_cs/issues/929) + +### Notes on Upgrading + +#### Incomplete multipart uploads + +[riak_cs/#475](https://github.com/basho/riak_cs/issues/475) was a +security issue where a newly created bucket may include unaborted or +incomplete multipart uploads which was created in previous epoch of +the bucket with same name. This was fixed by: + +- on creating buckets; checking if live multipart exists and if + exists, return 500 failure to client. + +- on deleting buckets; trying to clean up all live multipart remains, + and checking if live multipart remains (in stanchion). if exists, + return 409 failure to client. + +Note that a few operations are needed after upgrading from 1.4.x (or +former) to 1.5.0. + +- run `riak-cs-admin cleanup-orphan-multipart` to cleanup all + buckets. It would be safer to specify timestamp with ISO 8601 format + like `2014-07-30T11:09:30.000Z` as an argument. For example, in + which time all CS nodes upgrade has finished. Then the cleaner does + not clean up multipart uploads newer than that timestamp. Some + corner cases can be prevented where multipart uploads conflicting + with bucket deletion and this cleanup. + +- there might be a time period until above cleanup finished, where no + client can create bucket if unfinished multipart upload remains + under deleted bucket. You can find [critical] log if such bucket + creation is attempted. + +#### Leeway seconds and disk space + +[riak_cs/#470](https://github.com/basho/riak_cs/pull/470) changed the +behaviour of object deletion and garbage collection. The timestamps in +garbage collection bucket were changed from the future time when the +object is to be deleted, to the current time when the object is +deleted, Garbage collector was also changed to collect objects until +'now - leeway seconds', from collecting objects until 'now' previously. + +Before (-1.4.x): + +``` + t1 t2 +-----------+--------------------------+-------------------> + DELETE object: GC triggered: + marked as collects objects + "t1+leeway" marked as "t2" +``` + +After (1.5.0-): + +``` + t1 t2 +-----------+--------------------------+-------------------> + DELETE object: GC triggered: + marked as "t1" collects objects + in GC bucket marked as "t2 - leeway" +``` + +This leads that there exists a period where no objects are collected +right after upgrade to 1.5.0, say, `t0`, until `t0 + leeway` . And +objects deleted just before `t0` won't be collected until `t0 + +2*leeway` . + +Also, all CS nodes which run GC should be upgraded *first.* CS nodes +which do not run GC should be upgraded later, to let leeway second +system work properly. Or stop GC while upgrading whole cluster, by +running `riak-cs-admin gc set-interval infinity` . + +Multi data center cluster should be upgraded more carefully, as to +make sure GC is not running while upgrading. + +#### Riak CS Multibag + +Multibag, the ability to store object manifests and blocks in separate +clusters or groups of clusters, has been added as an Enterprise feature, +but it is in early preview status. `proxy_get` has not yet been +implemented for this preview feature, so multibag is intended for a +single DC only at this time. + +> **Note**: CS Multibag was renamed to CS Supercluster. More information on Supercluster Support is available [here][riak_cs_multibag_support]. + +### Known Issues and Limitations + +* If a client sends another request in the same connection while + waiting for copy finish, the copy also will be aborted. This is a + side effect of client disconnect detection in case of object copy. + See [#932](https://github.com/basho/riak_cs/pull/932) for further + information. + +* Copying objects in OOS interface is not implemented. + +* Multibag is added as Enterprise feature, but it is in early preview + status. `proxy_get` setup among clusters multibag on is not + implemented yet. + + +## Riak CS 1.4.5 + +#### Bugs Fixed + +* Fix several 'data hiding' bugs with the v2 list objects FSM [riak_cs/788](https://github.com/basho/riak_cs/issues/788) . +* Don't treat HEAD requests toward BytesOut in access statistics [riak_cs/791](https://github.com/basho/riak_cs/issues/791) . +* Handle whitespace in POST/PUT XML documents [riak_cs/795](https://github.com/basho/riak_cs/issues/795) . +* Handle unicode user-names and XML [riak_cs/807](https://github.com/basho/riak_cs/issues/807) . +* Fix missing XML fields on storage usage [riak_cs/808](https://github.com/basho/riak_cs/issues/808) . +* Adjust fold-objects timeout [riak_cs/811](https://github.com/basho/riak_cs/issues/811) . +* Prune deleted buckets from user record [riak_cs/812](https://github.com/basho/riak_cs/issues/812) . +* Fix bad bucketname in storage usage [riak_cs/800](https://github.com/basho/riak_cs/issues/800) . + +Riak CS 1.4.4 introduced +[a bug (#800)](https://github.com/basho/riak_cs/issues/800) where +storage calculations made while running that version would have the +bucket-name replaced by the string "struct". This version fixes the +bug, but can't go back and retroactively fix the old storage +calculations. Aggregations on an entire user-account should still be +accurate, but you won't be able to break-down storage by bucket, as +they will all share the name "struct". + + +#### Additions + +* Optimize the list objects v2 FSM for prefix requests [riak_cs/804](https://github.com/basho/riak_cs/issues/804) . + +## Riak CS 1.4.4 + +[Riak CS 1.4.4 Release Notes](https://github.com/basho/riak_cs/blob/1.4.4/RELEASE-NOTES.md) + +#### Bugs Fixed + +* Create basho-patches directory [riak_cs/775](https://github.com/basho/riak_cs/issues/775) . +* `sum_bucket` timeout crashes all storage calculation is fixed by [riak_cs/759](https://github.com/basho/riak_cs/issues/759) . +* Failure to throttle access archiver is fixed by [riak_cs/758](https://github.com/basho/riak_cs/issues/758) . +* Access archiver crash is fixed by [riak_cs/747](https://github.com/basho/riak_cs/issues/747) . + +## Riak CS 1.4.3 + +[Riak CS 1.4.3 Release Notes](https://github.com/basho/riak_cs/blob/1.4.3/RELEASE-NOTES.org) + +#### Bugs Fixed + +* Fix bug that reverted manifests in the `scheduled_delete` state to the `pending_delete` or active state. +* Don't count already deleted manifests as overwritten +* Don't delete current object version on overwrite with incorrect md5 + +#### Additions + +* Improve performance of manifest pruning +* Optionally use paginated 2i for the GC daemon. This is to help prevent timeouts when collecting data that can be garbage collected. +* Improve handling of Riak disconnects on block fetches +* Update to Lager 2.0.1 +* Optionally prune manifests based on count, in addition to time +* Allow multiple access archiver processes to run concurrently + +## Riak CS 1.4.2 + +#### Bugs Fixed + +* Fix issue with Enterprise build on Debian Linux distributions. +* Fix source tarball build. +* Fix access statistics bug that caused all accesses to be treated as errors. +* Make logging in bucket listing map phase function lager version agnostic to avoid issues when using versions of Riak older than 1.4. +* Handle undefined `props` field in manifests to fix issue accessing objects written with a version of Riak CS older than 1.3.0. + +#### Additions + +* Add option to delay initial GC sweep on a node using the `initial_gc_delay` configuration option. +* Append random suffix to GC bucket keys to avoid hot keys and improve performance during periods of frequent deletion. +* Add `default_proxy_cluster_id` option to provide a way to specify a default cluster id to be used when the cluster id is undefined. This is to facilitate migration from the OSS version to the Enterprise version. + +## Riak CS 1.4.1 + +#### Bugs Fixed +* Fix list objects crash when more than the first 1001 keys are in the pending delete state. +* Fix crash in garbage collection daemon. +* Fix packaging bug by updating `node_package` dependency. + +## Riak CS 1.4.0 + +#### Additions + +* Add preliminary support for the Swift API and Keystone authentication. +* Improve performance of object listing when using Riak 1.4.0 or greater. +* Add ability to edit user account name and email address. +* Add support for v3 multi-data-center replication. +* Add configurable Riak connection timeouts. +* Add syslog support via Lager. +* Only contact one vnode for immutable block requests. + +#### Bugs Fixed + +* Remove unnecessary keys in GC bucket. +* Fix query-string authentication for multi-part uploads. +* Fix Storage Class for multi-part uploaded objects. +* Fix etags for multi-part uploads. +* Support reformat indexes in the Riak CS multi-backend. +* Fix unbounded memory-growth on `GET` requests with a slow connection. +* Reduce access-archiver memory use. +* Fix 500 on object ACL `HEAD` request. +* Fix semantics for concurrent upload and delete of the same key with a multipart upload. +* Verify `content-md5` header if supplied. +* Handle transient Riak connection failures. + +## Riak CS 1.3.1 + +#### Bugs Fixed + +* Fix bug in handling of active object manifests in the case of overwrite or delete that could lead to old object versions being resurrected. +* Fix improper capitalization of user metadata header names. +* Fix issue where the S3 rewrite module omits any query parameters that are not S3 subresources. Also correct handling of query parameters so that parameter values are not URL decoded twice. This primarily affects pre-signed URLs because the access key and request signature are included as query parameters. +* Fix for issue with init script stop. + +## Riak CS 1.3.0 + +#### Additions + +* Support for multipart file uploads. Parts must be in the range of 5MB-5GB. +* Support for bucket policies using a restricted set of principals and conditions. +* Support for returning bytes ranges of a file using the `Range` header. +* Administrative commands may be segregated onto a separate interface. +* Authentication for administrative commands may be disabled. +* Performance and stability improvements for listing the contents of buckets. +* Support for the prefix, delimiter, and marker options when listing the contents of a bucket. +* Support for using Webmachine's access logging features in conjunction with the Riak CS internal access logging mechanism. +* Moved all administrative resources under `/riak-cs`. +* Riak CS now supports packaging for FreeBSD, SmartOS, and Solaris. + +#### Bugs Fixed + +* Fix handling of cases where buckets have siblings. Previously this resulted in 500 errors returned to the client. +* Reduce likelihood of sibling creation when creating a bucket. +* Return a 404 instead of a 403 when accessing a deleted object. +* Unquote URLs to accommodate clients that URL encode `/` characters in URLs. +* Deny anonymous service-level requests to avoid unnecessary error messages trying to list the buckets owned by an undefined user. + +## Riak CS 1.2.2 + +#### Additions + +* Full support for MDC replication + +#### Bugs Fixed + +* Fix problem where objects with `utf-8` unicode key can be neither listed nor fetched. +* Speed up `bucket_empty` check and fix process leak. This bug was originally found when a user was having trouble with `s3cmd rb :s3//foo --recursive`. The operation first tries to delete the (potentially large) bucket, which triggers our bucket empty check. If the bucket has more than 32k items, we run out of processes unless `+P` is set higher (because of the leak). + +## Riak CS 1.2.1 + +#### Additions + +* Add reduce phase for listing bucket contents to provide backpressure when executing the MapReduce job. +* Use prereduce during storage calculations. +* Return 403 instead of 404 when a user attempts to list contents of nonexistent bucket. + +#### Bugs Fixed + +* Return 403 instead of 404 when a user attempts to list contents of nonexistent bucket. +* Do not do bucket list for `HEAD` or `?versioning` or `?location` request. + +## Riak CS 1.2.0 + +#### Additions + +* Add preliminary support for MDC replication +* Quickcheck test to exercise the erlcloud library against Riak CS +* Basic support for riak_test integration + +#### Bugs Fixed + +* Do not expose stack traces to users on 500 errors +* Fix issue with sibling creation on user record updates +* Fix crash in terminate state when fsm state is not fully populated +* Script fixes and updates in response to node_package updates + +## Riak CS 1.1.0 + +#### Additions + +* Update user creation to accept a JSON or XML document for user creation instead of URL encoded text string. +* Configuration option to allow anonymous users to create accounts. In the default mode, only the administrator is allowed to create accounts. +* Ping resource for health checks. +* Support for user-specified metadata headers. +* User accounts may be disabled by the administrator. +* A new key_secret can be issued for a user by the administrator. +* Administrator can now list all system users and optionally filter by enabled or disabled account status. +* Garbage collection for deleted and overwritten objects. +* Separate connection pool for object listings with a default of 5 connections. +* Improved performance for listing all objects in a bucket. +* Statistics collection and querying. +* DTrace probing. + +#### Bugs Fixed + +* Check for timeout when checking out a connection from poolboy. +* PUT object now returns 200 instead of 204. +* Fixes for Dialyzer errors and warnings. +* Return readable error message with 500 errors instead of large webmachine backtraces. + +## Riak CS 1.0.2 + +#### Additions + +* Support query parameter authentication as specified in [Signing and Authenticating REST Requests](http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html). + +## Riak CS 1.0.1 + +#### Bugs Fixed + +* Default `content-type` is not passed into function to handle `PUT` request body +* Requests hang when a node in the Riak cluster is unavailable +* Correct inappropriate use of `riak_cs_utils:get_user` by `riak_moss_acl_utils:get_owner_data` + +## Riak CS 1.0.0 + +#### Additions + +* Subsystem for calculating user access and storage usage +* Fixed-size connection pool of Riak connections +* Use a single Riak connection per request to avoid deadlock conditions +* Object ACLs +* Management for multiple versions of a file manifest +* Configurable block size and max content length +* Support specifying non-default ACL at bucket creation time + +#### Bugs Fixed +* Fix PUTs for zero-byte files +* Fix fsm initialization race conditions +* Canonicalize the entire path if there is no host header, but there are tokens +* Fix process and socket leaks in get fsm + +## Riak CS 0.1.2 + +#### Bugs Fixed +* Return 403 instead of 503 for invalid anonymous or signed requests. +* Properly clean up processes and connections on object requests. + +## Riak CS 0.1.1 + +#### Bugs Fixed +* HEAD requests always result in a `403 Forbidden`. +* `s3cmd info` on a bucket object results in an error due to missing ACL document. +* Incorrect atom specified in `riak_moss_wm_utils:parse_auth_header`. +* Bad match condition used in `riak_moss_acl:has_permission/2`. + +## Riak CS 0.1.0 + +#### Additions +* Bucket-level access control lists +* User records have been modified so that an system-wide unique email address is required to create a user. +* User creation requests are serialized through `stanchion` to be certain the email address is unique. +* Bucket creation and deletion requests are serialized through `stanchion` to ensure bucket names are unique in the system. +* The `stanchion` serialization service is now required to be installed and running for the system to be fully operational. +* The concept of an administrative user has been added to the system. The credentials of the administrative user must be added to the `app.config` files for `moss` and `stanchion`. +* User credentials are now created using a url-safe base64 encoding module. + +#### Bugs Fixed + +* `s3cmd info` fails due to missing `last-modified` key in return document. +* `s3cmd get` of 0 byte file fails. +* Bucket creation fails with status code `415` using the AWS Java SDK. + +#### Known Issues + +* Object-level access control lists have not yet been implemented. + +## Riak CS 0.0.3 + +#### Additions + +* Support for the `s3cmd` subcommands `sync`, `du`, and `rb` +* Return valid size and checksum for each object when listing bucket objects. +* Changes so that a bucket may be deleted if it is empty. +* Changes so a subdirectory path can be specified when storing or retrieving files. +* Make buckets private by default +* Support the prefix query parameter +* Enhance process dependencies for improved failure handling + +#### Bugs Fixed + +* URL decode keys on put so they are represented correctly. This eliminates confusion when objects with spaces in their names are listed and when attempting to access them. +* Properly handle zero-byte files +* Reap all processes during file puts + +#### Known Issues + +* Buckets are marked as `/private/` by default, but globally unique bucket names are not enforced. This means that two users may create the same bucket and this could result in unauthorized access and unintentional overwriting of files. This will be addressed in a future release by ensuring that bucket names are unique across the system. diff --git a/content/riak/cs/2.1.2/cookbooks/rolling-upgrades.md b/content/riak/cs/2.1.2/cookbooks/rolling-upgrades.md new file mode 100644 index 0000000000..e8d42898b8 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/rolling-upgrades.md @@ -0,0 +1,205 @@ +--- +title: "Rolling Upgrades For Riak CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Rolling Upgrades" + identifier: "advanced_upgrades" + weight: 100 + parent: "run_advanced" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Rolling-Upgrades-For-Riak-CS/ + - /riak/cs/2.1.2/cookbooks/Rolling-Upgrades-For-Riak-CS/ + - /riak/cs/latest/cookbooks/rolling-upgrades/ +--- + +Each node in a Riak CS cluster contains settings that define its +operating modes and API coverage. The following steps outline the +process of upgrading Riak CS in a rolling fashion. + +Be sure to check the Riak CS [Version Compatibility]({{}}riak/cs/2.1.2/cookbooks/version-compatibility) chart to ensure that your version of Riak, Riak CS, and Stanchion have been tested to work together. As Basho supports upgrades from the previous two major versions, this document will cover upgrades from Riak CS 1.4.x and Riak CS 1.5.x. + +As Riak CS 2.0.0 only works with Riak 2.0.5, the underlying Riak installation +*must* be upgraded to Riak 2.0.5. + +{{% note title="Note on upgrading from Riak CS < 1.5.4" %}} + +Some key objects changed names after the upgrade. Applications may need to +change their behaviour due to this bugfix.{{% /note %}} + + +{{% note title="Note on upgrading from Riak CS < 1.5.1" %}} + +Bucket number limitation per user have been introduced in 1.5.1. Users who +have more than 100 buckets cannot create any bucket after the upgrade unless +the limit is extended in the system configuration.{{% /note %}} + + +{{% note title="Note on upgrading From Riak CS 1.4.x" %}} +An operational procedure + +to clean up incomplete multipart under deleted buckets is needed. Otherwise +new buckets with names that used to exist in the past can't be created. The +operation will fail with a `409 Conflict` error.

+ +Leeway seconds and disk space should also be carefully watched during the +upgrade, because timestamp management of garbage collection has changed since +the 1.5.0 release. Consult the + +Leeway seconds and disk space section of the 1.5 release notes +for a more detailed description.{{% /note %}} + + +1. Stop Riak, Riak CS, and Stanchion: + + ```bash + riak stop + riak-cs stop + stanchion stop + ``` + +2. Back up Riak's configuration files: + + ```bash + sudo tar -czf riak_config_backup.tar.gz /etc/riak + ``` + +3. Optionally, back up your data directories: + + ```bash + sudo tar -czf riak_data_backup.tar.gz /var/lib/riak + ``` + +
Note on Patches
+ Remember to remove all patches from the `basho-patches` directory, as the + version of Erlang has changed in Riak CS 2.0. All official patches + previously released by Basho have been included in this release. +
+ +4. Upgrade Riak, Riak CS, and Stanchion. See the Riak + CS Downloads and Riak Downloads + pages to find the appropriate packages. + + **Debian** / **Ubuntu** + + ```bash + sudo dpkg -i .deb + sudo dpkg -i .deb + sudo dpkg -i .deb + ``` + + **RHEL** / **CentOS** + + ```bash + sudo rpm -Uvh .rpm + sudo rpm -Uvh .rpm + sudo rpm -Uvh .rpm + ``` + +5. The `add_paths` setting in your configuration file must be updated to reflect + the current version's `/ebin` directory. To give an example, if the + previous `/ebin` directory was located at + `/usr/lib/riak-cs/lib/riak_cs-1.5.2/ebin` and you're upgrading to version + 2.0.0, you will need to change the value in `add_paths`: + + ```advancedconfig + {add_paths, ["/usr/lib/riak-cs/lib/riak_cs-2.0.0/ebin"]} + ``` + + ```appconfig + {add_paths, ["/usr/lib/riak-cs/lib/riak_cs-2.0.0/ebin"]} + ``` + + +6. Riak CS 2.0 introduces a new style of configuration known as `riak-cs.conf`. + You may choose to continue the use of the `app.config` file, or migrate your + existing configuration to `riak-cs.conf` (recommended). If you choose to + use `riak-cs.conf`, you should migrate all supported settings to the new + format, and copy all others to the new `advanced.config` file. + +{{% note title="Note on Legacy app.config usage" %}} + **If you choose to use the legacy `app.config` files for Riak CS and/or + Stanchion, some parameters have changed names and must be updated**. + + In particular, for the Riak CS `app.config`: + + - `cs_ip` and `cs_port` have been combined into `listener`. + - `riak_ip` and `riak_pb_port` have been combined into `riak_host`. + - `stanchion_ip` and `stanchion_port` have been combined into + `stanchion_host`. + - `admin_ip` and `admin_port` have been combined into `admin_listener`. + - `webmachine_log_handler` has become `webmachine_access_log_handler`. + - `{max_open_files, 50}` has been deprecated and should be replaced with + `{total_leveldb_mem_percent, 30}`. + + For the Stanchion `app.config`: + + - `stanchion_ip` and `stanchion_port` have been combined into `listener`. + - `riak_ip` and `riak_port` have been combined into `riak_host`. + + Each of the above pairs follows a similar form. For example, if your legacy + `app.config` configuration was previously: + + ``` + {riak_cs, [` + {cs_ip, "127.0.0.1"}, + {cs_port, 8080 }, + . . . + ]}, + ``` + + It should now read: + + ``` + {riak_cs, [ + {listener, {"127.0.0.1", 8080}}, + . . . + ]}, + ``` + +and so on. More details can be found at [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs). +{{% /note %}} + +{{% note title="Note on Memory Sizing" %}} +Some changes have been made to both Riak and Riak CS that may warrant +some performance tuning. Please consult the + +Release Notes for more details. +{{% /note %}} + +7. Riak has also moved to the new configuration format, using a file called + `riak.conf`. Remember to migrate all existing Riak configurations during + the upgrade process. For example, the default bucket properties: + + ```riakconf + buckets.default.allow_mult = true + ``` + + ```appconfig + {riak_core, [ + ... + {default_bucket_props, [{allow_mult, true}]}, + ... + ]}. + ``` + +8. Start the node: + + ```bash + riak start + stanchion start + riak-cs start + ``` + +9. Wait for any handoff to complete: + + ```bash + riak-admin transfers + ``` + +10. Move on to the next node and repeat this process throughout the + cluster. diff --git a/content/riak/cs/2.1.2/cookbooks/supercluster.md b/content/riak/cs/2.1.2/cookbooks/supercluster.md new file mode 100644 index 0000000000..64e445eaa5 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/supercluster.md @@ -0,0 +1,187 @@ +--- +title: "Riak CS Supercluster Support" +description: "" +menu: + riak_cs-2.1.2: + name: "Riak CS Supercluster Support" + identifier: "advanced_supercluster_support" + weight: 103 + parent: "run_advanced" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/supercluster/ + - /riak/cs/latest/cookbooks/supercluster/ + - /riak/cs/latest/cookbooks/multibag/ +--- + +{{% note title="Technical Preview" %}} +Riak CS Supercluster is currently in technical preview mode and is available +only to Riak Enterprise +customers. It is not yet suitable for production use. +{{% /note %}} + +While [Riak CS Enterprise](http://basho.com/riak-enterprise) enables +you to distribute Riak CS objects across multiple data centers in a +[source/sink pattern]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture), all linked clusters are treated the same. In Riak CS version 1.5.0, however, Basho has added **supercluster** support to Riak CS Enterprise. + +With supercluster support, you can store object manifests and blocks in +separate clusters or groups of clusters, a.k.a. **a set of supercluser members**, enhancing the scalability and overall storage capabilities of a Riak CS installation. + +## Supercluster members + +A supercluster member is a set of clusters linked together via [Multi-Datacenter Replication]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture)/(MDC). +Without MDC support, a supercluster member consists of a single cluster. With MDC support, however, a supercluster member can consist of several linked clusters. You can assign members **weights** that determine the likelihood that objects, blocks, and manifests will be stored there. For example, if you expect to use one supercluster member more heavily than another you can increase the weight of that member using the interface described in [Riak CS Command-line Tools]({{}}riak/cs/2.1.2/cookbooks/command-line-tools). + +## The Master Member + +In a Riak CS supercluster setup, there is one special member, known as the +**master member**, that bears a set of special responsibilities. It stores +objects such as: + +* User information (for authentication and other purposes) +* Bucket-related information, e.g. the supercluster member in which each bucket is + stored +* Access statistics regarding Riak CS usage + +## Supercluster Configuration + +In order to use Riak CS supercluster, you need to modify multiple configuration +files. First, in each Riak CS node you need to alter the node's +`riak-cs.conf`, `advanced.config`, or `app.config` file to specify the host and port of each supercluster member. + +For example, if you wanted to set up supercluster members on host `127.0.0.1` with three different ports -- `10017`,`10027`, and `10037` -- you would add the following section: + +```riakcsconf +supercluster.member.Alpha = 127.0.0.1:10017 +supercluster.member.Bravo = 127.0.0.1:10027 +supercluster.member.Charlie = 127.0.0.1:10037 +``` +```advancedconfig +{riak_cs, [ + %% Other configs + {supercluster_members, + [ + {"Alpha", "127.0.0.1", 10017}, + {"Bravo", "127.0.0.1", 10027}, + {"Charlie", "127.0.0.1", 10037} + ]}, + %% Other configs +]}, +``` +```appconfig +{riak_cs, [ + %% Other configs + {supercluster_members, + [ + {"Alpha", "127.0.0.1", 10017}, + {"Bravo", "127.0.0.1", 10027}, + {"Charlie", "127.0.0.1", 10037} + ]}, + %% Other configs +]}, +``` + +>As with all configuration changes, each node must be restarted for the +changes to take effect. + +In addition to configuring Riak CS to use supercluster support, you will need to mirror the configuration changes shown above in Stanchion. In the +`stanchion.conf`, `advanced.config`, or `app.config` file in each Stanchion node, the following +section would need to be inserted: + +```stanchionconf +supercluster.member.Alpha = 127.0.0.1:10017 +supercluster.member.Bravo = 127.0.0.1:10027 +supercluster.member.Charlie = 127.0.0.1:10037 +``` +```advancedconfig +{stanchion, [ + %% Other configs + {supercluster_members, + [ + {"Alpha", "127.0.0.1", 10017}, + {"Bravo", "127.0.0.1", 10027}, + {"Charlie", "127.0.0.1", 10037} + ] + } + %% Other configs +]}, +``` +```appconfig +{stanchion, [ + %% Other configs + {supercluster_members, + [ + {"Alpha", "127.0.0.1", 10017}, + {"Bravo", "127.0.0.1", 10027}, + {"Charlie", "127.0.0.1", 10037} + ] + } + %% Other configs +]}, +``` + +## Transitioning to Supercluster Support + +If you have an existing Riak CS installation without supercluster support +and would like to add it, there is a series of basic steps to follow. + +### Stanchion + +Stanchion houses some of the basic functionality required for Riak CS +supercluster support. The first step in transitioning to supercluster support +is to upgrade Stanchion to a version that supports Riak CS supercluster. +That involves performing the following steps on each node: + +1. Stop the node +2. Upgrade Stanchion to a version that supports Riak CS supercluster, i.e. + Riak CS 1.5.0 and later +3. Set your desired Stanchion [configuration]({{}}riak/cs/2.1.2/cookbooks/configuration/stanchion) +4. Start Stanchion on each node + +### Add Clusters + +To add clusters to a supercluster installation, you must set up Riak CS and +Stanchion to communicate with those clusters. You can specify the +connection information as explained above in the [supercluster Configuration](#supercluster-configuration) section. + +### Set Weights + +When a new supercluster member is added, you must first set the weight of that member to zero using the [`riak-cs-supercluster`]({{}}riak/cs/2.1.2/cookbooks/command-line-tools) command-line interface. + +The example below sets the weight of the recently added supercluster member `Alpha` to zero: + +```bash +riak-cs-supercluster weight Alpha 0 +``` + +All weights are stored in the [master member](#the-master-member) and shared with all Riak CS nodes, which means that you only have to set weights once for them to be valid throughout your cluster. + +All supercluster members must begin their life with a weight of zero. However, you can set non-zero weights once all Riak CS and Stanchion nodes are properly +set up to recognize one another in the cluster. Let's say that we've set +up three members, `Alpha`, `Bravo`, and `Charlie`. We want to assign them the +weights 40, 40, and 20, respectively. The following commands would +accomplish that: + +```bash +riak-cs-supercluster weight Alpha 40 +riak-cs-supercluster weight Bravo 40 +riak-cs-supercluster weight Charlie 20 +``` + +The weights don't need to add up to 100 or to any specific number. Each +weight will be calculated as a percentage of the total assigned weights. +Thus, if a fourth supercluster member were added, you could assign it a weight of 30 without changing the other weights. + +Congratulations! Your Riak CS installation is now ready to use the new +supercluster feature. + +## Command Line Interface + +Complete documentation for the `riak-cs-supercluster` interface can be found +in our documentation on [Riak CS Command Line Tools]({{}}riak/cs/2.1.2/cookbooks/command-line-tools/#riak-cs-supercluster). + +## Limitations + +Riak CS supercluster does not currently support [proxy gets]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter/#riak-cs-mdc-gets) from +sink clusters. diff --git a/content/riak/cs/2.1.2/cookbooks/system-features.md b/content/riak/cs/2.1.2/cookbooks/system-features.md new file mode 100644 index 0000000000..98cda7d455 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/system-features.md @@ -0,0 +1,18 @@ +--- +title: "System Features" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/System-Features/ + - /riak/cs/latest/cookbooks/system-features/ +--- + +The following pages detail Riak CS's system features. + +* [Access Control Lists]({{}}riak/cs/2.1.2/cookbooks/access-control-lists) +* [Authentication]({{}}riak/cs/2.1.2/cookbooks/authentication) +* [Monitoring and Metrics]({{}}riak/cs/2.1.2/cookbooks/monitoring-and-metrics) +* [Querying Access Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-access-statistics) +* [Querying Storage Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-storage-statistics) +* [Usage and Billing Data]({{}}riak/cs/2.1.2/cookbooks/usage-and-billing-data) diff --git a/content/riak/cs/2.1.2/cookbooks/usage-and-billing-data.md b/content/riak/cs/2.1.2/cookbooks/usage-and-billing-data.md new file mode 100644 index 0000000000..6c063efec7 --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/usage-and-billing-data.md @@ -0,0 +1,387 @@ +--- +title: "Usage and Billing Data" +description: "" +menu: + riak_cs-2.1.2: + name: "Usage and Billing Data" + identifier: "admin_usage_billing" + weight: 101 + parent: "http_admin" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Usage-and-Billing-Data/ + - /riak/cs/latest/cookbooks/usage-and-billing-data/ +--- + +Like many other object storage systems, Riak CS gathers a variety of +usage statistics and makes them available through its administrative +API. + +{{% note title="Note on terminology" %}} +In this and other documents in the Riak CS documentation, the terms "storage" +and "billing" are used interchangeably. The same goes for the terms "usage" +and access. +{{% /note %}} + +## Access Statistics + +Access stats are tracked on a per-user basis, as rollups for slices of +time. They are stored just like other Riak CS data, in the `cs.access` +bucket in particular. For information about querying access statistics, +please read [Querying Access Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-access-statistics). + +## Overview + +The basic process driving usage and billing data in Riak CS is the +following: + +1. Riak CS determines whom, if anyone, should be billed for each access +2. Riak CS send this and some statistical information about the + accesses to an aggregation subsystem +3. The aggregation subsystem periodically sends its accumulated log to + be archived +4. The archival subsystem sums all recorded accesses for each user and + stores a record for each user for the time slice + +Log retrieval then involves simply making a request to Riak for all +slice objects for a user in a time period. No access data will be logged +unless the user for the access is known. + +### Tracked Statistics + +Several statistics are logged automatically is a user is specified for +the request: + +* `Count` --- the number of times this operation was used, where each + request counts as one (1) +* `BytesIn` --- the number of bytes that were included in the request + body +* `BytesOut` --- the number of bytes that were sent in the response body + +For successful requests, each of these stats is logged under the name +given. For unsuccessful requests, they are logged under this name with a +prefix of either `SystemError` for requests that end in response codes +500+, or `UserError` for requests that end in response codes 400-499. +For example, if a user tries to download a nonexistent file, it will be +logged under `UserErrorCount` with the bytes of the message logged under +`UserErrorBytesOut`. + +These three metrics are logged for each operation separately. The access +logger determines the operation type by comparing the method, resource +module, and path to a known table. For example, it knows that a `GET` on +the *key* module with the `acl` query parameter in the path is a +`KeyReadACL` operation. A `PUT` to the same resource without the `acl` +query parameter is a `KeyWrite` operation. See [Querying Access Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-access-statistics) for a list of all operation types. + +### Log Accumulation + +As resources finish their processing, the access logger module is called +by Webmachine to log the access. This module implements a server that +finds all of the access notes in the request's log data and stores them +until the current interval ends. + +When the current interval ends, the access module transfers ownership of +its accumulated data to the archiver module. The logger module then +resets for logging the next slice's accesses. + +#### Interval Duration + +The length of the log flushing interval is configured by the application +environment variable `access_log_flush_factor`. The value is expressed +as an integer divisor of the `access_archive_period` setting. That is, +if `access_log_flush_factor` is 5 and `access_archive_period` is 3600 +(== 1 hour) seconds, the log will be flushed every 720 seconds (== 12 +minutes), which is 5 times per archive period. + +The value of `access_log_flush_factor` must be an integer factor of +`access_archive_period`. If the factor does not divide the period +evenly, an error will be printed in the log, and the Riak CS node will +refuse to start. + +The default value for `access_log_flush_factor` is 1 (once per archive +period). These settings may be manipulated in the Riak CS `app.config` +file, normally located at `/etc/riak-cs/app.config`. + +#### Log Size Trigger for Archival + +Archival of the access log will also be triggered if the number of +records waiting to be archived reaches a certain configured level. When +the threshold is reached, all accumulated records are transferred to the +archiver, which writes out a sample with *now* as the end-time. +Accumulation is then restarted with *now* as the start time, and will +continue until either the end of the time interval or until the log +threshold is reached again. + +This level is configured by the application environment variable +`access_log_flush_size`. Its default value is `1000000` (one million). + +#### Backlog Caveat + +If the logger finds itself so far behind that it would need to schedule +its next archival in the past---that is, after sending a log +accumulation for interval N to the archiver, it finds that the end of +interval N+1 has already passed---the logger will drop the backlog in +its message box by exiting and allowing its supervisor process to +restart it. Just before exiting, it will print an error message +describing how far behind it was: + +```log +09:56:02.584 [error] Access logger is running 302 seconds behind, skipping 0 log messages to catch up +``` + +With the default one-hour archive period, this case will only be +encountered when the logger is an entire hour behind. This behavior is +meant as a safety valve to prevent that hour lag from growing due to +memory pressure from the logger processes's message queue. + +#### Manually Triggering Archival + +When taking a machine out of service, it may be desirable to trigger log +archival before the end of the interval. To do so, use the +`riak-cs-access` script with the command `flush`. It should be installed +on the same path as the `riak-cs` script. For most OS distributions this +will be at `/usr/local/sbin`. + +By default, the script will wait up to 50 seconds for the logger to +acknowledge that it has passed its accumulation to the archiver and +another 50 seconds for the archiver to acknowledge that it has finished +archiving all accumulations it has received. To wait longer, use the +`-w` parameter on the command line with an integer number of 5-second +intervals to wait. That is, to wait for 100 seconds for each phase, use: + +```bash +riak-cs-access flush -w 20 +``` + +### Archive Retrieval + +When a request is received for a user's access stats over some time +period, the objects for all intervals in that time period must be +retrieved. + +It is important to note that the archival process does not attempt a +read/modify/write cycle when writing a slice record. The `cs.access` +bucket should have the `allow_mult=true` flag set, and so multiple Riak +CS nodes writing the same slice record for the same user create +siblings. Riak CS attempts to check and set the `allow_mult` bucket +property when it starts up, and will print a warning in the log about +being `unable to configure` or `unable to verify` bucket settings if it +fails. + +Siblings should be handled at read time. Sibling resolution should be +nothing more than a set union of all records. The HTTP resource serving +the statistics expects to provide them on a node-accumulated basis, so +it is important to set a **unique Erlang node name for each Riak CS +node**. + +## Storage Statistics + +Storage statistics are also tracked on a per-user basis, as rollups for +slices of time. They are stored in the same Riak cluster as other Riak +CS data, in the `cs.storage` bucket. + +For detailed information about querying storage statistics, please read +[Querying Storage Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-storage-statistics). + +### High Level + +1. Storage is calculated for all users either + a. on a regular schedule or + b. when manually triggered with the `riak-cs-storage` script +2. Each user's sum is stored in an object named for the timeslice in + which the aggregation took place +3. Sums are broken down by bucket + +Log retrieval is then simply making a request to Riak for all slice +objects for a user in a particular time period. + +#### Prerequisite: Code Paths for MapReduce + +The storage calculation system uses MapReduce to sum the files in a +bucket. This means you must tell all of your Riak nodes where to find +Riak CS's compiled files before calculating storage. + +See [Configuring Riak for CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-for-cs) for directions on setting this up. + +### Scheduling and Manual Triggering + +Triggering the storage calculation is a matter of setting up a regular +schedule or manually starting the process via the `riak-cs-storage` +script. + +#### Regular Schedules + +If you would like to have an Riak CS node calculate the storage used by every +user at the same time (or times) each day, specify a schedule in that node's +Riak CS `riak-cs.conf` file, or in the old-style `advanced.config` or +`app.config` file. + +In the `riak_cs` section of the file, add an entry for +`storage_schedule` like this: + +```riakcsconf +stats.storage.schedule.1 = 0600 +``` + +```advancedconfig +{storage_schedule, "0600"} +``` + +```appconfig +{storage_schedule, "0600"} +``` + +The time is given as a string of the form `HHMM`, representing the +hour and minute GMT to start the calculation process. In this example, the node +would start the storage calculation at 6am GMT every day. + +To set up multiple times, simply specify multiple times. For example, +to schedule the calculation to happen at both 6am and 6pm, use: + +```riakcsconf +stats.storage.schedule.1 = 0600 +stats.storage.schedule.2 = 1800 +``` + +```advancedconfig +{storage_schedule, ["0600", "1800"]} +``` + +```appconfig +{storage_schedule, ["0600", "1800"]} +``` + +{{% note title="Note on archive periods" %}} +When using multiple times in a storage schedule, they must be scheduled for +different archive periods (see details for `storage_archive_period` in the +**Archival** section below). Extra scheduled times in the same archive period +are skipped. This is intended to allow more than one Riak CS node to calculate +storage statistics concurrently, as they will take notice of users already +calculated by other nodes and skip them (see details in the Manual Triggering +section about overriding this behavior). +{{% /note %}} + +By default, no schedule is specified, so the storage calculation is +never done automatically. + +#### Manual Triggering + +If you would rather trigger storage calculations manually, simply use +the `batch` command in the `riak-cs-storage` script: + +```bash +riak-cs-storage batch +# Response: +# Batch storage calculation started. +``` + +If there is already a calculation in progress, or if starting the +calculation fails for some other reason, the script will print an error +message saying so. + +By default, a manually triggered calculation run will skip users that +have already been calculated in the current archive period (see the +Archival section below for details about `storage_archive_period`). If +you would rather calculate an additional sample for every user in this +period, add the `--recalc` (or `-r` for short) option to the command +line: + +```bash +riak-cs-storage batch -r # force recalculation of every user +``` + +#### Further Control + +In-process batch calculations can also be paused or canceled using the +`riak-cs-storage` script. + +To pause an in-process batch, use: + +```bash +riak-cs-storage pause +# Response: +# The calculation was paused. +``` + +To resume a paused batch, use: + +```bash +riak-cs-storage resume +# Response: +# The calculation was resumed. +``` + +To cancel an in-process batch (whether *paused* or *active*), use: + +```bash +riak-cs-storage cancel +# Response: +# The calculation was canceled. +``` + +You can also retrieve the current state of the daemon by using the +`status` command. The first line will indicate whether the daemon is +*idle*, *active*, or *paused*, and it will be followed by further + details based on progress. For example: + +```log +A storage calculation is in progress +Schedule: none defined +Last run started at: 20120316T204135Z +Current run started at: 20120316T204203Z +Next run scheduled for: unknown/never +Elapsed time of current run: 3 +Users completed in current run: 1 +Users left in current run: 4 +``` + +### Results + +When the node finishes calculating every user's storage, it will print a +message to the log noting how long the entire process took: + +```log +08:33:19.282 [info] Finished storage calculation in 1 seconds. +``` + +### Process + +The calculation process is coordinated by a long-lived finite state +machine process that handles both the scheduling (if a schedule is +defined) and the running of the process. + +When a storage calculation starts, the first step is to obtain a list of +known users of the system. Each user's record contains information about +the buckets that the user owns. + +For each bucket that a user owns, a MapReduce query is run. The query's +inputs are the list of the keys in the bucket (the input is +`BucketName`, so the keys stay on the server). The query then has two +phases: a map that produces tuples of the form `{1, +ByteSize(File)}`---if *active*; nothing if *inactive*---and a reduce +that sums those tuples element-wise. The result is one tuple whose first +element is the number of files in the bucket and whose second element is +the total number of bytes stored in that file. + +Only one bucket is calculated at a time to prevent putting too much load +on the Riak cluster. Only one user is calculated at a time as well to +prevent too large of a temporary list on the Riak CS node. + +Once the sum for each of the user's buckets is calculated, a record is +written to the `cs.storage` Riak bucket. + +### Archival + +Records written to the `cs.storage` bucket are very similar to records +written to the `cs.access` bucket used for logging access statistics. +The value is a JSON object with one field per bucket. The key is a +combination of the user's `key_id` and the timestamp of the time slice +for which the calculation was run. + +The period for storage archival is separate from the period for access +archival. The storage archival period is configured by the application +environment variable `storage_archive_period`. The default is 86400 (one +day). This is because storage calculations are expected to be archived +much less frequently than access logs, and so specifying fewer possible +keys to look up later reduces overhead at reporting time. diff --git a/content/riak/cs/2.1.2/cookbooks/using-with-keystone.md b/content/riak/cs/2.1.2/cookbooks/using-with-keystone.md new file mode 100644 index 0000000000..669db52b2c --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/using-with-keystone.md @@ -0,0 +1,514 @@ +--- +title: "Using Riak CS With Keystone" +description: "" +menu: + riak_cs-2.1.2: + name: "Using Riak CS With Keystone" + identifier: "api_openstack_keystone" + weight: 103 + parent: "api_openstack" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Using-Riak-CS-With-Keystone/ + - /riak/cs/latest/cookbooks/using-with-keystone/ +--- + +This document shows you how to configure Riak CS to work with the +[OpenStack Keystone](http://docs.openstack.org/developer/keystone/) +authentication service. + +Riak CS can be configured to use either the OpenStack Object Storage API +or the S3 API in conjunction with Keystone for authentication. + +## Terminology + +In a system that uses Keystone for authentication, there are three main +entity types to be aware of: `tenants`, `users`, and `roles`. + +* `tenant` --- A tenant is a collection entity that can contain a number + of users +* `user` --- A user represents an individual that uses the OpenStack + system +* `role` --- A role is used to define a link between a user and a tenant + and to indicate permissions of the user within that tenant + +The OpenStack `tenant_id` maps to a `key_id` to identify a user account +in Riak CS. In OpenStack, only users who are assigned an `operator` role +for a tenant may perform operations. Other users that belong to a tenant +may be granted access using ACLs. + +Currently, Riak CS does not support OpenStack ACLs and only permits +access to tenant operators. ACLs will be supported at a later date. + +By default, Riak CS recognizes `admin` and `swiftoperator` as valid +operator roles, but that list can be configured. + +Riak CS does not currently support the use of multiple authentication +servers via *reseller prefixes*, but if this turns out to be important +based on user feedback, support may be added in the future. + +## Configuration + +#### API + +Set the API using the `rewrite_module` configuration in the Riak CS +`riak-cs.conf` file, or the old-style `app.config` file in the `riak_cs` +section. + +To use the S3 API, insert the following: + +```riakcsconf +rewrite_module = riak_cs_s3_rewrite +``` + +```appconfig +{riak_cs, [ + %% Other configs + {rewrite_module, riak_cs_s3_rewrite}, + %% Other configs + ]} +``` + +To use the OpenStack object storage API: + +```riakcsconf +rewrite_module = riak_cs_oos_rewrite +``` + +```appconfig +{riak_cs, [ + %% Other configs + {rewrite_module, riak_cs_oos_rewrite}, + %% Other configs + ]} +``` + +#### Authentication Module + +Set the authentication module using the `auth_module` configuration in the Riak +CS `riak-cs.conf` file, or the old-style `app.config` file in the `riak_cs` +section. + +To specify the Keystone authentication module: + +```riakcsconf +auth_module = riak_cs_keystone_auth +``` + +```appconfig +{riak_cs, [ + %% Other configs + {auth_module, riak_cs_keystone_auth}, + %% Other configs + ]} +``` + +#### Operator Roles + +You may optionally override the default list of valid operator roles in the +`advanced.config` file, or the`app.config` file. The default roles are `admin` +and `swiftoperator`, but others may be used: + +```advancedconfig + {riak_cs, [ + %% Other configs + {os_operator_roles, [<<"admin">>, <<"swiftoperator">>, <<"cinnamon">>]}, + %% Other configs + ]} +``` + +```appconfig + {riak_cs, [ + %% Other configs + {os_operator_roles, [<<"admin">>, <<"swiftoperator">>, <<"cinnamon">>]}, + %% Other configs + ]} +``` + +**Note**: Each role should be formatted as shown above, with two angle +brackets preceding and following each role value. + +#### Root Host + +Make sure that the value of the `root_host` key in the Riak CS `riak-cs.conf` +file, or the `cs_root_host` key in the old-style `advanced.config` or +`app.config` files matches the root host used for the object store in the +Keystone configuration. + +For example, given the following config snippet from a Keystone configuration +file, the value for `root_host` (or `cs_root_host`) should be set to +`object.store.host`: + +```config +catalog.RegionOne.object_store.publicURL = http://object.store.host/v1/AUTH_$(tenant_id)s +catalog.RegionOne.object_store.adminURL = http://object.store.host/ +catalog.RegionOne.object_store.internalURL = http://object.store.host/v1/AUTH_$(tenant_id)s +``` + +The entry in the Riak CS configuration file would be as follows: + +```riakcsconf +root_host = object.store.host +``` + +```advancedconfig + {riak_cs, [ + %% Other configs + {cs_root_host, "object.store.host"}, + %% Other configs + ]} +``` + +```appconfig + {riak_cs, [ + %% Other configs + {cs_root_host, "object.store.host"}, + %% Other configs + ]} +``` + +#### Admin Token + +Riak CS needs to know the administration token so that it can successfully +validate user tokens with Keystone. If no value for `os_admin_token` is +specified, the default value is `ADMIN`. The value can be set by adding the +following to the `riak_cs` section of the Riak CS `advanced.config` or +`app.config` files: + +```advancedconfig + {riak_cs, [ + %% Other configs + {os_admin_token, "SNARFSNARFSNARF"}, + %% Other configs + ]} +``` + +```appconfig + {riak_cs, [ + %% Other configs + {os_admin_token, "SNARFSNARFSNARF"}, + %% Other configs + ]} +``` + +#### Auth URL + +Riak CS also needs to know the authentication URL to use to communicate with +Keystone. The default value is `"http://localhost:5000/v2.0"`. To override this +value add the following to the `riak_cs` section of the Riak CS +`advanced.config` or `app.config` files: + +```advancedconfig + {riak_cs, [ + %% Other configs + {os_auth_url, "http://host.with.the.most.com:5000/v2.0"}, + %% Other configs + ]} +``` + +```appconfig + {riak_cs, [ + %% Other configs + {os_auth_url, "http://host.with.the.most.com:5000/v2.0"}, + %% Other configs + ]} +``` + +#### Keystone Resources + +Riak CS needs to be be aware of a few resources to be able to perform +authentication with Keystone. These resources are unlikely to need to be +changed from their defaults, but that capability is provided in case the +need arises. + +* Token Resources + +The default is `"tokens/"`. To override this, add the following to the `riak_cs` +section of the Riak CS `advanced.config` or `app.config` files: + +```advancedconfig + {riak_cs, [ + %% Other configs + {os_tokens_resource, "mytokens/"}, + %% Other configs + ]} +``` + +```appconfig + {riak_cs, [ + %% Other configs + {os_tokens_resource, "mytokens/"}, + %% Other configs + ]} +``` + +* S3 Token Resources + +This resource is only used when the S3 API is used in conjunction with Keystone +authentication. The default is `"s3tokens/"`. To override this, add the +following to the `riak_cs` section of the Riak CS `advanced.config` or +`app.config` files: + +```advancedconfig + {riak_cs, [ + %% Other configs + {os_s3_tokens_resource, "mys3tokens/"}, + %% Other configs + ]} +``` + +```appconfig + {riak_cs, [ + %% Other configs + {os_s3_tokens_resource, "mys3tokens/"}, + %% Other configs + ]} +``` + +* User Resources + +The default is `"users/"`. To override this, add the following to the `riak_cs` +section of the Riak CS `advanced.config` or `app.config` files: + +```advancedconfig + {riak_cs, [ + %% Other configs + {os_users_resource, "users/"}, + %% Other configs + ]} +``` + +```appconfig + {riak_cs, [ + %% Other configs + {os_users_resource, "users/"}, + %% Other configs + ]} +``` + +## Testing + +### Keystone Setup + +Follow the procedures documented in [Keystone Setup]({{}}riak/cs/2.1.2/cookbooks/keystone-setup) to set up and run +Keystone. + +1. Create a tenant called `test`: + + ```bash + keystone tenant-create --name test + ``` + +1. Using the tenant id of the tenant created in the previous step and + create a user called `test` that is a member of tenant `test`: + + ```bash + keystone user-create --name test / + --pass test --email test@test.com / + --tenant-id --enabled true + ``` + +1. Create a role called `swiftoperator`: + + ```bash + keystone role-create --name swiftoperator + ``` + +1. Add the `swiftoperator` role for user `test`: + + ```bash + keystone user-role-add --user-id / + --role-id --tenant-id + ``` + +1. Create ec2 credentials for the user `test`: + + ```bash + keystone ec2-credentials-create --user_id / + --tenant_id + ``` + +### Testing Openstack API and Keystone authentication + +1. Start Riak, Riak CS, and Stanchion. Make sure that the values for the + `rewrite_module` and `auth_module` keys in the Riak CS `riak-cs.conf` file, + or the old-style `advanced.config` or `app.config` file in the `riak_cs` + section, are set as follows: + + ```riakcsconf + rewrite_module = riak_cs_oos_rewrite + auth_module = riak_cs_keystone_auth + ``` + + ```advancedconfig + {riak_cs, [ + %% Other configs + {rewrite_module, riak_cs_oos_rewrite}, + {auth_module, riak_cs_keystone_auth}, + %% Other configs + ]} + ``` + + ```appconfig + {riak_cs, [ + %% Other configs + {rewrite_module, riak_cs_oos_rewrite}, + {auth_module, riak_cs_keystone_auth}, + %% Other configs + ]} + ``` + +1. Get an auth token for the `test` user to use in requests to Riak CS: + + ```curl + curl -s -d '{"auth": {"tenantName": "test", "passwordCredentials": {"username": "test", "password": "test"}}}' / + -H 'Content-type: application/json' / + http://localhost:5000/v2.0/tokens | python -mjson.tool + ``` + + The value of the `id` field of the `token` object in the response is + used as the value for the `X-Auth-Token` header in all subsequent + requests to Riak CS. The `publicURL` for the `object-store` service + listed in the `serviceCatalog` of the response is the base URL used + for all API requests to Riak CS. + + Now export the token and public URL, like this: + + ```bash + export ID=20f1a9e46ebd42a3bdd03e009722eeb8 + export URL=http://localhost:8080/v1/AUTH_8d84a17ac99d49fcb6f35c767dd562db + ``` + +1. Create a bucket (S3 bucket == OpenStack container) + + ```curl + curl -X PUT / + -H 'X-Auth-Token: $ID' / + $URL/bucket1 + ``` + +1. List the buckets + + ```curl + curl -H 'X-Auth-Token: $ID' / + $URL + ``` + +1. Put an object into the bucket + + ```curl + curl -X PUT / + -H 'X-Auth-Token: $ID' / + --data 'abcdefghi123456789' / + $URL/bucket1/object1 + ``` + +1. List the objects in the bucket + + ```curl + curl -H 'X-Auth-Token: $ID' / + $URL/bucket1 + ``` + +1. Fetch the object from the bucket + + ```curl + curl -H 'X-Auth-Token: $ID' / + $URL/bucket1/object1 + ``` + +1. Delete the object + + ```curl + curl -X DELETE / + -H 'X-Auth-Token: $ID' / + $URL/bucket1/object1 + ``` +1. Delete the bucket + + ```curl + curl -X DELETE / + -H 'X-Auth-Token: $ID' / + $URL/bucket1 + ``` + +### Testing S3 API and Keystone Authentication + +1. If Riak and Stanchion are not already running, start them now. + +1. Edit the Riak CS `riak-cs.conf`, or the old-style `advanced.config` or + `app.config` file and restart Riak CS. The values for `rewrite_module` and + `auth_module` should be set as follows: + + ```riakcsconf + rewrite_module = riak_cs_s3_rewrite + auth_module = riak_cs_keystone_auth + ``` + + ```advancedconfig + {riak_cs, [ + %% Other configs + {rewrite_module, riak_cs_s3_rewrite}, + {auth_module, riak_cs_keystone_auth}, + %% Other configs + ]} + ``` + + ```appconfig + {riak_cs, [ + %% Other configs + {rewrite_module, riak_cs_s3_rewrite}, + {auth_module, riak_cs_keystone_auth}, + %% Other configs + ]} + ``` + +1. Use the values of `access` and `secret` from the EC2 credentials + created for the `test` user as the `key_id` and `key_secret` for + signing requests. For example, if you are using `s3cmd`, use these + credentials for the `access_key` and `secret_key` fields of the + `.s3cfg` file. The subsequent examples are done using `s3cmd` since + it is a fairly common tool. + +1. Create a sample file to upload + + ```bash + echo "ilovechickenilovelivermeowmixmeowmixwilldeliver" > upload.txt + ``` + +1. Create a bucket (i.e. container) + + ```bash + s3cmd mb s3://bucket2 + ``` + +1. List the buckets + + ```bash + s3cmd ls + ``` + +1. Put an object into the bucket + + ```bash + s3cmd put upload.txt s3://bucket2 + ``` + +1. Fetch the object from the bucket + + ```bash + ss3cmd get s3://bucket2/upload.txt download.txt + ``` + +1. Delete the object + + ```bash + s3cmd del s3://bucket2/upload.txt + ``` + +1. Delete the bucket + + ```bash + s3cmd rb s3://bucket2 + ``` diff --git a/content/riak/cs/2.1.2/cookbooks/version-compatibility.md b/content/riak/cs/2.1.2/cookbooks/version-compatibility.md new file mode 100644 index 0000000000..54a1345a8a --- /dev/null +++ b/content/riak/cs/2.1.2/cookbooks/version-compatibility.md @@ -0,0 +1,70 @@ +--- +title: "Version Compatibility" +description: "" +menu: + riak_cs-2.1.2: + name: "Version Compatibility" + identifier: "reference_version_compat" + weight: 101 + parent: "reference" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/Version-Compatibility/ + - /riak/cs/latest/cookbooks/version-compatibility/ +--- + +If you are deploying Riak CS in combination with an existing Riak +cluster, you should verify that the version of Riak that you are using +is compatible with the version of Riak CS that you intend to use. + +It is important to note that not all versions of Riak are compatible +with Riak CS, but a number of version combinations have been tested, are +known to function together, and can be recommended for use. + +The following details combinations of Riak and Riak CS versions which +are known to function together and provides some general tips about Riak +versions for use with Riak CS. + +## Unsupported Riak Versions + +Riak versions prior to version 1.2.0 are known to have performance +issues and are not tested, recommended, or supported for use with Riak +CS. Additionally, Riak versions prior to 1.0.0 are missing essential +functionality, such as Secondary Indexes or LevelDB support, required by +Riak CS. + +## Working Version Combinations + +Basic functional testing has been performed with the following combinations of +Riak and Riak CS. These versions are also known to be functioning in production environments. + +Riak version | Stanchion version | Riak CS version +--------------|-------------------|---------------- +1.2.1 | 1.2.2 | 1.2.2 +1.2.1 | 1.3.0 | 1.3.0 +1.3.0 | 1.2.2 | 1.2.2 +1.3.0 | 1.3.0 | 1.3.0 +1.4.0 | 1.4.0 | 1.4.0 +1.4.1 | 1.4.0 | 1.4.0 +1.4.8 | 1.4.3 | 1.4.5 +1.4.10 | 1.5.0 | 1.5.0 +1.4.10 | 1.5.0 | 1.5.1 +1.4.10 | 1.5.0 | 1.5.2 +1.4.12 | 1.5.0 | 1.5.3 +1.4.12 | 1.5.0 | 1.5.4 +2.0.5 | 2.0.0 | 2.0.0 +2.0.5 | 2.0.0 | 2.0.1 +2.1.2 | 2.1.0 | 2.1.0 + +**Note**: While Riak CS versions 1.5.0 and later will work with Riak +1.4.x, we highly recommend running CS with at least Riak 1.4.8, +preferably 1.4.10. + +Basic functionality testing consists of account creation, object storage and +retrieval, bucket listing operations, and Access Control List (ACL) enforcement +verification. + +Note that functional testing of Riak CS clusters operating with mixed versions +(e.g., a combination of Riak CS version 1.2.2 and version 1.3.0 nodes) has not +been performed, and cannot be recommended at this time. diff --git a/content/riak/cs/2.1.2/developing.md b/content/riak/cs/2.1.2/developing.md new file mode 100644 index 0000000000..1b70045656 --- /dev/null +++ b/content/riak/cs/2.1.2/developing.md @@ -0,0 +1,21 @@ +--- +title: "Developing With Riak CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Developing" + identifier: "develop" + weight: 200 + pre: lambda +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riak/cs/latest/developing/ +--- + +### In This Section + +- [Accounts & Administration](../references/appendices/http-admin/) +- [Riak CS APIs](../references/apis) +- [Querying Access Statistics](../cookbooks/querying-access-statistics/) +- [Querying Storage Statistics](../cookbooks/querying-storage-statistics/) diff --git a/content/riak/cs/2.1.2/downloads.md b/content/riak/cs/2.1.2/downloads.md new file mode 100644 index 0000000000..296b01ac23 --- /dev/null +++ b/content/riak/cs/2.1.2/downloads.md @@ -0,0 +1,28 @@ +--- +title: "Download Riak CS 2.1.2 and Other Tools" +description: "Download some stuff!" +menu: + riak_cs-2.1.2: + name: "Download Riak CS" + identifier: "download_riak_cs" + weight: 100 + parent: "index" +project: "riak_cs" +project_version: "2.1.2" +toc: true +layout: downloads +listed_projects: + - project: "riak_cs" + version: "2.1.2" + title: "Riak CS" + - project: "stanchion" + version: "2.1.2" + title: "Stanchion" + - project: "riak_cs_control" + version: "1.0.2" + title: "Riak CS Control" +aliases: + - /riakcs/2.1.2/riakcs-downloads/ + - /riak/cs/2.1.2/riakcs-downloads/ + - /riak/cs/latest/downloads/ +--- diff --git a/content/riak/cs/2.1.2/index.md b/content/riak/cs/2.1.2/index.md new file mode 100644 index 0000000000..c1962c7d6c --- /dev/null +++ b/content/riak/cs/2.1.2/index.md @@ -0,0 +1,47 @@ +--- +title: "Riak Cloud Storage" +description: "" +menu: + riak_cs-2.1.2: + name: "Riak CS" + identifier: "index" + weight: 100 + pre: bolt +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/ +--- + +Riak CS (Cloud Storage) is easy-to-use object storage software built on top of +[Riak KV](http://basho.com/riak/), Basho's distributed database. Riak CS is +designed to provide simple, available, distributed cloud storage at any scale, +and can be used to build cloud architectures---be they public or private---or +as storage infrastructure for heavy-duty applications and services. Riak CS's +API is [Amazon S3 compatible](http://docs.aws.amazon.com/AmazonS3/latest/API/APIRest.html) +and supports per-tenant reporting for use cases involving billing +and metering. + +Riak CS is open source and [free for download]({{}}riak/cs/2.1.2/downloads). + +## Notable Riak CS Features + +### Amazon S3-API Compatibility + +Riak CS has a built-in S3 interface with S3 Access Control List ([ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html)) support, which means that you can both use existing S3 tools and frameworks to manage your data and also import and extract data from Amazon directly. The HTTP REST API supports service, bucket, and object-level operations to easily store and retrieve data. There is also support for the [OpenStack Swift API]({{}}riak/cs/2.1.2/references/appendices/comparisons/swift/) + +### Per-Tenant Visibility + +With the Riak CS [Reporting API]({{}}riak/cs/2.1.2/cookbooks/monitoring-and-metrics), you can access per-tenant usage data and statistics over network I/O. This reporting functionality supports use cases including accounting, +subscription, chargebacks, plugins with billing systems, efficient multi-department utilization, and much more. + +### Supports Large Objects of Arbitrary Content Type, Plus Metadata + +Riak CS enables you to store any conceivable data type, such as +images, text, video, documents, database backups, or software binaries. +Riak CS can store objects into the terabyte size range using multipart +file uploads. Riak CS also supports standard Amazon [metadata headers](http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html). + +### Multi-Datacenter Replication (Enterprise Edition Only) + +Riak CS [Enterprise](http://basho.com/riak-enterprise) Multi-Datacenter Replication for active backups, disaster recovery, and data locality. Provide low-latency storage wherever your users are and maintain availability even in the event of site failure. diff --git a/content/riak/cs/2.1.2/operating.md b/content/riak/cs/2.1.2/operating.md new file mode 100644 index 0000000000..c06298d821 --- /dev/null +++ b/content/riak/cs/2.1.2/operating.md @@ -0,0 +1,20 @@ +--- +title: "Operating Riak CS" +description: "" +menu: + riak_cs-2.1.2: + name: "Operating" + identifier: "ops" + weight: 300 + pre: database +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riak/cs/latest/operating/ +--- + +### In This Section + +- [Configuring Riak CS](../cookbooks/configuration) +- [Running Riak CS](./running) +- [Operating Riak CS Advanced](./advanced) diff --git a/content/riak/cs/2.1.2/operating/advanced.md b/content/riak/cs/2.1.2/operating/advanced.md new file mode 100644 index 0000000000..48719161a0 --- /dev/null +++ b/content/riak/cs/2.1.2/operating/advanced.md @@ -0,0 +1,21 @@ +--- +title: "Operating Riak CS Advanced" +description: "" +menu: + riak_cs-2.1.2: + name: "Advanced" + identifier: "run_advanced" + weight: 102 + parent: "ops" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riak/cs/latest/operating/advanced/ +--- + +### In This Section + +- [Rolling Upgrades for Riak CS](../../cookbooks/rolling-upgrades/) +- [Monitoring and Metrics](../../cookbooks/monitoring-and-metrics/) +- [Riak CS Control](../../references/appendices/riak-cs-control/) +- [Riak CS Supercluster Support](../../cookbooks/supercluster/) diff --git a/content/riak/cs/2.1.2/operating/running.md b/content/riak/cs/2.1.2/operating/running.md new file mode 100644 index 0000000000..b8582dc2a3 --- /dev/null +++ b/content/riak/cs/2.1.2/operating/running.md @@ -0,0 +1,20 @@ +--- +title: "Running Riak CS Overview" +description: "" +menu: + riak_cs-2.1.2: + name: "Running" + identifier: "run" + weight: 101 + parent: "ops" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riak/cs/latest/operating/running/ +--- + +### In This Section + +- [Riak CS Command-line Tools](../../cookbooks/command-line-tools) +- [Launching and Stopping Riak CS](../../cookbooks/installing/launching-and-stopping/) +- [Riak CS Logging](../../cookbooks/logging/) diff --git a/content/riak/cs/2.1.2/reference.md b/content/riak/cs/2.1.2/reference.md new file mode 100644 index 0000000000..fd1b971ed4 --- /dev/null +++ b/content/riak/cs/2.1.2/reference.md @@ -0,0 +1,20 @@ +--- +title: "Riak CS Reference" +description: "" +menu: + riak_cs-2.1.2: + name: "Reference" + identifier: "reference" + weight: 500 + pre: references +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riak/cs/latest/reference/ +--- + +### In This Section + +- [Frequently Asked Questions](../cookbooks/faqs/riak-cs/) +- [Riak CS Version Compatibility](../cookbooks/version-compatibility/) +- [Release Notes](../cookbooks/release-notes/) diff --git a/content/riak/cs/2.1.2/references/apis.md b/content/riak/cs/2.1.2/references/apis.md new file mode 100644 index 0000000000..ceacbc598a --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis.md @@ -0,0 +1,20 @@ +--- +title: "Riak CS APIs" +description: "" +menu: + riak_cs-2.1.2: + name: "APIs" + identifier: "api" + weight: 200 + parent: "develop" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riak/cs/latest/references/apis/ +--- + +### In This Section + +- [Riak CS Storage API](./storage) +- [Riak CS S3 Storage API](./storage/s3) +- [Riak CS OpenStack Storage API](./storage/openstack) diff --git a/content/riak/cs/2.1.2/references/apis/storage.md b/content/riak/cs/2.1.2/references/apis/storage.md new file mode 100644 index 0000000000..2133d196ed --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage.md @@ -0,0 +1,162 @@ +--- +title: "Riak CS Storage API" +description: "" +menu: + riak_cs-2.1.2: + name: "Riak CS Storage API" + identifier: "api_storage" + weight: 100 + parent: "api" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/ + - /riak/cs/2.1.2/references/apis/storage/ + - /riak/cs/latest/references/apis/storage/ +--- + +The Riak CS storage API is compatible with the Amazon S3 REST API, which +means that any of the operations listed can be executed using any of the +commonly available S3 libraries or tools. + +## API Feature Comparison + +The following table describes the support status for current Amazon S3 +functional features. + +Feature | Status | Remark +--------|--------|-------- +GET Service (lists all buckets for authenticated user) | | | +DELETE Bucket | | | +PUT Bucket | | | +Bucket Lifecycle | | | +Policy (Buckets, Objects) {{1.3.0+}} | | Supports the "*" principal type and the "Secure Transport" and "IP address" conditions. | +Policy (Buckets, Objects) {{1.3.0-}} | Coming Soon | Planned for future release | +Bucket Website | | | +Bucket ACLs (GET, PUT) | | | +Bucket Location | | | +Bucket Notification | | | +Bucket Object Versions | | | +GET Bucket Info (HEAD) | | | +Bucket Request Payment | | | +PUT Object | | | +Put Object (Copy) {{1.5.0+}} | | | +PUT Object (Copy) {{1.3.0-1.5.0}} | | Support is limited to a 0 byte copy from an object to itself for the purpose of updating metadata. | +PUT Object (Copy) {{1.3.0-}} | Coming Soon | Planned for future release | +DELETE Object {{1.3.0-}} | | | +DELETE Multiple Objects | | Planned for future release | +GET Object {{1.3.0+}} | | | +GET Object {{1.3.0-}} | | Range query unimplemented | +Object ACLs (GET, PUT) | | | +HEAD Object | | | +POST Object | | | +Copy Object | | Planned for future release | +Multipart Uploads {{1.3.0+}} | | UploadPartCopy unimplemented | +Multipart Uploads {{1.3.0-}} | Coming Soon | Planned for future release | + +## Service-level Operations + +* [GET Service]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request + +## Bucket-level Operations + +* [GET Bucket]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-bucket) --- Returns a list of the objects + within a bucket +* [GET Bucket ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions + for a bucket +* [PUT Bucket policy]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.1.2/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.1.2/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket + +## Object-level Operations + +* [GET Object]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.1.2/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.1.2/references/apis/storage/s3/delete-object) --- Deletes an object + +## Multipart Upload + +Multipart upload allows you to upload a single object as a set of parts. +Object parts can be uploaded independently and in any order. After all +parts are uploaded, Riak CS assembles an object out of the parts. When +your object size reaches 100MB, you should consider using multipart +uploads instead of uploading the object in a single operation. Read more +about multipart uploads on the [overview page]({{}}riak/cs/2.1.2/cookbooks/multipart-upload-overview). + +* [Initiate Multipart Upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.1.2/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.1.2/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.1.2/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. + +## Common Headers + +* [Common Riak CS Request Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers) + +There are two storage API options for Riak CS. The first and most fully +featured is the S3 API. There is also limited but improving support for +the OpenStack Object Storage API. + +Riak CS can present different APIs by using the URL-rewriting +capabilities of [Webmachine](https://github.com/basho/webmachine). +Configuring what API Riak CS uses is done by specifying the proper +*rewrite* module in the configuration file. A rewrite module contains a +set of rules for translating requests made using a particular API to +requests in the native Riak CS API. The native API was designed to +facilitate the organization and maintenance of the Riak CS Webmachine +resource modules. + +### S3 API + +* Module: `riak_cs_s3_rewrite` +* [Documentation](http://docs.aws.amazon.com/AmazonS3/latest/API/APIRest.html) +* [Mapping]({{}}riak/cs/2.1.2/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api) + +### Openstack Object Storage API (v1) + +* Module: `riak_cs_oos_rewrite` +* [Documentation](http://docs.openstack.org/api/openstack-object-storage/1.0/content/index.html) +* [Mapping]({{}}riak/cs/2.1.2/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api) + +Selecting an API is done by adding or changing the `rewrite_module` key in the +Riak CS `riak-cs.conf` file, or the old-style `advanced.config` or `app.config` +files in the `riak_cs` section. For example, to instruct Riak CS to present the +S3 API, ensure the following is contained in your configuration file: + +```riakcsconf +rewrite_module = riak_cs_s3_rewrite +``` + +```advancedconfig + {riak_cs, [ + %% Other configs + {rewrite_module, riak_cs_s3_rewrite}, + %% Other configs + ]} +``` + +```appconfig + {riak_cs, [ + %% Other configs + {rewrite_module, riak_cs_s3_rewrite}, + %% Other configs + ]} +``` + +The S3 API is the default that is set in the configuration that is +included when installing a Riak CS package or building from source. + +More details for each option can be found by following one of the +following links: + +* [S3 API]({{}}riak/cs/2.1.2/references/apis/storage/s3/) +* [OpenStack API]({{}}riak/cs/2.1.2/references/apis/storage/openstack/) diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack.md b/content/riak/cs/2.1.2/references/apis/storage/openstack.md new file mode 100644 index 0000000000..fc7bad5fc7 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack.md @@ -0,0 +1,63 @@ +--- +title: "Riak CS OpenStack Storage API" +description: "" +menu: + riak_cs-2.1.2: + name: "OpenStack API" + identifier: "api_openstack" + weight: 102 + parent: "api" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/ + - /riak/cs/2.1.2/references/apis/storage/openstack/ + - /riak/cs/latest/references/apis/storage/openstack/ +--- + +The OpenStack storage API (*v1*) provides a convenient way to integrate Riak CS for use as an object storage system in conjunction with an OpenStack deployment. + +## API Feature Comparison + +The following table describes the support status for current OpenStack Object Storage API features. + +Feature | Status | Remark +--------|--------|-------- +List Containers (lists all buckets for authenticated user) | | | +Get Account Metadata | Coming Soon | Planned for future release | +Create or Update Account Metadata | Coming Soon | Planned for future release | +Delete Account Metadata | Coming Soon | Planned for future release | +List Objects | | | +Create Container | | | +Delete Container | | | +Create or Update Container Metadata | | | +Delete Container Metadata | | | +Create Static Website | | | +Get Object | | | +Create or Update Object | | | +Create Large Objects | Coming Soon | Planned for future release | +Assigning CORS Headers to Requests | Coming Soon | Planned for future release | +Enabling File Compression with the Content-Encoding Header | | | +Enabling Browser Bypass with the Content-Disposition Header | | | +Expiring Objects with the X-Delete-After and X-Delete-At Headers | | | +Object Versioning | Coming Soon | Planned for future release | +Copy Object | Coming Soon | Planned for future release | +Delete Object | | | +Get Object Metadata | Coming Soon | Planned for future release | +Update Object Metadata | Coming Soon | Planned for future release | + +## Storage Account Services + +* [List Containers]({{}}riak/cs/2.1.2/references/apis/storage/openstack/list-containers) --- Lists the containers owned by an account + +## Storage Container Services + +* [List Objects]({{}}riak/cs/2.1.2/references/apis/storage/openstack/list-objects) --- Lists the objects in a container +* [Create Container]({{}}riak/cs/2.1.2/references/apis/storage/openstack/create-container) --- Creates a new container +* [Delete Container]({{}}riak/cs/2.1.2/references/apis/storage/openstack/delete-container) --- Deletes a container + +## Storage Object Services + +* [Get Object]({{}}riak/cs/2.1.2/references/apis/storage/openstack/get-object) --- Retrieves an object +* [Create or Update Object]({{}}riak/cs/2.1.2/references/apis/storage/openstack/create-object) --- Write an object in a container +* [Delete Object]({{}}riak/cs/2.1.2/references/apis/storage/openstack/delete-object) --- Delete an object from a container diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack/create-container.md b/content/riak/cs/2.1.2/references/apis/storage/openstack/create-container.md new file mode 100644 index 0000000000..6091d9df4d --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack/create-container.md @@ -0,0 +1,61 @@ +--- +title: "Riak CS OpenStack Create Container" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Create-Container + - /riak/cs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Create-Container + - /riak/cs/latest/references/apis/storage/openstack/create-container/ +--- + +Creates a new container. The account of the user who makes the request to create the container becomes the container owner. Anonymous requests cannot create containers. + +## Container Naming + +A container name must obey the following rules, which produces a DNS-compliant container name: + +* Must be from 3 to 63 characters. +* Must be one or more labels, each separated by a period (`.`). Each label: +* Must start with a lowercase letter or a number. Must end with a lowercase letter or a number. Can contain lowercase letters, numbers and dashes. +* Must not be formatted as an IP address (e.g., `192.168.9.2`). + +## Requests + +### Request Syntax + +```http +PUT /// HTTP/1.1 +Host: data.basho.com +X-Auth-Token: auth_token +``` + +### Request Parameters + +This operation does not use request parameters. + +## Responses + +This operation does not return a response. + +## Examples + +### Sample Request + +A request that creates a container named `basho-docs`. + +```http +PUT /v1.0/deadbeef/basho-docs HTTP/1.1 +Host: data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +X-Auth-Token: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +``` + +### Sample Response + +```http +HTTP/1.1 201 Created +Date: Thu, 07 Jun 2007 18:50:19 GMT +Server: RiakCS +Content-Type: text/plain; charset=UTF-8 +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack/create-object.md b/content/riak/cs/2.1.2/references/apis/storage/openstack/create-object.md new file mode 100644 index 0000000000..369d07080b --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack/create-object.md @@ -0,0 +1,60 @@ +--- +title: "Riak CS OpenStack Create Object" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Create-Object + - /riak/cs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Create-Object + - /riak/cs/latest/references/apis/storage/openstack/create-object/ +--- + +Writes or overwrites an object's content and metadata. + +Riak CS is a distributed system. If it receives multiple write requests for the same object at the same time, the system will overwrite all but the last object written. If necessary, you can build versioning or object locking into your application. + +To prevent the storage of data corrupted during transmission over a network, the Content-MD5 header instructs Riak CS to compare the object to the MD5 value provided. If the values don't match, the operation returns an error. In addition, if the PUT Object operation calculates the MD5, you can compare the ETag that is returned to the calculated MD5 value. + +**Note**: You can configure an application to use the `100-continue` HTTP status code, which sends the Request Headers prior to sending the request body. Doing so prevents sending the message body when the message is rejected based on the headers, for example, due to authentication failure or redirect. + +## Requests + +### Request Syntax + +```http +PUT //// HTTP/1.1 +Host: data.basho.com +X-Auth-Token: auth_token +``` + +## Responses + +This operation does not return a response. + +## Examples + +### Sample Request + +A request that stores the object `basho-process.jpg` in the container `basho-docs`. + +```http +PUT /v1.0/deadbeef/basho-docs/basho-process.jpg HTTP/1.1 +Host: data.basho.com +Date: Fri, 01 Jun 2012 12:00:00 GMT +X-Auth-Token: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +Content-Type: text/plain +Content-Length: 201445 +Expect: 100-continue +[201445 bytes of object data] +``` + +### Sample Response + +```http +HTTP/1.1 201 Created +Date: Fri, 01 Jun 2012 12:00:00 GMT +ETag: "32cf731c97645a398434535f271b2358" +Content-Length: 0 +Connection: close +Server: RiakCS +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack/delete-container.md b/content/riak/cs/2.1.2/references/apis/storage/openstack/delete-container.md new file mode 100644 index 0000000000..48782f9cfb --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack/delete-container.md @@ -0,0 +1,55 @@ +--- +title: "Riak CS OpenStack Delete Container" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Delete-Container + - /riak/cs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Delete-Container + - /riak/cs/latest/references/apis/storage/openstack/delete-container/ +--- + +Deletes a container. + +{{% note title="Note" %}} +All objects in the container must be deleted before you can delete the +container. +{{% /note %}} + +## Requests + +### Request Syntax + +```http +DELETE /// HTTP/1.1 +Host: data.basho.com +X-Auth-Token: auth_token +``` + +## Responses + +This operation does not return a response. + +## Examples + +### Sample Request + +A request that deletes a container named `basho-docs`. + +```http +DELETE /v1.0/deadbeef/basho-docs HTTP/1.1 +Host: data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +X-Auth-Token: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +``` + +### Sample Response + +```http +HTTP/1.1 204 No Content +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Connection: close +Server: RiakCS +Content-Length: 0 +Content-Type: text/plain; charset=UTF-8 +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack/delete-object.md b/content/riak/cs/2.1.2/references/apis/storage/openstack/delete-object.md new file mode 100644 index 0000000000..c60c2beaf7 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack/delete-object.md @@ -0,0 +1,48 @@ +--- +title: "Riak CS OpenStack Delete Object" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Delete-Object + - /riak/cs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Delete-Object + - /riak/cs/latest/references/apis/storage/openstack/delete-object/ +--- + +Removes the specified object, if it exists. + +## Requests + +### Request Syntax + +```http +DELETE //// HTTP/1.1 +Host: data.basho.com +X-Auth-Token: auth_token +``` + +## Responses + +This operation does not return a response. + +## Examples + +### Sample Request + +The following request deletes the object `basho-process.jpg` from the container `basho-docs`. + +```http +DELETE /v1.0/deadbeef/basho-docs/basho-process.jpg HTTP/1.1 +Host: data.basho.com +Date: Fri, 01 Jun 2012 12:00:00 GMT +X-Auth-Token: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +``` + +### Sample Response + +```http +HTTP/1.1 204 No Content +Date: Wed, 06 Jun 2012 20:47:15 GMT +Connection: close +Server: RiakCS +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack/get-object.md b/content/riak/cs/2.1.2/references/apis/storage/openstack/get-object.md new file mode 100644 index 0000000000..277ad14f51 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack/get-object.md @@ -0,0 +1,49 @@ +--- +title: "Riak CS OpenStack Get Object" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Get-Object + - /riak/cs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-Get-Object + - /riak/cs/latest/references/apis/storage/openstack/get-object/ +--- + +Retrieves an object from a Riak CS container. + +## Requests + +### Request Syntax + +```http +GET //// HTTP/1.1 +Host: data.basho.com +X-Auth-Token: auth_token +``` + +## Examples + +### Sample Request + +The following request returns the object `basho-process.jpg` from the container `basho-docs`. + +```http +GET /v1.0/deadbeef/basho-docs/basho-process.jpg HTTP/1.1 +Host: data.basho.com +Date: Fri, 01 Jun 2012 12:00:00 GMT +X-Auth-Token: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +``` + +### Sample Response + +```http +HTTP/1.1 200 OK +Date: Wed, 06 Jun 2012 20:48:15 GMT +Last-Modified: Wed, 06 Jun 2012 13:39:25 GMT +ETag: "32cf731c97645a398434535f271b2358" +Content-Length: 201445 +Content-Type: text/plain +Connection: close +Server: RiakCS +[201445 bytes of object data] +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack/list-containers.md b/content/riak/cs/2.1.2/references/apis/storage/openstack/list-containers.md new file mode 100644 index 0000000000..49b4cb0438 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack/list-containers.md @@ -0,0 +1,53 @@ +--- +title: "Riak CS OpenStack List Containers" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-List-Containers + - /riak/cs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-List-Containers + - /riak/cs/latest/references/apis/storage/openstack/list-containers/ +--- + +Returns a list of all containers owned by an *authenticated* account. + +**Note**: This operation does not list containers created by other accounts. It also does not list containers for anonymous requests. + +## Requests + +### Request Syntax + +```http +GET // HTTP/1.1 +Host: data.basho.com +X-Auth-Token: auth_token +``` + +## Responses + +A list of containers is returned in the response body, one container per line. The HTTP response's status code will be `2xx` (between `200` and `299`, inclusive). + +## Examples + +### Sample Request + +```http +GET /v1.0/deadbeef HTTP/1.1 +Host: data.basho.com +X-Auth-Token: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +``` + +### Sample Response + +```http +HTTP/1.1 200 Ok +Date: Thu, 07 Jun 2010 18:57:07 GMT +Server: RiakCS +Content-Type: text/plain; charset=UTF-8 +Content-Length: 32 + + images + movies + documents + backups +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack/list-objects.md b/content/riak/cs/2.1.2/references/apis/storage/openstack/list-objects.md new file mode 100644 index 0000000000..04e0839c1e --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack/list-objects.md @@ -0,0 +1,55 @@ +--- +title: "Riak CS OpenStack List Objects" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-List-Objects + - /riak/cs/2.1.2/references/apis/storage/openstack/RiakCS-OpenStack-List-Objects + - /riak/cs/latest/references/apis/storage/openstack/list-objects/ +--- + +Returns a list of objects (all or up to 1,000) in a container. + +## Requests + +### Request Syntax + +```http +GET /// HTTP/1.1 +Host: data.basho.com +X-Auth-Token: auth_token +``` + +## Responses + +A list of objects is returned in the response body, one object name +per line. The response will be a `200 OK` if the request succeeded. If +the container does not exist, or if an incorrect account is specified, +then a response with a `404` (Not Found) status code will be returned. + +## Examples + +### Sample Request + +A request that returns the objects in the container named `basho-docs`. + +```http +GET /v1.0/deadbeef/basho-docs HTTP/1.1 +Host: data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +X-Auth-Token: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +``` + +### Sample Response + +```http +HTTP/1.1 200 Ok +Date: Thu, 07 Jun 2010 18:50:19 GMT +Server: RiakCS +Content-Type: text/plain; charset=UTF-8 +Content-Length: 28 + + scheduleQ1.jpg + scheduleQ2.jpg +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api.md b/content/riak/cs/2.1.2/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api.md new file mode 100644 index 0000000000..b594e03abf --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api.md @@ -0,0 +1,55 @@ +--- +title: "Mapping From OOS API to Riak CS internal API" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/openstack/Mapping-From-OOS-API-to-Riak-CS-internal-API + - /riak/cs/2.1.2/references/apis/storage/openstack/Mapping-From-OOS-API-to-Riak-CS-internal-API + - /riak/cs/latest/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api/ +--- + +## Overview + +This document outlines a mapping of the OpenStack Object Storage (OOS) API (version 1.0) URLs to their rewritten format that is processed by Webmachine and Riak CS. + +## URL Mapping + +### Storage Account Services + +* List Containers + * `GET //` -> `GET /buckets` +* Retrieve account metadata + * **TBD** +* Create/Update account metadata + * **TBD** +* Delete account metadata + * **TBD** + +### Storage Container Services + +* List Objects + * `GET ///` -> `GET /buckets//objects` +* Create Container + * `PUT ///` -> `PUT /buckets/` +* Delete Container + * `DELETE ///` -> `DELETE /buckets/` +* Retrieve Container Metadata + * **TBD** +* Create/Update Container Metadata + * **TBD** +* Delete Container Metadata + * **TBD** + +### Storage Object Services + +* Retrieve Object + * `GET ////` -> `GET /buckets//objects/` +* Create/Update Object + * `PUT ////` -> `PUT /buckets//objects/` +* Delete Object + * `DELETE ////` -> `DELETE /buckets//objects/` +* Retrieve Object Metadata + * **TBD** +* Update Object Metadata + * **TBD** diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3.md b/content/riak/cs/2.1.2/references/apis/storage/s3.md new file mode 100644 index 0000000000..c2e47c96fc --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3.md @@ -0,0 +1,105 @@ +--- +title: "Riak CS S3 Storage API" +description: "" +menu: + riak_cs-2.1.2: + name: "S3 API" + identifier: "api_s3" + weight: 101 + parent: "api" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/ + - /riak/cs/2.1.2/references/apis/storage/s3/ + - /riak/cs/latest/references/apis/storage/s3/ +--- + +The Riak CS storage API is compatible with the Amazon S3 REST API, which +means that any of the operations listed below can be executed against +Riak CS using any of the commonly available S3 libraries and tools. + +## API Feature Comparison + +The following table describes the support status for current Amazon S3 +functional features. + +Feature | Status | Remark +--------|--------|-------- +GET Service (lists all buckets for authenticated user) | | | +DELETE Bucket | | | +PUT Bucket | | | +Bucket Lifecycle | | | +Policy (Buckets, Objects) {{1.3.0+}} | | Supports the "*" principal type and the "Secure Transport" and "IP address" conditions. | +Policy (Buckets, Objects) {{1.3.0-}} | Coming Soon | Planned for future release | +Bucket Website | | | +Bucket ACLs (GET, PUT) | | | +Bucket Location | | | +Bucket Notification | | | +Bucket Object Versions | | | +GET Bucket Info (HEAD) | | | +Bucket Request Payment | | | +PUT Object | | | +PUT Object (Copy) {{1.5.0+}} | | | +PUT Object (Copy) {{1.3.0-1.4.5}} | | Support is limited to a 0 byte copy from an object to itself for the purpose of updating metadata. | +PUT Object (Copy) {{1.3.0-}} | Coming Soon | Planned for future release | +DELETE Object | | | +DELETE Multiple Objects | | | +GET Object {{1.3.0+}} | | | +GET Object {{1.3.0-}} | | Range query unimplemented | +Object ACLs (GET, PUT) | | | +HEAD Object | | | +POST Object | | | +Copy Object | | | +Multipart Uploads {{1.5.0+}} | | | +Multipart Uploads {{1.3.0-1.4.5}} | | UploadPartCopy unimplemented | +Multipart Uploads {{1.3.0-}} | Coming Soon | Planned for future release | +Multipart Uploads {{1.5.0+}} | | | + +## Service-level Operations + +* [GET Service]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-service) --- Returns a list of all buckets owned by the user who sent the request + +## Bucket-level Operations + +* [GET Bucket]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-bucket) --- Returns a list of the objects + within a bucket +* [GET Bucket ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-bucket-acl) --- Returns the [Access Control List (ACL)](http://docs.aws.amazon.com/AmazonS3/latest/dev/ACLOverview.html) associated with a bucket +* [GET Bucket policy]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-bucket-policy) --- Gets the policy of a bucket +* [PUT Bucket]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-bucket) --- Creates a new bucket +* [PUT Bucket ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-bucket-acl) --- Sets the ACL permissions + for a bucket +* [PUT Bucket policy]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-bucket-policy) --- Sets the policy for a bucket +* [DELETE Bucket]({{}}riak/cs/2.1.2/references/apis/storage/s3/delete-bucket) --- Deletes a bucket +* [DELETE Bucket policy]({{}}riak/cs/2.1.2/references/apis/storage/s3/delete-bucket-policy) --- Deletes the policy of a bucket + +## Object-level Operations + +* [GET Object]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-object) --- Retrieves an object +* [GET Object ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/get-object-acl) --- Returns the ACLs associated with an object +* [PUT Object]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-object) --- Stores an object to a bucket +* [PUT Object (Copy)]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-object-copy) --- Creates a copy of an object +* [PUT Object ACL]({{}}riak/cs/2.1.2/references/apis/storage/s3/put-object-acl) --- Sets the ACLs associated with an object +* [HEAD Object]({{}}riak/cs/2.1.2/references/apis/storage/s3/head-object) --- Retrieves object metadata (not the full content of the object) +* [DELETE Object]({{}}riak/cs/2.1.2/references/apis/storage/s3/delete-object) --- Deletes an object + +## Multipart Upload + +Multipart upload allows you to upload a single object as a set of parts. +Object parts can be uploaded independently and in any order. After all +parts are uploaded, Riak CS assembles an object out of the parts. When +your object size reaches 100MB, you should consider using multipart +uploads instead of uploading the object in a single operation. Read more +about multipart uploads on the [overview page]({{}}riak/cs/2.1.2/cookbooks/multipart-upload-overview). + +* [Initiate Multipart Upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/initiate-multipart-upload) --- Initiates a multipart upload and returns an upload ID +* [Upload Part]({{}}riak/cs/2.1.2/references/apis/storage/s3/upload-part) --- Uploads a part in a multipart upload +* [Complete Multipart Upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/complete-multipart-upload) --- Completes a multipart upload and assembles previously uploaded parts +* [Abort Multipart Upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/abort-multipart-upload) --- Aborts a multipart upload and eventually frees storage consumed by previously uploaded parts +* [List Parts]({{}}riak/cs/2.1.2/references/apis/storage/s3/list-parts) --- Lists the parts that have been uploaded for a specific multipart upload. +* [List Multipart Uploads]({{}}riak/cs/2.1.2/references/apis/storage/s3/list-multipart-uploads) --- Lists multipart uploads that have not yet been completed or aborted. + +## Common Headers + +* [Common Riak CS Request Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-request-headers) +* [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers) diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/abort-multipart-upload.md b/content/riak/cs/2.1.2/references/apis/storage/s3/abort-multipart-upload.md new file mode 100644 index 0000000000..811e15f509 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/abort-multipart-upload.md @@ -0,0 +1,72 @@ +--- +title: "Riak CS Abort Multipart Upload" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-Abort-Multipart-Upload + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-Abort-Multipart-Upload + - /riak/cs/latest/references/apis/storage/s3/abort-multipart-upload/ +--- + +Aborts a multipart upload. After a multipart upload is aborted, the storage +consumed by any previously uploaded parts will be freed. + +## Requests + +### Request Syntax + +This example shows the syntax for aborting a multipart upload. + +``` +DELETE /ObjectName?uploadId=UploadId HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue +``` + +### Request Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Request Elements + +This operation does not use request elements. + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +This operation does not use response elements. + +### Special Errors + +**NoSuchUpload** - The specified multipart upload does not exist. + +## Examples + +### Sample Request + +The following request aborts a multipart upload identified by its upload ID. + +``` +DELETE /large.iso?uploadId=VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA HTTP/1.1 +Host: os.data.basho.com +Date: Mon, 1 Nov 2010 20:34:56 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:0RQf3/cRonhpaBX5sCYVf1bNRuU= +``` + +### Sample Response + +``` +HTTP/1.1 204 OK +Date: Mon, 1 Nov 2010 20:34:56 GMT +Content-Length: 0 +Connection: keep-alive +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/common-request-headers.md b/content/riak/cs/2.1.2/references/apis/storage/s3/common-request-headers.md new file mode 100644 index 0000000000..6c5e0c37a4 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/common-request-headers.md @@ -0,0 +1,24 @@ +--- +title: "Common Riak CS Request Headers" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/Common-RiakCS-Request-Headers + - /riak/cs/2.1.2/references/apis/storage/s3/Common-RiakCS-Request-Headers + - /riak/cs/latest/references/apis/storage/s3/common-request-headers/ +--- + +These are the headers that are common to all Riak CS REST requests. + +Header | Description | Data type +:------|:------------|:--------- +`Authorization` | Information required to requests authentication. This header is not required for anonymous requests. +`Cache-Control` | This header is for use by caches and intermediate proxies. It can be any string. {{1.5.0+}} +`Content-Length` | The length of the message without headers according to [RFC 2616](https://www.ietf.org/rfc/rfc2616.txt). This header is required for PUTs and for operations that load XML. +`Content-Type` | The content type of the resource, e.g. `application/json`. +`Content-MD5` | The base64-encoded 128-bit MD5 digest of the message without the headers according to [RFC 1864](https://www.ietf.org/rfc/rfc1864.txt). Although this header is optional, the `Content-MD5` header can be used to confirm that the data is the same as what was originally sent. +`Date` | The current data and time according to the requester, e.g. `Fri, 01 Jun 2012 12:00:00 GMT`. With the `Authorization` header, you must specify either the `x-amz-date` or `Date` header. +`Expect` | When you use `100-continue` in your application, it doesn't send the request body until it receives an acknowledgment. That way, the body of the message isn't sent if the message is rejected based on the headers. +`Host` | For path-style requests, the value is something like `data.basho.com`. For virtual-style requests, the value is something like `bucketname.data.basho.com`. +`x-amz-date` | This header is optional for HTTP 1.0 requests but required for HTTP 1.1. Registers the current date and time according to the requester, e.g. `Fri, 01 Jun 2012 12:00:00 GMT`. With the `Authorization` header, you must specify either the `x-amz-date` or `Date` header. If you specify both, the value for this header takes precedence. diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/common-response-headers.md b/content/riak/cs/2.1.2/references/apis/storage/s3/common-response-headers.md new file mode 100644 index 0000000000..8688a06087 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/common-response-headers.md @@ -0,0 +1,20 @@ +--- +title: "Common Riak CS Response Headers" +description: "" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/Common-RiakCS-Response-Headers + - /riak/cs/2.1.2/references/apis/storage/s3/Common-RiakCS-Response-Headers + - /riak/cs/latest/references/apis/storage/s3/common-response-headers/ +--- + +These are the headers that are common to all Riak CS REST responses. + +Header | Description | Data type +:------|:------------|:--------- +`Content-Length` | The length in bytes of the response body. | string | +`Connection` | Whether the connection to the server is open or closed. | enum (`open` or `close`) | +`Date` | The date and time that Riak CS responded, e.g. `Fri, 01 Jun 2012 12:00:00 GMT` | string | +`Etag` | The entity tag is an MD5 hash of the object and reflects only changes to the object contents, not the object's metadata. The ETag is set when an object is created. | string | +`Server` | The name of the server that created the response. | string | diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/complete-multipart-upload.md b/content/riak/cs/2.1.2/references/apis/storage/s3/complete-multipart-upload.md new file mode 100644 index 0000000000..19ec9b5d46 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/complete-multipart-upload.md @@ -0,0 +1,163 @@ +--- +title: "Riak CS Complete Multipart Upload" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-Complete-Multipart-Upload + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-Complete-Multipart-Upload + - /riak/cs/latest/references/apis/storage/s3/complete-multipart-upload/ +--- + +Completes a multipart upload by assembling previously uploaded parts. Upon +receiving this request, Riak CS concatenates all the parts in ascending order by +part number to create a new object. The parts list (part number and ETag header +value)must be provided in the Complete Multipart Upload request. + +Processing of a Complete Multipart Upload request could take several minutes to +complete. An HTTP response header that specifies a `200 OK` response is sent +while processing is in progress. After that, Riak CS periodically sends +whitespace characters to keep the connection from timing out. Because a request +could fail after the initial `200 OK` response has been sent, it is important +that you check the response body to determine whether the request succeeded. + +## Requests + +### Request Syntax + +This example shows the syntax for completing a multipart upload. + +``` +POST /ObjectName?uploadId=UploadId HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Content-Length: size +Authorization: signatureValue + + + + PartNumber + ETag + + ... + +``` + +### Request Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Request Elements + +**CompleteMultipartUpload** - Container for the request. + +* *Type*: Container +* *Children*: One or more `Part` elements +* *Ancestors*: None + +**Part** - Container for elements related to a particular previously uploaded part. + +* *Type*: Container +* *Children*: `PartNumber`, `ETag` +* *Ancestors*: `CompleteMultipartUpload` + +**PartNumber** - Part number that identifies the part. + +* *Type*: Integer +* *Ancestors*: `Part` + +**ETag** - Entity tag returned when the part was uploaded. + +* *Type*: String +* *Ancestors*: `Part` + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +**CompleteMultipartUploadResult** - Container for the response. + +* *Type*: Container +* *Children*: `Location`, `Bucket`, `Key`, `ETag` +* *Ancestors*: None + +**Location** - The URI that identifies the newly created object. + +* *Type*: URI +* *Ancestors*: `CompleteMultipartUploadResult` + +**Bucket** - The name of the bucket that contains the newly created object. + +* *Type*: String +* *Ancestors*: `CompleteMultipartUploadResult` + +**Key** - The object key of the newly created object. + +* *Type*: String +* *Ancestors*: `CompleteMultipartUploadResult` + +**ETag** - Entity tag that identifies the newly created object's data. + +* *Type*: String +* *Ancestors*: `CompleteMultipartUploadResult` + +### Special Errors + +**EntityTooSmall** - Your proposed upload is smaller than the minimum allowed object size. Each part must be at least 5 MB in size, except the last part. + +**InvalidPart** - One or more of the specified parts could not be found. The part might not have been uploaded, or the specified entity tag might not have matched the part's entity tag. + +**InvalidPartOrder** - The list of parts was not in ascending order. Parts list must specified in order by part number. + +**NoSuchUpload** - The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed. + +## Examples + +### Sample Request + +The following Complete Multipart Upload request specifies three parts in the `CompleteMultipartUpload` element. + +``` +POST /large.iso?uploadId=VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA HTTP/1.1 +Host: os.data.basho.com +Date: Mon, 1 Nov 2010 20:34:56 GMT +Content-Length: 391 +Authorization: AWS AKIAIOSFODNN7EXAMPLE:0RQf4/cRonhpaBX5sCYVf1bNRuU= + + + + 1 + "a54357aff0632cce46d942af68356b38" + + + 2 + "0c78aef83f66abc1fa1e8477f296d394" + + + 3 + "acbd18db4cc2f85cedef654fccc4a4d8" + + +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Mon, 1 Nov 2010 20:34:56 GMT +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) + + + + http://os.data.basho.com/large.iso + os + large.iso + "3858f62230ac3c915f300c664312c11f-9" + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/delete-bucket-policy.md b/content/riak/cs/2.1.2/references/apis/storage/s3/delete-bucket-policy.md new file mode 100644 index 0000000000..31954ffb23 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/delete-bucket-policy.md @@ -0,0 +1,70 @@ +--- +title: "Riak CS DELETE Bucket policy" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-DELETE-Bucket-policy + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-DELETE-Bucket-policy + - /riak/cs/latest/references/apis/storage/s3/delete-bucket-policy/ +--- + +The `DELETE Bucket policy` operation deletes the `policy` subresource of an existing bucket. To perform this operation, you must be the bucket owner. + +## Requests + +### Request Syntax + +``` +DELETE /?policy HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue + +``` + +### Request Parameters + +This operation does not use request parameters. + +### Request Headers + +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-request-headers). + +### Request Elements + +No body should be appended. + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +`DELETE` response elements return whether the operation succeeded or not. + +## Examples + +### Sample Request + +The following request shows the DELETE individual policy request for the bucket. + +``` +DELETE /?policy HTTP/1.1 +Host: bucketname.data.basho.com +Date: Tue, 04 Apr 2010 20:34:56 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= + +``` + +### Sample Response + +``` +HTTP/1.1 204 No Content +Date: Tue, 04 Apr 2010 12:00:01 GMT +Connection: keep-alive +Server: Riak CS +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/delete-bucket.md b/content/riak/cs/2.1.2/references/apis/storage/s3/delete-bucket.md new file mode 100644 index 0000000000..763ac602b6 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/delete-bucket.md @@ -0,0 +1,54 @@ +--- +title: "Riak CS DELETE Bucket" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-DELETE-Bucket + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-DELETE-Bucket + - /riak/cs/latest/references/apis/storage/s3/delete-bucket/ +--- + +The `DELETE Bucket` operation deletes the bucket specified in the URI. + +{{% note title="Note" %}} +All objects in the bucket must be deleted before you can delete the bucket. +{{% /note %}} + +## Requests + +### Request Syntax + +``` +DELETE / HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signature_value +``` + +## Responses + +DELETE Bucket uses only common response headers and doesn't return any response elements. + +## Examples + +### Sample Request + +The DELETE Bucket operation deletes the bucket name "projects". + +``` +DELETE / HTTP/1.1 +Host: projects.data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Authorization: AWS QMUG3D7KP5OQZRDSQWB6:4Pb+A0YT4FhZYeqMdDhYls9f9AM= +``` + +### Sample Response + +``` +HTTP/1.1 204 No Content +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/delete-multi.md b/content/riak/cs/2.1.2/references/apis/storage/s3/delete-multi.md new file mode 100644 index 0000000000..fbc2f528a8 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/delete-multi.md @@ -0,0 +1,82 @@ +--- +title: "Riak CS DELETE Multiple Objects" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-DELETE-Multi + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-DELETE-Multi + - /riak/cs/latest/references/apis/storage/s3/delete-multi/ +--- + +Multi-object `DELETE` enables you to delete multiple objects from a +bucket at the same time if those objects exist. Multi-object `DELETE`s +require you to `POST` an XML object to Riak CS specifying object key +and version information, as in the example below. + +## Requests + +### Request Syntax + +``` +POST /?delete HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Content-Length: length +Authorization: signature_value + + + + true + + Key + + + Key + + ... + +``` + +## Example + +### Sample Request + +``` +POST /?delete HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Content-Length: length +Authorization: signature_value + + + + sample1.txt + + + sample2.txt + + +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Wed, 06 Jun 2012 20:47:15 GMT +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) + + + + + sample1.txt + + + sample2.txt + AccessDenied + Access Denied + + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/delete-object.md b/content/riak/cs/2.1.2/references/apis/storage/s3/delete-object.md new file mode 100644 index 0000000000..beef544133 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/delete-object.md @@ -0,0 +1,47 @@ +--- +title: "Riak CS DELETE Object" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-DELETE-Object + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-DELETE-Object + - /riak/cs/latest/references/apis/storage/s3/delete-object/ +--- + +The `DELETE Object` operation removes an object, if one exists. + +## Requests + +### Request Syntax + +``` +DELETE /ObjectName HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Content-Length: length +Authorization: signature_value +``` + +## Examples + +### Sample Request + +The DELETE Object operation deletes the object, `projects-schedule.jpg`. + +``` +DELETE /projects-schedule.jpg HTTP/1.1 +Host: bucketname.data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 GMT +Authorization: AWS QMUG3D7KP5OQZRDSQWB6:4Pb+A0YT4FhZYeqMdDhYls9f9AM= +``` + +### Sample Response + +``` +HTTP/1.1 204 No Content +Date: Wed, 06 Jun 2012 20:47:15 GMT +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket-acl.md b/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket-acl.md new file mode 100644 index 0000000000..7f8583de2f --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket-acl.md @@ -0,0 +1,163 @@ +--- +title: "Riak CS GET Bucket ACL" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-GET-Bucket-ACL + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-GET-Bucket-ACL + - /riak/cs/latest/references/apis/storage/s3/get-bucket-acl/ +--- + +The `GET Bucket acl` operation uses the `acl` subresource to return the access control list (ACL) of a bucket. + +*Note:* You must have READ_ACP access to the bucket to use this operation. If the anonymous user has READ_ACP permission, this operation will return the ACL of the bucket without an authorization header. + +## Requests + +### Request Syntax + +``` +GET /?acl HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signature_value +``` + +### Request Parameters + +The GET Bucket acl operation doesn't use request parameters. + +## Response Elements + +**AccessControlList** - Container for ACL information. + +* *Type*: Container +* *Ancestry*: AccessControlPolicy + +**AccessControlPolicy** - Container for the response. + +* *Type*: Container +* *Ancestry*: None + +**DisplayName** - Bucket owner's display name. + +*Note*: The operation returns the `DisplayName` only if the owner's e-mail address can be determined from the `ID`. + +* *Type*: String +* *Ancestry*: AccessControlPolicy.Owner + +**Grant** - Container for `Grantee` and `Permission`. + +* *Type*: Container +* *Ancestry*: AccessControlPolicy.AccessControlList + +**Grantee** - Container for `DisplayName` and `ID` of the person who is being granted permissions. + +* *Type*: Container +* *Ancestry*: AccessControlPolicy.AccessControlList.Grant + +**ID** - Bucket owner's ID. + +* *Type*: String +* *Ancestry*: AccessControlPolicy.Owner + +**Owner** - Container for bucket owner information. + +* *Type*: Container +* *Ancestry*: AccessControlPolicy + +**Permission** - Permission granted to the `Grantee` for bucket. + +* *Type*: String +* *TypeValid Values*: FULL_CONTROL|WRITE|WRITE_ACP|READ|READ_ACP + +*Ancestry*: AccessControlPolicy.AccessControlList.Grant**AccessControlList** - Container for ACL information. + +* *Type*: Container +* *Ancestry*: AccessControlPolicy + +**AccessControlPolicy** - Container for the response. + +* *Type*: Container +* *Ancestry*: None + +**DisplayName** - Bucket owner's display name. + +*Note*: The operation returns the `DisplayName` only if the owner's e-mail address can be determined from the `ID`. + +* *Type*: String +* *Ancestry*: AccessControlPolicy.Owner + +**Grant** - Container for `Grantee` and `Permission`. + +* *Type*: Container +* *Ancestry*: AccessControlPolicy.AccessControlList + +**Grantee** - Container for `DisplayName` and `ID` of the person who is being granted permissions. + + +* *Type*: Container +* *Ancestry*: AccessControlPolicy.AccessControlList.Grant + +**ID** - Bucket owner's ID. + + +* *Type*: String +* *Ancestry*: AccessControlPolicy.Owner + +**Owner** - Container for bucket owner information. + + +* *Type*: Container +* *Ancestry*: AccessControlPolicy + +**Permission** - Permission granted to the `Grantee` for bucket. + + +* *Type*: String +* *Valid Values*: FULL_CONTROL|WRITE|WRITE_ACP|READ|READ_ACP +* *Ancestry*: AccessControlPolicy.AccessControlList.Grant + +## Examples + +### Sample Request + +This request returns the ACL of the specified bucket. + +``` +GET ?acl HTTP/1.1 +Host:bucket.data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Authorization: AWS QMUG3D7KP5OQZRDSQWB6:4Pb+A0YT4FhZYeqMdDhYls9f9AM= +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Last-Modified: Mon, 04 Jun 2012 12:00:00 GMT +Content-Length: 124198 +Content-Type: text/plain +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) + + + + 24ef09aa099d10f75aa57c8caeab4f8c8e7faeebf76c078efc7c6caea54ba06a + UserName@basho.com + + + + + 24ef09aa099d10f75aa57c8caeab4f8c8e7faeebf76c078efc7c6caea54ba06a + UserName@basho.com + + FULL_CONTROL + + + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket-policy.md b/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket-policy.md new file mode 100644 index 0000000000..0e4316adb4 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket-policy.md @@ -0,0 +1,92 @@ +--- +title: "Riak CS GET Bucket Policy" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-GET-Bucket-policy + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-GET-Bucket-policy + - /riak/cs/latest/references/apis/storage/s3/get-bucket-policy/ +--- + +The `GET Bucket policy` operation uses the `policy` subresource to fetch the policy currently set to an existing bucket. If the bucket does not have a policy the call ends up in 404 Not Found. To perform this operation, you must be the bucket owner. + +## Requests + +### Request Syntax + +This example shows the syntax for setting the policy in the request body. + +``` +GET /?policy HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue + +``` + +### Request Parameters + +This operation does not use request parameters. + +### Request Headers + +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-request-headers). + +### Request Elements + +No body should be appended. + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +`GET` response has a JSON which was PUT as its body. + +## Examples + +### Sample Request + +The following request shows the GET individual policy request for the bucket. + +``` +GET /?policy HTTP/1.1 +Host: bucketname.data.basho.com +Date: Tue, 04 Apr 2010 20:34:56 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= +Content-size: 0 + +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Tue, 04 Apr 2010 12:00:01 GMT +Connection: keep-alive +Server: Riak CS +Content-size: 256 + +{ + "Version": "2008-10-17", + "Statement": [ + { + "Sid": "Stmtaaa", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:GetObjectAcl","s3:GetObject"], + "Resource": "arn:aws:s3:::bucketname/*", + "Condition": { + "IpAddress": { + "aws:SourceIp": "127.0.0.1/32" + } + } + } + ] +} +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket.md b/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket.md new file mode 100644 index 0000000000..90b7e0cbd1 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/get-bucket.md @@ -0,0 +1,255 @@ +--- +title: "Riak CS GET Bucket" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-GET-Bucket + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-GET-Bucket + - /riak/cs/latest/references/apis/storage/s3/get-bucket/ +--- + +The `GET Bucket` operation returns a list of objects (all or up to 1,000) in a bucket. + +*Note:* You must have READ access to the bucket to use this operation. + +## Requests + +### Request Syntax + +``` +GET / HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signature_value +``` + +### Request Parameters + +GET Bucket uses the following parameters to return a subset of the objects in a bucket. + +**prefix** - A string with which keys must begin to be included in the response. + +You can use prefixes to separate the objects in a bucket into groupings of keys. + +* *Type*: String +* *Default*: None + +**Delimiter** - Keys that contain the same string between the `prefix` and the first occurrence of the `delimiter` are rolled up into a single result in the `CommonPrefixes` collection and aren't returned anywhere else in the response. + +* *Type*: String +* *Default*: None + +**Marker** - The starting location in the bucket for the list of objects. + +* *Type*: String +* *Default*: None + +**MaxKeys** - The maximum number of keys returned in the response body. + +* *Type*: String +* *Default*: 1000 + +## Response Elements + +**Contents** - Metadata about each object returned in the response. + +* *Type*: XML metadata +* *Ancestry*: ListBucketResult + +**CommonPrefixes** - Keys, if any, between the `Prefix` and the next occurrence of the `delimiter` string. + +A response contains `CommonPrefixes` only if the request includes a `delimiter`. `CommonPrefixes` lists keys that act like subdirectories in the directory specified by `Prefix`. If `Prefix` is *projects/* and `delimiter` is */*, the common prefix in *projects/marketing/2012* is *projects/marketing/*. The keys rolled up into a common prefix represent a single return for the calculation of the number of returns (which is limited by `MaxKeys`). + + +* *Type*: String +* *Ancestry*: ListBucketResult + +**Delimiter** - Keys that contain the same string between the `prefix` and the first occurrence of the `delimiter` are rolled up into a single result in the `CommonPrefixes` collection and aren't returned anywhere else in the response. + + +* *Type*: String +* *Ancestry*: ListBucketResult + +**DisplayName** - Object owner's display name. + + +* *Type*: String +* *Ancestry*: ListBucketResult.Contents.Owner + +**ETag** - The entity tag is an MD5 hash of the object and reflects only changes to the object contents, not the object's metadata. + + +* *Type*: String +* *Ancestry*: ListBucketResult.Contents + +**ID** - Object owner's user ID. + + +* *Type*: String +* *Ancestry*: ListBucketResult.Contents.Owner + +**IsTruncated** - Indicates whether all of the results were returned (`true`) or only a subset (`false`) because the number of results returned exceeded the maximum specified by `MaxKeys`. + + +* *Type*: String +* *Ancestry*: boolean + +**Key** - The object key. + + +* *Type*: String +* *Ancestry*: ListBucketResult.Contents + +**LastModified** - The date and time that the object was last modified. + + +* *Type*: Date +* *Ancestry*: ListBucketResult.Contents + +**Marker** - The starting location in the bucket for the list of objects. + + +* *Type*: String +* *Ancestry*: ListBucketResult + +**MaxKeys** - The maximum number of keys returned in the response body. + + +* *Type*: String +* *Ancestry*: ListBucketResult + +**Name** - Bucket's name. + + +* *Type*: String +* *Ancestry*: ListBucketResult + +**Owner** - Bucket owner. + + +* *Type*: String +* *Children*: DisplayName, ID +* *Ancestry*: ListBucketResult.Contents|CommonPrefixes + +**Prefix** - Keys that begin with the indicated prefix. + + +* *Type*: String +* *Ancestry*: ListBucketResult + +**Size** - The object's size in bytes. + + +* *Type*: String +* *Ancestry*: ListBucketResult.Contents + +**StorageClass** - Always STANDARD. + + +* *Type*: String +* *Ancestry*: ListBucketResult.Contents + +## Examples + +### Sample Request +A request that returns the objects in the bucket, `projects`. + +``` +GET / HTTP/1.1 +Host: projects.data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Authorization: AWS QMUG3D7KP5OQZRDSQWB6:4Pb+A0YT4FhZYeqMdDhYls9f9AM= +Content-Type: text/plain +``` + +### Sample Response + +```xml + + + projects + + + 1000 + false + + scheduleQ1.jpg + 2012-06-01T09:20:03.000Z + "f77127731fba39869dede5c9645a3328" + 519226 + STANDARD + + 324ABC0713CD0B420EFC086821BFAE7ED81442C + + + + + scheduleQ2.jpg + 2012-06-02T11:02:42 + "645a39851b2cf27731c974f535343328" + 990102 + STANDARD + + 324ABC0713CD0B420EFC086821BFAE7ED81442C + foobar + + + +``` + +### Sample Request Using Request Parameters + +This sample request lists up to 100 keys in the `projects` bucket that start with `IT` and occur after the key that begins with `ITdb`. + +``` +GET ?prefix=IT HTTP/1.1 +Host: projects.data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Authorization: AWS QMUG3D7KP5OQZRDSQWB6:4Pb+A0YT4FhZYeqMdDhYls9f9AM= +``` + +### Sample Response Based on Request Parameters + +``` +HTTP/1.1 200 OK +x-amz-id-2: gyB+3jRPnrkN98ZajxHXr3u7EFM67bNgSAxexeEHndCX/7GRnfTXxReKUQF28IfP +x-amz-request-id: 3B3C7C725673C630 +Date: Wed, 06 Jun 2012 20:48:15 GMT +Content-Type: application/xml +Content-Length: 302 +Connection: close +Server: BashoData + + + + projects + IT + + 1000 + false + + ITdb + 2012-06-01T09:20:03.000Z + "f77127731fba39869dede5c9645a3328" + 29493 + STANDARD + + B420EFC086821B324ABC0713CD0FAE7ED81442C + richardp + + + + ITstorage + 2012-04-14T04:20:10.000Z + "a96f00ad9f27c3828ef3fdf83fc9ac7f" + 4 + STANDARD + + 324ABC0713CD0B420EFC086821BFAE7ED81442C + foobar + + + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/get-object-acl.md b/content/riak/cs/2.1.2/references/apis/storage/s3/get-object-acl.md new file mode 100644 index 0000000000..7eb836de1f --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/get-object-acl.md @@ -0,0 +1,115 @@ +--- +title: "Riak CS GET Object ACL" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-GET-Object-ACL + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-GET-Object-ACL + - /riak/cs/latest/references/apis/storage/s3/get-object-acl/ +--- + +The `GET Object acl` operation uses the `acl` subresource to return the access control list (ACL) of an object. + +*Note:* You must have READ_ACP access to the object to use this operation. + +## Requests + +### Request Syntax + +``` +GET /ObjectName?acl HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signature_value +``` + +### Request Parameters + +The GET Object acl operation doesn't use request parameters. + +## Response Elements + +**AccessControlList** - Container for ACL information (Grant, Grantee, and Permission). + +* *Type*: Container +* *Ancestors*: AccessControlPolicy + +**AccessControlPolicy** - Contains the elements that set the ACL permissions for each grantee. + +* *Type*: String +* *Ancestors*: None + +**DisplayName** - Bucket owner's display name. + +* *Type*: String +* *Ancestors*: AccessControlPolicy.Owner + +**Grant** - Container for `Grantee` and `Permission`. + +* *Type*: Container +* *Ancestors*: AccessControlPolicy.AccessControlList + +**Grantee** - The `ID`, `Emailaddress`, or `uri` of the subject who is being granted permissions. + +* *Type*: String +* *Ancestors*: AccessControlPolicy.AccessControlList.Grant + +**ID** - Bucket owner's ID. + +* *Type*: String +* *Ancestors*: AccessControlPolicy.Owner|AccessControlPolicy.AccessControlList.Grant + +**Owner** - Container for bucket owner information. + +* *Type*: Container +* *Ancestors*: AccessControlPolicy + +**Permission** - Permission granted to the `Grantee` for bucket. + +* *Type*: String +* *Valid Values*: FULL_CONTROL|WRITE|READ_ACP +* *Ancestors*: AccessControlPolicy.AccessControlList.Grant + +## Examples + +### Sample Request + +This request returns the ACL of the object, `basho-process.jpg`. + +``` +GET /basho-process.jpg?acl HTTP/1.1 +Host:bucket.data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Authorization: AWS QMUG3D7KP5OQZRDSQWB6:4Pb+A0YT4FhZYeqMdDhYls9f9AM= +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Wed, 06 Jun 2012 20:47:15 GMT +Last-Modified: Mon, 04 Jun 2012 12:00:00 GMT +Content-Length: 124 +Content-Type: text/plain +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) + + + + 24ef09aa099d10f75aa57c8caeab4f8c8e7faeebf76c078efc7c6caea54ba06a + UserName@basho.com + + + + + 24ef09aa099d10f75aa57c8caeab4f8c8e7faeebf76c078efc7c6caea54ba06a + UserName@basho.com + + FULL_CONTROL + + + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/get-object.md b/content/riak/cs/2.1.2/references/apis/storage/s3/get-object.md new file mode 100644 index 0000000000..e53cb1ffac --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/get-object.md @@ -0,0 +1,79 @@ +--- +title: "Riak CS GET Object" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-GET-Object + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-GET-Object + - /riak/cs/latest/references/apis/storage/s3/get-object/ +--- + +The `GET Object` operation retrieves objects from the Riak CS storage. + +*Note:* You must have READ access to the object to use this operation. If the anonymous user has READ access, you can retrieve an object without using an authorization header. + +GET Object retrieves an object. + +## Requests + +### Request Syntax + +``` +GET /objectName HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signature_value +``` + +## Examples + +### Sample Request + +The following request returns the object, `basho-process.jpg`. + +``` +GET /basho-process.jpg HTTP/1.1 +Host: bucket.data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Authorization: AWS AKIAIOSFODNN7EXAMPLE:0RQf4/cRonhpaBX5sCYVf1bNRuU= +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Wed, 06 Jun 2012 20:48:15 GMT +Last-Modified: Wed, 06 Jun 2012 13:39:25 GMT +ETag: "3327731c971645a398fba9dede5f2768" +Content-Length: 611892 +Content-Type: text/plain +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +[611892 bytes of object data] +``` + +### Sample Request Using the Range Header + +This sample request asks for only the first `1000` bytes of a `1705` byte file + +``` +GET build.sh HTTP/1.1 +Host: projects.data.basho.com +Date: Tue, 07 Jan 2014 19:49:11 +0000 +Authorization: AWS QMUG3D7KP5OQZRDSQWB6:4Pb+A0YT4FhZYeqMdDhYls9f9AM= +Range: bytes=0-999 +``` + +### Sample Response Using the Range Header + +``` +HTTP/1.1 206 Partial Content +Server: BashoData +Date: Tue, 07 Jan 2014 19:49:11 GMT +Content-Type: application/xml +Content-Range: bytes 0-1000/1705 +Content-Length: 1000 +Accept-Ranges: bytes +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/get-service.md b/content/riak/cs/2.1.2/references/apis/storage/s3/get-service.md new file mode 100644 index 0000000000..39c26ce09c --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/get-service.md @@ -0,0 +1,105 @@ +--- +title: "Riak CS GET Service" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-GET-Service + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-GET-Service + - /riak/cs/latest/references/apis/storage/s3/get-service/ +--- + +The `GET Service` operation returns a list of all buckets owned by the *authenticated* user who sent sent the request. + +*Note:* The GET Service operation doesn't list buckets created by other users. It also doesn't list buckets for anonymous requests. + +## Requests + +### Request Syntax + +``` +GET / HTTP/1.1 +Host: data.basho.com +Date: date +Authorization: signature_value +``` + +## Response Elements + +**Bucket** - Container for bucket information. + +* *Type*: Container +* *Children*: Name,CreationDate +* *Ancestor*: ListAllMyBucketsResult.Buckets + +**Buckets** - Container for one or more buckets. + +* *Type*: Container +* *Children*: Bucket +* *Ancestor*: ListAllMyBucketsResult + +**CreationDate** - Date the bucket was created. + +* *Type*: date (format yyyy-mm-ddThh:mm:ss.timezone, e.g., 2012-06-03T15:4548:02.000Z) +* *Ancestor*: ListAllMyBucketsResult.Buckets.Bucket + +**DisplayName** - Bucket owner's display name. + +* *Type*: String +* *Ancestor*: ListAllMyBucketsResult.Owner + +**ID** - Bucket owner's user ID. + +* *Type*: String +* *Ancestor*: ListAllMyBucketsResult.Owner + +**ListAllMyBucketsResult** - Container for response. + +* *Type*: Container +* *Children*: Owner, Buckets +* *Ancestor*: None + +**Name** - Bucket's name. + +* *Type*: String +* *Ancestor*: ListAllMyBucketsResult.Buckets.Bucket + +**Owner** - Container for bucket owner information. + +* *Type*: Container +* *Ancestor*: ListAllMyBucketsResult + +## Examples + +### Sample Request + +The GET operation on the Service endpoint (data.basho.com in this example) returns a list of all of the buckets owned by the authenticated sender of the request. + +``` +Host: data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Authorization: AWS QMUG3D7KP5OQZRDSQWB6:4Pb+A0YT4FhZYeqMdDhYls9f9AM= +``` + +### Sample Response + +```xml + + + + 324ABC0713CD0B420EFC086821BFAE7ED81442C + "foobar + + + + projects + 2011-05-10T14:10:15.000Z + + + templates + 2011-05-10T14:18:25.000Z + + + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/head-object.md b/content/riak/cs/2.1.2/references/apis/storage/s3/head-object.md new file mode 100644 index 0000000000..61e226adce --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/head-object.md @@ -0,0 +1,54 @@ +--- +title: "Riak CS HEAD Object" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-HEAD-Object + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-HEAD-Object + - /riak/cs/latest/references/apis/storage/s3/head-object/ +--- + +The `HEAD Object` operation retrieves metadata from an object without returning the object. + +*Note:* You must have READ access to the object to use this operation. + +A HEAD request has the same options as a GET operation on an object, and the HEAD response is identical to the GET response, except that there is no response body. + +## Requests + +### Request Syntax + +``` +HEAD /ObjectName HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signature_value +``` + +## Examples + +### Sample Request + +The following request returns the metadata of an object. + +``` +HEAD /basho-process.jpg HTTP/1.1 +Host: bucket.data.basho.com +Date: Wed, 06 Jun 2012 20:47:15 +0000 +Authorization: AWS AKIAIOSFODNN7EXAMPLE:0RQf4/cRonhpaBX5sCYVf1bNRuU= +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Wed, 06 Jun 2012 20:48:15 GMT +Last-Modified: Wed, 06 Jun 2012 13:39:25 GMT +ETag: "3327731c971645a398fba9dede5f2768" +Content-Length: 611892 +Content-Type: text/plain +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/initiate-multipart-upload.md b/content/riak/cs/2.1.2/references/apis/storage/s3/initiate-multipart-upload.md new file mode 100644 index 0000000000..8d38a0b11b --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/initiate-multipart-upload.md @@ -0,0 +1,113 @@ +--- +title: "Riak CS Initiate Multipart Upload" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-Initiate-Multipart-Upload/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-Initiate-Multipart-Upload/ + - /riak/cs/latest/references/apis/storage/s3/initiate-multipart-upload/ +--- + +Initiates a multipart upload and returns an upload ID. The upload ID is used to associate all the parts in the specific multipart upload. + +## Requests + +### Request Syntax + +This example shows the syntax for initiating a multipart upload. + +``` +POST /ObjectName?uploads HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue +``` + +### Request Headers + +**Content-Type** - A standard MIME type that describes the content format. + +* *Type*: String +* *Default*: binary/octet-stream +* *Valid Values*: 100-continue +* *Constraints*: None + +**x-amz-meta-*** - User specified metadata fields which can be stored with the object. + +* *Type*: String +* *Default*: None +* *Constraints*: None + +#### Permission Request Headers + +**x-amz-acl** - This request header specifies a predefined ACL to apply to the object being created. A predefined ACL grants specific permissions to individual accounts or predefined groups. + +* *Type*: String +* *Valid Values*: private | public-read | public-read-write | authenticated-read | bucket-owner-read | bucket-owner-full-control +* *Constraints*: None + +### Request Elements + +This operation does not use request elements. + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +**InitiateMultipartUploadResult** - Container for response. + +* *Type*: Container +* *Children*: `Bucket`, `Key`, `UploadId` +* *Ancestors*: None + +**Bucket** - Name of the bucket to which the multipart upload was initiated. + +* *Type*: String +* *Children*: `Bucket`, `Key`, `UploadId` +* *Ancestors*: `InitiateMultipartUploadResult` + +**Key** - Object key for which the multipart upload was initiated. + +* *Type*: String +* *Ancestors*: `InitiateMultipartUploadResult` + +**UploadId** - ID for the initiated multipart upload. + +* *Type*: String +* *Ancestors*: `InitiateMultipartUploadResult` + +## Examples + +### Sample Request + +This operation initiates a multipart upload for the `large.iso` object. + +``` +POST /large.iso?uploads HTTP/1.1 +Host: os.data.basho.com +Date: Mon, 1 Nov 2010 20:34:56 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:0RQf4/cRonhpaBX5sCYVf1bNRuU= +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Mon, 1 Nov 2010 20:34:56 GMT +Content-Length: 197 +Connection: keep-alive +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) + + + + os + large.iso + VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/list-multipart-uploads.md b/content/riak/cs/2.1.2/references/apis/storage/s3/list-multipart-uploads.md new file mode 100644 index 0000000000..c0cb1d67d6 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/list-multipart-uploads.md @@ -0,0 +1,223 @@ +--- +title: "Riak CS List Multipart Uploads" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-List-Multipart-Uploads/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-List-Multipart-Uploads/ + - /riak/cs/latest/references/apis/storage/s3/list-multipart-uploads/ +--- + +Lists multipart uploads that have not yet been completed or aborted. + +In the response, the uploads are sorted by key. If your application has +initiated more than one multipart upload using the same object key, then uploads +in the response are first sorted by key. Additionally, uploads are sorted in +ascending order within each key by the upload initiation time. + +## Requests + +### Request Syntax + +This example shows the syntax for listing of multipart uploads. + +``` +GET /?uploads HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue +``` + +### Request Parameters + +**delimiter** - Character you use to group keys. + +* *Type*: String + +**max-uploads** - Sets the maximum number of multipart uploads, from 1 to 1,000, to return in the response body. + +* *Type*: Integer +* *Default*: 1,000 + +**key-marker** - Together with `upload-id-marker`, this parameter specifies the multipart upload after which listing should begin. + +* *Type*: String + +**prefix** - Lists in-progress uploads only for those keys that begin with the specified prefix. + +* *Type*: String + +**upload-id-​marker** - Together with `key-marker`, specifies the multipart upload after which listing should begin. + +* *Type*: String + +### Request Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Request Elements + +This operation does not use request elements. + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common RiakCS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +**ListMultipartUploadsResult** - Container for the response. + +* *Type*: Container +* *Children*: `Bucket`, `KeyMarker`, `UploadIdMarker`, `NextKeyMarker`, `NextUploadIdMarker`, `MaxUploads`, `Delimiter`, `Prefix`, `CommonPrefixes`, `IsTruncated` +* *Ancestors*: None + +**Bucket** - Name of the bucket to which the multipart upload was initiated. + +* *Type*: String +* *Ancestors*: `ListMultipartUploadsResult` + +**KeyMarker** - The key at or after which the listing began. + +* *Type*: String +* *Ancestors*: `ListMultipartUploadsResult` + +**UploadIdMarker** - Upload ID after which listing began. + +* *Type*: String +* *Ancestors*: `ListMultipartUploadsResult` + +**NextKeyMarker** - When a list is truncated, this element specifies the value that should be used for the key-marker request parameter in a subsequent request. + +* *Type*: Container +* *Ancestors*: `ListMultipartUploadsResult` + +**NextUploadIdMarker** - When a list is truncated, this element specifies the value that should be used for the `upload-id-marker` request parameter in a subsequent request. + +* *Type*: String +* *Ancestors*: `ListMultipartUploadsResult` + +**MaxUploads** - Maximum number of multipart uploads that could have been included in the response. + +* *Type*: Integer +* *Ancestors*: `ListMultipartUploadsResult` + +**IsTruncated** - Indicates whether the returned list of parts is truncated. + +* *Type*: Boolean +* *Ancestors*: `ListPartsResult` + +**Upload** - Container for elements related to a particular multipart upload. + +* *Type*: Container +* *Children*: `Key`, `UploadId`, `InitiatorOwner`, `StorageClass`, `Initiated` +* *Ancestors*: `ListMultipartUploadsResult` + +**Key** - Key of the object for which the multipart upload was initiated. + +* *Type*: Integer +* *Ancestors*: `Upload` + +**UploadId** - Upload ID that identifies the multipart upload. + +* *Type*: Integer +* *Ancestors*: `Upload` + +**Initiator** - Container element that identifies who initiated the multipart upload. + +* *Type*: Container +* *Children*: `ID`, `DisplayName` +* *Ancestors*: `Upload` + +**ID** - Canonical User ID. + +* *Type*: String +* *Ancestors*: `Initiator`, `Owner` + +**DisplayName** - Principal's name. + +* *Type*: String +* *Ancestors*: `Initiator`, `Owner` + +**Owner** - Container element that identifies the object owner, after the object is created. + +* *Type*: Container +* *Children*: `ID`, `DisplayName` +* *Ancestors*: `Upload` + +**Initiated** - Date and time at which the multipart upload was initiated. + +* *Type*: Date +* *Ancestors*: `Upload` + +**ListMultipartUploadsResult.Prefix** - When a prefix is provided in the request, this field contains the specified prefix. + +* *Type*: String +* *Ancestors*: `ListMultipartUploadsResult` + +**Delimiter** - Contains the delimiter you specified in the request. + +* *Type*: String +* *Ancestors*: `ListMultipartUploadsResult` + +**CommonPrefixes** - If you specify a delimiter in the request, then the result returns each distinct key prefix containing the delimiter in a CommonPrefixes element. + +* *Type*: Container +* *Ancestors*: `ListMultipartUploadsResult` + +**CommonPrefixes.Prefix** - If the request does not include the Prefix parameter, then this element shows only the substring of the key that precedes the first occurrence of the delimiter character. These keys are not returned anywhere else in the response. + +* *Type*: String +* *Ancestors*: `CommonPrefixes` + +## Examples + +### Sample Request + +The following request lists three multipart uploads. + +``` +GET /?uploads HTTP/1.1 +Host: os.data.basho.com +Date: Mon, 1 Nov 2010 20:34:56 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:0RQf4/cRonhpaBX5sCYVf1bNRuU= +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Mon, 1 Nov 2010 20:34:56 GMT +Content-Length: 1330 +Connection: keep-alive +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) + + + + os + + + large.iso + + + + + my-divisor + VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA + + arn:aws:iam::111122223333:user/user1-11111a31-17b5-4fb7-9df5-b111111f13de + user1-11111a31-17b5-4fb7-9df5-b111111f13de + + + 75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a + OwnerDisplayName + + STANDARD + 2010-11-10T20:48:33.000Z + + ... + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/list-parts.md b/content/riak/cs/2.1.2/references/apis/storage/s3/list-parts.md new file mode 100644 index 0000000000..a354817222 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/list-parts.md @@ -0,0 +1,193 @@ +--- +title: "Riak CS List Parts" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-List-Parts/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-List-Parts/ + - /riak/cs/latest/references/apis/storage/s3/list-parts/ +--- + +Lists the parts that have been uploaded for a specific multipart upload. + +## Requests + +### Request Syntax + +This example shows the syntax for listing parts of a multipart upload. + +``` +GET /ObjectName?uploadId=UploadId HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue +``` + +### Request Parameters + +**uploadId** - Upload ID identifying the multipart upload whose parts are being listed. + +* *Type*: String +* *Default*: None + +**max-parts** - Sets the maximum number of parts to return in the response body. + +* *Type*: String +* *Default*: 1,000 + +**part-number​-marker** - Specifies the part after which listing should begin. Only parts with higher part numbers will be listed. + +* *Type*: String +* *Default*: None + +### Request Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Request Elements + +This operation does not use request elements. + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +**ListPartsResult** - Container for the response. + +* *Type*: Container +* *Children*: `Bucket`, `Key`, `UploadId`, `Initiator`, `Owner`, `StorageClass`, `PartNumberMarker`, `NextPartNumberMarker`, `MaxParts`, `IsTruncated`, `Part` + +**Bucket** - Name of the bucket to which the multipart upload was initiated. + +* *Type*: String +* *Ancestors*: `ListPartsResult` + +**Key** - Object key for which the multipart upload was initiated. + +* *Type*: String +* *Ancestors*: `ListPartsResult` + +**UploadId** - Upload ID identifying the multipart upload whose parts are being listed. + +* *Type*: String +* *Ancestors*: `ListPartsResult` + +**Initiator** - Container element that identifies who initiated the multipart upload. + +* *Type*: Container +* *Children*: `ID`, `DisplayName` +* *Ancestors*: `ListPartsResult` + +**ID** - Canonical User ID. + +* *Type*: String +* *Ancestors*: `Initiator` + +**DisplayName** - Principal's name. + +* *Type*: String +* *Ancestors*: `Initiator` + +**Owner** - Container element that identifies the object owner, after the object is created. + +* *Type*: Container +* *Children*: `ID`, `DisplayName` +* *Ancestors*: `ListPartsResult` + +**PartNumberMarker** - Part number after which listing begins. + +* *Type*: Integer +* *Ancestors*: `ListPartsResult` + +**NextPartNumberMarker** - When a list is truncated, this element specifies the last part in the list, as well as the value to use for the `part-number-marker` request parameter in a subsequent request. + +* *Type*: Integer +* *Ancestors*: `ListPartsResult` + +**MaxParts** - Maximum number of parts that were allowed in the response. + +* *Type*: Integer +* *Ancestors*: `ListPartsResult` + +**IsTruncated** - Indicates whether the returned list of parts is truncated. + +* *Type*: Boolean +* *Ancestors*: `ListPartsResult` + +**Part** - Container for elements related to a particular part. + +* *Type*: String +* *Children*: `PartNumber`, `LastModified`, `ETag`, `Size` +* *Ancestors*: `ListPartsResult` + +**PartNumber** - Part number identifying the part. + +* *Type*: Integer +* *Ancestors*: `Part` + +**LastModified** - Date and time at which the part was uploaded. + +* *Type*: Date +* *Ancestors*: `Part` + +**ETag** - Entity tag returned when the part was uploaded. + +* *Type*: String +* *Ancestors*: `Part` + +**Size** - Size of the uploaded part data. + +* *Type*: Integer +* *Ancestors*: `Part` + +## Examples + +### Sample Request + +The following request lists multipart upload parts. + +``` +GET /large.iso?uploadId=VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA HTTP/1.1 +Host: os.data.basho.com +Date: Mon, 1 Nov 2010 20:34:56 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:0RQf4/cRonhpaBX5sCYVf1bNRuU= +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Mon, 1 Nov 2010 20:34:56 GMT +Content-Length: 985 +Connection: keep-alive +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) + + + + os + large.iso + VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA + + arn:aws:iam::111122223333:user/some-user-11116a31-17b5-4fb7-9df5-b288870f11xx + umat-user-11116a31-17b5-4fb7-9df5-b288870f11xx + + + 75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a + someName + + STANDARD + + 1 + 2010-11-10T20:48:34.000Z + "7778aef83f66abc1fa1e8477f296d394" + 10485760 + + ... + +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api.md b/content/riak/cs/2.1.2/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api.md new file mode 100644 index 0000000000..679cdabde3 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api.md @@ -0,0 +1,85 @@ +--- +title: "Mapping From S3 API to Riak CS internal API" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/Mapping-From-S3-API-to-Riak-CS-internal-API + - /riak/cs/2.1.2/references/apis/storage/s3/Mapping-From-S3-API-to-Riak-CS-internal-API + - /riak/cs/latest/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api/ +--- + +## Overview + +This document is to outline a mapping of S3 API URLs to their +rewritten format that is processed by Webmachine. + +## URL Mapping + +### Service Operations + +* `GET Service` + * `GET /` -> `GET /buckets` + +### Bucket Operations + +*Note* Common method to specify bucket is to prefix bucket name to `Host` header value + +* `GET Bucket` + * `GET /` or `GET /` -> `GET /buckets//objects` +* `HEAD Bucket` + * `HEAD /` or `HEAD /` -> `HEAD /buckets/` +* `PUT Bucket` + * `PUT /` or `PUT /` -> `PUT /buckets/` +* `DELETE Bucket` + * `DELETE /` or `DELETE /` -> `DELETE /buckets/` +* `GET Bucket acl` + * `GET /?acl` -> `GET /buckets//acl` +* `PUT Bucket acl` + * `PUT /?acl` -> `PUT /buckets//acl` +* `GET Bucket location` + * `GET /?location`-> `GET /buckets//location` +* `PUT Bucket location` + * `PUT /?location`-> `PUT /buckets//location` +* `GET Bucket versioning` + * `GET /?versioning`-> `GET /buckets//versioning` +* `PUT Bucket versioning` + * `PUT /?versioning`-> `PUT /buckets//versioning` +* `GET Bucket policy` + * `GET /?policy`-> `GET /buckets//policy` +* `PUT Bucket policy` + * `PUT /?policy`-> `PUT /buckets//policy` +* `DELETE Bucket policy` + * `DELETE /?policy`-> `DELETE /buckets//policy` +* `List Multipart Uploads` + * `GET /?uploads` -> `GET /buckets//uploads` +* `Delete Multiple Objects` (This is listed in the S3 docs as an object operation, but it fits better here) + * `POST /?delete` -> `POST /buckets//delete` + +### Object Operations + +*Note* Common method to specify bucket is to prefix bucket name to `Host` header value + +* `GET Object` + * `GET /` -> `GET /buckets//objects/` +* `HEAD Object` + * `HEAD /` -> `HEAD /buckets//objects/` +* `PUT Object` + * `PUT /` -> `PUT /buckets//objects/` +* `DELETE Object` + * `DELETE /` -> `DELETE /buckets//objects/` +* `GET Object acl` + * `GET /?acl` -> `GET /buckets//objects//acl` +* `PUT Object acl` + * `PUT /` -> `PUT /buckets//objects//acl` +* `Initiate Multipart Upload` + * `POST /?uploads` -> `POST /buckets//objects//uploads` +* `Upload Part` + * `PUT /?partNumber=&uploadId=` -> `PUT /buckets//objects//uploads/?partNumber=` +* `Complete Multipart Upload` + * `POST /?uploads` -> `POST /buckets//objects//uploads` +* `Upload Part` + * `DELETE /&uploadId=` -> `DELETE /buckets//objects//uploads/` +* `List Parts` + * `GET /?uploadId=` -> `GET /buckets//objects//uploads/` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket-acl.md b/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket-acl.md new file mode 100644 index 0000000000..cfa61b0f7f --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket-acl.md @@ -0,0 +1,213 @@ +--- +title: "Riak CS PUT Bucket ACL" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Bucket-ACL/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Bucket-ACL/ + - /riak/cs/latest/references/apis/storage/s3/put-bucket-acl/ +--- + +The `PUT Bucket acl` operation uses the `acl` subresource to set the permissions on an existing bucket using an access control list (ACL). + +*Note:* You must have WRITE_ACP access to the bucket to use this operation. + +`PUT Bucket acl` offers two methods for setting a bucket's permissions: + +* Specify the ACL in the request body +* Specify permissions using request headers + +*Note*: You can specify an ACL in the request body or with request headers, not both. + +## Requests + +### Request Syntax + +This example shows the syntax for setting the ACL in the request body. The Request Headers section contain a list of headers you can use instead. + +``` +PUT /?acl HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue + + + + ID + EmailAddress + + + + + ID + EmailAddress + + Permission + + ... + + +``` + +### Request Parameters + +This operation does not use request parameters. + +### Request Headers +`PUT Bucket acl` offers the following request headers in addition to the request headers common to all operations. + +**x-amz-acl** - This request header specifies a predefined ACL to apply to the bucket being created. A predefined ACL grants specific permissions to individual accounts or predefined groups. + + +* *Type*: String +* *Valid Values*: private | public-read | public-read-write | authenticated-read | bucket-owner-read | bucket-owner-full-control +* *Default*: private + +### Request Elements + +If you specify the ACL using the request body, you must use the following elements: + +**AccessControlList** - Container for ACL information (Grant, Grantee, and Permission). + +* *Type*: Container +* *Ancestors*: AccessControlPolicy + +**AccessControlPolicy** - Contains the elements that set the ACL permissions for each grantee. + +* *Type*: Container +* *Ancestors*: None + +**DisplayName** - Bucket owner's display name. + +* *Type*: String +* *Ancestors*: AccessControlPolicy.Owner + +**Grant** - Container for `Grantee` and `Permission`. + +* *Type*: Container +* *Ancestors*: AccessControlPolicy.AccessControlList + +**Grantee** - The `ID`, `Emailaddress`, or `uri` of the subject who is being granted permissions. + +* *Type*: String +* *Ancestors*: AccessControlPolicy.AccessControlList.Grant + +**ID** - Bucket owner's ID. + +* *Type*: String +* *Ancestors*: AccessControlPolicy.Owner|AccessControlPolicy.AccessControlList.Grant + +**Owner** - Container for bucket owner information. + +* *Type*: Container +* *Ancestors*: AccessControlPolicy + +**Permission** - Permission granted to the `Grantee` for bucket. + +* *Type*: String +* *Valid Values*: FULL_CONTROL|WRITE|WRITE_ACP|READ|READ_ACP +* *Ancestors*: AccessControlPolicy.AccessControlList.Grant + +In request elements, you can specify the grantee to whom you are granting permissions in the following ways: + +* *emailAddress*: The email address of an account + +``` + + user1@basho.com + +``` + +From the email address, the grantee is resolved to the CanonicalUser. The response to a `GET Object acl` request displays the grantee as the CanonicalUser. + +* *id*: The user ID of an account + +``` + + ID + GranteesEmail + +``` + +For the id method, DisplayName is optional and ignored in the request. + +* *uri*: The uri that defines a group + +``` + + http://data.basho.com/groups/AuthenticatedUsers + +``` + +### Response Elements + +PUT Bucket acl does not return response elements. + +## Examples + +### Sample Request with Access Permission Specified in the Request Body +This sample request grants access permission to an existing bucket, named basho_docs, by specifying the ACL in the request body. In addition to granting full control to the bucket owner, the request specifies the following grants: + +* Grant AllUsers group READ permission on the bucket. +* Grant the Dev group WRITE permission on the bucket. +* Grant an account, which is identified by email address, WRITE_ACP permission. +* Grant an account, which is identified by canonical user ID, READ_ACP permission. + +``` +PUT /?acl HTTP/1.1 +Host: basho_docs.data.basho.com +Content-Length: 1660202 +x-amz-date: Fri, 01 Jun 2012 12:00:00 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= + + + + BucketOwnerCanonicalUserID + OwnerDisplayName + + + + + 852b113e7a2f25102679df27bb0ae12b3f85be6BucketOwnerCanonicalUserID + OwnerDisplayName + + FULL_CONTROL + + + + http://acs.data.basho.com/groups/global/AllUsers + + READ + + + + http://acs.data.basho.com/groups/global/Dev + + WRITE + + + + user1@basho.com + + WRITE_ACP + + + + f30716ab7115dcb44a5ef76e9d74b8e20567f63TestAccountCanonicalUserID + + READ_ACP + + + +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Fri, 01 Jun 2012 12:00:00 GMT +Content-Length: 0 +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket-policy.md b/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket-policy.md new file mode 100644 index 0000000000..744298b23f --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket-policy.md @@ -0,0 +1,136 @@ +--- +title: "Riak CS PUT Bucket Policy" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Bucket-policy/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Bucket-policy/ + - /riak/cs/latest/references/apis/storage/s3/put-bucket-policy/ +--- + +The `PUT Bucket policy` operation uses the `policy` subresource to add or replace the policy on an existing bucket. If the bucket already has a policy, the one in this request completely replaces it. To perform this operation, you must be the bucket owner. + +{{% note title="Note" %}} +Currently only the `aws:SourceIp` and `aws:SecureTransport` policy conditions +are supported. +{{% /note %}} + +## Requests + +### Request Syntax + +This example shows the syntax for setting the policy in the request body. + +``` +PUT /?policy HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue + +Policy written in JSON +``` + +### Request Parameters + +This operation does not use request parameters. + +### Request Headers + +This operation uses only request headers that are common to all operations. For more information, see [Common Riak CS Request Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-request-headers). + +### Request Elements + +The body is a JSON string containing the policy elements. The supported policy elements are detailed below. + +#### Version + +The Version element specifies the policy language version. If a version is not specified, this defaults to `2008-10-17`. + +#### ID + +The Id element specifies an optional identifier for the policy. + +#### Statement + +The Statement element is the main element for a policy. This element is required. It can include multiple elements. The Statement element contains an array of individual statements. Each individual statement is a JSON block enclosed in braces, i.e. `{ ... }`. Below is a list of currently supported statements. + +**SID** : The Sid (statement ID) is an optional identifier that you provide for the policy statement. You can assign a Sid value to each statement in a statement array. + +**Effect** : The Effect element is required and specifies whether the statement will result in an allow or an explicit deny. Valid values for Effect are `Allow` and `Deny`. + +**Principal** : The Principal element specifies the user, account, service, or other entity that is allowed or denied access to a resource. Currently, Riak CS only supports the `*` principal type. + +**Action** : The Action element describes the type of access that should be allowed or denied. + +Supported actions are: + +- `s3:GetObject`, `s3:PutObject`, `s3:DeleteObject`, +- `s3:GetObjectAcl`, `s3:PutObjectAcl`, +- `s3:ListMultipartUploadParts`, `s3:AbortMultipartUpload`, +- `s3:CreateBucket`, `s3:DeleteBucket`, `s3:ListBucket`, `s3:ListAllMyBuckets`, +- `s3:GetBucketAcl`, `s3:PutBucketAcl`, +- `s3:GetBucketPolicy`, `s3:DeleteBucketPolicy`, `s3:PutBucketPolicy`, +- `s3:ListBucketMultipartUploads` . + +**Resource** : The Resource element specifies the object or objects that the statement covers. Currently, Riak only supports buckets as resources, specified as: `"arn:aws:s3:::/*"`. + +**Condition** : The Condition element (or Condition block) lets you specify conditions for when a policy is in effect. The Condition element is optional.Riak CS supports 3 Condition Types: `Bool`, `IpAddress`, and `NotIpAddress`. + +Riak CS supports two keys to be used with these conditions: `aws:SecureTransport` and `aws:SourceIp`. `aws:SecureTransport` is used with the `Bool` condition to check whether the request was sent with SSL. Accepted values for this key are `true` and `false`. `aws:SourceIp` is used with the `IpAddress` and `NotIpAddress` conditions, and represents the requester's IP address. IPv4 IP addresses in CIDR notation are supported. + +The IP address to be compared with `IpAddress` or `NotIpAddress` is taken from the source IP address of the TCP connection. If Riak CS is behind a load balancer that does not preserve source IP address and bucket policies related to IP addresses, Riak CS can be configured to adopt IP address described in the `X-Forwarded-For` request header, which is added by the load balancer. Set `trust_x_forwarded_for` to `true` in `app.config` when the header can be trusted and secure. The default is `false`, where Riak CS uses the source IP address of the TCP connection. + +More information on S3 Policies can be found in Amazon's [Permissions And Policies](http://docs.aws.amazon.com/IAM/latest/UserGuide/PermissionsAndPolicies.html) documentation. + + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +`PUT` response elements return whether the operation succeeded or not. + +## Examples + +### Sample Request + +The following request shows the PUT individual policy request for the bucket. + +``` +PUT /?policy HTTP/1.1 +Host: bucketname.data.basho.com +Date: Tue, 04 Apr 2010 20:34:56 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= + +{ + "Version": "2008-10-17", + "Statement": [ + { + "Sid": "0xDEADBEEF", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:GetObjectAcl","s3:GetObject"], + "Resource": "arn:aws:s3:::bucketname/*", + "Condition": { + "IpAddress": { + "aws:SourceIp": "192.0.72.1/24" + } + } + } + ] +} +``` + +### Sample Response + +``` +HTTP/1.1 204 No Content +Date: Tue, 04 Apr 2010 12:00:01 GMT +Connection: keep-alive +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket.md b/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket.md new file mode 100644 index 0000000000..f89f3e5a4a --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/put-bucket.md @@ -0,0 +1,121 @@ +--- +title: "Riak CS PUT Bucket" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Bucket/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Bucket/ + - /riak/cs/latest/references/apis/storage/s3/put-bucket/ +--- + +The `PUT Bucket` operation creates a new bucket. The user who sends the request to create the bucket becomes the bucket owner. Anonymous requests can't create buckets. + +*Note:* To create a bucket, you must have a valid Key ID, which is used to authenticate requests. + +## Bucket Naming + +A best practice is to use DNS-compliant bucket names. DNS-compliant bucket names ensure that virtual hosted-style requests can locate buckets. + +A bucket name must obey the following rules, which produces a DNS-compliant bucket name: + +* Must be from 3 to 63 characters. +* Must be one or more labels, each separated by a period (.). Each label: +* Must start with a lowercase letter or a number. Must end with a lowercase letter or a number. Can contain lowercase letters, numbers and dashes. +* Must not be formatted as an IP address (e.g., 192.168.9.2). + +## Access Permissions + +PUT Bucket offers the option to specify the permissions you want to grant to specific accounts or groups for the bucket. You can grant permissions to accounts or groups with request headers, using one of the following two methods: + +* Specify a predefined ACL using the x-amz-acl request header. More information about predefined ACLs is available [here](http://docs.amazonwebservices.com/AmazonS3/latest/dev/ACLOverview.html#CannedACL). +* Specify access permissions explicitly using the x-amz-grant-read, x-amz-grant-write, x-amz-grant-read-acp, x-amz-grant-write-acp, x-amz-grant-full-control headers, which map to the set of ACL permissions supported by Amazon S3. + +*Note*: You can use either a predefined ACL or specify access permissions explicitly, not both. + +## Requests + +### Request Syntax + +``` +PUT / HTTP/1.1 +Host: bucketname.data.basho.com +Content-Length: length +Date: date +Authorization: signature_value + + + BucketRegion + +``` +{{% note title="Note" %}} +This example includes some request headers. The Request Headers section +contains the complete list of headers. +{{% /note %}} + +### Request Parameters + +This operation does not use request parameters. + +### Request Headers + +PUT Bucket offers the following request headers in addition to the request headers common to all operations. + +**x-amz-acl** - This request header specifies a predefined ACL to apply to the bucket being created. A predefined ACL grants specific permissions to individual accounts or predefined groups. + +* *Type*: String +* *Valid Values*: private | public-read | public-read-write | authenticated-read | bucket-owner-read | bucket-owner-full-control + +### Response Elements + +PUT Bucket does not return response elements. + +## Examples + +### Sample Request + +A request that creates a bucket named `basho_docs`. + +``` +PUT / HTTP/1.1 +Host: basho_docs.data.basho.com +Content-Length: 0 +Date: Fri, 01 Jun 2012 12:00:00 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Fri, 01 Jun 2012 12:00:00 GMT +Content-Length: 0 +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` + +### Sample Request to Configure Access Permission Using Predefined ACL + +This request creates a bucket named `basho_docs` and sets the ACL to private. + +``` +PUT / HTTP/1.1 +Host: basho_docs.data.basho.com +Content-Length: 0 +x-amz-acl: private +Date: Fri, 01 Jun 2012 12:00:00 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= +``` + +### Sample Response For Bucket with Predefined ACL + +``` +HTTP/1.1 200 OK +Date: Fri, 01 Jun 2012 12:00:00 GMT + +Location: /basho_docs +Content-Length: 0 +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/put-object-acl.md b/content/riak/cs/2.1.2/references/apis/storage/s3/put-object-acl.md new file mode 100644 index 0000000000..d5f6209867 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/put-object-acl.md @@ -0,0 +1,215 @@ +--- +title: "Riak CS PUT Object ACL" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Object-ACL/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Object-ACL/ + - /riak/cs/latest/references/apis/storage/s3/put-object-acl/ +--- + +The `PUT Object acl` operation uses the `acl` subresource to set the access control list (ACL) permissions for an existing object in a bucket. + +*Note:* You must have WRITE_ACP access to the object to use this operation. + +`PUT Object acl` offers two methods for setting an object's permissions: + +* Specify the ACL in the request body +* Specify permissions using request headers + +*Note*: You can specify an ACL in the request body or with request headers, not both. + +## Requests + +### Request Syntax + +This example shows the syntax for setting the ACL in the request body. The Request Headers section contain a list of headers you can use instead. + +``` +PUT /ObjectName?acl HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Authorization: signatureValue + + + + ID + EmailAddress + + + + + ID + EmailAddress + + Permission + + ... + + +``` + +### Request Parameters + +This operation does not use request parameters. + +### Request Headers + +`PUT Object acl` offers the following request headers in addition to the request headers common to all operations. + +**x-amz-acl** - This request header specifies a predefined ACL to apply to the object being created. A predefined ACL grants specific permissions to individual accounts or predefined groups. + +* *Type*: String +* *Valid Values*: private | public-read | public-read-write | authenticated-read | bucket-owner-read | bucket-owner-full-control +* *Default*: private + +### Request Elements + +If you specify the ACL using the request body, you must use the following elements: + +**AccessControlList** - Container for ACL information (Grant, Grantee, and Permission). + +* *Type*: Container +* *Ancestors*: AccessControlPolicy + +**AccessControlPolicy** - Contains the elements that set the ACL permissions for each grantee. + +* *Type*: Container +* *Ancestors*: None + +**DisplayName** - Object owner's display name. + +* *Type*: String +* *Ancestors*: AccessControlPolicy.Owner + +**Grant** - Container for `Grantee` and `Permission`. + +* *Type*: Container +* *Ancestors*: AccessControlPolicy.AccessControlList + +**Grantee** - The subject who is being granted permissions. + +* *Type*: String +* *Valid Values*: DisplayName|EmailAddress|AuthenticatedUser +* *Ancestors*: AccessControlPolicy.AccessControlList.Grant + +**ID** - Object owner's ID. + +* *Type*: String +* *Ancestors*: AccessControlPolicy.Owner|AccessControlPolicy.AccessControlList.Grant + +**Owner** - Container for object owner information. + +* *Type*: Container +* *Ancestors*: AccessControlPolicy + +**Permission** - Permission granted to the `Grantee`. + +* *Type*: String +* *Valid Values*: FULL_CONTROL|WRITE_ACP|READ|READ_ACP +* *Ancestors*: AccessControlPolicy.AccessControlList.Grant + +In request elements, you can specify the grantee to whom you are granting permissions in the following ways: + +* *emailAddress*: The email address of an account + +``` + + user1@basho.com + +``` + +From the email address, the grantee is resolved to the CanonicalUser. The response to a `GET Object acl` request displays the grantee as the CanonicalUser. + +* *id*: The user ID of an account + +``` + + ID + GranteesEmail + +``` + +For the id method, DisplayName is optional and ignored in the request. + +* *uri*: The uri that defines a group + +``` + + http://data.basho.com/groups/AuthenticatedUsers + +``` + +### Response Elements + +PUT Bucket acl does not return response elements. + +## Examples + +### Sample Request with Access Permission Specified in Request Body + +This sample request grants access permission to an existing object, named `basho-process.jpg`, by specifying the ACL in the request body. In addition to granting full control to the bucket owner, grant full control to an account identified by its canonical user ID. + +``` +PUT /basho-process.jpg?acl HTTP/1.1 +Host: basho_docs.data.basho.com +Date: Fri, 01 Jun 2012 12:00:00 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= +Content-Length: 124 + + + + 75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a + user1@basho.com + + + + + 75aa57f09aa0c8caeab4f8c24e99d10f8e7faeeExampleCanonicalUserID + user2@basho.com + + FULL_CONTROL + + + +``` + +### Sample Response + +This is the sample response when versioning is enabled. + +``` +HTTP/1.1 200 OK +Date: Fri, 01 Jun 2012 12:00:00 GMT +Last-Modified:Fri, 01 Jun 2012 10:30:15 GMT +Content-Length: 0 +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` + +### Sample Request Setting Access Permissions with Headers + +The following request uses ACL-specific request headers, x-amz-acl, and specifies a predefined ACL (public_read) to grant object read access to everyone. + +``` +PUT basho-process.jpg?acl HTTP/1.1 +Host: examplebucket.data.basho.com +x-amz-acl: public-read +Accept: */* +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= +Host: data.basho.com +Connection: Keep-Alive +``` + +### Sample Response to Setting Permissions with Headers + +``` +HTTP/1.1 200 OK +x-amz-id-2: ZDsjJI9E3ke4WK56w5YegkbG6RWPxNQHIQ0CjrjyRVFZhEbabXnBO9w5G7Dmxsgk +x-amz-request-id: 827BD84C13B255B1 +Date: Fri, 01 Jun 2012 12:00:00 GMT +Content-Length: 0 +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/put-object-copy.md b/content/riak/cs/2.1.2/references/apis/storage/s3/put-object-copy.md new file mode 100644 index 0000000000..cf825637d2 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/put-object-copy.md @@ -0,0 +1,95 @@ +--- +title: "Riak CS PUT Object (Copy)" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Object-Copy/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Object-Copy/ + - /riak/cs/latest/references/apis/storage/s3/put-object-copy/ +--- + +The `PUT Object (Copy)` creates a copy of an object that is already stored in Riak CS. Adding the `x-amz-copy-source` HTTP header makes the PUT operation copy the source object into the destination bucket. + +## Access Permissions + +PUT Object (Copy) offers the option to specify the permissions you want to grant to specific accounts or groups for the copied object. You can grant permissions to accounts or groups with request headers, using one of the following two methods: + +* Specify a predefined ACL using the `x-amz-acl` request header. More information about predefined ACLs is available [here](http://docs.amazonwebservices.com/AmazonS3/latest/dev/ACLOverview.html#CannedACL). +* Specify access permissions explicitly using the `x-amz-grant-read`, `x-amz-grant-write`, `x-amz-grant-read-acp`, `x-amz-grant-write-acp`, `x-amz-grant-full-control` headers, which map to the set of ACL permissions supported by Amazon S3. + +{{% note title="Note" %}} +You can use either a predefined ACL or specify access permissions explicitly, +not both. +{{% /note %}} + +*Note*: You can configure an application to use the `100-continue` HTTP status code, which sends the Request Headers prior to sending the request body. Doing so prevents sending the message body when the message is rejected based on the headers, for example, due to authentication failure or redirect). + + +## Requests + +### Request Syntax + +``` +PUT /ObjectNameCopy HTTP/1.1 +Host: bucketname.data.example.com +x-amz-copy-source: /ObjectName +Date: date +Authorization: signature_value +``` + +### Request Headers + +PUT Object (Copy) offers the following request headers in addition to request headers common to all operations: + +**Expect** - When you use `100-continue` in your application, it doesn't send the request body until it receives an acknowledgment. That way, the body of the message isn't sent if the message is rejected based on the headers. + +* *Type*: String +* *Default*: None +* *Valid Values*: 100-continue +* *Constraints*: None + +**x-amz-copy-source** - Path to source object (object to be copied). + +* *Type*: String +* *Default*: None +* *Constraints*: None + +#### Permission Request Headers + +**x-amz-acl** - This request header specifies a predefined ACL to apply to the object being created. A predefined ACL grants specific permissions to individual accounts or predefined groups. + +* *Type*: String +* *Valid Values*: private | public-read | public-read-write | authenticated-read | bucket-owner-read | bucket-owner-full-control +* *Constraints*: None + +## Examples + +### Sample Request + +A request that copies the object, `basho-process.jpg` in the bucket, `basho_docs`. + +``` +PUT /basho-process.jpg HTTP/1.1 +Host: basho_docs.data.basho.com +x-amz-copy-source: /basho-process.jpg +Date: Mon, 18 Feb 2013 16:38:49 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= +Content-Type: text/plain +Content-Length: 0 +Expect: 100-continue +``` + +### Sample Response + +``` +HTTP/1.1 100 Continue +HTTP/1.1 200 OK +Server: Riak CS +ETag: "d41d8cd98f00b204e9800998ecf8427e" +Date: Mon, 18 Feb 2013 16:38:49 GMT +Content-Type: text/plain +Content-Length: 0 +Connection: close +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/put-object.md b/content/riak/cs/2.1.2/references/apis/storage/s3/put-object.md new file mode 100644 index 0000000000..2f8b29a642 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/put-object.md @@ -0,0 +1,143 @@ +--- +title: "Riak CS PUT Object" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Object/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-PUT-Object/ + - /riak/cs/latest/references/apis/storage/s3/put-object/ +--- + +The `PUT Object` operation adds an object to a bucket. The PUT Object operation does not add partial objects, so a success response indicates that the entire object was added to the bucket. + +*Note:* You must have WRITE permission on a bucket to use this operation. + +Riak CS is a distributed system. If it receives multiple write requests for the same object at the same time, the system will overwrite all but the last object written. If necessary, you can build versioning or object locking into your application. + +To prevent the storage of data corrupted during transmission over a network, the Content-MD5 header instructs Riak CS to compare the object to the MD5 value provided. If the values don't match, the operation returns an error. In addition, if the PUT Object operation calculates the MD5, you can compare the ETag that is returned to the calculated MD5 value. + +*Note*: You can configure an application to use the `100-continue` HTTP status code, which sends the Request Headers prior to sending the request body. Doing so prevents sending the message body when the message is rejected based on the headers, for example, due to authentication failure or redirect). + +## Access Permissions +PUT Object offers the option to specify the permissions you want to grant to specific accounts or groups for the object. You can grant permissions to accounts or groups with request headers, using one of the following two methods: + +* Specify a predefined ACL using the x-amz-acl request header. More information about predefined ACLs is available [here](http://docs.amazonwebservices.com/AmazonS3/latest/dev/ACLOverview.html#CannedACL). +* Specify access permissions explicitly using the x-amz-grant-read, x-amz-grant-write, x-amz-grant-read-acp, x-amz-grant-write-acp, x-amz-grant-full-control headers, which map to the set of ACL permissions supported by Amazon S3. + +{{% note title="Note" %}} +You can use either a predefined ACL or specify access permissions explicitly, +not both. +{{% /note %}} + +## Requests + +### Request Syntax + +``` +PUT /ObjectName HTTP/1.1 +Host: bucketname.data.example.com +Date: date +Authorization: signature_value +``` + +### Request Headers + +PUT Object offers the following request headers in addition to request headers common to all operations: + +**Content-Length** - The size of the object in bytes. This header is required. + +* *Type*: String +* *Default*: None +* *Constraints*: None + +**Content-MD5** - The base64-encoded 128-bit MD5 digest of the message without the headers according to RFC 1864. Although this header is optional, the Content-MD5 header can be used to confirm that the data is the same as what was originally sent. + +* *Type*: String +* *Default*: None +* *Constraints*: None + +**Content-Type** - A standard MIME type that describes the content format. + +* *Type*: String +* *Default*: binary/octet-stream +* *Valid Values*: 100-continue +* *Constraints*: None + +**Expect** - When you use `100-continue` in your application, it doesn't send the request body until it receives an acknowledgment. That way, the body of the message isn't sent if the message is rejected based on the headers. + +* *Type*: String +* *Default*: None +* *Valid Values*: 100-continue +* *Constraints*: None + +**x-amz-meta-*** - User specified metadata fields which can be stored with the object. + +* *Type*: String +* *Default*: None +* *Constraints*: None + +#### Permission Request Headers + +**x-amz-acl** - This request header specifies a predefined ACL to apply to the object being created. A predefined ACL grants specific permissions to individual accounts or predefined groups. + +* *Type*: String +* *Valid Values*: private | public-read | public-read-write | authenticated-read | bucket-owner-read | bucket-owner-full-control +* *Constraints*: None + +## Examples + +### Sample Request + +A request that stores the object, `basho-process.jpg` in the bucket, `basho_docs`. + +``` +PUT /basho-process.jpg HTTP/1.1 +Host: basho_docs.data.basho.com +Date: Fri, 01 Jun 2012 12:00:00 GMT +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= +Content-Type: text/plain +Content-Length: 201445 +Expect: 100-continue +[201445 bytes of object data] +``` + +### Sample Response + +``` +HTTP/1.1 200 OK +Date: Fri, 01 Jun 2012 12:00:00 GMT +ETag: "32cf731c97645a398434535f271b2358" +Content-Length: 0 +Connection: close +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` + +### Sample Request with Predefined Access Permissions + +This request uses an `x-amz-acl` header to specify a predefined ACL to grant READ permission to the public. + +``` +...Object data in the body... +PUT draftschedule.jpg HTTP/1.1 +Host: myBucket.data.basho.com +x-amz-date: b24cf9553547f8b395dd038b34a81474 +x-amz-acl: public-read +Authorization: AWS AKIAIOSFODNN7EXAMPLE:xQE0diMbLRepdf3YB+FIEXAMPLE= +Content-Length: 300 +Expect: 100-continue +Connection: Keep-Alive + +...Object data in the body... +``` + +### Sample Response for Predefined Access Permissions + +``` +HTTP/1.1 200 OK +Date: b24cf9553547f8b395dd038b34a81474 +ETag: "b24cf9553547f8b395dd038b34a81474" +Content-Length: 0 +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/apis/storage/s3/upload-part.md b/content/riak/cs/2.1.2/references/apis/storage/s3/upload-part.md new file mode 100644 index 0000000000..db5a05cb62 --- /dev/null +++ b/content/riak/cs/2.1.2/references/apis/storage/s3/upload-part.md @@ -0,0 +1,92 @@ +--- +title: "Riak CS Upload Part" +description: "" +project: "riak_cs" +project_version: "2.1.2" +toc: true +aliases: + - /riakcs/2.1.2/references/apis/storage/s3/RiakCS-Upload-Part/ + - /riak/cs/2.1.2/references/apis/storage/s3/RiakCS-Upload-Part/ + - /riak/cs/latest/references/apis/storage/s3/upload-part/ +--- + +This operation uploads a part in a multipart upload. You must [initiate a multipart upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/initiate-multipart-upload) before you can upload any part. In this operation you provide part data in your request. + +## Requests + +### Request Syntax + +This example shows the syntax for uploading a part in a multipart upload. + +``` +PUT /ObjectName?partNumber=PartNumber&uploadId=UploadId HTTP/1.1 +Host: bucketname.data.basho.com +Date: date +Content-Length: size +Authorization: signatureValue +``` + +### Request Headers + +**Content-Length** - The size of the object in bytes. This header is required. + +* *Type*: String +* *Default*: None +* *Constraints*: None + +**Content-MD5** - The base64-encoded 128-bit MD5 digest of the message without the headers according to RFC 1864. Although this header is optional, the Content-MD5 header can be used to confirm that the data is the same as what was originally sent. + +* *Type*: String +* *Default*: None +* *Constraints*: None + +**Expect** - When you use `100-continue` in your application, it doesn't send the request body until it receives an acknowledgment. That way, the body of the message isn't sent if the message is rejected based on the headers. + +* *Type*: String +* *Default*: None +* *Valid Values*: 100-continue +* *Constraints*: None + +### Request Elements + +This operation does not use request elements. + +## Response + +### Response Headers + +This implementation of the operation uses only response headers that are common to most responses. For more information, see [Common Riak CS Response Headers]({{}}riak/cs/2.1.2/references/apis/storage/s3/common-response-headers). + +### Response Elements + +This operation does not use response elements. + +## Examples + +### Sample Request + +The following `PUT` request uploads part number 1 in a multipart upload. This request includes the upload ID from an [Initiate Multipart Upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/initiate-multipart-upload) request. + +``` +PUT /large.iso?partNumber=1&uploadId=VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA HTTP/1.1 +Host: os.data.basho.com +Date: Mon, 1 Nov 2010 20:34:56 GMT +Content-Length: 10485760 +Content-MD5: pUNXr/BjKK5G2UKvaRRrOA== +Authorization: AWS AKIAIOSFODNN7EXAMPLE:VGhpcyBtZXNzYWdlIHNpZ25lZGGieSRlbHZpbmc= + +[10485760 bytes of object data] +``` + +### Sample Response + +The response includes the `ETag` header. This value must be retained for when you send the [Complete Multipart Upload]({{}}riak/cs/2.1.2/references/apis/storage/s3/complete-multipart-upload) request. + +``` +HTTP/1.1 200 OK +Date: Mon, 1 Nov 2010 20:34:56 GMT +ETag: "b54357faf0632cce46e942fa68356b38" +Content-Length: 0 +Connection: keep-alive +Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +``` diff --git a/content/riak/cs/2.1.2/references/appendices/comparisons/atmos.md b/content/riak/cs/2.1.2/references/appendices/comparisons/atmos.md new file mode 100644 index 0000000000..4d6817de70 --- /dev/null +++ b/content/riak/cs/2.1.2/references/appendices/comparisons/atmos.md @@ -0,0 +1,71 @@ +--- +title: "Riak CS Compared To Atmos" +description: "" +menu: + riak_cs-2.1.2: + name: "Riak CS Compared To Atmos" + identifier: "compare_atmos" + weight: 101 + parent: "theory_comparisons" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/appendices/comparisons/Riak-Compared-to-Atmos/ + - /riak/cs/2.1.2/references/appendices/comparisons/Riak-Compared-to-Atmos/ + - /riak/cs/latest/references/appendices/comparisons/atmos/ +--- + +The purpose of this comparison is not to serve as an introduction to Riak CS and EMC Atmos, or their commonalities, but rather to enumerate interesting differences between the two systems. This document is intended for those who already have a basic understanding of both systems. + +If you feel that this comparison is unfair to either of these technologies, please [submit an issue on GitHub](https://github.com/basho/basho_docs/issues/new) or send an email to **docs@basho.com**. + +## Feature/Capability Comparison + +The table below gives a high-level comparison of Riak CS and Atmos features and capabilities. For low-level details, please refer to the Riak CS and [Atmos docs](https://community.emc.com/community/edn/atmos). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Feature/CapabilityRiakAtmos
InterfacesRiak CS offers an S3-compatible interface that can be used with existing S3 clients and libraries. + Atmos offers a REST and SOAP API, an S3-compatible API, and an Atmos SDK as well as interfaces to traditional storage solutions, including NFS/CIF and CAS. +
Availability and Architecture for Reads/WritesOn write, Riak CS breaks large objects into blocks. Riak CS distributes data across physical machines using consistent hashing and replicates objects a default of 3 times in the underlying Riak storage system. A manifest is maintained for each object that points to which blocks comprise the object. The manifest is used to retrieve all blocks and present them to a client on read.
+ Riak CS is a masterless system in that any node can receive and route client requests, making it highly resilient to failure conditions like network partition and hardware failure. Riak uses a request serializer for globally unique entities like users and buckets. This request serializer runs on a single node and in the event of failure, a portion of write operations (specifically, creating new buckets and users) will be unavailable until service is restored.
+ In Riak, by default, objects (including their manifests) are replicated 3 times in the underlying system. Riak can also be configured to store more replicas in a given site. +
EMC Atmos stores objects and their metadata separately. The + Metadata Service is responsible for storing all of an object's metadata, including policy and user-defined data, and for providing the object layout which is required for both writes and reads to the underlying storage service. On read, the client will connect with a Resource Management Service to talk to a Metadata Location Service, which then locates the correct Metadata Service for the object.
+ The Metadata Location Service, responsible for finding a local Metadata Service on read, is deployed on two nodes of the first rack in an EMC Atmos implementation. The Metadata Service itself is a master/slave system with a primary and secondary node. The use of a master/slave architecture for metadata services that are required for reads and writes may compromise availability in the event of hardware failure or network partition. Additionally, Atmos stores only two copies of the metadata for an object at a site, which may also cause availability problems in certain failure conditions. +
Users and MultitenancyRiak exposes multitenancy using S3 conventions for user provisioning and management. Administrators can create users which are then able to authenticate, create buckets, upload and download data, retrieve account information and other user privileges. + EMC Atmos implements a more complex tenant scheme. Atmos recommends implementing 1-2 tenants in a system and using multiple sub-tenants underneath each tenant. The number of tenants is limited to the number of physical nodes, as front-end nodes are assigned to a specific tenant for client access. Configuring tenants and subtenants may be operationally complex, while assigning specific tenants to specific front-end nodes may cause end-user availability issues in the event of node failure. +
HardwareRiak CS ships as software and can be installed from source or with packages, including Ubuntu and CentOS. There is no hardware lock-in to specific vendors, and Riak CS is designed to be run on commodity hardware so that enterprises can achieve economies of scale. + EMC Atmos can be deployed as a software/hardware bundle on Atmos Hardware or as a virtual edition deployed on a VMware-certified third-party storage system. +
Multi-Datacenter ReplicationFor multi-site replication in Riak CS, global information for users, bucket information, and manifests are streamed in real time from a primary implementation to a secondary site, so that global state is maintained across locations. Objects can then be replicated in either fullsync or realtime sync mode. The secondary site will replicate the object as in normal operations. Additional datacenters can be added in order to create availability zones or additional data redundancy and locality. Riak CS can also be configured for bi-directional replication. + In EMC Atmos, object replication to secondary sites is done via synchronous or asynchronous replication configured by policies. These policies are implemented as part of the Metadata Service. A read-only copy of the metadata is maintained at secondary sites. +
diff --git a/content/riak/cs/2.1.2/references/appendices/comparisons/swift.md b/content/riak/cs/2.1.2/references/appendices/comparisons/swift.md new file mode 100644 index 0000000000..29eaa25854 --- /dev/null +++ b/content/riak/cs/2.1.2/references/appendices/comparisons/swift.md @@ -0,0 +1,79 @@ +--- +title: "Riak CS Compared To Swift" +description: "" +menu: + riak_cs-2.1.2: + name: "Riak CS Compared To Swift" + identifier: "compare_swift" + weight: 100 + parent: "theory_comparisons" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/appendices/comparisons/Riak-Compared-to-Swift/ + - /riak/cs/2.1.2/references/appendices/comparisons/Riak-Compared-to-Swift/ + - /riak/cs/latest/references/appendices/comparisons/swift/ +--- + +Riak CS and Swift---the object storage component of OpenStack---are both cloud storage systems that hold many design and implementation details in common. The purpose of this document is not to serve as an introduction to Riak CS and Swift or to their commonalities, but rather to enumerate important differences between the two systems. This document is intended for those who already have a basic understanding of both systems. + +If you feel that this comparison is unfair to either of these technologies, please [submit an issue on GitHub](https://github.com/basho/basho_docs/issues/new) or send an email to **docs@basho.com**. + + +## Feature/Capability Comparison + +The table below gives a high-level comparison of Riak CS and Swift features and capabilities. For low-level details, please refer to the Riak CS and [Swift docs](http://docs.openstack.org/developer/swift/). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Feature/CapabilityRiakSwift
Anti-EntropyRiak CS supports Active Anti-Entropy, which monitors and repairs inconsistencies between divergent replicas. Riak CS also supports "passive" read-time anti-entropy, which provides repair of inconsistencies immediately at client-read time. Swift does not perform repair at read or write time, but rather resolves such issues during its next rsync cycle.Swift has a continuous anti-entropy process via frequent invocation of "rsync" for repairing any inconsistencies between data node file systems.
Write-Time Communication & Host FailuresRiak CS always writes to the full number of desired hosts, using fallback nodes to perform hinted handoff and stand in for any missing or failing hosts in order to immediately reach full redundancy. As soon as the primary Riak CS nodes are once again reachable, copies on the fallbacks will be sent to them, quickly repairing the state of the cluster.Swift will write at least a majority/quorum of replicas before declaring success, and will allow anti-entropy to bring the number of replicas up to the full count later if needed due to node failures.
Quorum ModelsRiak CS’s underlying quorum model is not only about availability, it also provides a latency-smoothing effect by replying to the user without the need to block on the slowest host in the replication set. This prevents brief per-host performance problems from affecting end-users.Swift, despite replying only with the "best" single response, will wait for all relevant storage nodes to finish before sending a response to a write request. This can adversely impact latency. However, Swift's read requests do not wait for a quorum; they simply try one replica at a time at random until they get a response with a fairly short timeout before moving on to try another. There are plans to improve the latency of Swift’s write requests.
Full-Stack IntegrationRiak CS stands alone as a storage service that has no specific related services for compute, VM image management, etc.Though it can run on its own, Swift is part of the OpenStack project, a highly regarded and well-defined "stack" of services.
LanguagesRiak CS is written in Erlang, a language and platform engineered for extremely high availability, making it easier to build Riak CS on industry-tested distributed systems components, and to attract engineers that specialize in such systems.Swift is written in Python, a language with a very large, accessible developer community who could readily contribute to Swift without the need to learn a new language.
InstallationRiak CS is designed for easy installation, with a relatively small number of independent components to manage. A minimal installation requires installing just three components and editing fewer than 10 lines of configuration data.Swift’s "toolbox" approach requires the installation and ongoing operational supervision of various components, including Memcached, SQLite, and Keystone (the OpenStack authentication server), each of which have deep dependency trees of their own. An upside of this approach is that the system’s overall behavior is extremely modifiable by changing the behavior of any of the many dependencies.
OperationsWith Riak CS, a single administrative command on a newly provisioned host tells the system to automatically integrate the new device. Well-defined underlying system components ensure correct behavior during transitions.Swift requires a high degree of manual management. Devices are added to the definition of the ring by defining their node, name and zone. To change the definitions, mapping must be regenerated and new definitions must be pushed out to every node with whichever means is available (rsync appears to be the most common). When these files fall out of sync, the system will experience strange behavior or cease to function altogether.
Support For Amazon S3 APIRiak CS directly and natively supports the widely adopted S3 API, including such commonly used aspects as S3-keyed ACLs, hostname-to-bucket translation, etc.Swift has its own custom (non-S3) API with its own strengths. Optional, externally developed middleware that emulates the S3 API on top of Swift is, however, available.
GovernanceRiak CS is open source and is managed by Basho. It is available under the Apache 2 License.Swift is entirely open source and is managed by the OpenStack Foundation. No license is required in any way and no single company can either block or cause any changes to it on their own.
diff --git a/content/riak/cs/2.1.2/references/appendices/http-admin.md b/content/riak/cs/2.1.2/references/appendices/http-admin.md new file mode 100644 index 0000000000..2716ed7e3e --- /dev/null +++ b/content/riak/cs/2.1.2/references/appendices/http-admin.md @@ -0,0 +1,65 @@ +--- +title: "HTTP Administration Overview" +description: "" +menu: + riak_cs-2.1.2: + name: "Accounts & Admin" + identifier: "http_admin" + weight: 100 + parent: "develop" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/appendices/Http-Administration/ + - /riak/cs/2.1.2/references/appendices/Http-Administration/ + - /riak/cs/latest/references/appendices/http-admin/ +--- + +Riak CS exposes the following administrative capabilities over HTTP +above and beyond those associated with Riak itself: + +Task | CS URI | Further reading +:----|:-------|:--------------- +User management | `/riak-cs/user` | [Account Management]({{}}riak/cs/2.1.2/cookbooks/account-management) +User access statistics | `/riak-cs/usage` | [Querying Access Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-access-statistics) +Storage statistics | `/riak-cs/usage` | [Querying Storage Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-storage-statistics) +Global statistics | `/riak-cs/stats` | [Monitoring and Metrics]({{}}riak/cs/2.1.2/cookbooks/monitoring-and-metrics) + +By default, these are accessible over the same IP/port as the rest of +the CS API, but they can be configured to run elsewhere, with or without +authentication. + +## Output format + +For these requests, results are available as either JSON or XML. Request +the appropriate data format by using the HTTP `Accept` header with +either `application/json` or `application/xml`, respectively. + +## URLs + +Each of these requests is performed over the CS HTTP port (`8080` by +default) or administrative port if configured via `admin_port`. The +`admin_ip` configuration setting can be used to further isolate the +administrative commands. + +Only the admin user can view other users' details unless the +`admin_auth_enabled` config is set to `false`. + +## Retrieving Statistics Via S3 Objects + +As an alternative to raw HTTP requests, the administrative requests can +be issued via the S3 API. See the GitHub documents linked below for more +details. + +## Related Resources + +* [configuring Riak CS]({{}}riak/cs/2.1.2/cookbooks/configuration/riak-cs) +* [Querying Access Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-access-statistics) + * [Usage and Billing Data]({{}}riak/cs/2.1.2/cookbooks/usage-and-billing-data) + * [Github wiki](https://github.com/basho/riak_cs/wiki/Querying-Access-Stats) +* [Querying Storage Statistics]({{}}riak/cs/2.1.2/cookbooks/querying-storage-statistics) + * [Enabling storage statistics](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) + * [Github wiki](https://github.com/basho/riak_cs/wiki/Logging-Storage-Stats) +* [Account Management]({{}}riak/cs/2.1.2/cookbooks/account-management) + * [Github wiki](https://github.com/basho/riak_cs/wiki/User-Management) +* [Monitoring and Metrics]({{}}riak/cs/2.1.2/cookbooks/monitoring-and-metrics) diff --git a/content/riak/cs/2.1.2/references/appendices/riak-cs-control.md b/content/riak/cs/2.1.2/references/appendices/riak-cs-control.md new file mode 100644 index 0000000000..cef7d1eac0 --- /dev/null +++ b/content/riak/cs/2.1.2/references/appendices/riak-cs-control.md @@ -0,0 +1,75 @@ +--- +title: "Riak CS Control" +description: "" +menu: + riak_cs-2.1.2: + name: "Riak CS Control" + identifier: "advanced_riak_cs_control" + weight: 102 + parent: "run_advanced" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/references/appendices/RiakCS-Control/ + - /riak/cs/2.1.2/references/appendices/RiakCS-Control/ + - /riak/cs/latest/references/appendices/riak-cs-control/ +--- + +Riak CS Control is a standalone user management application for Riak CS. +It provides a user interface for filtering, disabling, creating and +managing users in a Riak CS Cluster. + +## Installing Riak CS Control + +Riak CS Control [is maintained as a separate application](https://github.com/basho/riak_cs_control) and can be installed via [source or package]({{}}riak/cs/2.1.2/downloads). + +## Setting Up Riak CS Control + +In the `/etc/riak-cs-control/app.config` file, configure the application +with the information needed to connect to the Riak CS cluster you wish +to administer. + +### Configuring Riak CS Control + +``` erlang +{riak_cs_control, [ + %% What port to run the application on. + {port, 8000 }, + + %% Instance of Riak CS you wish to talk to. + {cs_hostname, "s3.amazonaws.com" }, + {cs_port, 80 }, + {cs_protocol, "http" }, + + %% Proxy information; necessary if you are using s3.amazonaws.com as + %% your hostname. + {cs_proxy_host, "localhost" }, + {cs_proxy_port, 8080 }, + + %% Credentials you want the application to run as. + {cs_admin_key, "admin-key" }, + {cs_admin_secret, "admin-secret" }, + + %% Specify the bucket name for administration options. + {cs_administration_bucket, "riak-cs" } +]}, +``` + +### Running Riak CS Control + +Start Riak CS Control as you would Riak or Riak CS with: + +```bash +riak-cs-control start +``` + +## The Users Page + +When you first navigate to the Riak CS Control UI, you will land on the +Users page: + +![Users Page]({{}}images/cs_control_users.png) + +On this page you can quickly see all current Riak CS users along with +their status, e-mail address, and credentials. From here you can filter, +disable, create, and manage users in a Riak CS Cluster. diff --git a/content/riak/cs/2.1.2/theory.md b/content/riak/cs/2.1.2/theory.md new file mode 100644 index 0000000000..12abab091d --- /dev/null +++ b/content/riak/cs/2.1.2/theory.md @@ -0,0 +1,21 @@ +--- +title: "Riak CS Theory & Concepts Overview" +description: "" +menu: + riak_cs-2.1.2: + name: "Theory & Concepts" + identifier: "theory" + weight: 400 + pre: beaker +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riak/cs/latest/theory/ +--- + +### In This Section + +- [Stanchion](./stanchion) +- [Multipart Upload Overview](../cookbooks/multipart-upload-overview/) +- [Garbage Collection](../cookbooks/garbage-collection) +- [Riak CS Compared To...](./comparisons) diff --git a/content/riak/cs/2.1.2/theory/comparisons.md b/content/riak/cs/2.1.2/theory/comparisons.md new file mode 100644 index 0000000000..772981038e --- /dev/null +++ b/content/riak/cs/2.1.2/theory/comparisons.md @@ -0,0 +1,19 @@ +--- +title: "Riak CS Compared To" +description: "" +menu: + riak_cs-2.1.2: + name: "Comparisons" + identifier: "theory_comparisons" + weight: 103 + parent: "theory" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riak/cs/latest/theory/comparisons/ +--- + +### In This Section + +- [Riak CS Compared To Swift](../../references/appendices/comparisons/swift/) +- [Riak CS Compared To Atmos](../../references/appendices/comparisons/atmos/) diff --git a/content/riak/cs/2.1.2/theory/stanchion.md b/content/riak/cs/2.1.2/theory/stanchion.md new file mode 100644 index 0000000000..d386555605 --- /dev/null +++ b/content/riak/cs/2.1.2/theory/stanchion.md @@ -0,0 +1,72 @@ +--- +title: "Stanchion" +description: "" +menu: + riak_cs-2.1.2: + name: "Stanchion" + identifier: "theory_stanchion" + weight: 100 + parent: "theory" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/theory/stanchion/ + - /riak/cs/2.1.2/theory/stanchion/ + - /riak/cs/latest/theory/stanchion/ +--- + +Stanchion is an application used by Riak CS to manage the serialization +of requests, which enables Riak CS to manage [globally unique entities](#globally-unique-entities) like users and bucket names. Serialization in this context means that the entire cluster agrees upon a single value for any globally unique entity at any given time; when that value is changed, the new value must be recognized throughout the entire cluster. + +## The Role of Stanchion in a Riak CS Cluster + +Unlike Riak and Riak CS, which both run on multiple nodes in your +cluster, there should be only _one_ running Stanchion instance in your +Riak CS cluster at any time. Correspondingly, your Stanchion +installation must be managed and configured separately. For more +information, see the following documents: + +* [Configuring Stanchion]({{}}riak/cs/2.1.2/cookbooks/configuration/stanchion) +* [Installing Stanchion]({{}}riak/cs/2.1.2/cookbooks/installing#installing-stanchion-on-a-node) +* [The Stantion Command-line Interface]({{}}riak/cs/2.1.2/cookbooks/command-line-tools#stanchion) + +For a more in-depth discussion of implementation details, see the +project's +[README](https://github.com/basho/stanchion/blob/master/README.org) on +GitHub. + +## Globally Unique Entities + +There are two types of entities that must be globally unique within a +Riak CS system: + +1. **User identifiers** --- Riak CS mandates that each user create an +account using an email address as an identifier. Stanchion takes steps +to ensure that an email address has not already been used before +accepting a user creation request. +2. **Bucket names** --- Bucket names must be unique within a Riak CS +system (just as they must be unique in S3 and other systems) and any +attempt to create a bucket with a name that is already in use are +rejected. + +The uniqueness of these entities is enforced by serializing any creation +or modification requests that involve them. This process is handled by +Stanchion. What happens under the hood is essentially that Stanchion +mandates that all [vnodes]({{}}riak/kv/2.1.3/learn/glossary#vnode) in the underlying Riak cluster that are responsible for the user or bucket being created must be available at creation time. + +One result of this enforcement is that user creation requests and bucket +creation or modification, i.e. deletion, requests are not highly +available like other Riak CS system operations. If the Stanchion +application is unavailable or cannot be reached for whatever reason, +you will not be able to carry out user- and bucket-related operations. +In addition, instability in the Riak cluster may lead to user and bucket +requests being disallowed. If this happens, you will see something like +this in the Stanchion console or error logs: + +```log +2013-01-03 05:24:24.028 [warning] <0.110.0>@stanchion_utils:bucket_available:501 Error occurred trying to check if the bucket <<"mybucket">> exists. Reason: <<"{pr_val_unsatisfied,3,2}">> +``` + +Because of this, user- and bucket-related operations should be used +_only_ as preparation for a workflow and not included as part of a +highly available workflow. diff --git a/content/riak/cs/2.1.2/tutorials/fast-track.md b/content/riak/cs/2.1.2/tutorials/fast-track.md new file mode 100644 index 0000000000..8c2c3176be --- /dev/null +++ b/content/riak/cs/2.1.2/tutorials/fast-track.md @@ -0,0 +1,52 @@ +--- +title: "The Riak CS Fast Track" +description: "" +menu: + riak_cs-2.1.2: + name: "The Riak CS Fast Track" + identifier: "fast_track" + weight: 300 + parent: "index" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/tutorials/fast-track/ + - /riak/cs/2.1.2/cookbooks/tutorials/fast-track/ + - /riak/cs/latest/tutorials/fast-track/ +--- + +Riak CS (Cloud Storage) is simple, open source storage software built on +top of Riak. It can be used to build public or private clouds, or as +reliable storage to power applications and services. It features: + +* Highly available, fault-tolerant storage +* Large object support and multipart upload +* S3-compatible API and authentication +* OpenStack Swift API (not covered in this fast track) +* Multi-tenancy and per-user reporting +* Simple operational model for adding capacity +* Robust stats for monitoring and metrics + +## What is the Riak CS Fast Track? + +The Riak CS Fast Track aims to get you up and running with Riak as +quickly as possible, so that you can learn by doing. It presents +alternatives to configuration and deployment of a local test environment +and enumerates performing basic operations that illustrate Riak CS core +concepts. + +The Fast Track is designed for people with little or no experience with +Riak CS, but can still be useful for more experienced users as well. + +## What does the Fast Track Cover? + +The Fast Track takes you through the following sections: + +* [What is Riak CS?](what-is-riak-cs) --- A high-level overview of Riak + CS and its architecture +* [Building a Local Test Environment](local-testing-environment) --- Instructions on setting up a + local environment on your machine +* [Building a Virtual Testing Environment](virtual-test-environment) --- Instructions on setting + up a virtual environment on your machine +* [Testing the Riak CS Installation](test-installation) --- Using s3cmd to test your + local Riak CS installation diff --git a/content/riak/cs/2.1.2/tutorials/fast-track/local-testing-environment.md b/content/riak/cs/2.1.2/tutorials/fast-track/local-testing-environment.md new file mode 100644 index 0000000000..cf5fa209f3 --- /dev/null +++ b/content/riak/cs/2.1.2/tutorials/fast-track/local-testing-environment.md @@ -0,0 +1,473 @@ +--- +title: "Building a Local Test Environment" +description: "" +menu: + riak_cs-2.1.2: + name: "Building a Local Test Environment" + identifier: "fast_track_local_test" + weight: 101 + parent: "fast_track" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/tutorials/fast-track/Building-a-Local-Test-Environment/ + - /riak/cs/2.1.2/cookbooks/tutorials/fast-track/Building-a-Local-Test-Environment/ + - /riak/cs/latest/tutorials/fast-track/local-testing-environment/ +--- + +The following instructions will guide you through installing a Riak CS +test environment. This guide does not cover system/service tuning and it +does not attempt to optimize your installation for your particular +architecture. + +If you want to build a testing environment with a minimum of +configuration, there is an option for [Building a Virtual Testing Environment]({{}}riak/cs/2.1.2/tutorials/fast-track/virtual-test-environment). + +## Installing Your First Node + +You should complete the following preparatory steps _before_ installing +and running Riak and Riak CS. + +### Step 1: Raise your system's open file limits + +Riak can consume a large number of open file handles during normal +operation. See the [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit) document for more information on +how to increase your system's open files limit. + +If you are the root user, you can increase the system's open files limit +*for the current session* with this command: + +```bash +ulimit -n 65536 +``` + +For this setting to persist in most Linux distributions, you also need +to save it for the `root` and `riak` users in +`/etc/security/limits.conf`: + +```bash +# ulimit settings for Riak CS +root soft nofile 65536 +root hard nofile 65536 +riak soft nofile 65536 +riak hard nofile 65536 +``` + +For Mac OS X, consult the [open files limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit/#mac-os-x) documentation. + +### Step 2: Download and install packages + +This guide uses `curl` for downloading packages and interacting with the +Riak CS API, so let's make sure that it's installed: + +```bash +sudo apt-get install -y curl +``` + +**Note**: If you're running Riak CS on a non-Debian/Ubuntu OS, +substitute the appropriate CLI commands. + +If you are running Ubuntu 11.10 or later, you will also need the +`libssl0.9.8` package. See [Installing on Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) for more information. + +```bash +sudo apt-get install -y libssl0.9.8 +``` + +Now, grab the appropriate packages: Riak, Riak CS, and Stanchion. See +[Download Riak]({{}}riak/kv/2.1.3/downloads/) and [Download Riak CS]({{}}riak/cs/2.1.2/downloads). +You can skip Riak CS Control for now. + +Once you have the packages, install them per the instructions below. + +#### First, install Riak + +The following links provide platform-specific instructions for +installing Riak. + +**Do not attempt to configure or start Riak until step 3 in this +document.** + + * [Debian and Ubuntu]({{}}riak/kv/2.1.3/setup/installing/debian-ubuntu) + * [RHEL and CentOS]({{}}riak/kv/2.1.3/setup/installing/rhel-centos) + * [Mac OS X]({{}}riak/kv/2.1.3/setup/installing/mac-osx) + * [FreeBSD]({{}}riak/kv/2.1.3/setup/installing/freebsd) + * [SUSE]({{}}riak/kv/2.1.3/setup/installing/suse) + * [Windows Azure]({{}}riak/kv/2.1.3/setup/installing/windows-azure) + * [AWS Marketplace]({{}}riak/kv/2.1.3/setup/installing/amazon-web-services) + * [From Source]({{}}riak/kv/2.1.3/setup/installing/source) + +#### Next, install Riak CS + +For Mac OS X: + +```bash +curl -O http://s3.amazonaws.com/downloads.basho.com/ +tar -xvzf +``` + +Replace `` with the actual filename for the package +you are installing. + +For RedHat Enterprise distributions (and similar): + +```bash +rpm -Uvh +``` + +Replace `` with the actual filename for the package +you are installing. + +Ubuntu distributions and similar: + +```bash +sudo dpkg -i +``` + +Replace `` with the actual filename for the package +you are installing. + +#### Finally, install Stanchion + +For Mac OS X: + +```bash +curl -O http://s3.amazonaws.com/downloads.basho.com/ +tar -xvzf +``` + +Replace `` with the actual filename for the +package you are installing. + +For RedHat Enterprise distributions (and similar): + +```bash +sudo rpm -Uvh +``` + +Replace `` with the actual filename for the +package you are installing. + +For Ubuntu distributions: + +```bash +sudo dpkg -i +``` + +Replace `` with the actual filename for the +package you are installing. + + +### Step 3: Set service configurations and start the services + +You will need to make changes to several configuration files. + +#### `/etc/riak/riak.conf` + +Be sure the storage backend is not set: + +```riakconf +## Delete this line +storage_backend = . . . +``` + +And that the default bucket properties allow siblings: + +```riakconf +## Append this line at the end of the file +buckets.default.allow_mult = true +``` + +Next, you need to expose the necessary Riak CS modules to Riak and instruct Riak +to use the custom backend provided by Riak CS. You'll have to use the old-style +`/etc/riak/advanced.config` for these settings. The file should look like: + +```advancedconfig +[ + {riak_kv, [ + {add_paths, ["/usr/lib/riak-cs/lib/riak_cs-2.1.2/ebin"]}, + {storage_backend, riak_cs_kv_multi_backend}, + {multi_backend_prefix_list, [{<<"0b:">>, be_blocks}]}, + {multi_backend_default, be_default}, + {multi_backend, [ + {be_default, riak_kv_eleveldb_backend, [ + {total_leveldb_mem_percent, 30}, + {data_root, "/var/lib/riak/leveldb"} + ]}, + {be_blocks, riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]} + ]} + ]} +]. +``` + +{{% note title="Note on OS-specific paths" %}} +The path for `add_paths` may be `/usr/lib/riak-cs` or `/usr/lib64/riak-cs` +depending on your operating system. +{{% /note %}} + +Next, set your interface IP addresses in the `riak.conf` file. In a +production environment, you'd likely have multiple NICs, but for this +test cluster, assume one NIC with an example IP address of 10.0.2.10. + +Change the following lines in `/etc/riak/riak.conf` + +```riakconf +listener.http.internal = 127.0.0.1:8098 +listener.protobuf.internal = 127.0.0.1:8087 +``` + +to + +```riakconf +listener.http.internal = 10.0.2.10:8098 +listener.protobuf.internal = 10.0.2.10:8087 +``` + +#### `/etc/riak-cs/riak-cs.conf` + +Change the following lines in `/etc/riak-cs/riak-cs.conf` + +```riakcsconf +listener = 127.0.0.1:8080 +riak_host = 127.0.0.1:8087 +stanchion_host = 127.0.0.1:8085 +``` + +to + +```riakcsconf +listener = 10.0.2.10:8080 +riak_host = 10.0.2.10:8087 +stanchion_host = 10.0.2.10:8085 +``` + +The `listener` could also be set to `0.0.0.0 `if you prefer Riak CS to listen on +all interfaces. + +#### `/etc/stanchion/stanchion.conf` + +Change the following lines in `/etc/stanchion/stanchion.conf` + +```stanchionconf +listener = 127.0.0.1:8085 +riak_host = 127.0.0.1:8087 +``` + +to + +```stanchionconf +listener = 10.0.2.10:8085 +riak_host = 10.0.2.10:8087 +``` + +#### Service names + +Next, set your service names, using either use the local IP address for +this or set hostnames. If you choose to set hostnames, you should ensure +that the hostnames are resolvable by DNS or set in `/etc/hosts` on all +nodes. **Note**: Service names require at least one period in the name. + +Change the following line in `/etc/riak/riak.conf` + +```riakconf +nodename = riak@127.0.0.1 +``` + +to + +```riakconf +nodename = riak@10.0.2.10 +``` + +Then change the following line in `/etc/riak-cs/riak-cs.conf` + +```riakcsconf +nodename = riak-cs@127.0.0.1 +``` + +to + +```riakcsconf +nodename = riak-cs@10.0.2.10 +``` + +Change the following line in `/etc/stanchion/stanchion.conf` + +```stanchionconf +nodename = stanchion@127.0.0.1 +``` + +to + +```stanchionconf +nodename = stanchion@10.0.2.10 +``` + +#### Start the services + +That is the minimum amount of service configuration required to start a +complete node. To start the services, run the following commands in the +appropriate `/bin` directories: + +```bash +sudo riak start +sudo stanchion start +sudo riak-cs start +``` + +The order in which you start the services is important, as each is a +dependency for the next. Make sure that you successfully start Riak +before Stanchion and Stanchion before Riak CS. + +You can check the liveness of your Riak CS installation and its +connection to the supporting Riak node. If the Riak CS node is running, +the following command should return `PONG`. + +```bash +riak-cs ping +``` + +To check that the Riak CS node is communicating with its supporting Riak +node, run a `GET` request against the `riak-cs/ping` endpoint of the +Riak CS node. For example: + +```curl +curl http://localhost:8080/riak-cs/ping +``` + +### Step 4: Create the admin user + +Creating the admin user is an optional step, but it's a good test of our +new services. Creating a Riak CS user requires two inputs: + +1. **Name** --- A URL-encoded string, e.g. `admin%20user` +2. **Email** --- A unique email address, e.g. `admin@admin.com` + +To create an admin user, we need to grant permission to create new users +to the `anonymous` user. This configuration setting is only required on +a single Riak CS node. + +Add this entry to `/etc/riak-cs/riak-cs.conf`: + +```riakcsconf +anonymous_user_creation = on +``` + +Then run `sudo riak-cs stop && sudo riak-cs start` to put the new config setting into +effect. + +We can create the admin user with the following `curl` command, on the +same Riak CS machine where the `anonymous_user_creation` configuration +option was enabled: + +```curl +curl -XPOST http://localhost:8080/riak-cs/user / + -H 'Content-Type: application/json' / + -d '{"email":"admin@admin.com", "name":"admin"}' +``` + +The output of this command will be a JSON object that looks something like this: + +```json +{ + "email": "admin@admin.com", + "display_name": "admin", + "name": "admin user", + "key_id": "5N2STDSXNV-US8BWF1TH", + "key_secret": "RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw==", + "id": "4b823566a2db0b7f50f59ad5e43119054fecf3ea47a5052d3c575ac8f990eda7" +} +``` + +The user's access key and secret key are returned in the `key_id` and +`key_secret` fields respectively. Take note of these keys as they will +be required in the testing step. + +In this case, those keys are: + +* **Access key** --- `5N2STDSXNV-US8BWF1TH` +* **Secret key** -- `RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw==` + +You can use this same process to create additional Riak CS users. To +make this user the admin user, we set these keys in the Riak CS +`riak-cs.conf` and `stanchion.conf` files. + +{{% note title="Note on admin keys" %}} +The same admin keys will need to be set on all nodes of the cluster. +{{% /note %}} + +Change the following lines in `/etc/riak-cs/riak-cs.conf` on all Riak CS +machines: + +```riakcsconf +admin.key = admin-key +admin.secret = admin-secret +``` + +to + +```riakcsconf +admin.key = 5N2STDSXNV-US8BWF1TH +admin.secret = RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw== +``` + +**Note**: Make sure to set the `anonymous_user_creation` setting to +`off` at this point. + +Change the following lines in `/etc/stanchion/stanchion.conf` + +```stanchion.conf +admin.key = admin-key +admin.secret = admin-secret +``` + +to + +```stanchion.conf +admin.key = 5N2STDSXNV-US8BWF1TH +admin.secret = RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw== +``` + +Now we have to restart the services for the change to take effect: + +```bash +sudo stanchion stop && sudo stanchion start +sudo riak-cs stop && sudo riak-cs start +``` + +## Installing Additional Nodes + +The process for installing additional nodes is identical to installing +your first node with two exceptions: + +1. Stanchion only needs to be installed on your first node; there is no + need to install it again on each node. The `stanchion_ip` setting in + your Riak CS `app.config` files should be set to the `stanchion_ip` + from your first node. +2. To add additional nodes to the Riak cluster, use the following + command + + ```bash + sudo riak-admin cluster join riak@10.0.2.10 + ``` + + where `riak@10.0.2.10` is the Riak node name set in your first + node's `/etc/riak/vm.args` file + +You will then need to verify the cluster plan with the `riak-admin +cluster plan` command, and commit the cluster changes with `riak-admin +cluster commit` to complete the join process. More information is +available in the [Command Line Tools]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) documentation. + +> **Note** +> +> **Riak CS is not designed to function directly on TCP port 80, and +should not be operated in a manner which exposes it directly to the +public internet**. Instead, consider a load balancing solution, +such as a dedicated device, [HAProxy](http://haproxy.1wt.eu), +or [Nginx](http://wiki.nginx.org/Main) between Riak CS and +the outside world. + +Once you have completed this step, You can progress to [testing the Riak CS installation]({{}}riak/cs/2.1.2/tutorials/fast-track/test-installation) using s3cmd. diff --git a/content/riak/cs/2.1.2/tutorials/fast-track/test-installation.md b/content/riak/cs/2.1.2/tutorials/fast-track/test-installation.md new file mode 100644 index 0000000000..9d6bc2f306 --- /dev/null +++ b/content/riak/cs/2.1.2/tutorials/fast-track/test-installation.md @@ -0,0 +1,147 @@ +--- +title: "Testing the Riak CS Installation" +description: "" +menu: + riak_cs-2.1.2: + name: "Testing the Installation" + identifier: "fast_track_test_install" + weight: 103 + parent: "fast_track" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/tutorials/fast-track/Testing-the-Installation/ + - /riak/cs/2.1.2/cookbooks/tutorials/fast-track/Testing-the-Installation/ + - /riak/cs/latest/tutorials/fast-track/test-installation/ +--- + +## Installing & Configuring s3/ + +### Installation + +The simplest way to test the installation is using the `s3cmd` script. +We can install it on Ubuntu by typing: + +``` bash +sudo apt-get -y install s3/ +``` + +>**s3cmd versions** +> +> The above command will install s3cmd version 1.1.0~beta3, which is the latest +> version available in Debian's package reposiory. More current insatll +> instructions suggest using Python's [pip](https://pypi.python.org/pypi/pip) +> module to install s3cmd. +> If this method is used, s3cmd will use AWS Signature version 3 which is +> incompatible with RIak CS. As of s3cmd version 1.5.0, there is a +> `--signature-v2` flag that can be used to have s3cmd use the older format, and +> correctly interact with Riak CS. + +For OS X users, either use the package manager of your preference or [download the S3 cmd package](http://sourceforge.net/projects/s3tools/files/s3cmd/). +You will need need to extract the `.tar` file, change directories into the +folder, and build the package. The process should look something like this: + +``` bash +tar -xvzf s3cmd-1.6.1.tar.gz +cd s3cmd-1.6.1 +sudo python setup.py install +``` + +You will be prompted to enter your system password. Enter it and then +wait for the installation to complete. + +### Configuration + +We need to configure `s3cmd` to use our Riak CS server rather than S3 as +well as our user keys. To do that interactively, type the following: + +``` bash +s3cmd -c ~/.s3cfgfasttrack --configure +``` + +If you are already using `s3cmd` on your local machine, the `-c` switch +allows you to specify a `.s3cfg` file without overwriting anything you +may have presently configured. + +There are 4 default settings you should change: + +* Access Key --- Use the Riak CS user access key you generated above. +* Secret Key --- Use the Riak CS user secret key you generated above. +* Proxy Server --- Use your Riak CS IP. If you followed the virtual + environment configuration, use `localhost`. +* Proxy Port --- The default Riak CS port is `8080`. + +You should have copied your Access Key and Secret Key from the prior +installation steps. + +## Interacting with Riak CS via S3/ + +>**Warning: s3cmd Signature Version** +> +> If you are using s3cmd version 1.5.0 or greater you will need to append the +> `--signature-v2` flag to every command that targets a Riak CS cluster to have +> s3cmd use the AWS Signature version 2 rather than the default AWS Signature +> version 3 + +Once `s3cmd` is configured, we can use it to create a test bucket: + +``` bash +s3cmd -c ~/.s3cfgfasttrack mb s3://test-bucket +``` + +We can see if it was created by typing: + +``` bash +s3cmd -c ~/.s3cfgfasttrack ls +``` + +We can now upload a test file to that bucket: + +``` bash +dd if=/dev/zero of=test_file bs=1M count=2 # Create a test file +s3cmd -c ~/.s3cfgfasttrack put test_file s3://test-bucket +``` + +We can see if it was properly uploaded by typing: + +``` bash +s3cmd -c ~/.s3cfgfasttrack ls s3://test-bucket +``` + +We can now download the test file. First, let's remove the file we +generated previously: + +``` bash +rm test_file +``` + +Now, we can download the `test_file`stored in Riak CS: + +```bash +s3cmd -c ~/.s3cfgfasttrack get s3://test-bucket/test_file +``` + +We should immediately see output like this: + +``` +s3://test-bucket/test_file -> ./test_file [1 of 1] + 2097152 of 2097152 100% in 0s 59.63 MB/s done +``` + +To verify that the file has been downloaded into the current directory: + +```bash +ls -lah test_file +``` + +## What's Next + +If you have made it this far, congratulations! You now have a working +Riak CS test instance (either virtual or local). There is still a fair +bit of learning to be done, so make sure and check out the Reference +section (click "Reference" on the nav on the left side of this page). A +few items that may be of particular interest: + +* [Details about API operations]({{}}riak/cs/2.1.2/references/apis/storage) +* [Information about the Ruby Fog client]({{}}riak/cs/2.1.2/cookbooks/fog) +* [Release Notes]({{}}riak/cs/2.1.2/cookbooks/release-notes) diff --git a/content/riak/cs/2.1.2/tutorials/fast-track/virtual-test-environment.md b/content/riak/cs/2.1.2/tutorials/fast-track/virtual-test-environment.md new file mode 100644 index 0000000000..d8b9e2f219 --- /dev/null +++ b/content/riak/cs/2.1.2/tutorials/fast-track/virtual-test-environment.md @@ -0,0 +1,103 @@ +--- +title: "Building a Virtual Test Environment" +description: "" +menu: + riak_cs-2.1.2: + name: "Building a Virtual Test Environment" + identifier: "fast_track_virtual_test" + weight: 102 + parent: "fast_track" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/tutorials/fast-track/Building-a-Virtual-Test-Environment/ + - /riak/cs/2.1.2/cookbooks/tutorials/fast-track/Building-a-Virtual-Test-Environment/ + - /riak/cs/latest/tutorials/fast-track/virtual-test-environment/ +--- + +This option for building an environment uses a Vagrant project powered +by Chef to bring up a local Riak CS cluster. Each node can run either +Ubuntu 12.04 or CentOS 6.5 64-bit with 1536MB of RAM by default. If you +want to tune the OS or node/memory count, you'll have to edit the +`Vagrantfile` directly. + +If you want to build a testing environment with more flexibility in +configuration and durability across environment resets, there are +instructions for [Building a Local Test Environment]({{}}riak/cs/2.1.2/tutorials/fast-track/local-testing-environment). + +## Configuration + +### Install Prerequisites + +* Download and install VirtualBox from [VirtualBox Downloads](https://www.virtualbox.org/wiki/Downloads). +* Download and install Vagrant via the [Vagrant Installer](http://downloads.vagrantup.com/). + +**Note**: Please make sure to install Vagrant 1.1.0 and above. + +### Install Vagrant plugins + +Install the following Vagrant plugins: + +```bash +vagrant plugin install vagrant-berkshelf +vagrant plugin install vagrant-omnibus +vagrant plugin install vagrant-cachier # Use RIAK_CS_USE_CACHE to enable +``` + +### Clone the Repository + +In order to begin, it is necessary to clone a GitHub repository to your +local machine and change directories into the cloned folder. + +``` bash +git clone https://github.com/basho/vagrant-riak-cs-cluster +cd vagrant-riak-cs-cluster +``` + +### Launch Cluster + +With VirtualBox and Vagrant installed, it's time to launch our virtual +environment. The command below will initiate the Vagrant project: + +``` bash +RIAK_CS_CREATE_ADMIN_USER=1 vagrant up +``` + +If you haven't already downloaded the Ubuntu or CentOS Vagrant box, this +step will download it. + +### Recording Admin User credentials + +In the Chef provisioning output, you will see entries that look like +this: + +```log +[2013-03-27T11:59:12+00:00] INFO: Riak CS Key: 5N2STDSXNV-US8BWF1TH +[2013-03-27T11:59:12+00:00] INFO: Riak CS Secret: RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw== +``` + +Take note of these keys as they will be required in the testing step. + +In this case, those keys are: + +```config +Access key: 5N2STDSXNV-US8BWF1TH +Secret key: RF7WD0b3RjfMK2cTaPfLkpZGbPDaeALDtqHeMw== +``` + +## Next Steps + +Congratulations! You have deployed a virtualized environment of Riak CS. +You are ready to progress to [Testing the Riak CS Installation]({{}}riak/cs/2.1.2/tutorials/fast-track/test-installation). + +### Stopping Your Virtual Environment + +When you are done testing or simply want to start again from scratch, +you can end the current virtualized environment: + +```bash +vagrant destroy +``` + +**Note**: Executing this command will reset the environment to a clean +state, removing any and all changes that you have committed. diff --git a/content/riak/cs/2.1.2/tutorials/fast-track/what-is-riak-cs.md b/content/riak/cs/2.1.2/tutorials/fast-track/what-is-riak-cs.md new file mode 100644 index 0000000000..6b70daf83f --- /dev/null +++ b/content/riak/cs/2.1.2/tutorials/fast-track/what-is-riak-cs.md @@ -0,0 +1,53 @@ +--- +title: "What Is Riak CS" +description: "" +menu: + riak_cs-2.1.2: + name: "What Is Riak CS?" + identifier: "fast_track_what" + weight: 100 + parent: "fast_track" +project: "riak_cs" +project_version: "2.1.2" +aliases: + - /riakcs/2.1.2/cookbooks/tutorials/fast-track/What-is-Riak-CS/ + - /riak/cs/2.1.2/cookbooks/tutorials/fast-track/What-is-Riak-CS/ + - /riak/cs/latest/tutorials/fast-track/what-is-riak-cs/ +--- + +This page introduces the architecture behind Riak CS. If you already +know this, you can skip it and progress to [Building a Local Test Environment](../local-testing-environment) or [Building a Virtual Testing Environment](../virtual-test-environment). + +## Architecture + +Riak CS is built on Riak. When an object is uploaded, Riak CS breaks the +object into smaller blocks that are streamed, stored, and replicated in +the underlying Riak cluster. Each block is associated with metadata for +retrieval. Since data is replicated, and other nodes automatically take +over responsibilities of nodes that go down, data remains available even +in failure conditions. + +### How It Works + +In a Riak CS system, any node can respond to client requests - there is +no master node and each node has the same responsibilities. Since data +is replicated (three replicas per object by default), and other nodes +automatically take over the responsibility of failed or +non-communicative nodes, data remains available even in the event of +node failure or network partition. + +When an object is uploaded via the [storage API]({{}}riak/cs/2.1.2/references/apis/storage), Riak CS breaks the object into smaller chunks that are streamed, +written, and replicated in Riak. Each chunk is associated with metadata +for later retrieval. The diagram below provides a visualization. + +![Riak CS Chunking]({{}}images/Riak-CS-Overview.png) + +## Riak CS Enterprise + +Riak CS Enterprise extends Riak CS with Multi-Datacenter Replication, +monitoring, and 24×7 support. Customers use Multi-Datacenter Replication +to serve global traffic, create availability zones, maintain active +backups, or meet disaster recovery and regulatory requirements. +Multi-Datacenter Replication can be used in two or more sites. Data can +be replicated across data centers using realtime or fullsync +replication. To try out Riak CS Enterprise, sign up for a [developer trial](http://info.basho.com/RiakCS1.1_DeveloperTrialRequest.html). diff --git a/content/riak/cs/latest/cookbooks/access-control-lists.md b/content/riak/cs/latest/cookbooks/access-control-lists.md new file mode 100644 index 0000000000..214add14b0 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/access-control-lists.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/access-control-lists/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/account-management.md b/content/riak/cs/latest/cookbooks/account-management.md new file mode 100644 index 0000000000..4c81a82c95 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/account-management.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/account-management/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/authentication.md b/content/riak/cs/latest/cookbooks/authentication.md new file mode 100644 index 0000000000..8b1829dacc --- /dev/null +++ b/content/riak/cs/latest/cookbooks/authentication.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/authentication/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/command-line-tools.md b/content/riak/cs/latest/cookbooks/command-line-tools.md new file mode 100644 index 0000000000..a9676a154d --- /dev/null +++ b/content/riak/cs/latest/cookbooks/command-line-tools.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/command-line-tools/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration.md b/content/riak/cs/latest/cookbooks/configuration.md new file mode 100644 index 0000000000..e2c0d1cda5 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/dragondisk.md b/content/riak/cs/latest/cookbooks/configuration/dragondisk.md new file mode 100644 index 0000000000..e4b58ee490 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/dragondisk.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/dragondisk/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/load-balancing-proxy.md b/content/riak/cs/latest/cookbooks/configuration/load-balancing-proxy.md new file mode 100644 index 0000000000..7793fe48cc --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/load-balancing-proxy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/load-balancing-proxy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/multi-datacenter.md b/content/riak/cs/latest/cookbooks/configuration/multi-datacenter.md new file mode 100644 index 0000000000..ed427f0bfb --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/reference.md b/content/riak/cs/latest/cookbooks/configuration/reference.md new file mode 100644 index 0000000000..03b065214d --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/reference.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/reference/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/riak-cs.md b/content/riak/cs/latest/cookbooks/configuration/riak-cs.md new file mode 100644 index 0000000000..94e0062e7a --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/riak-cs.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/riak-cs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/riak-for-cs.md b/content/riak/cs/latest/cookbooks/configuration/riak-for-cs.md new file mode 100644 index 0000000000..1563b89c24 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/riak-for-cs.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/riak-for-cs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/s3-client.md b/content/riak/cs/latest/cookbooks/configuration/s3-client.md new file mode 100644 index 0000000000..50012421a3 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/s3-client.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/s3-client/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/stanchion.md b/content/riak/cs/latest/cookbooks/configuration/stanchion.md new file mode 100644 index 0000000000..351ef44543 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/stanchion.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/stanchion/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/configuration/transmit.md b/content/riak/cs/latest/cookbooks/configuration/transmit.md new file mode 100644 index 0000000000..9c4c4217ff --- /dev/null +++ b/content/riak/cs/latest/cookbooks/configuration/transmit.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/configuration/transmit/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/designate-admin-user.md b/content/riak/cs/latest/cookbooks/designate-admin-user.md new file mode 100644 index 0000000000..0c048c5744 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/designate-admin-user.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/designate-admin-user/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/faqs/riak-cs.md b/content/riak/cs/latest/cookbooks/faqs/riak-cs.md new file mode 100644 index 0000000000..2c17d986e2 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/faqs/riak-cs.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/faqs/riak-cs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/fog.md b/content/riak/cs/latest/cookbooks/fog.md new file mode 100644 index 0000000000..28cc1d6759 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/fog.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/fog/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/garbage-collection.md b/content/riak/cs/latest/cookbooks/garbage-collection.md new file mode 100644 index 0000000000..0228e78ff4 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/garbage-collection.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/garbage-collection/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/installing.md b/content/riak/cs/latest/cookbooks/installing.md new file mode 100644 index 0000000000..0774035a7e --- /dev/null +++ b/content/riak/cs/latest/cookbooks/installing.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/installing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/installing/chef.md b/content/riak/cs/latest/cookbooks/installing/chef.md new file mode 100644 index 0000000000..10d8255e67 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/installing/chef.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/installing/chef/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/installing/launching-and-stopping.md b/content/riak/cs/latest/cookbooks/installing/launching-and-stopping.md new file mode 100644 index 0000000000..153a97e30f --- /dev/null +++ b/content/riak/cs/latest/cookbooks/installing/launching-and-stopping.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/installing/launching-and-stopping/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/keystone-conf-sample.md b/content/riak/cs/latest/cookbooks/keystone-conf-sample.md new file mode 100644 index 0000000000..db05b245ff --- /dev/null +++ b/content/riak/cs/latest/cookbooks/keystone-conf-sample.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/keystone-conf-sample/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/keystone-setup.md b/content/riak/cs/latest/cookbooks/keystone-setup.md new file mode 100644 index 0000000000..e3e084571c --- /dev/null +++ b/content/riak/cs/latest/cookbooks/keystone-setup.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/keystone-setup/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/logging.md b/content/riak/cs/latest/cookbooks/logging.md new file mode 100644 index 0000000000..07f1d399f6 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/logging.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/logging/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/monitoring-and-metrics.md b/content/riak/cs/latest/cookbooks/monitoring-and-metrics.md new file mode 100644 index 0000000000..00c443532c --- /dev/null +++ b/content/riak/cs/latest/cookbooks/monitoring-and-metrics.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/monitoring-and-metrics/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/multi-datacenter-overview.md b/content/riak/cs/latest/cookbooks/multi-datacenter-overview.md new file mode 100644 index 0000000000..2c34ff1fb7 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/multi-datacenter-overview.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/multi-datacenter-overview/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/multipart-upload-overview.md b/content/riak/cs/latest/cookbooks/multipart-upload-overview.md new file mode 100644 index 0000000000..8da6da4ddc --- /dev/null +++ b/content/riak/cs/latest/cookbooks/multipart-upload-overview.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/multipart-upload-overview/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/querying-access-statistics.md b/content/riak/cs/latest/cookbooks/querying-access-statistics.md new file mode 100644 index 0000000000..7c4c795c30 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/querying-access-statistics.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/querying-access-statistics/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/querying-storage-statistics.md b/content/riak/cs/latest/cookbooks/querying-storage-statistics.md new file mode 100644 index 0000000000..d73d38ab1c --- /dev/null +++ b/content/riak/cs/latest/cookbooks/querying-storage-statistics.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/querying-storage-statistics/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/release-notes.md b/content/riak/cs/latest/cookbooks/release-notes.md new file mode 100644 index 0000000000..df0e8e057e --- /dev/null +++ b/content/riak/cs/latest/cookbooks/release-notes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/release-notes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/rolling-upgrades.md b/content/riak/cs/latest/cookbooks/rolling-upgrades.md new file mode 100644 index 0000000000..12983baf22 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/rolling-upgrades.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/rolling-upgrades/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/supercluster.md b/content/riak/cs/latest/cookbooks/supercluster.md new file mode 100644 index 0000000000..0615209fa7 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/supercluster.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/supercluster/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/system-features.md b/content/riak/cs/latest/cookbooks/system-features.md new file mode 100644 index 0000000000..5da74485cd --- /dev/null +++ b/content/riak/cs/latest/cookbooks/system-features.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/system-features/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/usage-and-billing-data.md b/content/riak/cs/latest/cookbooks/usage-and-billing-data.md new file mode 100644 index 0000000000..c236a6d393 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/usage-and-billing-data.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/usage-and-billing-data/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/using-with-keystone.md b/content/riak/cs/latest/cookbooks/using-with-keystone.md new file mode 100644 index 0000000000..3d496c37b4 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/using-with-keystone.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/using-with-keystone/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/cookbooks/version-compatibility.md b/content/riak/cs/latest/cookbooks/version-compatibility.md new file mode 100644 index 0000000000..21c5345742 --- /dev/null +++ b/content/riak/cs/latest/cookbooks/version-compatibility.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/cookbooks/version-compatibility/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/developing.md b/content/riak/cs/latest/developing.md new file mode 100644 index 0000000000..4716d9fcd7 --- /dev/null +++ b/content/riak/cs/latest/developing.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/developing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/downloads.md b/content/riak/cs/latest/downloads.md new file mode 100644 index 0000000000..8da57c4f5b --- /dev/null +++ b/content/riak/cs/latest/downloads.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/downloads/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/index.md b/content/riak/cs/latest/index.md new file mode 100644 index 0000000000..d5ee4a15aa --- /dev/null +++ b/content/riak/cs/latest/index.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/operating.md b/content/riak/cs/latest/operating.md new file mode 100644 index 0000000000..3aa39c71dc --- /dev/null +++ b/content/riak/cs/latest/operating.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/operating/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/operating/advanced.md b/content/riak/cs/latest/operating/advanced.md new file mode 100644 index 0000000000..811e3f9e68 --- /dev/null +++ b/content/riak/cs/latest/operating/advanced.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/operating/advanced/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/operating/running.md b/content/riak/cs/latest/operating/running.md new file mode 100644 index 0000000000..d0366ea3cc --- /dev/null +++ b/content/riak/cs/latest/operating/running.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/operating/running/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/reference.md b/content/riak/cs/latest/reference.md new file mode 100644 index 0000000000..6d3a5918b8 --- /dev/null +++ b/content/riak/cs/latest/reference.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/reference/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis.md b/content/riak/cs/latest/references/apis.md new file mode 100644 index 0000000000..199affe2e2 --- /dev/null +++ b/content/riak/cs/latest/references/apis.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage.md b/content/riak/cs/latest/references/apis/storage.md new file mode 100644 index 0000000000..9078a51d64 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack.md b/content/riak/cs/latest/references/apis/storage/openstack.md new file mode 100644 index 0000000000..04c5035499 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack/create-container.md b/content/riak/cs/latest/references/apis/storage/openstack/create-container.md new file mode 100644 index 0000000000..a4026acf21 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack/create-container.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/create-container/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack/create-object.md b/content/riak/cs/latest/references/apis/storage/openstack/create-object.md new file mode 100644 index 0000000000..543fb85aa8 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack/create-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/create-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack/delete-container.md b/content/riak/cs/latest/references/apis/storage/openstack/delete-container.md new file mode 100644 index 0000000000..4ad7c6ff6a --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack/delete-container.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/delete-container/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack/delete-object.md b/content/riak/cs/latest/references/apis/storage/openstack/delete-object.md new file mode 100644 index 0000000000..7174868b4e --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack/delete-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/delete-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack/get-object.md b/content/riak/cs/latest/references/apis/storage/openstack/get-object.md new file mode 100644 index 0000000000..47ea0efd16 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack/get-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/get-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack/list-containers.md b/content/riak/cs/latest/references/apis/storage/openstack/list-containers.md new file mode 100644 index 0000000000..c62e8e4984 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack/list-containers.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/list-containers/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack/list-objects.md b/content/riak/cs/latest/references/apis/storage/openstack/list-objects.md new file mode 100644 index 0000000000..662fe60e03 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack/list-objects.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/list-objects/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api.md b/content/riak/cs/latest/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api.md new file mode 100644 index 0000000000..c627d12abf --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/openstack/mapping-from-oos-api-to-riak-cs-internal-api/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3.md b/content/riak/cs/latest/references/apis/storage/s3.md new file mode 100644 index 0000000000..4f2f7e7596 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/abort-multipart-upload.md b/content/riak/cs/latest/references/apis/storage/s3/abort-multipart-upload.md new file mode 100644 index 0000000000..e315ea2a6d --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/abort-multipart-upload.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/abort-multipart-upload/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/common-request-headers.md b/content/riak/cs/latest/references/apis/storage/s3/common-request-headers.md new file mode 100644 index 0000000000..34cdeb0d1a --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/common-request-headers.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/common-request-headers/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/common-response-headers.md b/content/riak/cs/latest/references/apis/storage/s3/common-response-headers.md new file mode 100644 index 0000000000..4cee4a63bc --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/common-response-headers.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/common-response-headers/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/complete-multipart-upload.md b/content/riak/cs/latest/references/apis/storage/s3/complete-multipart-upload.md new file mode 100644 index 0000000000..1764a6b0d3 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/complete-multipart-upload.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/complete-multipart-upload/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/delete-bucket-policy.md b/content/riak/cs/latest/references/apis/storage/s3/delete-bucket-policy.md new file mode 100644 index 0000000000..680ebac5de --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/delete-bucket-policy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/delete-bucket-policy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/delete-bucket.md b/content/riak/cs/latest/references/apis/storage/s3/delete-bucket.md new file mode 100644 index 0000000000..19bc18fa73 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/delete-bucket.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/delete-bucket/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/delete-multi.md b/content/riak/cs/latest/references/apis/storage/s3/delete-multi.md new file mode 100644 index 0000000000..46c0263fc7 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/delete-multi.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/delete-multi/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/delete-object.md b/content/riak/cs/latest/references/apis/storage/s3/delete-object.md new file mode 100644 index 0000000000..c5873025db --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/delete-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/delete-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/get-bucket-acl.md b/content/riak/cs/latest/references/apis/storage/s3/get-bucket-acl.md new file mode 100644 index 0000000000..c9f852a958 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/get-bucket-acl.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/get-bucket-acl/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/get-bucket-policy.md b/content/riak/cs/latest/references/apis/storage/s3/get-bucket-policy.md new file mode 100644 index 0000000000..5fe97b6119 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/get-bucket-policy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/get-bucket-policy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/get-bucket.md b/content/riak/cs/latest/references/apis/storage/s3/get-bucket.md new file mode 100644 index 0000000000..88f7c6295b --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/get-bucket.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/get-bucket/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/get-object-acl.md b/content/riak/cs/latest/references/apis/storage/s3/get-object-acl.md new file mode 100644 index 0000000000..c02732e493 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/get-object-acl.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/get-object-acl/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/get-object.md b/content/riak/cs/latest/references/apis/storage/s3/get-object.md new file mode 100644 index 0000000000..c514f6f497 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/get-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/get-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/get-service.md b/content/riak/cs/latest/references/apis/storage/s3/get-service.md new file mode 100644 index 0000000000..f0791a17db --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/get-service.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/get-service/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/head-object.md b/content/riak/cs/latest/references/apis/storage/s3/head-object.md new file mode 100644 index 0000000000..a0bcd3b04c --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/head-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/head-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/initiate-multipart-upload.md b/content/riak/cs/latest/references/apis/storage/s3/initiate-multipart-upload.md new file mode 100644 index 0000000000..aa5904db41 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/initiate-multipart-upload.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/initiate-multipart-upload/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/list-multipart-uploads.md b/content/riak/cs/latest/references/apis/storage/s3/list-multipart-uploads.md new file mode 100644 index 0000000000..866d835c41 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/list-multipart-uploads.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/list-multipart-uploads/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/list-parts.md b/content/riak/cs/latest/references/apis/storage/s3/list-parts.md new file mode 100644 index 0000000000..4846aefdfc --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/list-parts.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/list-parts/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api.md b/content/riak/cs/latest/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api.md new file mode 100644 index 0000000000..048da1ea6a --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/mapping-from-s3-api-to-riak-cs-internal-api/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/put-bucket-acl.md b/content/riak/cs/latest/references/apis/storage/s3/put-bucket-acl.md new file mode 100644 index 0000000000..ddb8c1bc7d --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/put-bucket-acl.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/put-bucket-acl/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/put-bucket-policy.md b/content/riak/cs/latest/references/apis/storage/s3/put-bucket-policy.md new file mode 100644 index 0000000000..e0ec482226 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/put-bucket-policy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/put-bucket-policy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/put-bucket.md b/content/riak/cs/latest/references/apis/storage/s3/put-bucket.md new file mode 100644 index 0000000000..451446296c --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/put-bucket.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/put-bucket/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/put-object-acl.md b/content/riak/cs/latest/references/apis/storage/s3/put-object-acl.md new file mode 100644 index 0000000000..6e396cc740 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/put-object-acl.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/put-object-acl/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/put-object-copy.md b/content/riak/cs/latest/references/apis/storage/s3/put-object-copy.md new file mode 100644 index 0000000000..63f0d5b509 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/put-object-copy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/put-object-copy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/put-object.md b/content/riak/cs/latest/references/apis/storage/s3/put-object.md new file mode 100644 index 0000000000..a48aca0f0a --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/put-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/put-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/apis/storage/s3/upload-part.md b/content/riak/cs/latest/references/apis/storage/s3/upload-part.md new file mode 100644 index 0000000000..305f053e15 --- /dev/null +++ b/content/riak/cs/latest/references/apis/storage/s3/upload-part.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/apis/storage/s3/upload-part/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/appendices/comparisons/atmos.md b/content/riak/cs/latest/references/appendices/comparisons/atmos.md new file mode 100644 index 0000000000..d91cacdd7e --- /dev/null +++ b/content/riak/cs/latest/references/appendices/comparisons/atmos.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/appendices/comparisons/atmos/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/appendices/comparisons/swift.md b/content/riak/cs/latest/references/appendices/comparisons/swift.md new file mode 100644 index 0000000000..77847cd3dd --- /dev/null +++ b/content/riak/cs/latest/references/appendices/comparisons/swift.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/appendices/comparisons/swift/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/appendices/http-admin.md b/content/riak/cs/latest/references/appendices/http-admin.md new file mode 100644 index 0000000000..5d2788b7cf --- /dev/null +++ b/content/riak/cs/latest/references/appendices/http-admin.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/appendices/http-admin/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/references/appendices/riak-cs-control.md b/content/riak/cs/latest/references/appendices/riak-cs-control.md new file mode 100644 index 0000000000..5479ff5d2f --- /dev/null +++ b/content/riak/cs/latest/references/appendices/riak-cs-control.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/references/appendices/riak-cs-control/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/theory.md b/content/riak/cs/latest/theory.md new file mode 100644 index 0000000000..b5d04a35c5 --- /dev/null +++ b/content/riak/cs/latest/theory.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/theory/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/theory/comparisons.md b/content/riak/cs/latest/theory/comparisons.md new file mode 100644 index 0000000000..2edae8ad0b --- /dev/null +++ b/content/riak/cs/latest/theory/comparisons.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/theory/comparisons/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/theory/stanchion.md b/content/riak/cs/latest/theory/stanchion.md new file mode 100644 index 0000000000..975ab516c7 --- /dev/null +++ b/content/riak/cs/latest/theory/stanchion.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/theory/stanchion/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/tutorials/fast-track.md b/content/riak/cs/latest/tutorials/fast-track.md new file mode 100644 index 0000000000..2c920a6478 --- /dev/null +++ b/content/riak/cs/latest/tutorials/fast-track.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/tutorials/fast-track/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/tutorials/fast-track/local-testing-environment.md b/content/riak/cs/latest/tutorials/fast-track/local-testing-environment.md new file mode 100644 index 0000000000..148c92e641 --- /dev/null +++ b/content/riak/cs/latest/tutorials/fast-track/local-testing-environment.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/tutorials/fast-track/local-testing-environment/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/tutorials/fast-track/test-installation.md b/content/riak/cs/latest/tutorials/fast-track/test-installation.md new file mode 100644 index 0000000000..c520d56fbf --- /dev/null +++ b/content/riak/cs/latest/tutorials/fast-track/test-installation.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/tutorials/fast-track/test-installation/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/tutorials/fast-track/virtual-test-environment.md b/content/riak/cs/latest/tutorials/fast-track/virtual-test-environment.md new file mode 100644 index 0000000000..a34f6e815d --- /dev/null +++ b/content/riak/cs/latest/tutorials/fast-track/virtual-test-environment.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/tutorials/fast-track/virtual-test-environment/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/cs/latest/tutorials/fast-track/what-is-riak-cs.md b/content/riak/cs/latest/tutorials/fast-track/what-is-riak-cs.md new file mode 100644 index 0000000000..ceb7e03faa --- /dev/null +++ b/content/riak/cs/latest/tutorials/fast-track/what-is-riak-cs.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_cs +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakcs/latest/tutorials/fast-track/what-is-riak-cs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + diff --git a/content/riak/index.md b/content/riak/index.md index e5f02ae42c..9718f835f6 100644 --- a/content/riak/index.md +++ b/content/riak/index.md @@ -1,6 +1,8 @@ --- layout: redirect target: "riak/kv/latest/" +aliases: + - "/riak/latest/" --- This page exists solely to redirect from the generated URL to the above `target` diff --git a/content/riak/kv/2.0.0/_reference-links.md b/content/riak/kv/2.0.0/_reference-links.md index 38e8d2bf43..6399dad694 100644 --- a/content/riak/kv/2.0.0/_reference-links.md +++ b/content/riak/kv/2.0.0/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.0/downloads/ -[install index]: /riak/kv/2.0.0/setup/installing -[upgrade index]: /riak/kv/2.0.0/upgrading -[plan index]: /riak/kv/2.0.0/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.0.0/configuring/reference/ -[manage index]: /riak/kv/2.0.0/using/managing -[performance index]: /riak/kv/2.0.0/using/performance -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.0/downloads/ +[install index]: {{}}riak/kv/2.0.0/setup/installing +[upgrade index]: {{}}riak/kv/2.0.0/upgrading +[plan index]: {{}}riak/kv/2.0.0/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.0.0/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.0/using/managing +[performance index]: {{}}riak/kv/2.0.0/using/performance +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.0/setup/planning -[plan start]: /riak/kv/2.0.0/setup/planning/start -[plan backend]: /riak/kv/2.0.0/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.0/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.0/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.0/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.0/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.0/setup/planning/best-practices -[plan future]: /riak/kv/2.0.0/setup/planning/future +[plan index]: {{}}riak/kv/2.0.0/setup/planning +[plan start]: {{}}riak/kv/2.0.0/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.0/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.0/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.0/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.0/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.0/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.0/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.0/setup/installing -[install aws]: /riak/kv/2.0.0/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.0/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.0/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.0/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.0/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.0/setup/installing/smartos -[install solaris]: /riak/kv/2.0.0/setup/installing/solaris -[install suse]: /riak/kv/2.0.0/setup/installing/suse -[install windows azure]: /riak/kv/2.0.0/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.0/setup/installing +[install aws]: {{}}riak/kv/2.0.0/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.0/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.0/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.0/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.0/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.0/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.0/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.0/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.0/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.0/setup/installing/source -[install source erlang]: /riak/kv/2.0.0/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.0/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.0/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.0/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.0/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.0/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.0/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.0/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.0/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.0/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.0/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.0/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.0/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.0/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.0/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.0/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.0/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.0/configuring -[config basic]: /riak/kv/2.0.0/configuring/basic -[config backend]: /riak/kv/2.0.0/configuring/backend -[config manage]: /riak/kv/2.0.0/configuring/managing -[config reference]: /riak/kv/2.0.0/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.0/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.0/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.0/configuring/mapreduce -[config search]: /riak/kv/2.0.0/configuring/search/ +[config index]: {{}}riak/kv/2.0.0/configuring +[config basic]: {{}}riak/kv/2.0.0/configuring/basic +[config backend]: {{}}riak/kv/2.0.0/configuring/backend +[config manage]: {{}}riak/kv/2.0.0/configuring/managing +[config reference]: {{}}riak/kv/2.0.0/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.0/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.0/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.0/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.0/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.0/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.0/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.0/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.0/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.0/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.0/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.0/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.0/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.0/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.0/using/ -[use admin commands]: /riak/kv/2.0.0/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.0/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.0/using/ +[use admin commands]: {{}}riak/kv/2.0.0/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.0/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.0/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.0/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.0/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.0/using/reference/search -[use ref 2i]: /riak/kv/2.0.0/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.0/using/reference/snmp -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.0/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.0/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.0/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.0/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.0/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.0/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.0/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.0/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.0/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.0/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.0/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.0/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.0/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.0/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.0/using/admin/ -[use admin commands]: /riak/kv/2.0.0/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.0/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.0/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.0/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.0/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.0/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.0/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.0/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.0/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.0/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.0/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.0/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.0/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.0/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.0/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.0/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.0/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.0/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.0/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.0/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.0/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.0/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.0/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.0/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.0/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.0/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.0/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.0/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.0/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.0/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.0/using/repair-recovery -[repair recover index]: /riak/kv/2.0.0/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.0/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.0/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.0/using/security/ -[security basics]: /riak/kv/2.0.0/using/security/basics -[security managing]: /riak/kv/2.0.0/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.0/using/security/ +[security basics]: {{}}riak/kv/2.0.0/using/security/basics +[security managing]: {{}}riak/kv/2.0.0/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.0/using/performance/ -[perf benchmark]: /riak/kv/2.0.0/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.0/using/performance/erlang -[perf aws]: /riak/kv/2.0.0/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.0/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.0/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.0/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.0/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.0/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.0/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.0/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.0/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.0/developing -[dev client libraries]: /riak/kv/2.0.0/developing/client-libraries -[dev data model]: /riak/kv/2.0.0/developing/data-modeling -[dev data types]: /riak/kv/2.0.0/developing/data-types -[dev kv model]: /riak/kv/2.0.0/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.0/developing +[dev client libraries]: {{}}riak/kv/2.0.0/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.0/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.0/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.0/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.0/developing/getting-started -[getting started java]: /riak/kv/2.0.0/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.0/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.0/developing/getting-started/python -[getting started php]: /riak/kv/2.0.0/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.0/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.0/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.0/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.0/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.0/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.0/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.0/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.0/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.0/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.0/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.0/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.0/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.0/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.0/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.0/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.0/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.0/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.0/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.0/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.0/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.0/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.0/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.0/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.0/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.0/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.0/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.0/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.0/developing/usage -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.0/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.0/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.0/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.0/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.0/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.0/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.0/developing/usage/search -[usage search schema]: /riak/kv/2.0.0/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.0/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.0/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.0/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.0/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.0/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.0/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.0/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.0/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.0/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.0/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.0/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.0/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.0/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.0/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.0/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.0/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.0/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.0/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.0/developing/api/backend -[dev api http]: /riak/kv/2.0.0/developing/api/http -[dev api http status]: /riak/kv/2.0.0/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.0/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.0/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.0/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.0/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.0/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.0/learn/glossary/ -[glossary aae]: /riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.0/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.0/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.0/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.0/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.0/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.0/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.0/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.0/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.0/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.0/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.0/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.0/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.0/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.0/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.0/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.0/add-ons.md b/content/riak/kv/2.0.0/add-ons.md index 7fbd998b68..5364d4de88 100644 --- a/content/riak/kv/2.0.0/add-ons.md +++ b/content/riak/kv/2.0.0/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.0/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.0/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.0/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.0/add-ons/redis/developing-rra.md index 9fab006d16..70190e6037 100644 --- a/content/riak/kv/2.0.0/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.0/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.0/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.0/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.0/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.0/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.0/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.0/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.0/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.0/add-ons/redis/redis-add-on-features.md index 8dca3e51c0..18031fd3db 100644 --- a/content/riak/kv/2.0.0/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.0/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.0/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.0/add-ons/redis/set-up-rra.md index 1db79201fe..ba66b14634 100644 --- a/content/riak/kv/2.0.0/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.0/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.0/setup/installing -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.0/setup/installing +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.0/add-ons/redis/using-rra.md b/content/riak/kv/2.0.0/add-ons/redis/using-rra.md index 996e76f1f3..24047ab71c 100644 --- a/content/riak/kv/2.0.0/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.0/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.0/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.0/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.0/configuring/backend.md b/content/riak/kv/2.0.0/configuring/backend.md index bb992a70b5..64f126544a 100644 --- a/content/riak/kv/2.0.0/configuring/backend.md +++ b/content/riak/kv/2.0.0/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.0/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.0/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.0/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.0/configuring/basic.md b/content/riak/kv/2.0.0/configuring/basic.md index 9c495fd0cb..492b25ee78 100644 --- a/content/riak/kv/2.0.0/configuring/basic.md +++ b/content/riak/kv/2.0.0/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.0.0/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.0/configuring/reference -[use running cluster]: /riak/kv/2.0.0/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.0/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.0/using/performance/erlang -[plan start]: /riak/kv/2.0.0/setup/planning/start -[plan best practices]: /riak/kv/2.0.0/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.0/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.0/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.0/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.0/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.0/using/performance -[perf aws]: /riak/kv/2.0.0/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.0/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.0/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.0/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.0/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.0/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.0/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.0/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.0/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.0/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.0/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.0/using/performance +[perf aws]: {{}}riak/kv/2.0.0/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.0/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.0/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.0/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.0/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.0/configuring/load-balancing-proxy.md index 97222cb4f5..0851e20340 100644 --- a/content/riak/kv/2.0.0/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.0/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.0/configuring/managing.md b/content/riak/kv/2.0.0/configuring/managing.md index 156132dceb..51e2122074 100644 --- a/content/riak/kv/2.0.0/configuring/managing.md +++ b/content/riak/kv/2.0.0/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.0/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.0/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.0/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.0/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.0/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.0/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.0/configuring/mapreduce.md b/content/riak/kv/2.0.0/configuring/mapreduce.md index e46de91c16..f5c867b4ac 100644 --- a/content/riak/kv/2.0.0/configuring/mapreduce.md +++ b/content/riak/kv/2.0.0/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.0.0/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.0/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.0/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.0/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.0/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.0/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.0/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.0/configuring/reference.md b/content/riak/kv/2.0.0/configuring/reference.md index aa1cb175de..5e9b05772e 100644 --- a/content/riak/kv/2.0.0/configuring/reference.md +++ b/content/riak/kv/2.0.0/configuring/reference.md @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch. vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="../../learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds) diff --git a/content/riak/kv/2.0.0/configuring/search.md b/content/riak/kv/2.0.0/configuring/search.md index cf60c18cae..1373cb02ab 100644 --- a/content/riak/kv/2.0.0/configuring/search.md +++ b/content/riak/kv/2.0.0/configuring/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.0/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.0/developing/usage/search -[usage search schema]: /riak/kv/2.0.0/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.0/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.0/developing/usage/custom-extractors -[config reference]: /riak/kv/2.0.0/configuring/reference -[config reference#search]: /riak/kv/2.0.0/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.0/using/security/ +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.0/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.0/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.0/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.0/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.0/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.0.0/configuring/strong-consistency.md b/content/riak/kv/2.0.0/configuring/strong-consistency.md index b580d86178..6bfaf33792 100644 --- a/content/riak/kv/2.0.0/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.0/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.0/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.0/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.0/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.0/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.0/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.0/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.0/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.0/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.0/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.0/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.0/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.0/developing/data-types -[glossary aae]: /riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.0/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.0/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.0/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.0/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.0/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.0/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.0/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.0/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.0/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.0/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.0/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.0/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.0/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.0/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.0/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.0/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.0/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.0/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.0/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.0/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.0/configuring/v2-multi-datacenter.md index 929fa33205..57d8632d05 100644 --- a/content/riak/kv/2.0.0/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.0/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.0/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.0/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.0.0/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.0/configuring/v2-multi-datacenter/nat.md index 4603e62965..abd6a68b12 100644 --- a/content/riak/kv/2.0.0/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.0/configuring/v2-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.0/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.0/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.0.0/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.0/configuring/v3-multi-datacenter.md index 294dbef698..77d54b1eb1 100644 --- a/content/riak/kv/2.0.0/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.0/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.0/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.0/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/nat.md index ecfc06c66d..4eb2ac6955 100644 --- a/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/quick-start.md index 70034d7405..bcad58a84d 100644 --- a/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.0/using/performance -[config v3 mdc]: /riak/kv/2.0.0/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.0/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl.md index c1a8144166..1ffabc18bf 100644 --- a/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.0/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.0/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.0/developing/api/backend.md b/content/riak/kv/2.0.0/developing/api/backend.md index 8ae5dd8a3c..9e2478865a 100644 --- a/content/riak/kv/2.0.0/developing/api/backend.md +++ b/content/riak/kv/2.0.0/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.0/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.0/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.0/developing/api/http.md b/content/riak/kv/2.0.0/developing/api/http.md index 8c7c4bc3b9..823c895f77 100644 --- a/content/riak/kv/2.0.0/developing/api/http.md +++ b/content/riak/kv/2.0.0/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.0/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.0/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.0/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.0/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.0/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.0/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.0/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.0/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.0/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.0/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.0/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.0/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.0/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.0/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.0/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.0/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.0/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.0/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.0/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.0/developing/data-types). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.0/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.0/developing/data-types). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.0/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.0/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.0/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.0/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.0/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.0/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.0/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.0/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.0/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.0/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.0/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.0/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.0/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.0/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.0/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.0/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.0/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.0/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.0/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.0/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.0/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.0/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.0/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.0/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.0/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.0/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.0/developing/api/http/counters.md b/content/riak/kv/2.0.0/developing/api/http/counters.md index de3904969e..77c34ff979 100644 --- a/content/riak/kv/2.0.0/developing/api/http/counters.md +++ b/content/riak/kv/2.0.0/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.0/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.0/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.0/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.0/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.0/developing/api/http/fetch-object.md b/content/riak/kv/2.0.0/developing/api/http/fetch-object.md index e789738c2c..3d6fc24d4b 100644 --- a/content/riak/kv/2.0.0/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.0/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.0/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.0/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.0/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.0/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.0/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.0/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.0/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.0/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.0/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.0/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.0/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.0/developing/api/http/fetch-search-index.md index 8d9164bf68..f85f079fed 100644 --- a/content/riak/kv/2.0.0/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.0/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.0/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.0/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.0/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.0/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.0/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.0/developing/api/http/fetch-search-schema.md index 9464ac731b..e1c9056cd1 100644 --- a/content/riak/kv/2.0.0/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.0/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.0/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.0/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.0/developing/api/http/get-bucket-props.md index 4c747b981f..33b111022b 100644 --- a/content/riak/kv/2.0.0/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.0/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.0/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.0/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.0/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.0/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.0/developing/api/http/link-walking.md b/content/riak/kv/2.0.0/developing/api/http/link-walking.md index 2ff65b316e..fde4f9b496 100644 --- a/content/riak/kv/2.0.0/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.0/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.0/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.0/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.0/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.0/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.0/developing/api/http/list-resources.md b/content/riak/kv/2.0.0/developing/api/http/list-resources.md index 602059114d..f2828b9211 100644 --- a/content/riak/kv/2.0.0/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.0/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.0/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.0/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.0/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.0/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.0/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.0/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.0/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.0/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.0/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.0/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.0/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.0/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.0/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.0/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.0/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.0/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.0/developing/api/http/mapreduce.md b/content/riak/kv/2.0.0/developing/api/http/mapreduce.md index 9dbd97e240..eb08f2b02a 100644 --- a/content/riak/kv/2.0.0/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.0/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.0/developing/api/http/search-index-info.md b/content/riak/kv/2.0.0/developing/api/http/search-index-info.md index 0c534a42a7..96b41616dd 100644 --- a/content/riak/kv/2.0.0/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.0/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.0/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.0/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.0/developing/api/http/search-query.md b/content/riak/kv/2.0.0/developing/api/http/search-query.md index 53d9cabe9a..b91c37fc35 100644 --- a/content/riak/kv/2.0.0/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.0/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.0/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.0/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.0/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.0/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.0/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.0/developing/api/http/secondary-indexes.md index afba892111..18a3a13bd6 100644 --- a/content/riak/kv/2.0.0/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.0/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.0/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.0/developing/api/http/set-bucket-props.md index 6f13dd98b3..576d64230a 100644 --- a/content/riak/kv/2.0.0/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.0/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.0/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.0/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.0/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.0/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.0/developing/api/http/status.md b/content/riak/kv/2.0.0/developing/api/http/status.md index 58ef35bf17..2727dc09f1 100644 --- a/content/riak/kv/2.0.0/developing/api/http/status.md +++ b/content/riak/kv/2.0.0/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.0/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.0/developing/api/http/store-object.md b/content/riak/kv/2.0.0/developing/api/http/store-object.md index 0f758e1acb..977506511b 100644 --- a/content/riak/kv/2.0.0/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.0/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.0/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.0/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.0/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.0/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.0/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.0/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.0/developing/api/http/store-search-index.md b/content/riak/kv/2.0.0/developing/api/http/store-search-index.md index 0587e3aacd..de0fea3438 100644 --- a/content/riak/kv/2.0.0/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.0/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.0/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.0/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.0/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.0/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.0/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.0/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.0/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.0/developing/api/http/store-search-schema.md index 9de9b6a5d9..5f33b4eef1 100644 --- a/content/riak/kv/2.0.0/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.0/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.0/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.0/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers.md index 9a7d10ae06..4f1620a6d9 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.0/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.0/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.0/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.0/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.0/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.0/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.0/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.0/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.0/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.0/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/auth-req.md index 0fc28b20b2..495137ecad 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.0/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.0/using/security/basics). diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/delete-object.md index a8ea5b8135..e53094b968 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.0/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.0/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store.md index 7afd10e180..a4f05f5fb7 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.0/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.0/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-fetch.md index 0072b3e4e8..54fe7ec65b 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.0/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.0/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.0/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.0/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.0/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store.md index 6e7327119f..35ee021db8 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store.md index cd73684d97..78c10080ba 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-store.md index 9eb36f107c..abc6c31742 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.0/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.0/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.0/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.0/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.0/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.0/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.0/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-union.md index e0b7662660..84da868ea2 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.0/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object.md index d0e9e01946..f1a089963b 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.0/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.0/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.0/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props.md index 1307822de1..76b4a6f6c0 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.0/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.0/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.0/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.0/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riakcs/latest/cookbooks/mdc-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/latest/cookbooks/mdc-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-type.md index 0f54f9048f..c9c4107fb4 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.0/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.0/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-client-id.md index 17fb00476e..df8dc044f2 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.0/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/mapreduce.md index 0fc7496b9c..6e041a5ac9 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.0/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.0/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.0/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.0/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/reset-bucket-props.md index 251b4a04ec..af0b57eb63 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/secondary-indexes.md index 4dbedd18f6..17cc72506b 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.0/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props.md index 362985bf0c..7afbed3eca 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-type.md index 643d3c9254..506133b595 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.0/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.0/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/store-object.md index 868259b2bc..1074cda1e6 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.0/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.0/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.0/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.0/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.0/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.0/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.0/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.0/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-get.md index 03f45e7057..d44a68a34a 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.0/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.0/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-put.md index 99107c0dea..fef017924f 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-index-put.md @@ -37,4 +37,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.0/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.0/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-get.md index 5e97360d4c..ce52dfbfbc 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.0/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-put.md index 685de06b9e..46bc0af254 100644 --- a/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.0/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.0/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.0/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.0/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.0/developing/app-guide.md b/content/riak/kv/2.0.0/developing/app-guide.md index ce4e9c024a..fa96910f9f 100644 --- a/content/riak/kv/2.0.0/developing/app-guide.md +++ b/content/riak/kv/2.0.0/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.0.0/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.0/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.0/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.0/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.0/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.0/developing/data-types -[dev data types#counters]: /riak/kv/2.0.0/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.0.0/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.0.0/developing/data-types/maps -[usage create objects]: /riak/kv/2.0.0/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.0/developing/usage/search -[use ref search]: /riak/kv/2.0.0/using/reference/search -[usage 2i]: /riak/kv/2.0.0/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.0/developing/client-libraries -[concept crdts]: /riak/kv/2.0.0/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.0/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.0/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.0/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.0/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.0/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.0/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.0/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.0/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.0/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.0/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.0/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.0/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.0/using/reference/strong-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.0/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.0/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.0/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.0/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.0/setup/installing -[getting started]: /riak/kv/2.0.0/developing/getting-started -[usage index]: /riak/kv/2.0.0/developing/usage -[glossary]: /riak/kv/2.0.0/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.0/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.0/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.0/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.0/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.0/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.0/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.0.0/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.0.0/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.0.0/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.0/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.0/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.0/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.0/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.0/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.0/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.0/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.0/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.0/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.0/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.0/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.0/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.0/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.0/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.0/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.0/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.0/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.0/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.0/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.0/setup/installing +[getting started]: {{}}riak/kv/2.0.0/developing/getting-started +[usage index]: {{}}riak/kv/2.0.0/developing/usage +[glossary]: {{}}riak/kv/2.0.0/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.0/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.0/developing/app-guide/advanced-mapreduce.md index af4565cbe1..4e0e87c453 100644 --- a/content/riak/kv/2.0.0/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.0/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.0/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.0/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.0/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.0/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.0/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.0/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.0/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.0/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.0/developing/app-guide/cluster-metadata.md index 44c8bf0a4d..5366ee5d58 100644 --- a/content/riak/kv/2.0.0/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.0/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.0/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.0/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.0/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.0/developing/app-guide/replication-properties.md index 79a8f94def..f0d3768fdb 100644 --- a/content/riak/kv/2.0.0/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.0/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.0/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.0/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.0/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.0/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.0/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.0/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.0/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.0/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.0/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.0/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.0/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.0/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.0/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.0/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.0/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.0/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.0/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.0/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.0/developing/app-guide/strong-consistency.md index 684d0a8518..3957f79c67 100644 --- a/content/riak/kv/2.0.0/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.0/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.0.0/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/2.1.3/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.0/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.0/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.0/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.0/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.0/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.0/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/2.1.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.0/developing/client-libraries -[getting started]: /riak/kv/2.0.0/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.0/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.0.0/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.0/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.0/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.0/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.0/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.0/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.0/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.0.0/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.0/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.0/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.0/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.0/developing/client-libraries.md b/content/riak/kv/2.0.0/developing/client-libraries.md index 18dd5e7aa3..b490ca7ca0 100644 --- a/content/riak/kv/2.0.0/developing/client-libraries.md +++ b/content/riak/kv/2.0.0/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.0/developing/data-types.md b/content/riak/kv/2.0.0/developing/data-types.md index 67cc1c2c16..50bdf69c79 100644 --- a/content/riak/kv/2.0.0/developing/data-types.md +++ b/content/riak/kv/2.0.0/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.0/developing/faq.md b/content/riak/kv/2.0.0/developing/faq.md index 3dda13d8c7..31a5e52b34 100644 --- a/content/riak/kv/2.0.0/developing/faq.md +++ b/content/riak/kv/2.0.0/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.0/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.0/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.0/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.0/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.0/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.0/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.0/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.0/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.0/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.0/developing/client-libraries -[MapReduce]: /riak/kv/2.0.0/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.0/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.0/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.0/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.0/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.0/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.0/developing/getting-started.md b/content/riak/kv/2.0.0/developing/getting-started.md index f3e1b9d251..380a39464d 100644 --- a/content/riak/kv/2.0.0/developing/getting-started.md +++ b/content/riak/kv/2.0.0/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.0/setup/installing -[dev client libraries]: /riak/kv/2.0.0/developing/client-libraries +[install index]: {{}}riak/kv/2.0.0/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.0/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.0/developing/getting-started/csharp.md b/content/riak/kv/2.0.0/developing/getting-started/csharp.md index bba9a70a5e..df96fc6f9c 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.0/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.0/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.0/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.0/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.0/developing/getting-started/csharp/querying.md index b3a559cf22..08d30d0d6a 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.0/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.0/developing/getting-started/erlang.md b/content/riak/kv/2.0.0/developing/getting-started/erlang.md index f0452872ba..9310dbb28f 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.0/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.0/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.0/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.0/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.0/developing/getting-started/erlang/object-modeling.md index 4874733e69..6e92cd1ef0 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.0/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.0/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.0/developing/getting-started/erlang/querying.md index f5ac9b640d..cfce41962c 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.0/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.0/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.0/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.0/developing/getting-started/golang.md b/content/riak/kv/2.0.0/developing/getting-started/golang.md index 98043991a7..3595f23a43 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.0/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.0/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.0/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.0/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.0/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.0/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.0/developing/getting-started/golang/object-modeling.md index f0f4df0289..7a89795ea3 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.0/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.0/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.0/developing/getting-started/golang/querying.md index 3474414cd1..9a7addf25e 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.0/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.0/developing/getting-started/java.md b/content/riak/kv/2.0.0/developing/getting-started/java.md index ca6db64647..e8b98e8891 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/java.md +++ b/content/riak/kv/2.0.0/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.0/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.0/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.0/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.0/developing/getting-started/java/crud-operations.md index 40da57934d..d34948b8da 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.0/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.0/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.0/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.0/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.0/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.0/developing/getting-started/java/querying.md b/content/riak/kv/2.0.0/developing/getting-started/java/querying.md index 614647d7a8..8b739e4d6a 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.0/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.0/developing/getting-started/nodejs.md b/content/riak/kv/2.0.0/developing/getting-started/nodejs.md index a0d86a16b4..7e0c0c0c91 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.0/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.0/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.0/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.0/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.0/developing/getting-started/nodejs/querying.md index dd42f76e4b..f0e5fa967a 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.0/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.0/developing/getting-started/php.md b/content/riak/kv/2.0.0/developing/getting-started/php.md index cfb69c74bf..3eaba6681d 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/php.md +++ b/content/riak/kv/2.0.0/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.0/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.0/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.0/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.0/developing/getting-started/php/crud-operations.md index 40ca1a1660..ab836d9e64 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.0/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.0/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.0/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.0/developing/getting-started/php/querying.md b/content/riak/kv/2.0.0/developing/getting-started/php/querying.md index e775c04b61..c094d2db12 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.0/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.0/developing/getting-started/python.md b/content/riak/kv/2.0.0/developing/getting-started/python.md index 8aaeacfb3b..f0289a677e 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/python.md +++ b/content/riak/kv/2.0.0/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.0/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.0/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.0/developing/getting-started/python/querying.md b/content/riak/kv/2.0.0/developing/getting-started/python/querying.md index b88a7d0a1e..1c87b2e984 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.0/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.0/developing/getting-started/ruby.md b/content/riak/kv/2.0.0/developing/getting-started/ruby.md index e02aa8edf3..f157ccd309 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.0/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.0/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.0/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.0/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.0/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.0/developing/getting-started/ruby/querying.md index 07bee6ed08..316d848a8e 100644 --- a/content/riak/kv/2.0.0/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.0/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.0/developing/key-value-modeling.md b/content/riak/kv/2.0.0/developing/key-value-modeling.md index 405f83cfce..081ee1a571 100644 --- a/content/riak/kv/2.0.0/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.0/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.0/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.0/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.0/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.0/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.0/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.0/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.0/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.0/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.0/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.0/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.0/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.0/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.0/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.0/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.0/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.0/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.0/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.0/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.0/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.0/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.0/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.0/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.0/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.0/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.0/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.0/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.0/developing/usage/commit-hooks.md b/content/riak/kv/2.0.0/developing/usage/commit-hooks.md index 8ee44dfe74..77976a7bbc 100644 --- a/content/riak/kv/2.0.0/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.0/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.0/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.0/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.0/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.0/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.0/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.0/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.0/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.0/developing/usage/conflict-resolution.md index 1ae2b71246..a2a6e4f56a 100644 --- a/content/riak/kv/2.0.0/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.0/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.0/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.0/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.0/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.0/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.0/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.0/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.0/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.0/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.0/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.0/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.0/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.0/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.0/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.0/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.0/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.0/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.0/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.0.0/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.0.0/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.0/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.0/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.0/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.0/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.0/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.0/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.0/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.0/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.0/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.0/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.0/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.0/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.0/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.0/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.0/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.0/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.0/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.0/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.0/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.0/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.0/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.0/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.0/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.0/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.0/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.0/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -611,7 +611,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.0/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.0/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -666,7 +666,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/csharp.md index d9d430b311..5295117d49 100644 --- a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.0/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/golang.md index 25f27b66c5..99118c6c5f 100644 --- a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.0/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/java.md index 26e4f58e01..5e628744a0 100644 --- a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.0/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.0/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.0/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.0/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.0/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.0/developing/data-types/counters), [set](/riak/kv/2.0.0/developing/data-types/sets), or [map](/riak/kv/2.0.0/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.0/developing/data-types/counters), [set]({{}}riak/kv/2.0.0/developing/data-types/sets), or [map]({{}}riak/kv/2.0.0/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.0/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.0/developing/data-types/sets). diff --git a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/nodejs.md index 71df3b9cda..89374792c4 100644 --- a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.0/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/php.md index 2023d46738..4eab2c5e71 100644 --- a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.0/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.0/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.0/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.0/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.0/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.0/developing/data-types/counters), [set](/riak/kv/2.0.0/developing/data-types/sets), or [map](/riak/kv/2.0.0/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.0/developing/data-types/counters), [set]({{}}riak/kv/2.0.0/developing/data-types/sets), or [map]({{}}riak/kv/2.0.0/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.0/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.0/developing/data-types/sets). diff --git a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/python.md index b28f9762e4..ec1baeb445 100644 --- a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.0/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.0/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.0/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.0/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.0/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.0/developing/data-types/counters), [set](/riak/kv/2.0.0/developing/data-types/sets), or [map](/riak/kv/2.0.0/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.0/developing/data-types/counters), [set]({{}}riak/kv/2.0.0/developing/data-types/sets), or [map]({{}}riak/kv/2.0.0/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.0/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.0/developing/data-types/sets). diff --git a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/ruby.md index 8edf291a0f..d019675c34 100644 --- a/content/riak/kv/2.0.0/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.0/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.0/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.0/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.0/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.0/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.0/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.0/developing/data-types/counters), [set](/riak/kv/2.0.0/developing/data-types/sets), or [map](/riak/kv/2.0.0/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.0/developing/data-types/counters), [set]({{}}riak/kv/2.0.0/developing/data-types/sets), or [map]({{}}riak/kv/2.0.0/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.0/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.0/developing/data-types/sets). diff --git a/content/riak/kv/2.0.0/developing/usage/creating-objects.md b/content/riak/kv/2.0.0/developing/usage/creating-objects.md index f3e4b388ee..01c55352e6 100644 --- a/content/riak/kv/2.0.0/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.0/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.0/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.0/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.0.0/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.0.0/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.0/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.0/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.0/developing/usage/custom-extractors.md b/content/riak/kv/2.0.0/developing/usage/custom-extractors.md index fe07447dd2..ce91bbea82 100644 --- a/content/riak/kv/2.0.0/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.0/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.0/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.0/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.0/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.0/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.0/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.0/developing/usage/deleting-objects.md b/content/riak/kv/2.0.0/developing/usage/deleting-objects.md index 33f0564952..617471000d 100644 --- a/content/riak/kv/2.0.0/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.0/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.0/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.0/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.0/developing/usage/document-store.md b/content/riak/kv/2.0.0/developing/usage/document-store.md index 65681cb64c..44d04bdaff 100644 --- a/content/riak/kv/2.0.0/developing/usage/document-store.md +++ b/content/riak/kv/2.0.0/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.0/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.0/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.0/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.0/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.0.0/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.0/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.0/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.0/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.0/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.0/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.0/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.0/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.0/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.0/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.0/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.0/developing/usage/mapreduce.md b/content/riak/kv/2.0.0/developing/usage/mapreduce.md index 7e8ce37b20..faa3a579fc 100644 --- a/content/riak/kv/2.0.0/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.0/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.0.0/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.0.0/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.0.0/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.0.0/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.0.0/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.0.0/developing/usage/search/) and [secondary indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.0.0/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.0.0/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.0.0/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.0.0/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.0.0/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.0.0/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.0.0/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.0.0/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.0.0/developing/usage/reading-objects.md b/content/riak/kv/2.0.0/developing/usage/reading-objects.md index 4718d2e126..fa887f7b5f 100644 --- a/content/riak/kv/2.0.0/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.0/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.0/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.0/developing/usage/replication.md b/content/riak/kv/2.0.0/developing/usage/replication.md index 8536c80743..f4c8f158ff 100644 --- a/content/riak/kv/2.0.0/developing/usage/replication.md +++ b/content/riak/kv/2.0.0/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.0/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.0/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using Strong +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.0/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.0/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.0/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.0/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.0/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.0/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.0/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.0/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.0/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.0/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.0/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.0/developing/usage/search-schemas.md b/content/riak/kv/2.0.0/developing/usage/search-schemas.md index ca22f14adf..b5bec01325 100644 --- a/content/riak/kv/2.0.0/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.0/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.0.0/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.0/developing/data-types/), and [more](/riak/kv/2.0.0/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types/), and [more]({{}}riak/kv/2.0.0/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.0/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.0/developing/usage/search.md b/content/riak/kv/2.0.0/developing/usage/search.md index cfeaf9b281..1230aaabfa 100644 --- a/content/riak/kv/2.0.0/developing/usage/search.md +++ b/content/riak/kv/2.0.0/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.0/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.0/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.0/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.0/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.0/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.0/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.0/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.0/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.0/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.0/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.0/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.0/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.0/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.0/developing/usage/searching-data-types.md b/content/riak/kv/2.0.0/developing/usage/searching-data-types.md index 0d429eb18d..5ef6be7fba 100644 --- a/content/riak/kv/2.0.0/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.0/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.0/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.0/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.0/developing/data-types/counters), [sets](/riak/kv/2.0.0/developing/data-types/sets), and [maps](/riak/kv/2.0.0/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.0/developing/data-types/counters), [sets]({{}}riak/kv/2.0.0/developing/data-types/sets), and [maps]({{}}riak/kv/2.0.0/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.0/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.0/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.0/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.0/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.0/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.0/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.0/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.0/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.0/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.0/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.0/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.0/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.0/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.0/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.0/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.0/developing/usage/secondary-indexes.md index 0ad9d2d14e..5464de7bcf 100644 --- a/content/riak/kv/2.0.0/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.0/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.0.0/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.0/setup/planning/backend/memory -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.0/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.0/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.0/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.0/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.0/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.0/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.0/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.0/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.0/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.0/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.0/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.0/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.0/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.0/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.0/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.0/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.0/developing/usage/security.md b/content/riak/kv/2.0.0/developing/usage/security.md index 457d213cae..015dcaead1 100644 --- a/content/riak/kv/2.0.0/developing/usage/security.md +++ b/content/riak/kv/2.0.0/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.0.0/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.0/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.0/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.0/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.0/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.0/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.0/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.0.0/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.0.0/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.0/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.0/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.0/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.0/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.0/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.0/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.0/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.0/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.0/developing/usage/security/php) -* [Python](/riak/kv/2.0.0/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.0/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.0/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.0/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.0/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.0/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.0/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.0/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.0/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.0/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.0/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.0/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.0/developing/usage/security/erlang.md b/content/riak/kv/2.0.0/developing/usage/security/erlang.md index dd1764848f..1bc87ed098 100644 --- a/content/riak/kv/2.0.0/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.0/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.0/using/security/managing-sources/), [PAM-](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.0/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.0/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.0/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.0/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.0/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.0/developing/usage/security/java.md b/content/riak/kv/2.0.0/developing/usage/security/java.md index df55708661..2a2c1545d2 100644 --- a/content/riak/kv/2.0.0/developing/usage/security/java.md +++ b/content/riak/kv/2.0.0/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.0/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.0/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.0/developing/usage/security/php.md b/content/riak/kv/2.0.0/developing/usage/security/php.md index 6a084b5b23..3e92682fc9 100644 --- a/content/riak/kv/2.0.0/developing/usage/security/php.md +++ b/content/riak/kv/2.0.0/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.0/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.0/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.0/developing/usage/security/python.md b/content/riak/kv/2.0.0/developing/usage/security/python.md index bcb11dfc35..ce83ada696 100644 --- a/content/riak/kv/2.0.0/developing/usage/security/python.md +++ b/content/riak/kv/2.0.0/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.0/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.0/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.0/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.0/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.0/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.0/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.0/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.0/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.0/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.0/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.0/developing/usage/security/ruby.md b/content/riak/kv/2.0.0/developing/usage/security/ruby.md index 312fb510c5..0be6885e74 100644 --- a/content/riak/kv/2.0.0/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.0/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.0/using/security/managing-sources/) or [PAM](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.0/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.0/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.0/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.0/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.0/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.0/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.0/developing/usage/updating-objects.md b/content/riak/kv/2.0.0/developing/usage/updating-objects.md index 8103938af6..d1ad7604d6 100644 --- a/content/riak/kv/2.0.0/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.0/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.0/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.0/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.0/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.0/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.0/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.0/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.0/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.0/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.0/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.0/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.0/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.0/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.0/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.0/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.0/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.0/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.0/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.0/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.0/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.0/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.0/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.0/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.0/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.0/index.md b/content/riak/kv/2.0.0/index.md index 6c5474d476..e8b66265e5 100644 --- a/content/riak/kv/2.0.0/index.md +++ b/content/riak/kv/2.0.0/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.0/configuring -[dev index]: /riak/kv/2.0.0/developing -[downloads]: /riak/kv/2.0.0/downloads/ -[install index]: /riak/kv/2.0.0/setup/installing/ -[plan index]: /riak/kv/2.0.0/setup/planning -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.0/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.0/developing/usage/search -[getting started]: /riak/kv/2.0.0/developing/getting-started -[dev client libraries]: /riak/kv/2.0.0/developing/client-libraries +[config index]: {{}}riak/kv/2.0.0/configuring +[dev index]: {{}}riak/kv/2.0.0/developing +[downloads]: {{}}riak/kv/2.0.0/downloads/ +[install index]: {{}}riak/kv/2.0.0/setup/installing/ +[plan index]: {{}}riak/kv/2.0.0/setup/planning +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.0/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search +[getting started]: {{}}riak/kv/2.0.0/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.0/developing/client-libraries diff --git a/content/riak/kv/2.0.0/introduction.md b/content/riak/kv/2.0.0/introduction.md index 7059bcde15..5501e7f357 100644 --- a/content/riak/kv/2.0.0/introduction.md +++ b/content/riak/kv/2.0.0/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.0.0/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.0.0/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.0.0/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.0.0/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.0.0/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.0.0/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.0.0/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.0.0/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.0.0/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.0.0/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.0.0/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.0.0/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.0.0/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.0.0/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.0.0/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.0.0/developing/data-types/maps#flags), [registers](/riak/kv/2.0.0/developing/data-types/maps#registers), -[counters](/riak/kv/2.0.0/developing/data-types/counters), [sets](/riak/kv/2.0.0/developing/data-types/sets), and -[maps](/riak/kv/2.0.0/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.0.0/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.0.0/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.0.0/developing/data-types/counters), [sets]({{}}riak/kv/2.0.0/developing/data-types/sets), and +[maps]({{}}riak/kv/2.0.0/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.0.0/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.0.0/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.0.0/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.0.0/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.0.0/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.0.0/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.0.0/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.0.0/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.0.0/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.0.0/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.0.0/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.0.0/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.0.0/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.0.0/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.0.0/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.0.0/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.0.0/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.0.0/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.0.0/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.0.0/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.0.0/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.0.0/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.0.0/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.0.0/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.0.0/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.0.0/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.0.0/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.0.0/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.0.0/learn/concepts.md b/content/riak/kv/2.0.0/learn/concepts.md index 15fda758a9..61a6611823 100644 --- a/content/riak/kv/2.0.0/learn/concepts.md +++ b/content/riak/kv/2.0.0/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.0/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.0/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.0/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.0/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.0/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.0/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.0/learn/concepts/vnodes -[config index]: /riak/kv/2.0.0/configuring -[plan index]: /riak/kv/2.0.0/setup/planning -[use index]: /riak/kv/2.0.0/using/ +[concept aae]: {{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.0/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.0/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.0/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.0/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.0/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.0/configuring +[plan index]: {{}}riak/kv/2.0.0/setup/planning +[use index]: {{}}riak/kv/2.0.0/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.0/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.0/learn/concepts/active-anti-entropy.md index 305b0e0a73..d94fb19850 100644 --- a/content/riak/kv/2.0.0/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.0/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.0/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.0/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.0/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.0/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.0.0/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.0/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.0/developing/usage/search +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.0/learn/concepts/buckets.md b/content/riak/kv/2.0.0/learn/concepts/buckets.md index af7b22362a..31450728e5 100644 --- a/content/riak/kv/2.0.0/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.0/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.0.0/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.0/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.0/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.0/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.0/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.0/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.0/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.0/configuring/basic -[dev api http]: /riak/kv/2.0.0/developing/api/http -[dev data types]: /riak/kv/2.0.0/developing/data-types -[glossary ring]: /riak/kv/2.0.0/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.0/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.0/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.0/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.0/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.0/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.0/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.0/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.0/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.0/configuring/basic +[dev api http]: {{}}riak/kv/2.0.0/developing/api/http +[dev data types]: {{}}riak/kv/2.0.0/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.0/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.0/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.0/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.0/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.0/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.0/learn/concepts/capability-negotiation.md index e4c7856ed2..34d46e28c7 100644 --- a/content/riak/kv/2.0.0/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.0/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.0/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.0/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.0/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.0/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.0/learn/concepts/causal-context.md b/content/riak/kv/2.0.0/learn/concepts/causal-context.md index c6ef77485d..a3fbe98f00 100644 --- a/content/riak/kv/2.0.0/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.0/learn/concepts/causal-context.md @@ -18,18 +18,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.0/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.0/developing/api/http -[dev key value]: /riak/kv/2.0.0/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.0/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.0/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.0/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.0/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.0/developing/api/http +[dev key value]: {{}}riak/kv/2.0.0/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.0/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.0/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.0/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.0/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -80,7 +80,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.0.0/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.0.0/learn/concepts/clusters.md b/content/riak/kv/2.0.0/learn/concepts/clusters.md index 0af1c562a2..3edb18c20f 100644 --- a/content/riak/kv/2.0.0/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.0/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.0/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.0/learn/concepts/replication -[glossary node]: /riak/kv/2.0.0/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.0/learn/dynamo -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.0/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.0/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.0/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.0/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.0/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.0/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.0/learn/concepts/crdts.md b/content/riak/kv/2.0.0/learn/concepts/crdts.md index d3a62d8531..744ab9ddc1 100644 --- a/content/riak/kv/2.0.0/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.0/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.0/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.0/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.0/developing/data-types -[glossary node]: /riak/kv/2.0.0/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.0/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.0/developing/data-types +[glossary node]: {{}}riak/kv/2.0.0/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.0.0/developing/usage/search/). +indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.0.0/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.0.0/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.0.0/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.0.0/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.0/learn/concepts/eventual-consistency.md index 87af53b497..f2b53eeec9 100644 --- a/content/riak/kv/2.0.0/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.0/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.0/learn/concepts/replication -[glossary node]: /riak/kv/2.0.0/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.0/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.0/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.0/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.0/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.0/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.0/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.0/developing/data-modeling/). +or models]({{}}riak/kv/2.0.0/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.0/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.0/learn/concepts/keys-and-objects.md index 263a85559b..9ee6d4ec4f 100644 --- a/content/riak/kv/2.0.0/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.0/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.0/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.0/learn/concepts/replication.md b/content/riak/kv/2.0.0/learn/concepts/replication.md index 9eaaf2ae62..0dcadc8374 100644 --- a/content/riak/kv/2.0.0/learn/concepts/replication.md +++ b/content/riak/kv/2.0.0/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.0/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.0/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.0/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.0/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.0/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.0/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.0/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.0/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.0/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.0/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.0/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.0/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.0/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.0/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.0/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.0/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.0/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.0/learn/concepts/strong-consistency.md index 4550a9be60..2cdb44d897 100644 --- a/content/riak/kv/2.0.0/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.0/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.0/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.0/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.0/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.0/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.0/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.0/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.0/learn/concepts/vnodes.md b/content/riak/kv/2.0.0/learn/concepts/vnodes.md index dd865eb82d..06a64c093b 100644 --- a/content/riak/kv/2.0.0/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.0/learn/concepts/vnodes.md @@ -16,16 +16,16 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.0/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.0/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.0/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.0/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.0/learn/glossary/#ring -[perf strong consistency]: /riak/kv/2.0.0/using/performance/strong-consistency -[plan backend]: /riak/kv/2.0.0/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.0/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.0/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.0/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.0/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.0/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.0/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.0/learn/glossary/#ring +[perf strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[plan backend]: {{}}riak/kv/2.0.0/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.0/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.0/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -81,7 +81,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.0/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -103,7 +103,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.0/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.0/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.0/learn/dynamo.md b/content/riak/kv/2.0.0/learn/dynamo.md index a6c499c93d..d687f52ea2 100644 --- a/content/riak/kv/2.0.0/learn/dynamo.md +++ b/content/riak/kv/2.0.0/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.0/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.0/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.0/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.0/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.0/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.0/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.0/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.0/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.0/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.0/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.0/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.0/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.0/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.0/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.0/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.0/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.0/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.0/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.0/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.0/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.0/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.0/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.0/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.0/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.0/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.0/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.0/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.0/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.0/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.0/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.0/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.0/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.0/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.0/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.0/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.0/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.0/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.0/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.0/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.0/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.0/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.0/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.0/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.0/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.0/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.0/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.0/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.0/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.0/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.0/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.0/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.0/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.0/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.0/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.0/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.0/learn/glossary.md b/content/riak/kv/2.0.0/learn/glossary.md index a5ed0f02b1..6e0ae9b107 100644 --- a/content/riak/kv/2.0.0/learn/glossary.md +++ b/content/riak/kv/2.0.0/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.0/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.0/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.0/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.0/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.0/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.0/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.0/developing/api/http -[dev data model]: /riak/kv/2.0.0/developing/data-modeling -[dev data types]: /riak/kv/2.0.0/developing/data-types -[glossary read rep]: /riak/kv/2.0.0/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.0/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.0/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.0/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.0/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.0/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.0/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.0/developing/api/http +[dev data model]: {{}}riak/kv/2.0.0/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.0/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.0/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.0/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.0/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.0/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.0/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.0/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.0/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.0/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.0/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.0/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.0/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.0/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.0/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.0/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.0/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.0/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.0/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.0/learn/use-cases.md b/content/riak/kv/2.0.0/learn/use-cases.md index 5db93466bb..82fffe14df 100644 --- a/content/riak/kv/2.0.0/learn/use-cases.md +++ b/content/riak/kv/2.0.0/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.0/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.0/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.0/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.0/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.0/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.0/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.0/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.0/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.0/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.0.0/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.0/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.0/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.0/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.0/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.0/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.0/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.0/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.0/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.0/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.0/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.0/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.0.0/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.0/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.0.0/learn/why-riak-kv.md b/content/riak/kv/2.0.0/learn/why-riak-kv.md index 67b6cc2b4b..272ed2896b 100644 --- a/content/riak/kv/2.0.0/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.0/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.0/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.0/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.0/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.0/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.0/developing/data-types -[glossary read rep]: /riak/kv/2.0.0/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.0/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.0/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.0/release-notes.md b/content/riak/kv/2.0.0/release-notes.md index 642c1490b9..e96ed09865 100644 --- a/content/riak/kv/2.0.0/release-notes.md +++ b/content/riak/kv/2.0.0/release-notes.md @@ -17,8 +17,8 @@ aliases: ## Major Features / Additions to 2.0 A listing and explanation of new features in version 2.0, along with -links to relevant documentation, can be found [in our official docs](http://docs.basho.com/riak/2.0.0/intro-v20/). You can find an -[Upgrading to 2.0 Guide](http://docs.basho.com/riak/2.0.0/upgrade-v20/) +links to relevant documentation, can be found [in our official docs]({{< baseurl >}}riak/kv/2.0.0/intro-v20/). You can find an +[Upgrading to 2.0 Guide]({{< baseurl >}}riak/kv/2.0.0/upgrade-v20/) there as well. The material below should be read as a more technical supplement to that material. @@ -45,7 +45,7 @@ perform these actions. However, the Bucket Properties HTTP API, Protocol Buffers messages, and supported clients have been updated to set and retrieve bucket properties for a bucket with a given bucket type. -For more details on bucket types see our [official documentation](http://docs.basho.com/riak/2.0.0/dev/advanced/bucket-types/). +For more details on bucket types see our [official documentation]({{< baseurl >}}riak/kv/2.0.0/dev/advanced/bucket-types/). ### Convergent Data Types @@ -61,9 +61,9 @@ All data types must be stored in buckets bearing a bucket type that sets the `datatype` property to one of `counter`, `set`, or `map`. Note that the bucket must have the `allow_mult` property set to `true`. See documentation on [Riak Data -Types](http://docs.basho.com/riak/2.0.0/dev/using/data-types/) and +Types]({{< baseurl >}}riak/kv/2.0.0/dev/using/data-types/) and [bucket -types](http://docs.basho.com/riak/2.0.0/dev/advanced/bucket-types/) for +types]({{< baseurl >}}riak/kv/2.0.0/dev/advanced/bucket-types/) for more details. These Data Types are wrapped in a regular `riak_object`, so size @@ -75,13 +75,13 @@ too. The following Data Types are currently available: Counters behave much like they do in version 1.4, except that you can use Riak's new bucket types feature to ensure no type conflicts. Documentation on counters can be found -[here](http://docs.basho.com/riak/2.0.0/dev/using/data-types/#Counters). +[here]({{< baseurl >}}riak/kv/2.0.0/developing/data-types/counters/). #### Sets Sets allow you to store multiple distinct opaque binary values against a key. See the -[documentation](http://docs.basho.com/riak/2.0.0/dev/using/data-types/#Sets) +[documentation]({{< baseurl >}}riak/kv/2.0.0/developing/data-types/sets/) for more details on usage and semantics. #### Maps @@ -91,7 +91,7 @@ as a container for composing ad hoc data structures from multiple Data Types. Inside a map you may store sets, counters, flags (similar to booleans), registers (which store binaries according to a last-write-wins logic), and even other maps. Please see the -[documentation](http://docs.basho.com/riak/2.0.0/dev/using/data-types/#Maps) +[documentation]({{< baseurl >}}riak/kv/2.0.0/developing/data-types/maps/) for usage and semantics. #### API @@ -120,7 +120,7 @@ The basic rule is "you cannot remove something you haven't seen", and the context tells Riak what you've actually seen. All of the official Basho clients, with the exception of the Java client, handle opaque contexts for you. Please see the -[documentation](http://docs.basho.com/riak/2.0.0/dev/using/data-types/#Data-Types-and-Context) +[documentation]({{< baseurl >}}riak/kv/2.0.0/dev/using/data-types/#Data-Types-and-Context) for more details. Please see **Known Issues** below for two known issues with Riak maps. @@ -144,7 +144,7 @@ siblings will only grow to the number of **truly concurrent** writes, not in relation to the number of times the object has been written, merged, or replicated to other clusters. More information can be found in our [Dotted Version -Vectors](http://docs.basho.com/riak/2.0.0/theory/concepts/dotted-version-vectors/) +Vectors]({{< baseurl >}}riak/kv/2.0.0/theory/concepts/dotted-version-vectors/) document. ### riak_control @@ -161,24 +161,24 @@ while it was being developed. Please read there for the most relevant information about Riak 2.0's new search. Additional official documentation can be found in the following three docs: -* [Using Search](http://docs.basho.com/riak/2.0.0/dev/using/search/) -* [Search Details](http://docs.basho.com/riak/2.0.0/dev/advanced/search/) -* [Search Schema](http://docs.basho.com/riak/2.0.0/dev/advanced/search-schema/) +* [Using Search]({{< baseurl >}}riak/kv/2.0.0/dev/using/search/) +* [Search Details]({{< baseurl >}}riak/kv/2.0.0/dev/advanced/search/) +* [Search Schema]({{< baseurl >}}riak/kv/2.0.0/dev/advanced/search-schema/) ### Strong Consistency Riak's new strong consistency feature is currently an open source only feature and is not yet commercially supported. Official documentation on this feature can be found in the following docs: -* [Using Strong Consistency](http://docs.basho.com/riak/2.0.0/dev/advanced/strong-consistency/) -* [Managing Strong Consistency](http://docs.basho.com/riak/2.0.0/ops/advanced/strong-consistency) -* [Strong Consistency](http://docs.basho.com/riak/2.0.0/theory/concepts/strong-consistency/) +* [Using Strong Consistency]({{< baseurl >}}riak/kv/2.0.0/dev/advanced/strong-consistency/) +* [Managing Strong Consistency]({{< baseurl >}}riak/kv/2.0.0/ops/advanced/strong-consistency) +* [Strong Consistency]({{< baseurl >}}riak/kv/2.0.0/theory/concepts/strong-consistency/) For more in-depth technical material, see our internal documentation [here](https://github.com/basho/riak_ensemble/blob/wip/riak-2.0-user-docs/riak_consistent_user_docs.md) and [here](https://github.com/basho/riak_ensemble/blob/wip/riak-2.0-user-docs/riak_consistent_user_docs.md). We also strongly advise you to see the list of [known -issues](http://docs.basho.com/riak/2.0.0/ops/advanced/strong-consistency/#Known-Issues). +issues]({{< baseurl >}}riak/kv/2.0.0/ops/advanced/strong-consistency/#Known-Issues). ### Security @@ -190,7 +190,7 @@ although Riak still should not be exposed directly to any unsecured network. Basho's documentation website includes [extensive coverage of the new -feature](http://docs.basho.com/riak/2.0.0/ops/running/authz/). Several +feature]({{< baseurl >}}riak/kv/2.0.0/ops/running/authz/). Several important caveats when enabling security: * There is no support yet for auditing. This is on the roadmap for a @@ -199,7 +199,7 @@ important caveats when enabling security: walking and Riak's original full-text search tool. * There are restrictions on Erlang modules exposed to MapReduce jobs when security is enabled. Those are documented - [here](http://docs.basho.com/riak/2.0.0/ops/running/authz/#Security-Checklist). + [here]({{< baseurl >}}riak/kv/2.0.0/ops/running/authz/#Security-Checklist). * Enabling security requires that applications be designed to transition gracefully based on the server response **or** applications will need to be halted before security is enabled and brought back online @@ -241,7 +241,7 @@ going forward. ## Client libraries Most [Basho-supported client -libraries](http://docs.basho.com/riak/latest/dev/using/libraries/) have +libraries]({{< baseurl >}}riak/kv/latest/dev/using/libraries/) have been updated for 2.0: * [Java](https://github.com/basho/riak-java-client) @@ -300,19 +300,19 @@ future version. **We do not recommend using these features in version few features that have already been removed in Riak 2.0. A listing can be found in the **Termination Notices** section below. -* [Link Walking](http://docs.basho.com/riak/latest/dev/using/link-walking/) +* [Link Walking]({{< baseurl >}}riak/kv/latest/dev/using/link-walking/) is deprecated and will not work if security is enabled. -* [Key Filters](http://docs.basho.com/riak/latest/dev/using/keyfilters/) +* [Key Filters]({{< baseurl >}}riak/kv/latest/dev/using/keyfilters/) are deprecated; we strongly discourage key listing in production due to the overhead involved, so it's better to maintain key indexes as values in Riak (see also our new - [set data type](http://docs.basho.com/riak/2.0.0/dev/using/data-types/#Sets) + [set data type]({{< baseurl >}}riak/kv/2.0.0/developing/data-types/sets/) as a useful tool for such indexes). * JavaScript MapReduce is deprecated; we have expanded our - [Erlang MapReduce](http://docs.basho.com/riak/2.0.0/dev/advanced/mapreduce/) + [Erlang MapReduce]({{< baseurl >}}riak/kv/2.0.0/dev/advanced/mapreduce/) documentation to assist with the transition. * Riak Search 1.0 is being phased out in favor of the new Solr-based - [Riak Search 2.0](http://docs.basho.com/riak/2.0.0/dev/advanced/search/). + [Riak Search 2.0]({{< baseurl >}}riak/kv/2.0.0/dev/advanced/search/). Version 1.0 will not work if security is enabled. * v2 replication (a component of Riak Enterprise) has been superseded by v3 and will be removed in the future. @@ -327,21 +327,20 @@ be found in the **Termination Notices** section below. `riak:local_client/1`); this API may change at any time, so we strongly recommend using our [Erlang client library](http://github.com/basho/riak-erlang-client/) (or [one of the - other libraries](http://docs.basho.com/riak/latest/dev/using/libraries/) + other libraries]({{< baseurl >}}riak/kv/latest/dev/using/libraries/) we support) instead. ## Termination Notices * `riak-admin backup` has been disabled; see - [our documentation](http://docs.basho.com/riak/2.0.0/ops/running/backups/) + [our documentation]({{< baseurl >}}riak/kv/2.0.0/ops/running/backups/) for a detailed look at running backup and restore operations. -* [Client ID-based vector clocks](http://docs.basho.com/riak/1.4.10/ops/advanced/configs/configuration-files/#-code-riak_kv-code-Settings) - have been removed; they were previously turned off by default in - favor of node-based vector clocks via the `vnode_vclocks` - configuration flag. +* Client ID-based vector clocks have been removed; they were previously + turned off by default in favor of node-based vector clocks via + the `vnode_vclocks` configuration flag. * LevelDB configuration values `cache_size` and `max_open_files` have been disabled in favor of `leveldb.maximum_memory.percent`. See - [Configuring eLevelDB](http://docs.basho.com/riak/2.0.0/ops/advanced/backends/leveldb/#Configuring-eLevelDB) + [Configuring eLevelDB]({{< baseurl >}}riak/kv/2.0.0/ops/advanced/backends/leveldb/#Configuring-eLevelDB) in our documentation. ## Known Issues @@ -352,14 +351,14 @@ Riak wiki page](https://github.com/basho/riak/wiki/2.0-known-issues). ## Upgrade Notes A full guide to upgrading to 2.0 can be found [in the official -docs](http://docs.basho.com/riak/2.0.0/upgrade-v20/). The information +docs]({{< baseurl >}}riak/kv/2.0.0/upgrade-v20/). The information below is supplementary. ### Downgrading After Install **Important note**: 2.0 introduces major new features which are incompatible with Riak 1.x. Those features depend on [bucket -types](http://docs.basho.com/riak/2.0.0/dev/advanced/bucket-types/); +types]({{< baseurl >}}riak/kv/2.0.0/dev/advanced/bucket-types/); once *any* bucket type has been created and activated, downgrades are no longer possible. @@ -377,7 +376,7 @@ directory, but we recommend converting your customizations into the `riak.conf` and `advanced.config` files to make configuration easier for you moving forward. More information can be found in our [configuration files -documentation](http://docs.basho.com/riak/2.0.0/ops/advanced/configs/configuration-files/). +documentation]({{< baseurl >}}riak/kv/2.0.0/ops/advanced/configs/configuration-files/). ## Bugfixes / Changes since 1.4.x diff --git a/content/riak/kv/2.0.0/setup/downgrade.md b/content/riak/kv/2.0.0/setup/downgrade.md index 3571ccc0db..bf4241a8e1 100644 --- a/content/riak/kv/2.0.0/setup/downgrade.md +++ b/content/riak/kv/2.0.0/setup/downgrade.md @@ -17,7 +17,7 @@ aliases: Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a -[rolling upgrade](/riak/kv/2.0.0/setup/upgrading/cluster). +[rolling upgrade]({{}}riak/kv/2.0.0/setup/upgrading/cluster). {{% note title="End Of Life Warning" %}} We test downgrading for two feature release versions. However, all versions below KV 2.0 are End Of Life (EOL) and unsupported. Please be aware of that if you choose to downgrade. @@ -49,9 +49,9 @@ both 1.4 and 1.3 are performed. * Riak Control should be disabled throughout the rolling downgrade process -* [Configuration Files](/riak/kv/2.0.0/configuring/reference) must be replaced with those of the version +* [Configuration Files]({{}}riak/kv/2.0.0/configuring/reference) must be replaced with those of the version being downgraded to -* [Active anti-entropy](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version +* [Active anti-entropy]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version below 1.3. ## Before Stopping a Node @@ -94,7 +94,7 @@ will need to be downgraded before the rolling downgrade begins. This can be done using the --downgrade flag with `riak-admin reformat-indexes` More information on the `riak-admin reformat-indexes` command, and downgrading indexes can be found in the -[`riak-admin`](/riak/kv/2.0.0/using/admin/riak-admin/#reformat-indexes) documentation. +[`riak-admin`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#reformat-indexes) documentation. ## Before Starting a Node diff --git a/content/riak/kv/2.0.0/setup/installing.md b/content/riak/kv/2.0.0/setup/installing.md index b10e25b7f8..26ac97bb72 100644 --- a/content/riak/kv/2.0.0/setup/installing.md +++ b/content/riak/kv/2.0.0/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.0/installing/ --- -[install aws]: /riak/kv/2.0.0/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.0/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.0/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.0/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.0/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.0/setup/installing/smartos -[install solaris]: /riak/kv/2.0.0/setup/installing/solaris -[install suse]: /riak/kv/2.0.0/setup/installing/suse -[install windows azure]: /riak/kv/2.0.0/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.0/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.0/setup/upgrading +[install aws]: {{}}riak/kv/2.0.0/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.0/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.0/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.0/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.0/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.0/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.0/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.0/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.0/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.0/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.0/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.0/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.0/setup/installing/amazon-web-services.md index bf0f42e0d5..a755c85b3b 100644 --- a/content/riak/kv/2.0.0/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.0/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.0/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.0/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.0/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.0/setup/installing/debian-ubuntu.md index 8c3e5ef830..9f99f8c247 100644 --- a/content/riak/kv/2.0.0/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.0/setup/installing/debian-ubuntu.md @@ -20,10 +20,10 @@ aliases: -[install source index]: /riak/kv/2.0.0/setup/installing/source/ -[security index]: /riak/kv/2.0.0/using/security/ -[install source erlang]: /riak/kv/2.0.0/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.0/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.0/using/security/ +[install source erlang]: {{}}riak/kv/2.0.0/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.0.0/setup/installing/freebsd.md b/content/riak/kv/2.0.0/setup/installing/freebsd.md index 04e880d7ef..68fab28b1c 100644 --- a/content/riak/kv/2.0.0/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.0/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.0/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.0/downloads/ -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.0/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.0/downloads/ +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.0/setup/installing/mac-osx.md b/content/riak/kv/2.0.0/setup/installing/mac-osx.md index 4af44f0a27..707d0b65e4 100644 --- a/content/riak/kv/2.0.0/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.0/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.0/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.0/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.0/setup/installing/rhel-centos.md b/content/riak/kv/2.0.0/setup/installing/rhel-centos.md index 3e19331bff..d6ef21c6c2 100644 --- a/content/riak/kv/2.0.0/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.0/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.0/setup/installing/source -[install source erlang]: /riak/kv/2.0.0/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.0/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.0/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.0.0/setup/installing/smartos.md b/content/riak/kv/2.0.0/setup/installing/smartos.md index 42a620b226..5e69ce7995 100644 --- a/content/riak/kv/2.0.0/setup/installing/smartos.md +++ b/content/riak/kv/2.0.0/setup/installing/smartos.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.0.0/setup/installing/solaris.md b/content/riak/kv/2.0.0/setup/installing/solaris.md index 5b5bd9644e..c5d1a5bc1b 100644 --- a/content/riak/kv/2.0.0/setup/installing/solaris.md +++ b/content/riak/kv/2.0.0/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.0/setup/installing/source.md b/content/riak/kv/2.0.0/setup/installing/source.md index 880a7d935e..9c16ae6efe 100644 --- a/content/riak/kv/2.0.0/setup/installing/source.md +++ b/content/riak/kv/2.0.0/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.0/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.0/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.0/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.0/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.0/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.0/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.0/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.0/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.0/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.0/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.0/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.0/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.0/setup/installing/source/erlang.md b/content/riak/kv/2.0.0/setup/installing/source/erlang.md index 334c7aafba..79987f5284 100644 --- a/content/riak/kv/2.0.0/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.0/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.0/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.0/setup/installing -[security basics]: /riak/kv/2.0.0/using/security/basics +[install index]: {{}}riak/kv/2.0.0/setup/installing +[security basics]: {{}}riak/kv/2.0.0/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho8.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.0/setup/installing/source/jvm.md b/content/riak/kv/2.0.0/setup/installing/source/jvm.md index b74cda253d..0f1e0cc37b 100644 --- a/content/riak/kv/2.0.0/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.0/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.0/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.0/developing/usage/search +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.0/setup/installing/suse.md b/content/riak/kv/2.0.0/setup/installing/suse.md index 295cb9e450..2842e92b9b 100644 --- a/content/riak/kv/2.0.0/setup/installing/suse.md +++ b/content/riak/kv/2.0.0/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.0/installing/suse/ --- -[install verify]: /riak/kv/2.0.0/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.0/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.0/setup/installing/verify.md b/content/riak/kv/2.0.0/setup/installing/verify.md index 8453f184cf..cfe9c3b180 100644 --- a/content/riak/kv/2.0.0/setup/installing/verify.md +++ b/content/riak/kv/2.0.0/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.0/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.0/developing/client-libraries -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.0/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.0/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.0/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.0/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.0/setup/installing/windows-azure.md b/content/riak/kv/2.0.0/setup/installing/windows-azure.md index 562af2b94e..4479e81efd 100644 --- a/content/riak/kv/2.0.0/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.0/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.0/setup/planning/backend.md b/content/riak/kv/2.0.0/setup/planning/backend.md index bbfcb6fcfb..12a460b23d 100644 --- a/content/riak/kv/2.0.0/setup/planning/backend.md +++ b/content/riak/kv/2.0.0/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.0/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.0/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.0/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.0/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.0/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.0/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.0/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.0/setup/planning/backend/bitcask.md index 7e5cb7de0a..411794a8d9 100644 --- a/content/riak/kv/2.0.0/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.0/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.0/using/admin/riak-cli -[config reference]: /riak/kv/2.0.0/configuring/reference -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.0/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.0/developing/usage/search - -[glossary aae]: /riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.0/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.0/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.0/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.0/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.0/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.0/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.0/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.0/setup/planning/backend/leveldb.md index 07ae268b59..ecd0658320 100644 --- a/content/riak/kv/2.0.0/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.0/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.0/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.0/configuring/reference -[perf index]: /riak/kv/2.0.0/using/performance -[config reference#aae]: /riak/kv/2.0.0/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[perf index]: {{}}riak/kv/2.0.0/using/performance +[config reference#aae]: {{}}riak/kv/2.0.0/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.0/setup/planning/backend/memory.md b/content/riak/kv/2.0.0/setup/planning/backend/memory.md index 7f07d1db1c..fa21786f0a 100644 --- a/content/riak/kv/2.0.0/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.0/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.0/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.0/configuring/reference -[plan backend multi]: /riak/kv/2.0.0/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.0/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.0/setup/planning/backend/multi.md b/content/riak/kv/2.0.0/setup/planning/backend/multi.md index 335c17dbc8..e79f5df3bf 100644 --- a/content/riak/kv/2.0.0/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.0/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.0/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.0/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.0/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.0/configuring/reference -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.0/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.0/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.0/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.0/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.0/setup/planning/best-practices.md b/content/riak/kv/2.0.0/setup/planning/best-practices.md index ddd33fffdd..b6c5d77c1b 100644 --- a/content/riak/kv/2.0.0/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.0/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.0/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.0/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.0/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.0/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.0/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.0/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.0/setup/planning/bitcask-capacity-calc.md index 4d7ddba3fc..9f1261006d 100644 --- a/content/riak/kv/2.0.0/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.0/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.0/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.0/setup/planning/cluster-capacity.md index d01ca54fde..012073dc3e 100644 --- a/content/riak/kv/2.0.0/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.0/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.0/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.0/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.0/setup/planning -[concept replication]: /riak/kv/2.0.0/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.0/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.0/configuring/reference -[perf benchmark]: /riak/kv/2.0.0/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.0/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.0/setup/planning +[concept replication]: {{}}riak/kv/2.0.0/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.0/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.0/setup/planning/operating-system.md b/content/riak/kv/2.0.0/setup/planning/operating-system.md index d7fb3fd3a3..9b00ee3554 100644 --- a/content/riak/kv/2.0.0/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.0/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.0/downloads/ +[downloads]: {{}}riak/kv/2.0.0/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.0/setup/planning/start.md b/content/riak/kv/2.0.0/setup/planning/start.md index 40d654579e..c90c80d63a 100644 --- a/content/riak/kv/2.0.0/setup/planning/start.md +++ b/content/riak/kv/2.0.0/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.0/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.0/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.0/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.0/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.0/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.0/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.0/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.0/setup/upgrading/checklist.md b/content/riak/kv/2.0.0/setup/upgrading/checklist.md index 225b4b64d2..a00b63b767 100644 --- a/content/riak/kv/2.0.0/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.0/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.0.0/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.0/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.0/using/performance +[perf open files]: {{}}riak/kv/2.0.0/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.0/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.0/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.0/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.0/configuring/reference -[config backend]: /riak/kv/2.0.0/configuring/backend -[usage search]: /riak/kv/2.0.0/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.0/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.0/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.0/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.0/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.0/using/admin/commands -[use admin riak control]: /riak/kv/2.0.0/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.0/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.0/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.0/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.0/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.0/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[config backend]: {{}}riak/kv/2.0.0/configuring/backend +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.0/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.0/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.0/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.0/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.0/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.0/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.0/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.0/setup/upgrading/cluster.md b/content/riak/kv/2.0.0/setup/upgrading/cluster.md index a8e0634611..e6782d2fc6 100644 --- a/content/riak/kv/2.0.0/setup/upgrading/cluster.md +++ b/content/riak/kv/2.0.0/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.0.0/ops/upgrading/rolling-upgrades/ - /riak/kv/2.0.0/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.0.0/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.0/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.0/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.0/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.0/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.0/release-notes/ +[production checklist]: {{}}riak/kv/2.0.0/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.0/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.0/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.0/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.0/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.0/release-notes/ [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.0/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.0/using/reference/jmx -[snmp]: /riak/kv/2.0.0/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.0/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.0/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.0.0/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.0.0/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported @@ -104,9 +104,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.0/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.0/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.0/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.0/release-notes/). {{% /note %}} ## RHEL/CentOS @@ -166,9 +166,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.0/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.0/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.0/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.0/release-notes/). {{% /note %}} ## Solaris/OpenSolaris @@ -252,9 +252,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.0/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.0/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.0/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.0/release-notes/). {{% /note %}} ## Rolling Upgrade to Enterprise diff --git a/content/riak/kv/2.0.0/setup/upgrading/search.md b/content/riak/kv/2.0.0/setup/upgrading/search.md index c53896c7b2..ce71ed1f33 100644 --- a/content/riak/kv/2.0.0/setup/upgrading/search.md +++ b/content/riak/kv/2.0.0/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.0/setup/upgrading/version.md b/content/riak/kv/2.0.0/setup/upgrading/version.md index fdc2abe348..bec3ab3ffd 100644 --- a/content/riak/kv/2.0.0/setup/upgrading/version.md +++ b/content/riak/kv/2.0.0/setup/upgrading/version.md @@ -20,7 +20,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.0.0/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.0.0/introduction). ## New Clients @@ -36,14 +36,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.0.0/introduction) like [data types](/riak/kv/2.0.0/developing/data-types) or the new [Riak Search](/riak/kv/2.0.0/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.0.0/introduction) like [data types]({{}}riak/kv/2.0.0/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.0.0/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.0.0/learn/concepts/buckets) and [key](/riak/kv/2.0.0/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.0.0/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.0.0/learn/concepts/buckets) and [key]({{}}riak/kv/2.0.0/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.0.0/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.0.0/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.0.0/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.0.0/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.0.0/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -56,7 +56,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.0.0/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.0.0/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -75,8 +75,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.0.0/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.0.0/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.0.0/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.0.0/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -86,17 +86,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/2.0.0/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.0.0/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.0.0/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.0.0/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.0.0/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.0.0/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.0.0/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -104,20 +104,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.0.0/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.0.0/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.0.0/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.0.0/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -126,11 +126,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.0.0/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.0.0/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.0.0/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -140,12 +140,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.0.0/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.0.0/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/2.0.0/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.0.0/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.0.0/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.0.0/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.0.0/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.0.0/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.0.0/using/security/basics) or the new [configuration files](/riak/kv/2.0.0/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.0.0/using/security/basics) or the new [configuration files]({{}}riak/kv/2.0.0/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -155,7 +155,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.0.0/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.0.0/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -166,12 +166,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.0.0/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.0.0/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.0.0/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.0.0/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -209,7 +209,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.0.0/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.0.0/setup/upgrading/search). ## Migrating from Short Names @@ -220,12 +220,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.0.0/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.0.0/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.0.0/using.md b/content/riak/kv/2.0.0/using.md index e080a75053..0aa65c24ba 100644 --- a/content/riak/kv/2.0.0/using.md +++ b/content/riak/kv/2.0.0/using.md @@ -15,7 +15,7 @@ toc: true [use running cluster]: ../using/running-a-cluster [use admin index]: ../using/admin/ [cluster ops index]: ../using/cluster-operations -[repair recover index]: ../repair-recovery +[repair recover index]: ../using/repair-recovery [security index]: ../using/security [perf index]: ../using/performance [troubleshoot index]: ../using/troubleshooting diff --git a/content/riak/kv/2.0.0/using/admin/commands.md b/content/riak/kv/2.0.0/using/admin/commands.md index ad877f1446..6b05c511e6 100644 --- a/content/riak/kv/2.0.0/using/admin/commands.md +++ b/content/riak/kv/2.0.0/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.0/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.0/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.0/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.0/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.0/using/admin/riak-admin.md b/content/riak/kv/2.0.0/using/admin/riak-admin.md index 7b1aa59cd3..4ceba99add 100644 --- a/content/riak/kv/2.0.0/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.0/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.0.0/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.0/configuring/reference -[use admin commands]: /riak/kv/2.0.0/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.0/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.0/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.0/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.0/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.0/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.0/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.0/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.0/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.0/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.0/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.0/setup/downgrade -[security index]: /riak/kv/2.0.0/using/security/ -[security managing]: /riak/kv/2.0.0/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.0/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.0/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.0/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.0/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.0/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.0/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.0/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.0/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.0/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.0/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.0/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.0/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.0/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.0/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.0/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.0/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.0/setup/downgrade +[security index]: {{}}riak/kv/2.0.0/using/security/ +[security managing]: {{}}riak/kv/2.0.0/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.0/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.0/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.0/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.0/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.0/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.0/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.0.0/using/admin/riak-cli.md b/content/riak/kv/2.0.0/using/admin/riak-cli.md index 760736596f..641061c814 100644 --- a/content/riak/kv/2.0.0/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.0/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.0/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.0/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.0/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.0/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.0/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.0/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.0/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.0/using/admin/riak-control.md b/content/riak/kv/2.0.0/using/admin/riak-control.md index a246c62585..aaa1b4006a 100644 --- a/content/riak/kv/2.0.0/using/admin/riak-control.md +++ b/content/riak/kv/2.0.0/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.0/configuring/reference +[config reference]: {{}}riak/kv/2.0.0/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.0/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.0/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.0/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.0/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.0/using/cluster-operations.md b/content/riak/kv/2.0.0/using/cluster-operations.md index d4ba04cedd..bcd5abf6e9 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations.md +++ b/content/riak/kv/2.0.0/using/cluster-operations.md @@ -20,7 +20,6 @@ toc: true [ops log]: ./logging [ops backup]: ./backing-up [ops handoff]: ./handoff -[ops obj del]: ./object-deletion [ops strong consistency]: ./strong-consistency [ops v3 mdc]: ./v3-multi-datacenter [ops v2 mdc]: ./v2-multi-datacenter @@ -84,13 +83,6 @@ Information on using the `riak-admin handoff` interface to enable and disable ha [Learn More >>][ops handoff] -#### [Object Deletion][ops obj del] - -Describes possible settings for `delete_mode`. - -[Learn More >>][ops obj del] - - #### [Monitoring Strong Consistency][ops strong consistency] Overview of the various statistics used in monitoring strong consistency. diff --git a/content/riak/kv/2.0.0/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.0/using/cluster-operations/active-anti-entropy.md index 66334f4cfe..1ccd5c4ac6 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes.md index 64095b3fe2..b81c1a4ec1 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.0/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.0/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.0/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.0/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.0/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.0/using/cluster-operations/backing-up.md index 04c016eae7..66a84ad6b4 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.0/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters -[config reference]: /riak/kv/2.0.0/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.0/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.0/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.0/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.0/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.0/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.0/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.0/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.0/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.0/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.0/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.0/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.0/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.0/using/cluster-operations/bucket-types.md index f2889f86a5..f0ed9f9b5e 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.0/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.0/using/cluster-operations/changing-cluster-info.md index dfe91b3c81..04dd7f66e7 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.0/configuring/reference +[config reference]: {{}}riak/kv/2.0.0/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.0/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.0/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.0/using/cluster-operations/handoff.md b/content/riak/kv/2.0.0/using/cluster-operations/handoff.md index dd337c2faf..89bfc790f2 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.0/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.0/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.0/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.0/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.0/using/cluster-operations/logging.md b/content/riak/kv/2.0.0/using/cluster-operations/logging.md index b61524c6c9..9eab1f47ec 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.0/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.0/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.0/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.0/using/cluster-operations/replacing-node.md index d64b9b148a..a7441a954a 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.0/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.0/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.0/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.0/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.0/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.0/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.0/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.0/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.0/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.0/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.0/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.0/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.0/using/cluster-operations/strong-consistency.md index e37c793ee1..14f1e9218b 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.0/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.0/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.0/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.0/using/cluster-operations/v2-multi-datacenter.md index 27659ba776..87e408bb46 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.0/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.0/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter.md index d430d1d397..87db2ed8c6 100644 --- a/content/riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.0/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.0/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.0/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.0/using/performance.md b/content/riak/kv/2.0.0/using/performance.md index c607c93b36..5d0cd73bdd 100644 --- a/content/riak/kv/2.0.0/using/performance.md +++ b/content/riak/kv/2.0.0/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.0/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.0/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.0/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.0/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.0/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.0/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.0/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.0/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.0/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.0/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.0/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.0/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.0/using/performance/benchmarking.md b/content/riak/kv/2.0.0/using/performance/benchmarking.md index 165f147f71..e9a86833f2 100644 --- a/content/riak/kv/2.0.0/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.0/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.0/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.0/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.0/using/performance/latency-reduction.md b/content/riak/kv/2.0.0/using/performance/latency-reduction.md index 79226f6c7b..38696b17a4 100644 --- a/content/riak/kv/2.0.0/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.0/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.0/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.0/using/performance/multi-datacenter-tuning.md index dd119458e6..9ca6f0ab3a 100644 --- a/content/riak/kv/2.0.0/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.0/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.0/using/performance +[perf index]: {{}}riak/kv/2.0.0/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.0/using/performance/open-files-limit.md b/content/riak/kv/2.0.0/using/performance/open-files-limit.md index 705bf0af33..c91e21c23e 100644 --- a/content/riak/kv/2.0.0/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.0/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.0.0/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.0/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.0.0/using/reference/bucket-types.md b/content/riak/kv/2.0.0/using/reference/bucket-types.md index 103b513d3a..99de9ae591 100644 --- a/content/riak/kv/2.0.0/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.0/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.0/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.0/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.0/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.0/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.0/developing/data-types), and [strong consistency](/riak/kv/2.0.0/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.0/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.0/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.0/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.0/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.0/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.0/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.0/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.0/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.0/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.0/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.0/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.0/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.0/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.0/learn/concepts/buckets) and [keys](/riak/kv/2.0.0/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.0/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.0/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.0/using/reference/custom-code.md b/content/riak/kv/2.0.0/using/reference/custom-code.md index 4b9dee62d2..f33bd33cca 100644 --- a/content/riak/kv/2.0.0/using/reference/custom-code.md +++ b/content/riak/kv/2.0.0/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.0/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.0/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.0/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.0/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.0/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.0/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.0/using/reference/handoff.md b/content/riak/kv/2.0.0/using/reference/handoff.md index 82604835f3..d36fb73b08 100644 --- a/content/riak/kv/2.0.0/using/reference/handoff.md +++ b/content/riak/kv/2.0.0/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.0/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.0/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.0/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.0/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.0/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.0/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.0/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.0/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.0/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.0/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.0/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.0/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.0/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.0/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.0/using/reference/jmx.md b/content/riak/kv/2.0.0/using/reference/jmx.md index 8529794513..49a4fdc982 100644 --- a/content/riak/kv/2.0.0/using/reference/jmx.md +++ b/content/riak/kv/2.0.0/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.0/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.0/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.0/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.0/using/reference/logging.md b/content/riak/kv/2.0.0/using/reference/logging.md index a3fbaf8190..4f0b1c429a 100644 --- a/content/riak/kv/2.0.0/using/reference/logging.md +++ b/content/riak/kv/2.0.0/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.0/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.0/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.0/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.0/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.0/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -46,7 +46,7 @@ File | Significance `console.log` | Console log output `crash.log` | Crash logs `erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. -`error.log` | [Common errors](../../repair-recover/errors) emitted by Riak. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.0/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.0/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.0/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.0/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.0/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.0/using/reference/multi-datacenter/comparison.md index 767991322e..af52455997 100644 --- a/content/riak/kv/2.0.0/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.0/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.0/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.0/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.0/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.0/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.0/using/reference/object-deletion.md b/content/riak/kv/2.0.0/using/reference/object-deletion.md index 58ed0ae124..9f22ab0f7d 100644 --- a/content/riak/kv/2.0.0/using/reference/object-deletion.md +++ b/content/riak/kv/2.0.0/using/reference/object-deletion.md @@ -39,7 +39,7 @@ concretely using the following example: * The object has been marked as deleted on nodes A and B, but it still lives on node C * A client attempts to read the object, Riak senses that there are - divergent replicas and initiates a repair process (either [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) or [active anti-entropy](../../../learn/concepts/active-anti-entropy/), + divergent replicas and initiates a repair process (either [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) or [active anti-entropy](../../../learn/concepts/active-anti-entropy/), depending on configuration) At this point, Riak needs to make a decision about what to do. Should diff --git a/content/riak/kv/2.0.0/using/reference/runtime-interaction.md b/content/riak/kv/2.0.0/using/reference/runtime-interaction.md index f2309f7c90..fbf7f01bc4 100644 --- a/content/riak/kv/2.0.0/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.0/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.0/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.0/configuring/reference -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.0/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.0/using/reference/search.md b/content/riak/kv/2.0.0/using/reference/search.md index e5a284731a..1368974e24 100644 --- a/content/riak/kv/2.0.0/using/reference/search.md +++ b/content/riak/kv/2.0.0/using/reference/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.0/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.0.0/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.0.0/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -126,7 +126,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.0/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.0/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -288,7 +288,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.0/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -298,7 +298,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -353,7 +353,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.0/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.0/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.0/using/reference/secondary-indexes.md b/content/riak/kv/2.0.0/using/reference/secondary-indexes.md index b064fc593d..400b0b30ff 100644 --- a/content/riak/kv/2.0.0/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.0/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.0.0/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.0/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.0/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.0/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.0/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.0/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.0/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.0/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.0/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.0/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.0/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.0/using/reference/statistics-monitoring.md index 829c66f64a..1da2890207 100644 --- a/content/riak/kv/2.0.0/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.0/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.0/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.0/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.0/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.0/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.0/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.0/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.0/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.0/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.0/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.0/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.0/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.0/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.0/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.0/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.0/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.0/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.0/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.0/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.0/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.0/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.0/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.0/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.0/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,9 +349,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.0/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.0/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.0/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.0/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -367,9 +367,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.0/using/reference/strong-consistency.md b/content/riak/kv/2.0.0/using/reference/strong-consistency.md index 6ab9a3aae0..a42ca77b7a 100644 --- a/content/riak/kv/2.0.0/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.0/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.0/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.0/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.0/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.0/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.0/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.0/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.0/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.0/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.0/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.0/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.0/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.0/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.0/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.0/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.0/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.0/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.0/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.0/using/reference/v2-multi-datacenter/architecture.md index 187d80c83d..28ef92b400 100644 --- a/content/riak/kv/2.0.0/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.0/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.0/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.0/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.0/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.0/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.0/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.0/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/aae.md index e4e6699019..4734f2c637 100644 --- a/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.0/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.0/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.0/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/architecture.md index 8f15218f1f..006188fe3c 100644 --- a/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.0/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.0/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.0/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.0/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/cascading-writes.md index bb49158fbc..2ab94636e7 100644 --- a/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.0/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 2f64788a99..18640c1c0b 100644 --- a/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.0/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.0/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.0/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.0/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.0/using/repair-recovery.md b/content/riak/kv/2.0.0/using/repair-recovery.md index 6557718a60..2c5fd1bd2a 100644 --- a/content/riak/kv/2.0.0/using/repair-recovery.md +++ b/content/riak/kv/2.0.0/using/repair-recovery.md @@ -15,7 +15,7 @@ toc: true [repair recover fail]: ./failure-recovery/ [repair recover errors]: ./errors/ [repair recover repairs]: ./repairs/ -[repair recover restart]: ./rolling-restarts/ +[repair recover restart]: ./rolling-restart/ ## In This Section diff --git a/content/riak/kv/2.0.0/using/repair-recovery/errors.md b/content/riak/kv/2.0.0/using/repair-recovery/errors.md index ce3a3ad799..1d050bfdd3 100644 --- a/content/riak/kv/2.0.0/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.0/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.0/configuring/reference +[config reference]: {{}}riak/kv/2.0.0/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.0/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.0/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.0/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.0/using/repair-recovery/failure-recovery.md index 81a4ea3a3a..5eb00d86a7 100644 --- a/content/riak/kv/2.0.0/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.0/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.0/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.0/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.0/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.0/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -115,7 +115,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.0.0/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.0/using/repair-recovery/repairs.md b/content/riak/kv/2.0.0/using/repair-recovery/repairs.md index 5e6c99bd21..e4ab27c968 100644 --- a/content/riak/kv/2.0.0/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.0/using/repair-recovery/repairs.md @@ -149,7 +149,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.0.0/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.0.0/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -218,23 +218,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.0/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.0/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.0/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.0/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.0/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.0/using/repair-recovery/rolling-restart.md index eef5e87389..a257d8c204 100644 --- a/content/riak/kv/2.0.0/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.0/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.0/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.0/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.0/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.0/using/running-a-cluster.md b/content/riak/kv/2.0.0/using/running-a-cluster.md index 2c731b9feb..67ece7cf2d 100644 --- a/content/riak/kv/2.0.0/using/running-a-cluster.md +++ b/content/riak/kv/2.0.0/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.0/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.0/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.0/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.0/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.0/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.0/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.0/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.0/using/security.md b/content/riak/kv/2.0.0/using/security.md index b645a92f4f..25ae6defa6 100644 --- a/content/riak/kv/2.0.0/using/security.md +++ b/content/riak/kv/2.0.0/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.0/ops/advanced/security --- -[config reference search]: /riak/kv/2.0.0/configuring/reference/#search -[config search enabling]: /riak/kv/2.0.0/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.0/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.0/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.0/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.0.0/using/security/basics -[security managing]: /riak/kv/2.0.0/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.0/using/security/basics +[security managing]: {{}}riak/kv/2.0.0/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.0.0/developing/usage/search +[usage search]: {{}}riak/kv/2.0.0/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.0/using/security/basics.md b/content/riak/kv/2.0.0/using/security/basics.md index af8c659e7a..741e92c1a3 100644 --- a/content/riak/kv/2.0.0/using/security/basics.md +++ b/content/riak/kv/2.0.0/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.0/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.0/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.0/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.0/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.0/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.0/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.0/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.0/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.0/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.0/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.0/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.0/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.0/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.0/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.0/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.0/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.0/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.0/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.0/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.0/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.0/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.0/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.0/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.0/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.0/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.0/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.0/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.0/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.0/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.0/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.0/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.0/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.0/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.0/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.0/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.0/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.0/configuring/reference/#directories). @@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.0/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.0/using/security/managing-sources.md b/content/riak/kv/2.0.0/using/security/managing-sources.md index 35bc49886c..de0a9ae280 100644 --- a/content/riak/kv/2.0.0/using/security/managing-sources.md +++ b/content/riak/kv/2.0.0/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.0/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.0/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.0/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.0/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.0/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.0/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.0/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.0/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.0/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.0/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.0/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.0/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.0/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.0/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.0.1/_reference-links.md b/content/riak/kv/2.0.1/_reference-links.md index 736562b9b8..f1b9368e0f 100644 --- a/content/riak/kv/2.0.1/_reference-links.md +++ b/content/riak/kv/2.0.1/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.1/downloads/ -[install index]: /riak/kv/2.0.1/setup/installing -[upgrade index]: /riak/kv/2.0.1/upgrading -[plan index]: /riak/kv/2.0.1/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.0.1/configuring/reference/ -[manage index]: /riak/kv/2.0.1/using/managing -[performance index]: /riak/kv/2.0.1/using/performance -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.1/downloads/ +[install index]: {{}}riak/kv/2.0.1/setup/installing +[upgrade index]: {{}}riak/kv/2.0.1/upgrading +[plan index]: {{}}riak/kv/2.0.1/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.0.1/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.1/using/managing +[performance index]: {{}}riak/kv/2.0.1/using/performance +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.1/setup/planning -[plan start]: /riak/kv/2.0.1/setup/planning/start -[plan backend]: /riak/kv/2.0.1/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.1/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.1/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.1/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.1/setup/planning/best-practices -[plan future]: /riak/kv/2.0.1/setup/planning/future +[plan index]: {{}}riak/kv/2.0.1/setup/planning +[plan start]: {{}}riak/kv/2.0.1/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.1/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.1/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.1/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.1/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.1/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.1/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.1/setup/installing -[install aws]: /riak/kv/2.0.1/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.1/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.1/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.1/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.1/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.1/setup/installing/smartos -[install solaris]: /riak/kv/2.0.1/setup/installing/solaris -[install suse]: /riak/kv/2.0.1/setup/installing/suse -[install windows azure]: /riak/kv/2.0.1/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.1/setup/installing +[install aws]: {{}}riak/kv/2.0.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.1/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.1/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.1/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.1/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.1/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.1/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.1/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.1/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.1/setup/installing/source -[install source erlang]: /riak/kv/2.0.1/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.1/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.1/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.1/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.1/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.1/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.1/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.1/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.1/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.1/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.1/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.1/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.1/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.1/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.1/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.1/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.1/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.1/configuring -[config basic]: /riak/kv/2.0.1/configuring/basic -[config backend]: /riak/kv/2.0.1/configuring/backend -[config manage]: /riak/kv/2.0.1/configuring/managing -[config reference]: /riak/kv/2.0.1/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.1/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.1/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.1/configuring/mapreduce -[config search]: /riak/kv/2.0.1/configuring/search/ +[config index]: {{}}riak/kv/2.0.1/configuring +[config basic]: {{}}riak/kv/2.0.1/configuring/basic +[config backend]: {{}}riak/kv/2.0.1/configuring/backend +[config manage]: {{}}riak/kv/2.0.1/configuring/managing +[config reference]: {{}}riak/kv/2.0.1/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.1/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.1/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.1/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.1/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.1/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.1/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.1/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.1/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.1/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.1/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.1/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.1/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.1/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.1/using/ -[use admin commands]: /riak/kv/2.0.1/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.1/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.1/using/ +[use admin commands]: {{}}riak/kv/2.0.1/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.1/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.1/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.1/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.1/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.1/using/reference/search -[use ref 2i]: /riak/kv/2.0.1/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.1/using/reference/snmp -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.1/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.1/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.1/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.1/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.1/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.1/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.1/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.1/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.1/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.1/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.1/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.1/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.1/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.1/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.1/using/admin/ -[use admin commands]: /riak/kv/2.0.1/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.1/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.1/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.1/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.1/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.1/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.1/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.1/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.1/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.1/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.1/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.1/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.1/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.1/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.1/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.1/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.1/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.1/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.1/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.1/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.1/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.1/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.1/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.1/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.1/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.1/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.1/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.1/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.1/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.1/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.1/using/repair-recovery -[repair recover index]: /riak/kv/2.0.1/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.1/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.1/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.1/using/security/ -[security basics]: /riak/kv/2.0.1/using/security/basics -[security managing]: /riak/kv/2.0.1/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.1/using/security/ +[security basics]: {{}}riak/kv/2.0.1/using/security/basics +[security managing]: {{}}riak/kv/2.0.1/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.1/using/performance/ -[perf benchmark]: /riak/kv/2.0.1/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.1/using/performance/erlang -[perf aws]: /riak/kv/2.0.1/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.1/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.1/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.1/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.1/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.1/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.1/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.1/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.1/developing -[dev client libraries]: /riak/kv/2.0.1/developing/client-libraries -[dev data model]: /riak/kv/2.0.1/developing/data-modeling -[dev data types]: /riak/kv/2.0.1/developing/data-types -[dev kv model]: /riak/kv/2.0.1/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.1/developing +[dev client libraries]: {{}}riak/kv/2.0.1/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.1/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.1/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.1/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.1/developing/getting-started -[getting started java]: /riak/kv/2.0.1/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.1/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.1/developing/getting-started/python -[getting started php]: /riak/kv/2.0.1/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.1/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.1/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.1/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.1/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.1/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.1/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.1/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.1/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.1/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.1/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.1/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.1/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.1/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.1/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.1/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.1/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.1/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.1/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.1/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.1/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.1/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.1/developing/usage -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.1/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.1/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.1/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.1/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.1/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.1/developing/usage/search -[usage search schema]: /riak/kv/2.0.1/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.1/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.1/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.1/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.1/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.1/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.1/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.1/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.1/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.1/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.1/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.1/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.1/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.1/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.1/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.1/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.1/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.1/developing/api/backend -[dev api http]: /riak/kv/2.0.1/developing/api/http -[dev api http status]: /riak/kv/2.0.1/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.1/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.1/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.1/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.1/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.1/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.1/learn/glossary/ -[glossary aae]: /riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.1/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.1/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.1/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.1/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.1/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.1/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.1/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.1/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.1/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.1/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.1/add-ons.md b/content/riak/kv/2.0.1/add-ons.md index e63596c230..e6ef17a353 100644 --- a/content/riak/kv/2.0.1/add-ons.md +++ b/content/riak/kv/2.0.1/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.1/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.1/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.1/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.1/add-ons/redis/developing-rra.md index 20c0844a41..6c2d94be51 100644 --- a/content/riak/kv/2.0.1/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.1/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.1/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.1/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.1/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.1/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.1/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.1/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.1/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.1/add-ons/redis/redis-add-on-features.md index 1d520b482a..f9ea1a41b3 100644 --- a/content/riak/kv/2.0.1/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.1/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.1/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.1/add-ons/redis/set-up-rra.md index b675c072f0..741d1710aa 100644 --- a/content/riak/kv/2.0.1/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.1/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.1/setup/installing -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.1/setup/installing +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.1/add-ons/redis/using-rra.md b/content/riak/kv/2.0.1/add-ons/redis/using-rra.md index fbfd4a6e73..98e4a1bceb 100644 --- a/content/riak/kv/2.0.1/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.1/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.1/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.1/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.1/configuring/backend.md b/content/riak/kv/2.0.1/configuring/backend.md index ab77e3df38..6d2fa2c9f7 100644 --- a/content/riak/kv/2.0.1/configuring/backend.md +++ b/content/riak/kv/2.0.1/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.1/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.1/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.1/configuring/basic.md b/content/riak/kv/2.0.1/configuring/basic.md index 48ae71674d..4db5d5de10 100644 --- a/content/riak/kv/2.0.1/configuring/basic.md +++ b/content/riak/kv/2.0.1/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.0.1/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.1/configuring/reference -[use running cluster]: /riak/kv/2.0.1/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.1/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.1/using/performance/erlang -[plan start]: /riak/kv/2.0.1/setup/planning/start -[plan best practices]: /riak/kv/2.0.1/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.1/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.1/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.1/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.1/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.1/using/performance -[perf aws]: /riak/kv/2.0.1/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.1/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.1/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.1/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.1/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.1/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.1/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.1/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.1/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.1/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.1/using/performance +[perf aws]: {{}}riak/kv/2.0.1/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.1/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.1/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.1/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.1/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.1/configuring/load-balancing-proxy.md index 753d5feef4..91ea20e390 100644 --- a/content/riak/kv/2.0.1/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.1/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.1/configuring/managing.md b/content/riak/kv/2.0.1/configuring/managing.md index 83d8a7d67b..408e8a4928 100644 --- a/content/riak/kv/2.0.1/configuring/managing.md +++ b/content/riak/kv/2.0.1/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.1/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.1/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.1/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.1/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.1/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.1/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.1/configuring/mapreduce.md b/content/riak/kv/2.0.1/configuring/mapreduce.md index a93b4541d2..68df630d0c 100644 --- a/content/riak/kv/2.0.1/configuring/mapreduce.md +++ b/content/riak/kv/2.0.1/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.0.1/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.1/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.1/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.1/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.1/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.1/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.1/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.1/configuring/reference.md b/content/riak/kv/2.0.1/configuring/reference.md index 0f654cee76..359eb92d4e 100644 --- a/content/riak/kv/2.0.1/configuring/reference.md +++ b/content/riak/kv/2.0.1/configuring/reference.md @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch. diff --git a/content/riak/kv/2.0.1/configuring/search.md b/content/riak/kv/2.0.1/configuring/search.md index dc458a36ad..38ac0b733c 100644 --- a/content/riak/kv/2.0.1/configuring/search.md +++ b/content/riak/kv/2.0.1/configuring/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.1/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.1/developing/usage/search -[usage search schema]: /riak/kv/2.0.1/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.1/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.1/developing/usage/custom-extractors -[config reference]: /riak/kv/2.0.1/configuring/reference -[config reference#search]: /riak/kv/2.0.1/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.1/using/security/ +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.1/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.1/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.1/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.1/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.0.1/configuring/strong-consistency.md b/content/riak/kv/2.0.1/configuring/strong-consistency.md index a0afa009a6..61eac448ed 100644 --- a/content/riak/kv/2.0.1/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.1/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.1/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.1/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.1/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.1/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.1/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.1/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.1/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.1/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.1/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.1/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.1/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.1/developing/data-types -[glossary aae]: /riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.1/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.1/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.1/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.1/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.1/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.1/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.1/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.1/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.1/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.1/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.1/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.1/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.1/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.1/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.1/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.1/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.1/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.1/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.1/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.1/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.1/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.1/configuring/v2-multi-datacenter.md index 7685fd570f..e00c4070e7 100644 --- a/content/riak/kv/2.0.1/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.1/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.1/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.1/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.0.1/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.1/configuring/v2-multi-datacenter/nat.md index e3084e2ba0..a92d8a0dac 100644 --- a/content/riak/kv/2.0.1/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.1/configuring/v2-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.1/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.1/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.0.1/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.1/configuring/v3-multi-datacenter.md index 997d5da0be..8301c706f7 100644 --- a/content/riak/kv/2.0.1/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.1/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.1/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.1/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/nat.md index 1786659d9c..c091ebb875 100644 --- a/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/quick-start.md index 0eb3f95096..73a1112a42 100644 --- a/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.1/using/performance -[config v3 mdc]: /riak/kv/2.0.1/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.1/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl.md index aebb547941..0b231d064c 100644 --- a/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.1/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.1/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.1/developing/api/backend.md b/content/riak/kv/2.0.1/developing/api/backend.md index 9c9b750205..5e9308f591 100644 --- a/content/riak/kv/2.0.1/developing/api/backend.md +++ b/content/riak/kv/2.0.1/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.1/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.1/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.1/developing/api/http.md b/content/riak/kv/2.0.1/developing/api/http.md index 7ac53c5f0b..a4def76f36 100644 --- a/content/riak/kv/2.0.1/developing/api/http.md +++ b/content/riak/kv/2.0.1/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.1/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.1/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.1/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.1/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.1/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.1/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.1/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.1/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.1/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.1/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.1/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.1/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.1/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.1/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.1/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.1/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.1/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.1/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.1/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.1/developing/data-types). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.1/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.1/developing/data-types). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.1/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.1/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.1/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.1/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.1/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.1/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.1/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.1/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.1/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.1/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.1/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.1/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.1/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.1/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.1/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.1/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.1/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.1/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.1/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.1/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.1/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.1/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.1/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.1/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.1/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.1/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.1/developing/api/http/counters.md b/content/riak/kv/2.0.1/developing/api/http/counters.md index ccca0d48e9..674258233b 100644 --- a/content/riak/kv/2.0.1/developing/api/http/counters.md +++ b/content/riak/kv/2.0.1/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.1/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.1/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.1/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.1/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.1/developing/api/http/fetch-object.md b/content/riak/kv/2.0.1/developing/api/http/fetch-object.md index c322612e23..f14a486867 100644 --- a/content/riak/kv/2.0.1/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.1/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.1/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.1/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.1/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.1/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.1/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.1/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.1/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.1/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.1/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.1/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.1/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.1/developing/api/http/fetch-search-index.md index d86c56fede..017e2369bc 100644 --- a/content/riak/kv/2.0.1/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.1/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.1/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.1/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.1/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.1/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.1/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.1/developing/api/http/fetch-search-schema.md index 7a737ec5f7..bc67f2fee5 100644 --- a/content/riak/kv/2.0.1/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.1/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.1/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.1/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.1/developing/api/http/get-bucket-props.md index 43ed90938f..aa3629454c 100644 --- a/content/riak/kv/2.0.1/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.1/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.1/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.1/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.1/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.1/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.1/developing/api/http/link-walking.md b/content/riak/kv/2.0.1/developing/api/http/link-walking.md index 06e84f2abb..bbfe80c925 100644 --- a/content/riak/kv/2.0.1/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.1/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.1/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.1/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.1/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.1/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.1/developing/api/http/list-resources.md b/content/riak/kv/2.0.1/developing/api/http/list-resources.md index 8ce7e08347..d706beac2b 100644 --- a/content/riak/kv/2.0.1/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.1/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.1/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.1/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.1/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.1/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.1/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.1/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.1/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.1/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.1/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.1/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.1/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.1/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.1/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.1/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.1/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.1/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.1/developing/api/http/mapreduce.md b/content/riak/kv/2.0.1/developing/api/http/mapreduce.md index 57cd2f5e7d..b1d152bdb1 100644 --- a/content/riak/kv/2.0.1/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.1/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.1/developing/api/http/search-index-info.md b/content/riak/kv/2.0.1/developing/api/http/search-index-info.md index bbcfca6dc8..56164468be 100644 --- a/content/riak/kv/2.0.1/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.1/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.1/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.1/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.1/developing/api/http/search-query.md b/content/riak/kv/2.0.1/developing/api/http/search-query.md index 0ec60c5a5c..179afc386c 100644 --- a/content/riak/kv/2.0.1/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.1/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.1/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.1/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.1/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.1/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.1/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.1/developing/api/http/secondary-indexes.md index a1d42201f1..e70f71dfd5 100644 --- a/content/riak/kv/2.0.1/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.1/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.1/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.1/developing/api/http/set-bucket-props.md index 7889ac0d16..473938c0b7 100644 --- a/content/riak/kv/2.0.1/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.1/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.1/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.1/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.1/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.1/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.1/developing/api/http/status.md b/content/riak/kv/2.0.1/developing/api/http/status.md index c4a62b949d..7512729d1a 100644 --- a/content/riak/kv/2.0.1/developing/api/http/status.md +++ b/content/riak/kv/2.0.1/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.1/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.1/developing/api/http/store-object.md b/content/riak/kv/2.0.1/developing/api/http/store-object.md index 1aabab9e43..dc6eccf3eb 100644 --- a/content/riak/kv/2.0.1/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.1/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.1/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.1/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.1/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.1/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.1/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.1/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.1/developing/api/http/store-search-index.md b/content/riak/kv/2.0.1/developing/api/http/store-search-index.md index e0e1f2571c..4eb1c707c7 100644 --- a/content/riak/kv/2.0.1/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.1/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.1/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.1/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.1/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.1/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.1/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.1/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.1/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.1/developing/api/http/store-search-schema.md index ed858c0484..fe73d449fa 100644 --- a/content/riak/kv/2.0.1/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.1/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.1/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.1/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers.md index 7e5f8f3aaf..22fc5af458 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.1/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.1/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.1/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.1/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.1/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.1/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.1/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.1/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.1/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.1/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/auth-req.md index bdd2ba992b..40ebdc5765 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.1/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.1/using/security/basics). diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/delete-object.md index cbed65c8e8..a30c4ed495 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.1/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.1/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store.md index bb6b2dde4d..a6f886e4e1 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.1/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.1/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-fetch.md index 16052984ef..6bb81836fb 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.1/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.1/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.1/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.1/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.1/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store.md index c56e50ecd4..32cf3b8ca6 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store.md index 2a8333b139..8e7864fdeb 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-store.md index d0189ea45f..52a0c87d39 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.1/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.1/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.1/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.1/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.1/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.1/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.1/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-union.md index cf2fe4f73b..574c22d0f1 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.1/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object.md index ff1ee02395..db7fd77125 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.1/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.1/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.1/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props.md index b1646ae02a..4b2f0bc25b 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.1/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.1/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.1/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.1/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riakcs/latest/cookbooks/mdc-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/latest/cookbooks/mdc-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-type.md index b0cd80a5e6..841f8a6e8f 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.1/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.1/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-client-id.md index ef5fcd3530..cb7d56bf48 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.1/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/mapreduce.md index 0cae4bf72a..fe4d410847 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.1/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.1/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.1/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.1/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/reset-bucket-props.md index a6b4847cd2..89edd90f25 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/secondary-indexes.md index 41501224fb..fd7d5b27e8 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.1/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props.md index 1871b0e92c..06ebfbe830 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-type.md index 5b48f730a2..fdfc36692a 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.1/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.1/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/store-object.md index c28868018d..d5add395ea 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.1/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.1/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.1/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.1/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.1/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.1/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.1/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.1/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-get.md index 69f907e67d..2748126fda 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-put.md index 7d5fe1a231..f3fabc17d8 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-index-put.md @@ -37,4 +37,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-get.md index f97bd43969..a6857dd7e1 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.1/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-put.md index 1633922ab9..5c2ce7a32b 100644 --- a/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.1/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.1/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.1/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.1/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.1/developing/app-guide.md b/content/riak/kv/2.0.1/developing/app-guide.md index e9ddd81831..1172a8d4f5 100644 --- a/content/riak/kv/2.0.1/developing/app-guide.md +++ b/content/riak/kv/2.0.1/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.0.1/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.1/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.1/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.1/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.1/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.1/developing/data-types -[dev data types#counters]: /riak/kv/2.0.1/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.0.1/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.0.1/developing/data-types/maps -[usage create objects]: /riak/kv/2.0.1/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.1/developing/usage/search -[use ref search]: /riak/kv/2.0.1/using/reference/search -[usage 2i]: /riak/kv/2.0.1/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.1/developing/client-libraries -[concept crdts]: /riak/kv/2.0.1/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.1/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.1/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.1/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.1/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.1/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.1/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.1/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.1/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.1/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.1/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.1/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.1/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.1/using/reference/strong-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.1/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.1/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.1/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.1/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.1/setup/installing -[getting started]: /riak/kv/2.0.1/developing/getting-started -[usage index]: /riak/kv/2.0.1/developing/usage -[glossary]: /riak/kv/2.0.1/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.1/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.1/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.1/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.1/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.1/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.1/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.0.1/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.0.1/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.0.1/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.1/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.1/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.1/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.1/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.1/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.1/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.1/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.1/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.1/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.1/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak//2.0.1/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.1/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.1/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.1/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.1/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.1/setup/installing +[getting started]: {{}}riak/kv/2.0.1/developing/getting-started +[usage index]: {{}}riak/kv/2.0.1/developing/usage +[glossary]: {{}}riak/kv/2.0.1/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.1/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.1/developing/app-guide/advanced-mapreduce.md index cc0ffeb54d..ec570656d2 100644 --- a/content/riak/kv/2.0.1/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.1/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.1/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.1/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.1/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.1/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.1/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.1/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.1/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.1/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.1/developing/app-guide/cluster-metadata.md index 17eb5205f3..502f14b462 100644 --- a/content/riak/kv/2.0.1/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.1/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.1/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.1/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.1/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.1/developing/app-guide/replication-properties.md index 3fd08293f2..bfaeb04b05 100644 --- a/content/riak/kv/2.0.1/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.1/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.1/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak//2.0.1/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.1/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.1/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.1/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.1/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.1/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.1/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.1/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.1/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.1/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.1/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.1/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.1/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.1/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.1/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.1/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.1/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.1/developing/app-guide/strong-consistency.md index 8e9b77161b..6eb2c98c76 100644 --- a/content/riak/kv/2.0.1/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.1/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.0.1/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/2.1.3/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.1/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.1/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.1/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.1/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.1/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.1/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/2.1.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.1/developing/client-libraries -[getting started]: /riak/kv/2.0.1/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.1/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak//2.0.1/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak//2.0.1/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.1/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.1/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.1/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.1/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.1/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.1/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak//2.0.1/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.1/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.1/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.1/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.1/developing/client-libraries.md b/content/riak/kv/2.0.1/developing/client-libraries.md index decc8741f3..008147f9c7 100644 --- a/content/riak/kv/2.0.1/developing/client-libraries.md +++ b/content/riak/kv/2.0.1/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.1/developing/data-types.md b/content/riak/kv/2.0.1/developing/data-types.md index 6367a6b328..68acb53787 100644 --- a/content/riak/kv/2.0.1/developing/data-types.md +++ b/content/riak/kv/2.0.1/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.1/developing/faq.md b/content/riak/kv/2.0.1/developing/faq.md index f68eb80fe9..0d896a68c6 100644 --- a/content/riak/kv/2.0.1/developing/faq.md +++ b/content/riak/kv/2.0.1/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.1/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.1/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.1/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.1/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.1/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.1/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.1/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.1/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.1/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.1/developing/client-libraries -[MapReduce]: /riak/kv/2.0.1/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.1/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.1/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.1/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.1/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.1/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.1/developing/getting-started.md b/content/riak/kv/2.0.1/developing/getting-started.md index 5c9f21fb4d..bff34fa46a 100644 --- a/content/riak/kv/2.0.1/developing/getting-started.md +++ b/content/riak/kv/2.0.1/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.1/setup/installing -[dev client libraries]: /riak/kv/2.0.1/developing/client-libraries +[install index]: {{}}riak/kv/2.0.1/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.1/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.1/developing/getting-started/csharp.md b/content/riak/kv/2.0.1/developing/getting-started/csharp.md index 03924d9364..06b92a7386 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.1/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.1/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.1/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.1/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.1/developing/getting-started/csharp/querying.md index 0e5da5d4ae..851a99b30d 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.1/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.1/developing/getting-started/erlang.md b/content/riak/kv/2.0.1/developing/getting-started/erlang.md index dd4f165bc1..cbe0b1bc53 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.1/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.1/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.1/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.1/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.1/developing/getting-started/erlang/object-modeling.md index 24981b2406..893ac07651 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.1/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.1/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.1/developing/getting-started/erlang/querying.md index 39ceab20ee..b9231d2cad 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.1/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.1/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.1/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.1/developing/getting-started/golang.md b/content/riak/kv/2.0.1/developing/getting-started/golang.md index a57a2e97af..b29283e9de 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.1/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.1/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.1/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.1/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.1/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.1/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.1/developing/getting-started/golang/object-modeling.md index a4f4ea0031..579cd3ae41 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.1/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.1/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.1/developing/getting-started/golang/querying.md index 9ecf13959e..62fa4bb6d0 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.1/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.1/developing/getting-started/java.md b/content/riak/kv/2.0.1/developing/getting-started/java.md index 9a7ef09aeb..19404bb987 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/java.md +++ b/content/riak/kv/2.0.1/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.1/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.1/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.1/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.1/developing/getting-started/java/crud-operations.md index dea9e5793a..c78ca72ec5 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.1/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.1/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.1/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.1/developing/getting-started/java/querying.md b/content/riak/kv/2.0.1/developing/getting-started/java/querying.md index 2462579344..e753a4c9cb 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.1/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.1/developing/getting-started/nodejs.md b/content/riak/kv/2.0.1/developing/getting-started/nodejs.md index 9577ca5c59..a9a2fe7eca 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.1/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.1/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.1/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.1/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.1/developing/getting-started/nodejs/querying.md index 4c5f1367a6..091d67e47e 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.1/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.1/developing/getting-started/php.md b/content/riak/kv/2.0.1/developing/getting-started/php.md index 5d4028e070..ddf167f311 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/php.md +++ b/content/riak/kv/2.0.1/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.1/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.1/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.1/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.1/developing/getting-started/php/crud-operations.md index f5fe5cbcc9..b7c6a9f41c 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.1/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.1/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.1/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.1/developing/getting-started/php/querying.md b/content/riak/kv/2.0.1/developing/getting-started/php/querying.md index 5babec52f2..bcbb56e53b 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.1/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.1/developing/getting-started/python.md b/content/riak/kv/2.0.1/developing/getting-started/python.md index fab9779563..571ce4e524 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/python.md +++ b/content/riak/kv/2.0.1/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.1/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.1/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.1/developing/getting-started/python/querying.md b/content/riak/kv/2.0.1/developing/getting-started/python/querying.md index 0573e70f12..b42d30f676 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.1/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.1/developing/getting-started/ruby.md b/content/riak/kv/2.0.1/developing/getting-started/ruby.md index ac0a689f0a..141a111130 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.1/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.1/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.1/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.1/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.1/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.1/developing/getting-started/ruby/querying.md index bd6a2c3aac..fefbdd3840 100644 --- a/content/riak/kv/2.0.1/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.1/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.1/developing/key-value-modeling.md b/content/riak/kv/2.0.1/developing/key-value-modeling.md index 2fe8bcb6a6..46d44ca833 100644 --- a/content/riak/kv/2.0.1/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.1/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.1/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.1/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.1/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.1/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.1/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.1/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.1/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.1/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.1/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.1/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.1/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.1/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.1/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.1/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.1/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.1/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.1/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.1/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.1/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.1/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.1/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.1/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.1/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.1/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.1/developing/usage/commit-hooks.md b/content/riak/kv/2.0.1/developing/usage/commit-hooks.md index ad0d2afc0d..087c08b0c9 100644 --- a/content/riak/kv/2.0.1/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.1/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.1/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.1/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.1/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.1/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.1/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.1/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.1/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.1/developing/usage/conflict-resolution.md index b7d59b96af..155985cbf7 100644 --- a/content/riak/kv/2.0.1/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.1/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.1/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak//2.0.1/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.1/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.1/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.1/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.1/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.1/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.1/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.1/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.1/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.1/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.1/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.1/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.1/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.1/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.1/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.1/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.1/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.0.1/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.0.1/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.1/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.1/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.1/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.1/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.1/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.1/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.1/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.1/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.1/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.1/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.1/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.1/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.1/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.1/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.1/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.1/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.1/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.1/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.1/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.1/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.1/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.1/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.1/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.1/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.1/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.1/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -611,7 +611,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.1/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.1/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -666,7 +666,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/csharp.md index 3357237e62..c927be5f6e 100644 --- a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.1/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/golang.md index 3e81d9c41a..7e35d48e35 100644 --- a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.1/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/java.md index fb7e18b38d..f10a54c6ad 100644 --- a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.1/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.1/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.1/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.1/developing/data-types/counters), [set](/riak/kv/2.0.1/developing/data-types/sets), or [map](/riak/kv/2.0.1/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.1/developing/data-types/counters), [set]({{}}riak/kv/2.0.1/developing/data-types/sets), or [map]({{}}riak/kv/2.0.1/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.1/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.1/developing/data-types/sets). diff --git a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/nodejs.md index 230139e147..c1192cf7ae 100644 --- a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.1/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/php.md index 28b46cb840..f1adad555c 100644 --- a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.1/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.1/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.1/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.1/developing/data-types/counters), [set](/riak/kv/2.0.1/developing/data-types/sets), or [map](/riak/kv/2.0.1/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.1/developing/data-types/counters), [set]({{}}riak/kv/2.0.1/developing/data-types/sets), or [map]({{}}riak/kv/2.0.1/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.1/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.1/developing/data-types/sets). diff --git a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/python.md index edc411efdf..3d940463d4 100644 --- a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.1/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.1/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.1/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.1/developing/data-types/counters), [set](/riak/kv/2.0.1/developing/data-types/sets), or [map](/riak/kv/2.0.1/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.1/developing/data-types/counters), [set]({{}}riak/kv/2.0.1/developing/data-types/sets), or [map]({{}}riak/kv/2.0.1/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.1/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.1/developing/data-types/sets). diff --git a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/ruby.md index 9ea5a116b2..f86f67a98e 100644 --- a/content/riak/kv/2.0.1/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.1/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.1/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.1/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.1/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.1/developing/data-types/counters), [set](/riak/kv/2.0.1/developing/data-types/sets), or [map](/riak/kv/2.0.1/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.1/developing/data-types/counters), [set]({{}}riak/kv/2.0.1/developing/data-types/sets), or [map]({{}}riak/kv/2.0.1/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.1/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.1/developing/data-types/sets). diff --git a/content/riak/kv/2.0.1/developing/usage/creating-objects.md b/content/riak/kv/2.0.1/developing/usage/creating-objects.md index 56dac90a83..17a3c89ba9 100644 --- a/content/riak/kv/2.0.1/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.1/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.1/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.1/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.0.1/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.0.1/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.1/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.1/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.1/developing/usage/custom-extractors.md b/content/riak/kv/2.0.1/developing/usage/custom-extractors.md index c1fcd3b09e..f8a39e2fd0 100644 --- a/content/riak/kv/2.0.1/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.1/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.1/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.1/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.1/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.1/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.1/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.1/developing/usage/deleting-objects.md b/content/riak/kv/2.0.1/developing/usage/deleting-objects.md index 31fc0ec533..6f0c955e2c 100644 --- a/content/riak/kv/2.0.1/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.1/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.1/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.1/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.1/developing/usage/document-store.md b/content/riak/kv/2.0.1/developing/usage/document-store.md index 23b83e7be8..0897a5b196 100644 --- a/content/riak/kv/2.0.1/developing/usage/document-store.md +++ b/content/riak/kv/2.0.1/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.1/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.1/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.1/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.1/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.0.1/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.1/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.1/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.1/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.1/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.1/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.1/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.1/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.1/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.1/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.1/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.1/developing/usage/mapreduce.md b/content/riak/kv/2.0.1/developing/usage/mapreduce.md index 060bb8a65d..eed5435c1e 100644 --- a/content/riak/kv/2.0.1/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.1/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.0.1/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.0.1/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.0.1/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.0.1/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.0.1/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.0.1/developing/usage/search/) and [secondary indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.0.1/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.0.1/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.0.1/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.0.1/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.0.1/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.0.1/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.0.1/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.0.1/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.0.1/developing/usage/reading-objects.md b/content/riak/kv/2.0.1/developing/usage/reading-objects.md index 41469cc980..6ca0cb102b 100644 --- a/content/riak/kv/2.0.1/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.1/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.1/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.1/developing/usage/replication.md b/content/riak/kv/2.0.1/developing/usage/replication.md index 6ff8f74eb3..cb1db4245e 100644 --- a/content/riak/kv/2.0.1/developing/usage/replication.md +++ b/content/riak/kv/2.0.1/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.1/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak//2.0.1/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using Strong +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.1/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.1/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.1/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.1/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.1/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.1/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.1/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.1/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.1/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.1/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.1/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.1/developing/usage/search-schemas.md b/content/riak/kv/2.0.1/developing/usage/search-schemas.md index 650df65c30..479498886b 100644 --- a/content/riak/kv/2.0.1/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.1/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.0.1/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.1/developing/data-types/), and [more](/riak/kv/2.0.1/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types/), and [more]({{}}riak/kv/2.0.1/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.1/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.1/developing/usage/search.md b/content/riak/kv/2.0.1/developing/usage/search.md index 87cdd4a47a..34561cc28a 100644 --- a/content/riak/kv/2.0.1/developing/usage/search.md +++ b/content/riak/kv/2.0.1/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.1/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.1/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.1/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.1/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.1/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.1/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.1/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.1/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.1/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.1/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.1/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.1/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.1/developing/usage/searching-data-types.md b/content/riak/kv/2.0.1/developing/usage/searching-data-types.md index 44d0fcbad1..2391d5a53b 100644 --- a/content/riak/kv/2.0.1/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.1/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.1/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.1/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.1/developing/data-types/counters), [sets](/riak/kv/2.0.1/developing/data-types/sets), and [maps](/riak/kv/2.0.1/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.1/developing/data-types/counters), [sets]({{}}riak/kv/2.0.1/developing/data-types/sets), and [maps]({{}}riak/kv/2.0.1/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.1/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.1/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.1/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.1/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.1/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.1/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.1/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.1/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.1/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.1/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.1/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.1/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.1/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.1/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.1/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.1/developing/usage/secondary-indexes.md index 62852ea90a..c4488ac2cb 100644 --- a/content/riak/kv/2.0.1/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.1/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.0.1/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.1/setup/planning/backend/memory -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak//2.0.1/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.1/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.1/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.1/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.1/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.1/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.1/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.1/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.1/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.1/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.1/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.1/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.1/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.1/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.1/developing/usage/security.md b/content/riak/kv/2.0.1/developing/usage/security.md index 2b41994644..fd3f283ee2 100644 --- a/content/riak/kv/2.0.1/developing/usage/security.md +++ b/content/riak/kv/2.0.1/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.0.1/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.1/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.1/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.1/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.1/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.1/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.1/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.0.1/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.0.1/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.1/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.1/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.1/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.1/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.1/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.1/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.1/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.1/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.1/developing/usage/security/php) -* [Python](/riak/kv/2.0.1/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.1/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.1/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.1/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.1/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.1/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.1/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.1/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.1/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.1/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.1/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.1/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.1/developing/usage/security/erlang.md b/content/riak/kv/2.0.1/developing/usage/security/erlang.md index 7145bd18a1..c57e45419c 100644 --- a/content/riak/kv/2.0.1/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.1/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.1/using/security/managing-sources/), [PAM-](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.1/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.1/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.1/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.1/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.1/developing/usage/security/java.md b/content/riak/kv/2.0.1/developing/usage/security/java.md index 28096934d4..57defee894 100644 --- a/content/riak/kv/2.0.1/developing/usage/security/java.md +++ b/content/riak/kv/2.0.1/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.1/developing/usage/security/php.md b/content/riak/kv/2.0.1/developing/usage/security/php.md index 4f3e746491..1e7d53e5fb 100644 --- a/content/riak/kv/2.0.1/developing/usage/security/php.md +++ b/content/riak/kv/2.0.1/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.1/developing/usage/security/python.md b/content/riak/kv/2.0.1/developing/usage/security/python.md index b04f1f2439..cfd2ea087d 100644 --- a/content/riak/kv/2.0.1/developing/usage/security/python.md +++ b/content/riak/kv/2.0.1/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.1/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.1/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.1/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.1/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.1/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.1/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.1/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.1/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.1/developing/usage/security/ruby.md b/content/riak/kv/2.0.1/developing/usage/security/ruby.md index dc5a0d1702..0f59bfe136 100644 --- a/content/riak/kv/2.0.1/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.1/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.1/using/security/managing-sources/) or [PAM](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.1/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.1/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.1/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.1/developing/usage/updating-objects.md b/content/riak/kv/2.0.1/developing/usage/updating-objects.md index b3656640dd..affc76b735 100644 --- a/content/riak/kv/2.0.1/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.1/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.1/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.1/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.1/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.1/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.1/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.1/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.1/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.1/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.1/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.1/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.1/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.1/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.1/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.1/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.1/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.1/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.1/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.1/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.1/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.1/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.1/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.1/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.1/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.1/index.md b/content/riak/kv/2.0.1/index.md index f1d59e9c47..da1c500e47 100644 --- a/content/riak/kv/2.0.1/index.md +++ b/content/riak/kv/2.0.1/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.1/configuring -[dev index]: /riak/kv/2.0.1/developing -[downloads]: /riak/kv/2.0.1/downloads/ -[install index]: /riak/kv/2.0.1/setup/installing/ -[plan index]: /riak/kv/2.0.1/setup/planning -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.1/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.1/developing/usage/search -[getting started]: /riak/kv/2.0.1/developing/getting-started -[dev client libraries]: /riak/kv/2.0.1/developing/client-libraries +[config index]: {{}}riak/kv/2.0.1/configuring +[dev index]: {{}}riak/kv/2.0.1/developing +[downloads]: {{}}riak/kv/2.0.1/downloads/ +[install index]: {{}}riak/kv/2.0.1/setup/installing/ +[plan index]: {{}}riak/kv/2.0.1/setup/planning +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.1/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search +[getting started]: {{}}riak/kv/2.0.1/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.1/developing/client-libraries diff --git a/content/riak/kv/2.0.1/introduction.md b/content/riak/kv/2.0.1/introduction.md index 64faf0bc4b..23e41dd16d 100644 --- a/content/riak/kv/2.0.1/introduction.md +++ b/content/riak/kv/2.0.1/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.0.1/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.0.1/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.0.1/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.0.1/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.0.1/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.0.1/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.0.1/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.0.1/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.0.1/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.0.1/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.0.1/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.0.1/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.0.1/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.0.1/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.0.1/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.0.1/developing/data-types/maps#flags), [registers](/riak/kv/2.0.1/developing/data-types/maps#registers), -[counters](/riak/kv/2.0.1/developing/data-types/counters), [sets](/riak/kv/2.0.1/developing/data-types/sets), and -[maps](/riak/kv/2.0.1/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.0.1/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.0.1/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.0.1/developing/data-types/counters), [sets]({{}}riak/kv/2.0.1/developing/data-types/sets), and +[maps]({{}}riak/kv/2.0.1/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.0.1/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.0.1/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.0.1/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.0.1/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.0.1/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.0.1/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.0.1/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.0.1/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.0.1/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.0.1/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.0.1/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.0.1/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.0.1/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.0.1/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.0.1/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.0.1/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.0.1/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.0.1/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.0.1/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.0.1/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.0.1/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.0.1/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.0.1/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.0.1/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.0.1/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.0.1/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.0.1/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.0.1/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.0.1/learn/concepts.md b/content/riak/kv/2.0.1/learn/concepts.md index a4ec02580c..157e5517b3 100644 --- a/content/riak/kv/2.0.1/learn/concepts.md +++ b/content/riak/kv/2.0.1/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.1/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.1/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.1/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.1/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.1/learn/concepts/vnodes -[config index]: /riak/kv/2.0.1/configuring -[plan index]: /riak/kv/2.0.1/setup/planning -[use index]: /riak/kv/2.0.1/using/ +[concept aae]: {{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.1/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.1/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.1/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.1/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.1/configuring +[plan index]: {{}}riak/kv/2.0.1/setup/planning +[use index]: {{}}riak/kv/2.0.1/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.1/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.1/learn/concepts/active-anti-entropy.md index 2523eafd40..e70d3707a7 100644 --- a/content/riak/kv/2.0.1/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.1/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.1/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.1/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.1/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[config aae]: {{}}riak//2.0.1/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.1/developing/usage/search +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.1/learn/concepts/buckets.md b/content/riak/kv/2.0.1/learn/concepts/buckets.md index bc53e21245..c8311f36ef 100644 --- a/content/riak/kv/2.0.1/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.1/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.0.1/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.1/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.1/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.1/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.1/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.1/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.1/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.1/configuring/basic -[dev api http]: /riak/kv/2.0.1/developing/api/http -[dev data types]: /riak/kv/2.0.1/developing/data-types -[glossary ring]: /riak/kv/2.0.1/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.1/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.1/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.1/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.1/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.1/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.1/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.1/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.1/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.1/configuring/basic +[dev api http]: {{}}riak/kv/2.0.1/developing/api/http +[dev data types]: {{}}riak/kv/2.0.1/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.1/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.1/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.1/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.1/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.1/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.1/learn/concepts/capability-negotiation.md index 88ef9f163e..853123cf54 100644 --- a/content/riak/kv/2.0.1/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.1/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.1/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.1/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.1/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.1/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.1/learn/concepts/causal-context.md b/content/riak/kv/2.0.1/learn/concepts/causal-context.md index 9211862f26..71bb44175d 100644 --- a/content/riak/kv/2.0.1/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.1/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.1/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.1/developing/api/http -[dev key value]: /riak/kv/2.0.1/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.1/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.1/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.1/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.1/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.1/developing/api/http +[dev key value]: {{}}riak/kv/2.0.1/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.1/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.1/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.1/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.1/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -78,7 +78,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.0.1/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.0.1/learn/concepts/clusters.md b/content/riak/kv/2.0.1/learn/concepts/clusters.md index 2dfcebd8fe..bca96565b1 100644 --- a/content/riak/kv/2.0.1/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.1/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.1/learn/concepts/replication -[glossary node]: /riak/kv/2.0.1/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.1/learn/dynamo -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.1/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.1/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.1/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.1/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.1/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.1/learn/concepts/crdts.md b/content/riak/kv/2.0.1/learn/concepts/crdts.md index fd8358aabb..b16d971191 100644 --- a/content/riak/kv/2.0.1/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.1/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.1/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.1/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.1/developing/data-types -[glossary node]: /riak/kv/2.0.1/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.1/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.1/developing/data-types +[glossary node]: {{}}riak/kv/2.0.1/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.0.1/developing/usage/search/). +indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.0.1/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.0.1/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.0.1/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.0.1/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.1/learn/concepts/eventual-consistency.md index c1ee51f803..113524f716 100644 --- a/content/riak/kv/2.0.1/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.1/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.1/learn/concepts/replication -[glossary node]: /riak/kv/2.0.1/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.1/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.1/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.1/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.1/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.1/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.1/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.1/developing/data-modeling/). +or models]({{}}riak/kv/2.0.1/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.1/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.1/learn/concepts/keys-and-objects.md index a9d8564374..5a6d56e92b 100644 --- a/content/riak/kv/2.0.1/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.1/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.1/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.1/learn/concepts/replication.md b/content/riak/kv/2.0.1/learn/concepts/replication.md index d60990af27..fbfc6c3649 100644 --- a/content/riak/kv/2.0.1/learn/concepts/replication.md +++ b/content/riak/kv/2.0.1/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.1/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.1/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.1/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.1/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.1/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.1/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.1/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.1/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.1/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.1/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.1/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.1/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.1/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.1/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.1/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.1/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.1/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.1/learn/concepts/strong-consistency.md index c8c15e0dbd..9cbf5804e3 100644 --- a/content/riak/kv/2.0.1/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.1/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.1/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.1/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.1/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.1/learn/concepts/vnodes.md b/content/riak/kv/2.0.1/learn/concepts/vnodes.md index d7ad468ef4..011d4fa2c0 100644 --- a/content/riak/kv/2.0.1/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.1/learn/concepts/vnodes.md @@ -16,16 +16,16 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.1/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.1/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.1/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.1/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.1/learn/glossary/#ring -[perf strong consistency]: /riak/kv/2.0.1/using/performance/strong-consistency -[plan backend]: /riak/kv/2.0.1/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.1/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.1/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.1/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.1/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.1/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.1/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.1/learn/glossary/#ring +[perf strong consistency]: {{}}riak/kv/2.0.1/using/reference/strong-consistency +[plan backend]: {{}}riak/kv/2.0.1/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.1/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.1/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -81,7 +81,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.1/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -103,7 +103,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.1/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.1/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.1/learn/dynamo.md b/content/riak/kv/2.0.1/learn/dynamo.md index bb07b5aa7a..90f4cb1853 100644 --- a/content/riak/kv/2.0.1/learn/dynamo.md +++ b/content/riak/kv/2.0.1/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.1/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.1/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.1/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.1/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.1/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.1/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.1/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.1/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.1/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.1/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.1/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.1/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.1/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.1/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.1/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.1/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.1/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.1/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.1/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.1/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.1/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.1/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.1/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.1/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.1/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.1/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.1/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.1/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.1/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.1/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.1/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.1/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.1/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.1/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.1/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.1/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.1/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.1/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.1/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.1/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.1/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.1/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.1/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.1/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.1/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.1/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.1/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.1/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.1/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.1/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.1/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.1/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.1/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.1/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.1/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.1/learn/glossary.md b/content/riak/kv/2.0.1/learn/glossary.md index 9444bd6636..40f0de5496 100644 --- a/content/riak/kv/2.0.1/learn/glossary.md +++ b/content/riak/kv/2.0.1/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.1/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.1/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.1/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.1/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.1/developing/api/http -[dev data model]: /riak/kv/2.0.1/developing/data-modeling -[dev data types]: /riak/kv/2.0.1/developing/data-types -[glossary read rep]: /riak/kv/2.0.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.1/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.1/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.1/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.1/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.1/developing/api/http +[dev data model]: {{}}riak/kv/2.0.1/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.1/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.1/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.1/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.1/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.1/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.1/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.1/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.1/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.1/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.1/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.1/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.1/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.1/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.1/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.1/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.1/learn/use-cases.md b/content/riak/kv/2.0.1/learn/use-cases.md index 3f8de13a34..52ee1144e1 100644 --- a/content/riak/kv/2.0.1/learn/use-cases.md +++ b/content/riak/kv/2.0.1/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.1/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.1/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.1/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.1/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.1/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.1/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.1/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.1/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.1/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.0.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.1/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.1/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.1/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.1/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.1/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.1/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.1/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.1/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.1/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.1/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.1/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.0.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.1/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.0.1/learn/why-riak-kv.md b/content/riak/kv/2.0.1/learn/why-riak-kv.md index 0daa83aa8c..dc705de181 100644 --- a/content/riak/kv/2.0.1/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.1/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.1/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.1/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.1/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.1/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.1/developing/data-types -[glossary read rep]: /riak/kv/2.0.1/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.1/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.1/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.1/setup/downgrade.md b/content/riak/kv/2.0.1/setup/downgrade.md index 1c2c4596a1..cf80d1ef13 100644 --- a/content/riak/kv/2.0.1/setup/downgrade.md +++ b/content/riak/kv/2.0.1/setup/downgrade.md @@ -17,7 +17,7 @@ aliases: Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a -[rolling upgrade](/riak/kv/2.0.1/setup/upgrading/cluster). +[rolling upgrade]({{}}riak/kv/2.0.1/setup/upgrading/cluster). {{% note title="End Of Life Warning" %}} We test downgrading for two feature release versions. However, all versions below KV 2.0 are End Of Life (EOL) and unsupported. Please be aware of that if you choose to downgrade. @@ -49,9 +49,9 @@ both 1.4 and 1.3 are performed. * Riak Control should be disabled throughout the rolling downgrade process -* [Configuration Files](/riak/kv/2.0.1/configuring/reference) must be replaced with those of the version +* [Configuration Files]({{}}riak/kv/2.0.1/configuring/reference) must be replaced with those of the version being downgraded to -* [Active anti-entropy](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version +* [Active anti-entropy]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version below 1.3. ## Before Stopping a Node @@ -94,7 +94,7 @@ will need to be downgraded before the rolling downgrade begins. This can be done using the --downgrade flag with `riak-admin reformat-indexes` More information on the `riak-admin reformat-indexes` command, and downgrading indexes can be found in the -[`riak-admin`](/riak/kv/2.0.1/using/admin/riak-admin/#reformat-indexes) documentation. +[`riak-admin`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#reformat-indexes) documentation. ## Before Starting a Node diff --git a/content/riak/kv/2.0.1/setup/installing.md b/content/riak/kv/2.0.1/setup/installing.md index 00b27b7124..a31d10c38c 100644 --- a/content/riak/kv/2.0.1/setup/installing.md +++ b/content/riak/kv/2.0.1/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.1/installing/ --- -[install aws]: /riak/kv/2.0.1/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.1/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.1/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.1/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.1/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.1/setup/installing/smartos -[install solaris]: /riak/kv/2.0.1/setup/installing/solaris -[install suse]: /riak/kv/2.0.1/setup/installing/suse -[install windows azure]: /riak/kv/2.0.1/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.1/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.1/setup/upgrading +[install aws]: {{}}riak/kv/2.0.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.1/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.1/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.1/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.1/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.1/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.1/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.1/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.1/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.1/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.1/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.1/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.1/setup/installing/amazon-web-services.md index 7e2da45d66..602b389979 100644 --- a/content/riak/kv/2.0.1/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.1/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.1/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.1/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.1/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.1/setup/installing/debian-ubuntu.md index db452b64ed..d27243b1e5 100644 --- a/content/riak/kv/2.0.1/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.1/setup/installing/debian-ubuntu.md @@ -20,10 +20,10 @@ aliases: -[install source index]: /riak/kv/2.0.1/setup/installing/source/ -[security index]: /riak/kv/2.0.1/using/security/ -[install source erlang]: /riak/kv/2.0.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.1/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.1/using/security/ +[install source erlang]: {{}}riak/kv/2.0.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.0.1/setup/installing/freebsd.md b/content/riak/kv/2.0.1/setup/installing/freebsd.md index 27d1c672d3..deaa84c828 100644 --- a/content/riak/kv/2.0.1/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.1/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.1/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.1/downloads/ -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.1/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.1/downloads/ +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.1/setup/installing/mac-osx.md b/content/riak/kv/2.0.1/setup/installing/mac-osx.md index 96349f50f3..c85f31842a 100644 --- a/content/riak/kv/2.0.1/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.1/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.1/setup/installing/rhel-centos.md b/content/riak/kv/2.0.1/setup/installing/rhel-centos.md index dbccb2ca7d..4c16aed4b5 100644 --- a/content/riak/kv/2.0.1/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.1/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.1/setup/installing/source -[install source erlang]: /riak/kv/2.0.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.1/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.0.1/setup/installing/smartos.md b/content/riak/kv/2.0.1/setup/installing/smartos.md index e01e6c2679..ccd86c2d41 100644 --- a/content/riak/kv/2.0.1/setup/installing/smartos.md +++ b/content/riak/kv/2.0.1/setup/installing/smartos.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.0.1/setup/installing/solaris.md b/content/riak/kv/2.0.1/setup/installing/solaris.md index 028ddf2e43..f366436863 100644 --- a/content/riak/kv/2.0.1/setup/installing/solaris.md +++ b/content/riak/kv/2.0.1/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.1/setup/installing/source.md b/content/riak/kv/2.0.1/setup/installing/source.md index 046680e8ff..0816eb5946 100644 --- a/content/riak/kv/2.0.1/setup/installing/source.md +++ b/content/riak/kv/2.0.1/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.1/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.1/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.1/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.1/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.1/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.1/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.1/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.1/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.1/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.1/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.1/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.1/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.1/setup/installing/source/erlang.md b/content/riak/kv/2.0.1/setup/installing/source/erlang.md index b181496518..239e81fc29 100644 --- a/content/riak/kv/2.0.1/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.1/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.1/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.1/setup/installing -[security basics]: /riak/kv/2.0.1/using/security/basics +[install index]: {{}}riak/kv/2.0.1/setup/installing +[security basics]: {{}}riak/kv/2.0.1/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho8.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.1/setup/installing/source/jvm.md b/content/riak/kv/2.0.1/setup/installing/source/jvm.md index dc43e01c42..499ae93faa 100644 --- a/content/riak/kv/2.0.1/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.1/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.1/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.1/developing/usage/search +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.1/setup/installing/suse.md b/content/riak/kv/2.0.1/setup/installing/suse.md index 1a58a7af47..ddebf97241 100644 --- a/content/riak/kv/2.0.1/setup/installing/suse.md +++ b/content/riak/kv/2.0.1/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.1/installing/suse/ --- -[install verify]: /riak/kv/2.0.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.1/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.1/setup/installing/verify.md b/content/riak/kv/2.0.1/setup/installing/verify.md index 932967c5ed..b0b97b1907 100644 --- a/content/riak/kv/2.0.1/setup/installing/verify.md +++ b/content/riak/kv/2.0.1/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.1/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.1/developing/client-libraries -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.1/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.1/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.1/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.1/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.1/setup/installing/windows-azure.md b/content/riak/kv/2.0.1/setup/installing/windows-azure.md index b05f94161a..ff5c853145 100644 --- a/content/riak/kv/2.0.1/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.1/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.1/setup/planning/backend.md b/content/riak/kv/2.0.1/setup/planning/backend.md index b18ddcc01e..479b8a14ba 100644 --- a/content/riak/kv/2.0.1/setup/planning/backend.md +++ b/content/riak/kv/2.0.1/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.1/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.1/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.1/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.1/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.1/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.1/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.1/setup/planning/backend/bitcask.md index 773a5af7a3..352fe52282 100644 --- a/content/riak/kv/2.0.1/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.1/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.1/using/admin/riak-cli -[config reference]: /riak/kv/2.0.1/configuring/reference -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.1/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.1/developing/usage/search - -[glossary aae]: /riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.1/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.1/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.1/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.1/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.1/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.1/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.1/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.1/setup/planning/backend/leveldb.md index 4f8a83b97c..7ccdf76c1f 100644 --- a/content/riak/kv/2.0.1/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.1/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.1/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.1/configuring/reference -[perf index]: /riak/kv/2.0.1/using/performance -[config reference#aae]: /riak/kv/2.0.1/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[perf index]: {{}}riak/kv/2.0.1/using/performance +[config reference#aae]: {{}}riak/kv/2.0.1/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.1/setup/planning/backend/memory.md b/content/riak/kv/2.0.1/setup/planning/backend/memory.md index 2632f9f38a..8ab6682060 100644 --- a/content/riak/kv/2.0.1/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.1/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.1/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.1/configuring/reference -[plan backend multi]: /riak/kv/2.0.1/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.1/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.1/setup/planning/backend/multi.md b/content/riak/kv/2.0.1/setup/planning/backend/multi.md index 168151b3a4..22655a4c3e 100644 --- a/content/riak/kv/2.0.1/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.1/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.1/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.1/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.1/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.1/configuring/reference -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.1/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.1/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.1/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.1/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.1/setup/planning/best-practices.md b/content/riak/kv/2.0.1/setup/planning/best-practices.md index 3ce4241b21..e18a203b6a 100644 --- a/content/riak/kv/2.0.1/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.1/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.1/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.1/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.1/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.1/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.1/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.1/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.1/setup/planning/bitcask-capacity-calc.md index 57ba8937d9..4f2f37958c 100644 --- a/content/riak/kv/2.0.1/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.1/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.1/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.1/setup/planning/cluster-capacity.md index cc194187f6..4d8ba53ec6 100644 --- a/content/riak/kv/2.0.1/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.1/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.1/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.1/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.1/setup/planning -[concept replication]: /riak/kv/2.0.1/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.1/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.1/configuring/reference -[perf benchmark]: /riak/kv/2.0.1/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.1/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.1/setup/planning +[concept replication]: {{}}riak/kv/2.0.1/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.1/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.1/setup/planning/operating-system.md b/content/riak/kv/2.0.1/setup/planning/operating-system.md index f58aff29b8..158bd65982 100644 --- a/content/riak/kv/2.0.1/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.1/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.1/downloads/ +[downloads]: {{}}riak/kv/2.0.1/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.1/setup/planning/start.md b/content/riak/kv/2.0.1/setup/planning/start.md index a543c425bb..c9e58940e4 100644 --- a/content/riak/kv/2.0.1/setup/planning/start.md +++ b/content/riak/kv/2.0.1/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.1/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.1/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.1/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.1/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.1/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.1/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.1/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.1/setup/upgrading/checklist.md b/content/riak/kv/2.0.1/setup/upgrading/checklist.md index f9fefc375f..c7498625f8 100644 --- a/content/riak/kv/2.0.1/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.1/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.0.1/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.1/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.1/using/performance +[perf open files]: {{}}riak/kv/2.0.1/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.1/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.1/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.1/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.1/configuring/reference -[config backend]: /riak/kv/2.0.1/configuring/backend -[usage search]: /riak/kv/2.0.1/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.1/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.1/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.1/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.1/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.1/using/admin/commands -[use admin riak control]: /riak/kv/2.0.1/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.1/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.1/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.1/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.1/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.1/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[config backend]: {{}}riak/kv/2.0.1/configuring/backend +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.1/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.1/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.1/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.1/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.1/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.1/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.1/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.1/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.1/setup/upgrading/cluster.md b/content/riak/kv/2.0.1/setup/upgrading/cluster.md index b76f2552b9..fb85e2fe51 100644 --- a/content/riak/kv/2.0.1/setup/upgrading/cluster.md +++ b/content/riak/kv/2.0.1/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.0.1/ops/upgrading/rolling-upgrades/ - /riak/kv/2.0.1/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.0.1/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.1/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.1/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.1/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.1/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.1/release-notes/ +[production checklist]: {{}}riak/kv/2.0.1/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.1/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.1/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.1/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.1/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.1/release-notes/ [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.1/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.1/using/reference/jmx -[snmp]: /riak/kv/2.0.1/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.1/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.1/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.0.1/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.0.1/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported @@ -104,9 +104,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.1/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.1/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.1/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.1/release-notes/). {{% /note %}} ## RHEL/CentOS @@ -166,9 +166,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.1/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.1/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.1/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.1/release-notes/). {{% /note %}} ## Solaris/OpenSolaris @@ -252,9 +252,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.1/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.1/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.1/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.1/release-notes/). {{% /note %}} ## Rolling Upgrade to Enterprise diff --git a/content/riak/kv/2.0.1/setup/upgrading/search.md b/content/riak/kv/2.0.1/setup/upgrading/search.md index a11ec465e3..637793bfd5 100644 --- a/content/riak/kv/2.0.1/setup/upgrading/search.md +++ b/content/riak/kv/2.0.1/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.1/setup/upgrading/version.md b/content/riak/kv/2.0.1/setup/upgrading/version.md index 5c0105df48..a739d8b09a 100644 --- a/content/riak/kv/2.0.1/setup/upgrading/version.md +++ b/content/riak/kv/2.0.1/setup/upgrading/version.md @@ -20,7 +20,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.0.1/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.0.1/introduction). ## New Clients @@ -36,14 +36,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.0.1/introduction) like [data types](/riak/kv/2.0.1/developing/data-types) or the new [Riak Search](/riak/kv/2.0.1/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.0.1/introduction) like [data types]({{}}riak/kv/2.0.1/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.0.1/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.0.1/learn/concepts/buckets) and [key](/riak/kv/2.0.1/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.0.1/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.0.1/learn/concepts/buckets) and [key]({{}}riak/kv/2.0.1/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.0.1/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.0.1/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.0.1/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.0.1/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.0.1/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -56,7 +56,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.0.1/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.0.1/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -75,8 +75,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.0.1/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.0.1/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.0.1/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.0.1/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -86,17 +86,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/2.0.1/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.0.1/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.0.1/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.0.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.0.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.0.1/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.0.1/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -104,20 +104,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.0.1/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.0.1/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.0.1/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.0.1/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -126,11 +126,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.0.1/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.0.1/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.0.1/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -140,12 +140,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.0.1/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.0.1/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/2.0.1/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.0.1/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.0.1/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.0.1/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.0.1/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.0.1/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.0.1/using/security/basics) or the new [configuration files](/riak/kv/2.0.1/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.0.1/using/security/basics) or the new [configuration files]({{}}riak/kv/2.0.1/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -155,7 +155,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.0.1/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.0.1/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -166,12 +166,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.0.1/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.0.1/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.0.1/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.0.1/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -209,7 +209,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.0.1/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.0.1/setup/upgrading/search). ## Migrating from Short Names @@ -220,12 +220,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.0.1/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.0.1/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.0.1/using.md b/content/riak/kv/2.0.1/using.md index 3745a87980..fe1622839e 100644 --- a/content/riak/kv/2.0.1/using.md +++ b/content/riak/kv/2.0.1/using.md @@ -15,7 +15,7 @@ toc: true [use running cluster]: ../using/running-a-cluster [use admin index]: ../using/admin/ [cluster ops index]: ../using/cluster-operations -[repair recover index]: ../repair-recovery +[repair recover index]: ../using/repair-recovery [security index]: ../using/security [perf index]: ../using/performance [troubleshoot index]: ../using/troubleshooting diff --git a/content/riak/kv/2.0.1/using/admin/commands.md b/content/riak/kv/2.0.1/using/admin/commands.md index 29cdb85e79..cab66bd689 100644 --- a/content/riak/kv/2.0.1/using/admin/commands.md +++ b/content/riak/kv/2.0.1/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.1/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.1/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.1/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.1/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.1/using/admin/riak-admin.md b/content/riak/kv/2.0.1/using/admin/riak-admin.md index 5d3bd332c0..04e575cf11 100644 --- a/content/riak/kv/2.0.1/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.1/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.0.1/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.1/configuring/reference -[use admin commands]: /riak/kv/2.0.1/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.1/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.1/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.1/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.1/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.1/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.1/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.1/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.1/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.1/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.1/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.1/setup/downgrade -[security index]: /riak/kv/2.0.1/using/security/ -[security managing]: /riak/kv/2.0.1/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.1/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.1/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.1/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.1/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.1/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.1/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.1/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.1/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.1/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.1/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.1/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.1/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.1/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.1/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.1/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.1/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.1/setup/downgrade +[security index]: {{}}riak/kv/2.0.1/using/security/ +[security managing]: {{}}riak/kv/2.0.1/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.1/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.1/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.1/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.1/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.1/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.1/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.0.1/using/admin/riak-cli.md b/content/riak/kv/2.0.1/using/admin/riak-cli.md index 72f3c8c441..64aad3b1e9 100644 --- a/content/riak/kv/2.0.1/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.1/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.1/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.1/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.1/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.1/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.1/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.1/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.1/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.1/using/admin/riak-control.md b/content/riak/kv/2.0.1/using/admin/riak-control.md index 7b24e2b45e..05e3ed02cd 100644 --- a/content/riak/kv/2.0.1/using/admin/riak-control.md +++ b/content/riak/kv/2.0.1/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.1/configuring/reference +[config reference]: {{}}riak/kv/2.0.1/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.1/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.1/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.1/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.1/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.1/using/cluster-operations.md b/content/riak/kv/2.0.1/using/cluster-operations.md index 495c077ff0..2372f50665 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations.md +++ b/content/riak/kv/2.0.1/using/cluster-operations.md @@ -20,7 +20,6 @@ toc: true [ops log]: ./logging [ops backup]: ./backing-up [ops handoff]: ./handoff -[ops obj del]: ./object-deletion [ops strong consistency]: ./strong-consistency [ops v3 mdc]: ./v3-multi-datacenter [ops v2 mdc]: ./v2-multi-datacenter @@ -84,13 +83,6 @@ Information on using the `riak-admin handoff` interface to enable and disable ha [Learn More >>][ops handoff] -#### [Object Deletion][ops obj del] - -Describes possible settings for `delete_mode`. - -[Learn More >>][ops obj del] - - #### [Monitoring Strong Consistency][ops strong consistency] Overview of the various statistics used in monitoring strong consistency. diff --git a/content/riak/kv/2.0.1/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.1/using/cluster-operations/active-anti-entropy.md index 4e1942bf4f..1e4865a738 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes.md index 89654e1e35..85d92d0ee6 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.1/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.1/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.1/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.1/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.1/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.1/using/cluster-operations/backing-up.md index 8b91c38e3d..3c31e150f4 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.1/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters -[config reference]: /riak/kv/2.0.1/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.1/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.1/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.1/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.1/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.1/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.1/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.1/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.1/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.1/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.1/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.1/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.1/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.1/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.1/using/cluster-operations/bucket-types.md index aed09231e2..db14425d12 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.1/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.1/using/cluster-operations/changing-cluster-info.md index f684552cc4..fa40d3e405 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.1/configuring/reference +[config reference]: {{}}riak/kv/2.0.1/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.1/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.1/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.1/using/cluster-operations/handoff.md b/content/riak/kv/2.0.1/using/cluster-operations/handoff.md index d136b9388e..0543146a45 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.1/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.1/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.1/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.1/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.1/using/cluster-operations/logging.md b/content/riak/kv/2.0.1/using/cluster-operations/logging.md index 7be2cade54..45be8089ab 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.1/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.1/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.1/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.1/using/cluster-operations/replacing-node.md index b0c5594c52..1f41929fd1 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.1/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.1/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.1/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.1/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.1/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.1/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.1/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.1/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.1/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.1/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.1/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.1/using/cluster-operations/strong-consistency.md index 128fbbd00f..411d5b49fd 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.1/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.1/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.1/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.1/using/cluster-operations/v2-multi-datacenter.md index 58516cdae2..fe1b59d813 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.1/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.1/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter.md index 7a957d39ea..37efe9bfac 100644 --- a/content/riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.1/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.1/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.1/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.1/using/performance.md b/content/riak/kv/2.0.1/using/performance.md index 95f215ac5c..c5e1eea584 100644 --- a/content/riak/kv/2.0.1/using/performance.md +++ b/content/riak/kv/2.0.1/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.1/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.1/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.1/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.1/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.1/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.1/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.1/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.1/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.1/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.1/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.1/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.1/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.1/using/performance/benchmarking.md b/content/riak/kv/2.0.1/using/performance/benchmarking.md index 6e0e1d40e6..20c2dda694 100644 --- a/content/riak/kv/2.0.1/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.1/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.1/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.1/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.1/using/performance/latency-reduction.md b/content/riak/kv/2.0.1/using/performance/latency-reduction.md index 81db8e84ee..5bd5a66a40 100644 --- a/content/riak/kv/2.0.1/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.1/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.1/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.1/using/performance/multi-datacenter-tuning.md index 3a209eb34c..0a9ccd4769 100644 --- a/content/riak/kv/2.0.1/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.1/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.1/using/performance +[perf index]: {{}}riak/kv/2.0.1/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.1/using/performance/open-files-limit.md b/content/riak/kv/2.0.1/using/performance/open-files-limit.md index 9d1de50c31..35e6afec81 100644 --- a/content/riak/kv/2.0.1/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.1/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.0.1/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.1/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.0.1/using/reference/bucket-types.md b/content/riak/kv/2.0.1/using/reference/bucket-types.md index f3555e9a64..a3983a39a3 100644 --- a/content/riak/kv/2.0.1/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.1/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.1/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.1/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.1/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.1/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.1/developing/data-types), and [strong consistency](/riak/kv/2.0.1/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.1/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.1/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.1/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.1/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.1/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.1/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.1/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.1/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.1/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.1/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.1/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.1/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.1/learn/concepts/buckets) and [keys](/riak/kv/2.0.1/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.1/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.1/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.1/using/reference/custom-code.md b/content/riak/kv/2.0.1/using/reference/custom-code.md index d8ea5b91ce..efba91d26f 100644 --- a/content/riak/kv/2.0.1/using/reference/custom-code.md +++ b/content/riak/kv/2.0.1/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.1/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.1/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.1/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.1/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.1/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.1/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.1/using/reference/handoff.md b/content/riak/kv/2.0.1/using/reference/handoff.md index 40cb2bba39..bfb03fdeb6 100644 --- a/content/riak/kv/2.0.1/using/reference/handoff.md +++ b/content/riak/kv/2.0.1/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.1/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.1/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.1/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.1/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.1/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.1/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.1/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.1/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.1/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.1/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.1/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.1/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.1/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.1/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.1/using/reference/jmx.md b/content/riak/kv/2.0.1/using/reference/jmx.md index 41bf46a3ba..66a2e668e7 100644 --- a/content/riak/kv/2.0.1/using/reference/jmx.md +++ b/content/riak/kv/2.0.1/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.1/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.1/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.1/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.1/using/reference/logging.md b/content/riak/kv/2.0.1/using/reference/logging.md index f7da7f56af..d32d6710c1 100644 --- a/content/riak/kv/2.0.1/using/reference/logging.md +++ b/content/riak/kv/2.0.1/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.1/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.1/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.1/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.1/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.1/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -46,7 +46,7 @@ File | Significance `console.log` | Console log output `crash.log` | Crash logs `erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. -`error.log` | [Common errors](../../repair-recover/errors) emitted by Riak. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.1/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.1/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.1/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.1/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.1/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.1/using/reference/multi-datacenter/comparison.md index 541ba5a787..2de1c791a1 100644 --- a/content/riak/kv/2.0.1/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.1/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.1/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.1/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.1/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.1/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.1/using/reference/object-deletion.md b/content/riak/kv/2.0.1/using/reference/object-deletion.md index a4a50898ec..aac478043d 100644 --- a/content/riak/kv/2.0.1/using/reference/object-deletion.md +++ b/content/riak/kv/2.0.1/using/reference/object-deletion.md @@ -39,7 +39,7 @@ concretely using the following example: * The object has been marked as deleted on nodes A and B, but it still lives on node C * A client attempts to read the object, Riak senses that there are - divergent replicas and initiates a repair process (either [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) or [active anti-entropy](../../../learn/concepts/active-anti-entropy/), + divergent replicas and initiates a repair process (either [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) or [active anti-entropy](../../../learn/concepts/active-anti-entropy/), depending on configuration) At this point, Riak needs to make a decision about what to do. Should diff --git a/content/riak/kv/2.0.1/using/reference/runtime-interaction.md b/content/riak/kv/2.0.1/using/reference/runtime-interaction.md index 4b009025fb..06231076ca 100644 --- a/content/riak/kv/2.0.1/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.1/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.1/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.1/configuring/reference -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.1/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.1/using/reference/search.md b/content/riak/kv/2.0.1/using/reference/search.md index 2aec5b8a33..a1af5e9d78 100644 --- a/content/riak/kv/2.0.1/using/reference/search.md +++ b/content/riak/kv/2.0.1/using/reference/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.1/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.0.1/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.0.1/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -126,7 +126,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.1/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.1/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -288,7 +288,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.1/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -298,7 +298,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -353,7 +353,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.1/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.1/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.1/using/reference/secondary-indexes.md b/content/riak/kv/2.0.1/using/reference/secondary-indexes.md index a0b053ec5b..2e88783192 100644 --- a/content/riak/kv/2.0.1/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.1/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.0.1/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak//2.0.1/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.1/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.1/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.1/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.1/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.1/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.1/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.1/using/reference/statistics-monitoring.md index f6bc483998..ebbbbc7e5f 100644 --- a/content/riak/kv/2.0.1/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.1/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.1/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.1/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.1/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.1/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.1/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.1/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.1/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.1/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.1/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.1/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.1/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.1/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.1/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.1/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.1/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.1/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.1/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.1/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.1/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.1/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.1/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.1/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.1/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,9 +349,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.1/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.1/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.1/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.1/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -367,9 +367,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.1/using/reference/strong-consistency.md b/content/riak/kv/2.0.1/using/reference/strong-consistency.md index ccfd2e1263..bbb3c52bc2 100644 --- a/content/riak/kv/2.0.1/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.1/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.1/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.1/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.1/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.1/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.1/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.1/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.1/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.1/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.1/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.1/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.1/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.1/using/reference/v2-multi-datacenter/architecture.md index dce7a51a7a..8b8425612c 100644 --- a/content/riak/kv/2.0.1/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.1/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.1/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.1/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.1/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.1/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.1/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.1/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/aae.md index f71a867a70..7ad3f09dac 100644 --- a/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.1/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.1/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.1/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/architecture.md index 95593cccf6..e82e10feb1 100644 --- a/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.1/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.1/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.1/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.1/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/cascading-writes.md index 7ef1f1ac2d..98c0b23ff4 100644 --- a/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.1/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md index a19922e76a..7705a569a3 100644 --- a/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.1/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.1/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.1/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.1/using/repair-recovery.md b/content/riak/kv/2.0.1/using/repair-recovery.md index e85b5ec06f..65a4e2dd7f 100644 --- a/content/riak/kv/2.0.1/using/repair-recovery.md +++ b/content/riak/kv/2.0.1/using/repair-recovery.md @@ -15,7 +15,7 @@ toc: true [repair recover fail]: ./failure-recovery/ [repair recover errors]: ./errors/ [repair recover repairs]: ./repairs/ -[repair recover restart]: ./rolling-restarts/ +[repair recover restart]: ./rolling-restart/ ## In This Section diff --git a/content/riak/kv/2.0.1/using/repair-recovery/errors.md b/content/riak/kv/2.0.1/using/repair-recovery/errors.md index 97f193a1f9..9da75dd74f 100644 --- a/content/riak/kv/2.0.1/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.1/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.1/configuring/reference +[config reference]: {{}}riak/kv/2.0.1/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.1/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.1/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.1/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.1/using/repair-recovery/failure-recovery.md index f8c6d18e06..ef19cef40d 100644 --- a/content/riak/kv/2.0.1/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.1/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.1/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.1/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.1/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.1/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -115,7 +115,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak//2.0.1/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.1/using/repair-recovery/repairs.md b/content/riak/kv/2.0.1/using/repair-recovery/repairs.md index 3c51575c9d..52e72db95e 100644 --- a/content/riak/kv/2.0.1/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.1/using/repair-recovery/repairs.md @@ -149,7 +149,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.0.1/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.0.1/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -218,23 +218,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.1/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.1/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.1/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.1/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.1/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.1/using/repair-recovery/rolling-restart.md index 1c66847a30..79d579dd32 100644 --- a/content/riak/kv/2.0.1/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.1/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.1/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.1/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.1/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.1/using/running-a-cluster.md b/content/riak/kv/2.0.1/using/running-a-cluster.md index 268f62e82e..9ac7a8a9aa 100644 --- a/content/riak/kv/2.0.1/using/running-a-cluster.md +++ b/content/riak/kv/2.0.1/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.1/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.1/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.1/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.1/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.1/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.1/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.1/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.1/using/security.md b/content/riak/kv/2.0.1/using/security.md index 45121eb882..229d4cd27a 100644 --- a/content/riak/kv/2.0.1/using/security.md +++ b/content/riak/kv/2.0.1/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.1/ops/advanced/security --- -[config reference search]: /riak/kv/2.0.1/configuring/reference/#search -[config search enabling]: /riak/kv/2.0.1/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.1/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.1/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.1/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.0.1/using/security/basics -[security managing]: /riak/kv/2.0.1/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.1/using/security/basics +[security managing]: {{}}riak/kv/2.0.1/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.0.1/developing/usage/search +[usage search]: {{}}riak/kv/2.0.1/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.1/using/security/basics.md b/content/riak/kv/2.0.1/using/security/basics.md index 8de7119f61..5b3a35b7d0 100644 --- a/content/riak/kv/2.0.1/using/security/basics.md +++ b/content/riak/kv/2.0.1/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.1/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.1/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.1/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.1/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.1/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.1/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.1/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.1/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.1/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.1/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.1/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.1/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.1/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.1/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.1/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.1/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.1/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.1/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.1/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.1/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.1/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.1/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.1/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.1/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.1/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.1/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.1/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.1/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.1/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.1/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.1/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.1/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.1/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.1/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.1/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.1/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.1/configuring/reference/#directories).
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="../../learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.1/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.1/using/security/managing-sources.md b/content/riak/kv/2.0.1/using/security/managing-sources.md index a07dee7f0a..a4b8a8e575 100644 --- a/content/riak/kv/2.0.1/using/security/managing-sources.md +++ b/content/riak/kv/2.0.1/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.1/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.1/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.1/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.1/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.1/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.1/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.1/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.1/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.1/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.1/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.1/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.1/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.1/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.1/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.0.2/_reference-links.md b/content/riak/kv/2.0.2/_reference-links.md index dedfa5455a..8101274d4f 100644 --- a/content/riak/kv/2.0.2/_reference-links.md +++ b/content/riak/kv/2.0.2/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.2/downloads/ -[install index]: /riak/kv/2.0.2/setup/installing -[upgrade index]: /riak/kv/2.0.2/upgrading -[plan index]: /riak/kv/2.0.2/planning -[config index]: /riak/2.0.2/using/configuring/ -[config reference]: /riak/kv/2.0.2/configuring/reference/ -[manage index]: /riak/kv/2.0.2/using/managing -[performance index]: /riak/kv/2.0.2/using/performance -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.2/downloads/ +[install index]: {{}}riak/kv/2.0.2/setup/installing +[upgrade index]: {{}}riak/kv/2.0.2/upgrading +[plan index]: {{}}riak/kv/2.0.2/planning +[config index]: {{}}riak/kv/2.0.2/using/configuring/ +[config reference]: {{}}riak/kv/2.0.2/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.2/using/managing +[performance index]: {{}}riak/kv/2.0.2/using/performance +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.2/setup/planning -[plan start]: /riak/kv/2.0.2/setup/planning/start -[plan backend]: /riak/kv/2.0.2/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.2/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.2/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.2/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.2/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.2/setup/planning/best-practices -[plan future]: /riak/kv/2.0.2/setup/planning/future +[plan index]: {{}}riak/kv/2.0.2/setup/planning +[plan start]: {{}}riak/kv/2.0.2/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.2/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.2/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.2/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.2/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.2/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.2/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.2/setup/installing -[install aws]: /riak/kv/2.0.2/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.2/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.2/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.2/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.2/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.2/setup/installing/smartos -[install solaris]: /riak/kv/2.0.2/setup/installing/solaris -[install suse]: /riak/kv/2.0.2/setup/installing/suse -[install windows azure]: /riak/kv/2.0.2/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.2/setup/installing +[install aws]: {{}}riak/kv/2.0.2/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.2/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.2/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.2/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.2/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.2/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.2/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.2/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.2/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.2/setup/installing/source -[install source erlang]: /riak/kv/2.0.2/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.2/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.2/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.2/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.2/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.2/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.2/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.2/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.2/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.2/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.2/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.2/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.2/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.2/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.2/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.2/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.2/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.2/configuring -[config basic]: /riak/kv/2.0.2/configuring/basic -[config backend]: /riak/kv/2.0.2/configuring/backend -[config manage]: /riak/kv/2.0.2/configuring/managing -[config reference]: /riak/kv/2.0.2/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.2/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.2/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.2/configuring/mapreduce -[config search]: /riak/kv/2.0.2/configuring/search/ +[config index]: {{}}riak/kv/2.0.2/configuring +[config basic]: {{}}riak/kv/2.0.2/configuring/basic +[config backend]: {{}}riak/kv/2.0.2/configuring/backend +[config manage]: {{}}riak/kv/2.0.2/configuring/managing +[config reference]: {{}}riak/kv/2.0.2/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.2/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.2/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.2/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.2/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.2/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.2/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.2/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.2/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.2/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.2/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.2/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.2/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.2/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.2/using/ -[use admin commands]: /riak/kv/2.0.2/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.2/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.2/using/ +[use admin commands]: {{}}riak/kv/2.0.2/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.2/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.2/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.2/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.2/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.2/using/reference/search -[use ref 2i]: /riak/kv/2.0.2/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.2/using/reference/snmp -[use ref strong consistency]: /riak/2.0.2/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.2/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.2/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.2/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.2/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.2/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.2/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.2/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.2/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.2/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.2/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.2/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.2/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.2/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.2/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.2/using/admin/ -[use admin commands]: /riak/kv/2.0.2/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.2/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.2/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.2/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.2/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.2/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.2/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.2/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.2/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.2/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.2/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.2/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.2/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.2/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.2/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.2/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.2/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.2/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.2/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.2/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.2/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.2/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.2/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.2/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.2/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.2/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.2/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.2/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.2/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.2/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.2/using/repair-recovery -[repair recover index]: /riak/kv/2.0.2/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.2/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.2/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.2/using/security/ -[security basics]: /riak/kv/2.0.2/using/security/basics -[security managing]: /riak/kv/2.0.2/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.2/using/security/ +[security basics]: {{}}riak/kv/2.0.2/using/security/basics +[security managing]: {{}}riak/kv/2.0.2/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.2/using/performance/ -[perf benchmark]: /riak/kv/2.0.2/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.2/using/performance/erlang -[perf aws]: /riak/kv/2.0.2/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.2/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.2/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.2/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.2/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.2/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.2/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.2/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.2/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.2/developing -[dev client libraries]: /riak/kv/2.0.2/developing/client-libraries -[dev data model]: /riak/kv/2.0.2/developing/data-modeling -[dev data types]: /riak/kv/2.0.2/developing/data-types -[dev kv model]: /riak/kv/2.0.2/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.2/developing +[dev client libraries]: {{}}riak/kv/2.0.2/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.2/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.2/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.2/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.2/developing/getting-started -[getting started java]: /riak/kv/2.0.2/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.2/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.2/developing/getting-started/python -[getting started php]: /riak/kv/2.0.2/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.2/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.2/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.2/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.2/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.2/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.2/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.2/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.2/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.2/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.2/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.2/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.2/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.2/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.2/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.2/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.2/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.2/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.2/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.2/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.2/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.2/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.2/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.2/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.2/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.2/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.2/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.2/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.2/developing/usage -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.2/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.2/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.2/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.2/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.2/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.2/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.2/developing/usage/search -[usage search schema]: /riak/kv/2.0.2/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.2/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.2/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.2/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.2/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.2/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.2/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.2/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.2/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.2/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.2/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.2/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.2/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.2/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.2/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.2/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.2/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.2/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.2/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.2/developing/api/backend -[dev api http]: /riak/kv/2.0.2/developing/api/http -[dev api http status]: /riak/kv/2.0.2/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.2/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.2/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.2/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.2/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.2/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.2/learn/glossary/ -[glossary aae]: /riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.2/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.2/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.2/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.2/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.2/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.2/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.2/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.2/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.2/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.2/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.2/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.2/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.2/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.2/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.2/add-ons.md b/content/riak/kv/2.0.2/add-ons.md index 82e6d0dd14..85dbf903a3 100644 --- a/content/riak/kv/2.0.2/add-ons.md +++ b/content/riak/kv/2.0.2/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.2/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.2/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.2/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.2/add-ons/redis/developing-rra.md index 9ada608f7f..2a1b188469 100644 --- a/content/riak/kv/2.0.2/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.2/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.2/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.2/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.2/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.2/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.2/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.2/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.2/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.2/add-ons/redis/redis-add-on-features.md index c85ef76a13..21869776a3 100644 --- a/content/riak/kv/2.0.2/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.2/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.2/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.2/add-ons/redis/set-up-rra.md index 0fbdf73199..af341ef681 100644 --- a/content/riak/kv/2.0.2/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.2/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.2/setup/installing -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.2/setup/installing +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.2/add-ons/redis/using-rra.md b/content/riak/kv/2.0.2/add-ons/redis/using-rra.md index d7ce66f3e5..fafaf37473 100644 --- a/content/riak/kv/2.0.2/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.2/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.2/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.2/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.2/configuring/backend.md b/content/riak/kv/2.0.2/configuring/backend.md index c22f1aaf72..355f34ec99 100644 --- a/content/riak/kv/2.0.2/configuring/backend.md +++ b/content/riak/kv/2.0.2/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.2/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.2/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.2/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.2/configuring/basic.md b/content/riak/kv/2.0.2/configuring/basic.md index e46bd2fef1..9c0c49e80e 100644 --- a/content/riak/kv/2.0.2/configuring/basic.md +++ b/content/riak/kv/2.0.2/configuring/basic.md @@ -14,26 +14,26 @@ aliases: - /riak/2.0.2/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.2/configuring/reference -[use running cluster]: /riak/kv/2.0.2/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.2/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.2/using/performance/erlang -[plan start]: /riak/kv/2.0.2/setup/planning/start -[plan best practices]: /riak/kv/2.0.2/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.2/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.2/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.2/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.2/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.2/using/performance -[perf aws]: /riak/kv/2.0.2/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.2/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.2/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.2/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.2/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.2/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.2/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.2/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.2/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.2/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.2/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.2/using/performance +[perf aws]: {{}}riak/kv/2.0.2/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.2/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -207,15 +207,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.2/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.2/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.2/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.2/configuring/load-balancing-proxy.md index 6c3b79d274..92075e4c64 100644 --- a/content/riak/kv/2.0.2/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.2/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.2/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.2/configuring/managing.md b/content/riak/kv/2.0.2/configuring/managing.md index d684883f12..db438b32bc 100644 --- a/content/riak/kv/2.0.2/configuring/managing.md +++ b/content/riak/kv/2.0.2/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.2/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.2/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.2/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.2/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.2/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.2/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.2/configuring/mapreduce.md b/content/riak/kv/2.0.2/configuring/mapreduce.md index a00aaec5c9..f1e5011da1 100644 --- a/content/riak/kv/2.0.2/configuring/mapreduce.md +++ b/content/riak/kv/2.0.2/configuring/mapreduce.md @@ -14,9 +14,9 @@ aliases: - /riak/2.0.2/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.2/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.2/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.2/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.2/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.2/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.2/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.2/configuring/reference.md b/content/riak/kv/2.0.2/configuring/reference.md index 6ab0e762e8..d454287827 100644 --- a/content/riak/kv/2.0.2/configuring/reference.md +++ b/content/riak/kv/2.0.2/configuring/reference.md @@ -1875,8 +1875,8 @@ package) and in R14B04 via a custom repository and branch. diff --git a/content/riak/kv/2.0.2/configuring/search.md b/content/riak/kv/2.0.2/configuring/search.md index d32cd584b9..bcb25c2413 100644 --- a/content/riak/kv/2.0.2/configuring/search.md +++ b/content/riak/kv/2.0.2/configuring/search.md @@ -14,20 +14,20 @@ aliases: - /riak/2.0.2/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.2/developing/usage/search -[usage search schema]: /riak/kv/2.0.2/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.2/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.2/developing/usage/custom-extractors -[config reference]: /riak/kv/2.0.2/configuring/reference -[config reference#search]: /riak/kv/2.0.2/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.2/using/security/ +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.2/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.2/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.2/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.2/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.2/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.0.2/configuring/strong-consistency.md b/content/riak/kv/2.0.2/configuring/strong-consistency.md index ae49819640..a7e9928148 100644 --- a/content/riak/kv/2.0.2/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.2/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.2/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.2/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.2/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.2/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.2/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.2/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.2/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.2/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.2/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.2/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.2/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.2/developing/data-types -[glossary aae]: /riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.2/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.2/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.2/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.2/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.2/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.2/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.2/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.2/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.2/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.2/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.2/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.2/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.2/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.2/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.2/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.2/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.2/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.2/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.2/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.2/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.2/configuring/v2-multi-datacenter.md index 7a62174b64..95665aafdb 100644 --- a/content/riak/kv/2.0.2/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.2/configuring/v2-multi-datacenter.md @@ -16,7 +16,7 @@ aliases: - /riak/2.0.2/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.2/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.2/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.0.2/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.2/configuring/v2-multi-datacenter/nat.md index d87f51b238..b3a5b9cf8b 100644 --- a/content/riak/kv/2.0.2/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.2/configuring/v2-multi-datacenter/nat.md @@ -16,7 +16,7 @@ aliases: - /riak/2.0.2/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.2/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.2/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.0.2/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.2/configuring/v3-multi-datacenter.md index adf4389462..16021f3e10 100644 --- a/content/riak/kv/2.0.2/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.2/configuring/v3-multi-datacenter.md @@ -16,8 +16,8 @@ aliases: - /riak/2.0.2/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.2/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.2/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/nat.md index a8e01d2a68..1f7661ce16 100644 --- a/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/nat.md @@ -16,7 +16,7 @@ aliases: - /riak/2.0.2/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/quick-start.md index 18e4a0b56b..1b6825a5de 100644 --- a/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/quick-start.md @@ -16,9 +16,9 @@ aliases: - /riak/2.0.2/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.2/using/performance -[config v3 mdc]: /riak/kv/2.0.2/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.2/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl.md index 53b7afb347..b6fdefa833 100644 --- a/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl.md @@ -16,7 +16,7 @@ aliases: - /riak/2.0.2/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.2/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.2/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.2/developing/api/backend.md b/content/riak/kv/2.0.2/developing/api/backend.md index befc7a6dbe..e19bd41ad6 100644 --- a/content/riak/kv/2.0.2/developing/api/backend.md +++ b/content/riak/kv/2.0.2/developing/api/backend.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.2/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.2/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.2/developing/api/http.md b/content/riak/kv/2.0.2/developing/api/http.md index 34e944b32b..96ac047aae 100644 --- a/content/riak/kv/2.0.2/developing/api/http.md +++ b/content/riak/kv/2.0.2/developing/api/http.md @@ -28,50 +28,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.2/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.2/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.2/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.2/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.2/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.2/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.2/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.2/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.2/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.2/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.2/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.2/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.2/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.2/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.2/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.2/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.2/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.2/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.2/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.2/developing/data-types). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.2/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.2/developing/data-types). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.2/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.2/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.2/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.2/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.2/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.2/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.2/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.2/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.2/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.2/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.2/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.2/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.2/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.2/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.2/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.2/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.2/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.2/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.2/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.2/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.2/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.2/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.2/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.2/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.2/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.2/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.2/developing/api/http/counters.md b/content/riak/kv/2.0.2/developing/api/http/counters.md index b845105c9f..7c29cb900e 100644 --- a/content/riak/kv/2.0.2/developing/api/http/counters.md +++ b/content/riak/kv/2.0.2/developing/api/http/counters.md @@ -53,7 +53,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.2/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.2/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.2/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.2/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.2/developing/api/http/fetch-object.md b/content/riak/kv/2.0.2/developing/api/http/fetch-object.md index e7a8c44621..8ed31ea117 100644 --- a/content/riak/kv/2.0.2/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.2/developing/api/http/fetch-object.md @@ -39,14 +39,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.2/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.2/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.2/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.2/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.2/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.2/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.2/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.2/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -73,7 +73,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.2/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.2/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.2/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.2/developing/api/http/fetch-search-index.md index f736038ec4..adb285014a 100644 --- a/content/riak/kv/2.0.2/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.2/developing/api/http/fetch-search-index.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.2/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.2/developing/usage/search/#simple-setup). ## Request @@ -35,7 +35,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.2/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.2/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.2/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.2/developing/api/http/fetch-search-schema.md index 8d5c51ef7e..1b2a8097a0 100644 --- a/content/riak/kv/2.0.2/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.2/developing/api/http/fetch-search-schema.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.2/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.2/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.2/developing/api/http/get-bucket-props.md index 3c917bc3a1..88fbe6d14c 100644 --- a/content/riak/kv/2.0.2/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.2/developing/api/http/get-bucket-props.md @@ -26,7 +26,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.2/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.2/developing/api/http/list-keys). ## Response @@ -42,7 +42,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.2/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.2/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.2/developing/api/http/link-walking.md b/content/riak/kv/2.0.2/developing/api/http/link-walking.md index 3c8e3261c5..e189b49726 100644 --- a/content/riak/kv/2.0.2/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.2/developing/api/http/link-walking.md @@ -16,8 +16,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.2/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.2/learn/glossary/#links). ## Request @@ -63,7 +63,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.2/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.2/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.2/developing/api/http/list-resources.md b/content/riak/kv/2.0.2/developing/api/http/list-resources.md index 2ca27bd638..03b9679de9 100644 --- a/content/riak/kv/2.0.2/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.2/developing/api/http/list-resources.md @@ -19,14 +19,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.2/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.2/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.2/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.2/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.2/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.2/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.2/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.2/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.2/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.2/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.2/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.2/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.2/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.2/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.2/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.2/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.2/developing/api/http/mapreduce.md b/content/riak/kv/2.0.2/developing/api/http/mapreduce.md index f81a1217da..57ad30e256 100644 --- a/content/riak/kv/2.0.2/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.2/developing/api/http/mapreduce.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -23,7 +23,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.2/developing/api/http/search-index-info.md b/content/riak/kv/2.0.2/developing/api/http/search-index-info.md index 6c61bab508..0cefdaef4e 100644 --- a/content/riak/kv/2.0.2/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.2/developing/api/http/search-index-info.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.2/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.2/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.2/developing/api/http/search-query.md b/content/riak/kv/2.0.2/developing/api/http/search-query.md index 419886df9a..1ea9fe7243 100644 --- a/content/riak/kv/2.0.2/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.2/developing/api/http/search-query.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.2/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.2/developing/usage/search) query. ## Request @@ -29,7 +29,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.2/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.2/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.2/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.2/developing/api/http/secondary-indexes.md index f4858a42d4..186592d764 100644 --- a/content/riak/kv/2.0.2/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.2/developing/api/http/secondary-indexes.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.2/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.2/developing/api/http/set-bucket-props.md index 599d11b94e..8ccb27e4ba 100644 --- a/content/riak/kv/2.0.2/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.2/developing/api/http/set-bucket-props.md @@ -36,8 +36,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.2/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.2/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.2/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.2/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.2/developing/api/http/status.md b/content/riak/kv/2.0.2/developing/api/http/status.md index bc5d97c214..31fe362902 100644 --- a/content/riak/kv/2.0.2/developing/api/http/status.md +++ b/content/riak/kv/2.0.2/developing/api/http/status.md @@ -134,7 +134,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.2/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.2/developing/api/http/store-object.md b/content/riak/kv/2.0.2/developing/api/http/store-object.md index 1987a89009..0c0074cdee 100644 --- a/content/riak/kv/2.0.2/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.2/developing/api/http/store-object.md @@ -37,8 +37,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.2/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.2/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.2/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.2/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -82,7 +82,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.2/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.2/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.2/developing/api/http/store-search-index.md b/content/riak/kv/2.0.2/developing/api/http/store-search-index.md index bd721a0ae2..a85bbf82ae 100644 --- a/content/riak/kv/2.0.2/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.2/developing/api/http/store-search-index.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.2/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.2/developing/usage/search/#simple-setup). ## Request @@ -25,11 +25,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.2/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.2/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -38,7 +38,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.2/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.2/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.2/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.2/developing/api/http/store-search-schema.md index c09e2f0e52..f8f2bacc4e 100644 --- a/content/riak/kv/2.0.2/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.2/developing/api/http/store-search-schema.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.2/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas). ## Request @@ -25,7 +25,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.2/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers.md index cad593caa5..4f315afcbc 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.2/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.2/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.2/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.2/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.2/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.2/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.2/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.2/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.2/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.2/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/auth-req.md index d280c389c3..76051994f1 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/auth-req.md @@ -26,4 +26,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.2/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.2/using/security/basics). diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/delete-object.md index ea3d803d76..19e82e968a 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/delete-object.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.2/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.2/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store.md index 913af6a72b..94d71314f6 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.2/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.2/developing/data-types). ## Request @@ -27,4 +27,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-fetch.md index 1a81311988..4d352b016a 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-fetch.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.2/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -41,14 +41,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.2/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.2/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -71,7 +71,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -90,7 +90,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.2/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.2/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store.md index d6461c803f..24eb495523 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store.md @@ -65,7 +65,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store.md index aa886fe3eb..585cecd52d 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store.md @@ -15,7 +15,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-store.md index d7c97f26d7..7c7a558055 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-store.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.2/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.2/developing/data-types). ## Request @@ -24,7 +24,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -49,11 +49,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.2/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.2/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -78,7 +78,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.2/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.2/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -91,7 +91,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.2/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-union.md index 4ecedc2411..34acbb6408 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/dt-union.md @@ -27,4 +27,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.2/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object.md index 71e7445c17..b8dee54b11 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object.md @@ -46,7 +46,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -86,7 +86,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.2/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -113,7 +113,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.2/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.2/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -131,7 +131,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props.md index 4cdbc50cbf..e5a98aeed5 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props.md @@ -25,7 +25,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.2/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.2/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -84,7 +84,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.2/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.2/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -105,5 +105,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riakcs/latest/cookbooks/mdc-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/latest/cookbooks/mdc-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-type.md index 1d404ed4d7..07b6779ccf 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-type.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.2/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.2/using/cluster-operations/bucket-types). ## Request @@ -29,4 +29,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-client-id.md index d6a36f2a64..4a57d686c0 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/get-client-id.md @@ -23,7 +23,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.2/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/mapreduce.md index c7e37f8bba..eae8b97305 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/mapreduce.md @@ -37,8 +37,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.2/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.2/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.2/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.2/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/reset-bucket-props.md index c83e0976a1..a464796fa2 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/reset-bucket-props.md @@ -26,7 +26,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/secondary-indexes.md index 4ca965a046..49864f6a47 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/secondary-indexes.md @@ -60,7 +60,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.2/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -83,7 +83,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props.md index d37b8d1abe..a2cac8fb5a 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props.md @@ -28,9 +28,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-type.md index 96eaa8d8cb..481871380e 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-type.md @@ -14,8 +14,8 @@ aliases: - /riak/2.0.2/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.2/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types). ## Request @@ -27,4 +27,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/store-object.md index 88d36f7fe2..a6b8bff1a9 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/store-object.md @@ -15,11 +15,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.2/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.2/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.2/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.2/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.2/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.2/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -49,7 +49,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -92,7 +92,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.2/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.2/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-get.md index 46c6c25cc9..37e0a63eb9 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-get.md @@ -52,7 +52,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-put.md index 952363ce0b..87f346ade2 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-index-put.md @@ -36,4 +36,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-get.md index ac06b5726c..af052d1c04 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-get.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.2/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-put.md index 4c1f004ffa..624711c5f0 100644 --- a/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.2/developing/api/protocol-buffers/yz-schema-put.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.2/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas). ## Request @@ -33,8 +33,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.2/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.2/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.2/developing/app-guide.md b/content/riak/kv/2.0.2/developing/app-guide.md index 31a3c3af35..efc8af3f9f 100644 --- a/content/riak/kv/2.0.2/developing/app-guide.md +++ b/content/riak/kv/2.0.2/developing/app-guide.md @@ -14,47 +14,47 @@ aliases: - /riak/2.0.2/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.2/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.2/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.2/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.2/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.2/developing/data-types -[dev data types#counters]: /riak/kv/2.0.2/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.0.2/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.0.2/developing/data-types/maps -[usage create objects]: /riak/kv/2.0.2/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.2/developing/usage/search -[use ref search]: /riak/kv/2.0.2/using/reference/search -[usage 2i]: /riak/kv/2.0.2/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.2/developing/client-libraries -[concept crdts]: /riak/kv/2.0.2/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.2/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.2/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.2/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.2/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.2/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.2/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.2/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.2/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.2/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.2/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.2/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.2/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.2/using/reference/strong-consistency -[use ref strong consistency]: /riak/2.0.2/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.2/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.2/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.2/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.2/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.2/setup/installing -[getting started]: /riak/kv/2.0.2/developing/getting-started -[usage index]: /riak/kv/2.0.2/developing/usage -[glossary]: /riak/kv/2.0.2/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.2/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.2/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.2/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.2/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.2/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.2/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.0.2/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.0.2/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.0.2/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.2/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.2/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.2/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.2/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.2/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.2/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.2/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.2/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.2/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.2/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.2/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.2/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.2/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.2/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.2/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.2/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.2/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.2/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.2/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.2/setup/installing +[getting started]: {{}}riak/kv/2.0.2/developing/getting-started +[usage index]: {{}}riak/kv/2.0.2/developing/usage +[glossary]: {{}}riak/kv/2.0.2/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -117,7 +117,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.2/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.2/developing/app-guide/advanced-mapreduce.md index c237739e2e..9e4894a027 100644 --- a/content/riak/kv/2.0.2/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.2/developing/app-guide/advanced-mapreduce.md @@ -14,12 +14,12 @@ aliases: - /riak/2.0.2/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.2/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.2/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.2/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.2/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.2/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.2/configuring/reference > **Use MapReduce sparingly** > @@ -724,7 +724,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.2/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.2/developing/app-guide/cluster-metadata.md index b88bbf634c..5b29f60fc0 100644 --- a/content/riak/kv/2.0.2/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.2/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.2/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.2/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.2/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.2/developing/app-guide/replication-properties.md index 5bb46803bc..873dad09a1 100644 --- a/content/riak/kv/2.0.2/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.2/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.2/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/2.0.2/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.2/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.2/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.2/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.2/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.2/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.2/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.2/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.2/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.2/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.2/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.2/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.2/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.2/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.2/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.2/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.2/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.2/developing/app-guide/strong-consistency.md index 5f1a298c45..d802ba129a 100644 --- a/content/riak/kv/2.0.2/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.2/developing/app-guide/strong-consistency.md @@ -14,25 +14,25 @@ aliases: - /riak/2.0.2/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/2.0.2/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/2.0.2/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.2/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.2/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.2/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.2/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.2/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.2/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/2.0.2/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.2/developing/client-libraries -[getting started]: /riak/kv/2.0.2/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.2/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.0.2/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.2/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.2/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.2/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.2/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.2/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.2/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.0.2/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.2/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.2/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.2/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.2/developing/client-libraries.md b/content/riak/kv/2.0.2/developing/client-libraries.md index d359e8555e..40c9161d66 100644 --- a/content/riak/kv/2.0.2/developing/client-libraries.md +++ b/content/riak/kv/2.0.2/developing/client-libraries.md @@ -35,7 +35,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.2/developing/data-types.md b/content/riak/kv/2.0.2/developing/data-types.md index 81cfbbf68d..5d387fb1f8 100644 --- a/content/riak/kv/2.0.2/developing/data-types.md +++ b/content/riak/kv/2.0.2/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.2/developing/faq.md b/content/riak/kv/2.0.2/developing/faq.md index ae8ce911c3..b2d42f1a5c 100644 --- a/content/riak/kv/2.0.2/developing/faq.md +++ b/content/riak/kv/2.0.2/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.2/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.2/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.2/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.2/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.2/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.2/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.2/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.2/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.2/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.2/developing/client-libraries -[MapReduce]: /riak/kv/2.0.2/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.2/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.2/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.2/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.2/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.2/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.2/developing/getting-started.md b/content/riak/kv/2.0.2/developing/getting-started.md index 6fee9a4e94..a09382a964 100644 --- a/content/riak/kv/2.0.2/developing/getting-started.md +++ b/content/riak/kv/2.0.2/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.2/setup/installing -[dev client libraries]: /riak/kv/2.0.2/developing/client-libraries +[install index]: {{}}riak/kv/2.0.2/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.2/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.2/developing/getting-started/csharp.md b/content/riak/kv/2.0.2/developing/getting-started/csharp.md index b638ea1781..a4e4faf27b 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.2/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.2/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.2/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.2/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.2/developing/getting-started/csharp/querying.md index 5a98bde506..c184c2b570 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.2/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.2/developing/getting-started/erlang.md b/content/riak/kv/2.0.2/developing/getting-started/erlang.md index 61d6743c54..7fca2dabfa 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.2/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.2/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.2/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.2/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.2/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.2/developing/getting-started/erlang/object-modeling.md index 52d0ba911b..526635914f 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.2/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.2/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.2/developing/getting-started/erlang/querying.md index a1cd0ebaae..50e82626ac 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.2/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.2/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.2/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.2/developing/getting-started/golang.md b/content/riak/kv/2.0.2/developing/getting-started/golang.md index 29d93e9c26..166483bbd3 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.2/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.2/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.2/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.2/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.2/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.2/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.2/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.2/developing/getting-started/golang/object-modeling.md index 5ec04e9146..814be6e552 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.2/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.2/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.2/developing/getting-started/golang/querying.md index 1fe004394f..ed76c175d9 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.2/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.2/developing/getting-started/java.md b/content/riak/kv/2.0.2/developing/getting-started/java.md index 9aba2eff79..a1947f798a 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/java.md +++ b/content/riak/kv/2.0.2/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.2/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.2/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.2/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.2/developing/getting-started/java/crud-operations.md index 1a19bc1db4..00d62d738f 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.2/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.2/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.2/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.2/developing/getting-started/java/querying.md b/content/riak/kv/2.0.2/developing/getting-started/java/querying.md index 31d94a2a72..7a70ced4dd 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.2/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.2/developing/getting-started/nodejs.md b/content/riak/kv/2.0.2/developing/getting-started/nodejs.md index 7c03251384..f58d651f48 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.2/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.2/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.2/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.2/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.2/developing/getting-started/nodejs/querying.md index 1618569c0f..ee6b47373d 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.2/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.2/developing/getting-started/php.md b/content/riak/kv/2.0.2/developing/getting-started/php.md index 2abb545320..4c436d3b9e 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/php.md +++ b/content/riak/kv/2.0.2/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.2/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.2/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.2/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.2/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.2/developing/getting-started/php/crud-operations.md index 683f1b13f6..c70d9625f0 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.2/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.2/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.2/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.2/developing/getting-started/php/querying.md b/content/riak/kv/2.0.2/developing/getting-started/php/querying.md index e254e0af37..7e4fd55173 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.2/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.2/developing/getting-started/python.md b/content/riak/kv/2.0.2/developing/getting-started/python.md index 919668c08b..4578e1da16 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/python.md +++ b/content/riak/kv/2.0.2/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.2/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.2/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.2/developing/getting-started/python/querying.md b/content/riak/kv/2.0.2/developing/getting-started/python/querying.md index f330d71869..2cec3a120e 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.2/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.2/developing/getting-started/ruby.md b/content/riak/kv/2.0.2/developing/getting-started/ruby.md index b094d405f2..0bbea07e13 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.2/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.2/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.2/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.2/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.2/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.2/developing/getting-started/ruby/querying.md index 9c6e2e922f..55ecae5246 100644 --- a/content/riak/kv/2.0.2/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.2/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.2/developing/key-value-modeling.md b/content/riak/kv/2.0.2/developing/key-value-modeling.md index cb6bbff765..9aeb2743a9 100644 --- a/content/riak/kv/2.0.2/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.2/developing/key-value-modeling.md @@ -15,7 +15,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.2/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.2/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.2/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.2/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -23,7 +23,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.2/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.2/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -36,12 +36,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.2/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.2/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.2/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.2/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.2/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -78,7 +78,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.2/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.2/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -136,13 +136,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.2/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.2/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.2/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.2/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.2/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -151,7 +151,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.2/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.2/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -189,7 +189,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.2/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.2/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -433,8 +433,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.2/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.2/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -442,7 +442,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.2/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.2/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.2/developing/usage/commit-hooks.md b/content/riak/kv/2.0.2/developing/usage/commit-hooks.md index e50e86583a..1fe8e6d3cc 100644 --- a/content/riak/kv/2.0.2/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.2/developing/usage/commit-hooks.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -30,7 +30,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.2/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.2/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -39,7 +39,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -86,13 +86,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.2/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.2/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.2/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.2/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.2/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.2/developing/usage/conflict-resolution.md index 3f4489740f..4561758991 100644 --- a/content/riak/kv/2.0.2/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.2/developing/usage/conflict-resolution.md @@ -14,15 +14,15 @@ aliases: - /riak/2.0.2/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.0.2/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.2/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.2/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.2/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.2/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.2/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.2/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.2/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.2/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -43,7 +43,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.2/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.2/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -62,8 +62,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.2/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.2/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.2/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -71,10 +71,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.2/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.2/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.2/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -86,7 +86,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.0.2/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.0.2/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -140,20 +140,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.2/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.2/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.2/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.2/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.2/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.2/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.2/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.2/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.2/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -163,7 +163,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.2/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.2/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -188,11 +188,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.2/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.2/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.2/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.2/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.2/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.2/developing/usage/updating-objects) document. ## Siblings @@ -207,7 +207,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.2/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.2/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -356,7 +356,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.2/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.2/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -477,11 +477,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.2/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.2/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.2/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.2/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.2/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -610,7 +610,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.2/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.2/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -665,7 +665,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/csharp.md index 2a2bc3e87c..45bcdebc96 100644 --- a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/csharp.md @@ -15,7 +15,7 @@ aliases: - /riak/2.0.2/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/golang.md index 9d58c7cd17..a29272e80b 100644 --- a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/2.0.2/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/java.md index 45d9ec9f0a..66415028d5 100644 --- a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/java.md @@ -15,7 +15,7 @@ aliases: - /riak/2.0.2/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -55,7 +55,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -188,7 +188,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.2/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.2/developing/usage) section. ## More Advanced Example @@ -257,9 +257,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.2/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.2/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.2/developing/data-types/counters), [set](/riak/kv/2.0.2/developing/data-types/sets), or [map](/riak/kv/2.0.2/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.2/developing/data-types/counters), [set]({{}}riak/kv/2.0.2/developing/data-types/sets), or [map]({{}}riak/kv/2.0.2/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -268,4 +268,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.2/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.2/developing/data-types/sets). diff --git a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/nodejs.md index 8e38903bfa..03ada0e8a5 100644 --- a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/nodejs.md @@ -15,7 +15,7 @@ aliases: - /riak/2.0.2/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/php.md index d93e500d6f..566d2ecdb1 100644 --- a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/php.md @@ -15,7 +15,7 @@ aliases: - /riak/2.0.2/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -45,7 +45,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -195,7 +195,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.2/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.2/developing/usage) section. ## More Advanced Example @@ -225,9 +225,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.2/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.2/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.2/developing/data-types/counters), [set](/riak/kv/2.0.2/developing/data-types/sets), or [map](/riak/kv/2.0.2/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.2/developing/data-types/counters), [set]({{}}riak/kv/2.0.2/developing/data-types/sets), or [map]({{}}riak/kv/2.0.2/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -236,4 +236,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.2/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.2/developing/data-types/sets). diff --git a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/python.md index 8f98c752ca..97e16f349e 100644 --- a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/python.md @@ -15,7 +15,7 @@ aliases: - /riak/2.0.2/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -50,7 +50,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -182,7 +182,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.2/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.2/developing/usage) section. ## More Advanced Example @@ -237,9 +237,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.2/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.2/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.2/developing/data-types/counters), [set](/riak/kv/2.0.2/developing/data-types/sets), or [map](/riak/kv/2.0.2/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.2/developing/data-types/counters), [set]({{}}riak/kv/2.0.2/developing/data-types/sets), or [map]({{}}riak/kv/2.0.2/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -248,4 +248,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.2/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.2/developing/data-types/sets). diff --git a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/ruby.md index 3c9f42b481..985e114c0a 100644 --- a/content/riak/kv/2.0.2/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.2/developing/usage/conflict-resolution/ruby.md @@ -15,7 +15,7 @@ aliases: - /riak/2.0.2/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -48,7 +48,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -174,7 +174,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.2/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.2/developing/usage) section. ## More Advanced Example @@ -234,9 +234,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.2/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.2/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.2/developing/data-types/counters), [set](/riak/kv/2.0.2/developing/data-types/sets), or [map](/riak/kv/2.0.2/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.2/developing/data-types/counters), [set]({{}}riak/kv/2.0.2/developing/data-types/sets), or [map]({{}}riak/kv/2.0.2/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -246,4 +246,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.2/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.2/developing/data-types/sets). diff --git a/content/riak/kv/2.0.2/developing/usage/creating-objects.md b/content/riak/kv/2.0.2/developing/usage/creating-objects.md index 61d9e70a3f..e6daa40f1b 100644 --- a/content/riak/kv/2.0.2/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.2/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.2/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.2/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.0.2/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.0.2/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.2/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.2/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.2/developing/usage/custom-extractors.md b/content/riak/kv/2.0.2/developing/usage/custom-extractors.md index 3d4abfabe4..c9a16ee747 100644 --- a/content/riak/kv/2.0.2/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.2/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.2/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.2/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.2/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.2/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.2/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.2/developing/usage/deleting-objects.md b/content/riak/kv/2.0.2/developing/usage/deleting-objects.md index df27b7d399..9ed1ea17e7 100644 --- a/content/riak/kv/2.0.2/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.2/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.2/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.2/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.2/developing/usage/document-store.md b/content/riak/kv/2.0.2/developing/usage/document-store.md index aae6037332..46f7aae7a6 100644 --- a/content/riak/kv/2.0.2/developing/usage/document-store.md +++ b/content/riak/kv/2.0.2/developing/usage/document-store.md @@ -15,23 +15,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.2/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.2/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.2/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.2/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.0.2/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.2/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.2/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.2/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.2/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.2/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.2/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -68,7 +68,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.2/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.2/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -241,7 +241,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.2/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.2/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.2/developing/usage/mapreduce.md b/content/riak/kv/2.0.2/developing/usage/mapreduce.md index 53fdfefb71..b53fbd946c 100644 --- a/content/riak/kv/2.0.2/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.2/developing/usage/mapreduce.md @@ -32,9 +32,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.0.2/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.0.2/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.0.2/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.0.2/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.0.2/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -48,9 +48,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.0.2/developing/usage/search/) and [secondary indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.0.2/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -85,7 +85,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.0.2/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -95,7 +95,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.0.2/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.0.2/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -106,20 +106,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.0.2/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.0.2/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -217,4 +217,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.0.2/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.0.2/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.0.2/developing/usage/reading-objects.md b/content/riak/kv/2.0.2/developing/usage/reading-objects.md index 68e53286ff..a9bd746f55 100644 --- a/content/riak/kv/2.0.2/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.2/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.2/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.2/developing/usage/replication.md b/content/riak/kv/2.0.2/developing/usage/replication.md index 8ad5c40dff..5b3e03f58b 100644 --- a/content/riak/kv/2.0.2/developing/usage/replication.md +++ b/content/riak/kv/2.0.2/developing/usage/replication.md @@ -14,12 +14,12 @@ aliases: - /riak/2.0.2/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/2.0.2/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -41,17 +41,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using Strong +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -71,7 +71,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.2/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.2/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -95,8 +95,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -105,7 +105,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -317,7 +317,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.2/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.2/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -355,7 +355,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.2/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.2/setup/planning/backend/multi). ## Delete Quorum with RW @@ -530,9 +530,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.2/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.2/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.2/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.2/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -546,7 +546,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.2/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -556,8 +556,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.2/developing/usage/search-schemas.md b/content/riak/kv/2.0.2/developing/usage/search-schemas.md index 56c84fa4f2..bfabe3b014 100644 --- a/content/riak/kv/2.0.2/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.2/developing/usage/search-schemas.md @@ -14,17 +14,17 @@ aliases: - /riak/2.0.2/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.2/developing/data-types/), and [more](/riak/kv/2.0.2/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types/), and [more]({{}}riak/kv/2.0.2/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -222,7 +222,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.2/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.2/developing/usage/search.md b/content/riak/kv/2.0.2/developing/usage/search.md index 685d30a4f3..efb3bd699a 100644 --- a/content/riak/kv/2.0.2/developing/usage/search.md +++ b/content/riak/kv/2.0.2/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.2/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.2/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.2/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.2/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.2/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.2/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.2/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.2/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.2/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.2/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.2/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.2/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.2/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.2/developing/usage/searching-data-types.md b/content/riak/kv/2.0.2/developing/usage/searching-data-types.md index 5e72cddd1b..684259ecbb 100644 --- a/content/riak/kv/2.0.2/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.2/developing/usage/searching-data-types.md @@ -14,10 +14,10 @@ aliases: - /riak/2.0.2/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.2/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.2/developing/data-types/counters), [sets](/riak/kv/2.0.2/developing/data-types/sets), and [maps](/riak/kv/2.0.2/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.2/developing/data-types/counters), [sets]({{}}riak/kv/2.0.2/developing/data-types/sets), and [maps]({{}}riak/kv/2.0.2/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -68,7 +68,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.2/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.2/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -85,7 +85,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.2/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.2/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -104,7 +104,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.2/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.2/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -154,7 +154,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.2/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.2/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -614,7 +614,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.2/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.2/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -986,9 +986,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.2/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.2/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.2/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.2/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.2/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.2/developing/usage/secondary-indexes.md index f781cd58b9..64152d631e 100644 --- a/content/riak/kv/2.0.2/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.2/developing/usage/secondary-indexes.md @@ -14,29 +14,29 @@ aliases: - /riak/2.0.2/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.2/setup/planning/backend/memory -[use ref strong consistency]: /riak/2.0.2/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.2/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.2/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.2/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.2/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.2/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.2/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -50,13 +50,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -74,7 +74,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.2/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.2/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -88,11 +88,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.2/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.2/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -258,8 +258,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.2/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.2/developing/getting-started) section. This has accomplished the following: @@ -1143,8 +1143,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.2/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.2/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.2/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.2/developing/usage/security.md b/content/riak/kv/2.0.2/developing/usage/security.md index 37e770a51f..fe36aed46b 100644 --- a/content/riak/kv/2.0.2/developing/usage/security.md +++ b/content/riak/kv/2.0.2/developing/usage/security.md @@ -14,49 +14,49 @@ aliases: - /riak/2.0.2/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.2/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.2/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.2/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.2/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.2/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.2/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.0.2/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.0.2/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.2/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.2/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.2/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.2/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.2/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.2/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.2/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.2/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.2/developing/usage/security/php) -* [Python](/riak/kv/2.0.2/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.2/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.2/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.2/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.2/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.2/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.2/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -81,12 +81,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.2/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.2/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.2/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.2/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.2/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.2/developing/usage/security/erlang.md b/content/riak/kv/2.0.2/developing/usage/security/erlang.md index eedde80167..11e7ce5ece 100644 --- a/content/riak/kv/2.0.2/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.2/developing/usage/security/erlang.md @@ -18,9 +18,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.2/using/security/managing-sources/), [PAM-](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.2/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.2/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.2/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -67,7 +67,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -89,10 +89,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.2/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.2/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.2/developing/usage/security/java.md b/content/riak/kv/2.0.2/developing/usage/security/java.md index abfc993e93..a63f7666ab 100644 --- a/content/riak/kv/2.0.2/developing/usage/security/java.md +++ b/content/riak/kv/2.0.2/developing/usage/security/java.md @@ -18,8 +18,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -38,7 +38,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.2/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.2/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.2/developing/usage/security/php.md b/content/riak/kv/2.0.2/developing/usage/security/php.md index c6d1e76268..ec92acb6a3 100644 --- a/content/riak/kv/2.0.2/developing/usage/security/php.md +++ b/content/riak/kv/2.0.2/developing/usage/security/php.md @@ -18,8 +18,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -32,7 +32,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.2/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.2/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.2/developing/usage/security/python.md b/content/riak/kv/2.0.2/developing/usage/security/python.md index b9b9305c80..d2848bff35 100644 --- a/content/riak/kv/2.0.2/developing/usage/security/python.md +++ b/content/riak/kv/2.0.2/developing/usage/security/python.md @@ -18,10 +18,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.2/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.2/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -50,7 +50,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.2/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.2/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -82,7 +82,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.2/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.2/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -105,10 +105,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.2/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.2/using/security/basics/#user-management). ## Certificate-based Authentication @@ -137,7 +137,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.2/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.2/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.2/developing/usage/security/ruby.md b/content/riak/kv/2.0.2/developing/usage/security/ruby.md index cd36c67e09..20da1941fc 100644 --- a/content/riak/kv/2.0.2/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.2/developing/usage/security/ruby.md @@ -18,10 +18,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.2/using/security/managing-sources/) or [PAM](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.2/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -41,7 +41,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.2/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.2/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -62,7 +62,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -85,10 +85,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.2/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.2/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.2/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.2/developing/usage/updating-objects.md b/content/riak/kv/2.0.2/developing/usage/updating-objects.md index 03b58874f3..a64288523d 100644 --- a/content/riak/kv/2.0.2/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.2/developing/usage/updating-objects.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode ## Using Causal Context @@ -22,9 +22,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.2/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.2/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.2/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.2/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -32,12 +32,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.2/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.2/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.2/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.2/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -305,22 +305,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.2/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.2/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.2/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.2/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.2/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.2/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.2/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.2/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -332,14 +332,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.2/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.2/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.2/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.2/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -620,7 +620,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.2/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.2/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -733,7 +733,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.2/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.2/index.md b/content/riak/kv/2.0.2/index.md index 49dcd68fb1..ff0a0a2865 100644 --- a/content/riak/kv/2.0.2/index.md +++ b/content/riak/kv/2.0.2/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.2/configuring -[dev index]: /riak/kv/2.0.2/developing -[downloads]: /riak/kv/2.0.2/downloads/ -[install index]: /riak/kv/2.0.2/setup/installing/ -[plan index]: /riak/kv/2.0.2/setup/planning -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.2/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.2/developing/usage/search -[getting started]: /riak/kv/2.0.2/developing/getting-started -[dev client libraries]: /riak/kv/2.0.2/developing/client-libraries +[config index]: {{}}riak/kv/2.0.2/configuring +[dev index]: {{}}riak/kv/2.0.2/developing +[downloads]: {{}}riak/kv/2.0.2/downloads/ +[install index]: {{}}riak/kv/2.0.2/setup/installing/ +[plan index]: {{}}riak/kv/2.0.2/setup/planning +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.2/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search +[getting started]: {{}}riak/kv/2.0.2/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.2/developing/client-libraries diff --git a/content/riak/kv/2.0.2/introduction.md b/content/riak/kv/2.0.2/introduction.md index 20ea722524..2ae25dfceb 100644 --- a/content/riak/kv/2.0.2/introduction.md +++ b/content/riak/kv/2.0.2/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.0.2/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.0.2/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.0.2/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.0.2/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.0.2/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.0.2/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.0.2/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.0.2/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.0.2/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.0.2/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.0.2/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.0.2/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.0.2/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.0.2/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.0.2/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.0.2/developing/data-types/maps#flags), [registers](/riak/kv/2.0.2/developing/data-types/maps#registers), -[counters](/riak/kv/2.0.2/developing/data-types/counters), [sets](/riak/kv/2.0.2/developing/data-types/sets), and -[maps](/riak/kv/2.0.2/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.0.2/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.0.2/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.0.2/developing/data-types/counters), [sets]({{}}riak/kv/2.0.2/developing/data-types/sets), and +[maps]({{}}riak/kv/2.0.2/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.0.2/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.0.2/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.0.2/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.0.2/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.0.2/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.0.2/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.0.2/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.0.2/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.0.2/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.0.2/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.0.2/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.0.2/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.0.2/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.0.2/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.0.2/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.0.2/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.0.2/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.0.2/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.0.2/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.0.2/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.0.2/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.0.2/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.0.2/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.0.2/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.0.2/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.0.2/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.0.2/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.0.2/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.0.2/learn/concepts.md b/content/riak/kv/2.0.2/learn/concepts.md index b78ea10363..d4a2a13eef 100644 --- a/content/riak/kv/2.0.2/learn/concepts.md +++ b/content/riak/kv/2.0.2/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.2/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.2/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.2/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.2/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.2/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.2/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.2/learn/concepts/vnodes -[config index]: /riak/kv/2.0.2/configuring -[plan index]: /riak/kv/2.0.2/setup/planning -[use index]: /riak/kv/2.0.2/using/ +[concept aae]: {{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.2/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.2/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.2/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.2/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.2/configuring +[plan index]: {{}}riak/kv/2.0.2/setup/planning +[use index]: {{}}riak/kv/2.0.2/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.2/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.2/learn/concepts/active-anti-entropy.md index 2263acc8cf..e6f534f81a 100644 --- a/content/riak/kv/2.0.2/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.2/learn/concepts/active-anti-entropy.md @@ -16,15 +16,15 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.2/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.2/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.2/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.0.2/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.2/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.2/developing/usage/search +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.2/learn/concepts/buckets.md b/content/riak/kv/2.0.2/learn/concepts/buckets.md index 57815cafd3..8d48be3f77 100644 --- a/content/riak/kv/2.0.2/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.2/learn/concepts/buckets.md @@ -15,25 +15,25 @@ aliases: - /riak/2.0.2/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.2/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.2/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.2/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.2/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.2/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.2/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.2/configuring/basic -[dev api http]: /riak/kv/2.0.2/developing/api/http -[dev data types]: /riak/kv/2.0.2/developing/data-types -[glossary ring]: /riak/kv/2.0.2/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.2/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.2/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.2/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.2/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.2/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.2/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.2/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.2/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.2/configuring/basic +[dev api http]: {{}}riak/kv/2.0.2/developing/api/http +[dev data types]: {{}}riak/kv/2.0.2/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.2/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.2/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.2/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.2/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.2/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.2/learn/concepts/capability-negotiation.md index 129096ede9..2dad0eb67a 100644 --- a/content/riak/kv/2.0.2/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.2/learn/concepts/capability-negotiation.md @@ -15,9 +15,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.2/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.2/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.2/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.2/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.2/learn/concepts/causal-context.md b/content/riak/kv/2.0.2/learn/concepts/causal-context.md index 612e1719f3..0fdf736825 100644 --- a/content/riak/kv/2.0.2/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.2/learn/concepts/causal-context.md @@ -15,18 +15,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.2/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.2/developing/api/http -[dev key value]: /riak/kv/2.0.2/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.2/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.2/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.2/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.2/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.2/developing/api/http +[dev key value]: {{}}riak/kv/2.0.2/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.2/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.2/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.2/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.2/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -77,7 +77,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.0.2/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.0.2/learn/concepts/clusters.md b/content/riak/kv/2.0.2/learn/concepts/clusters.md index 982b531837..b520de3434 100644 --- a/content/riak/kv/2.0.2/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.2/learn/concepts/clusters.md @@ -16,15 +16,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.2/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.2/learn/concepts/replication -[glossary node]: /riak/kv/2.0.2/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.2/learn/dynamo -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.2/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.2/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.2/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.2/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.2/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -61,7 +61,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -93,7 +93,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.2/learn/concepts/crdts.md b/content/riak/kv/2.0.2/learn/concepts/crdts.md index 39e5669bc9..4679e48ef5 100644 --- a/content/riak/kv/2.0.2/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.2/learn/concepts/crdts.md @@ -15,15 +15,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.2/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.2/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.2/developing/data-types -[glossary node]: /riak/kv/2.0.2/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.2/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.2/developing/data-types +[glossary node]: {{}}riak/kv/2.0.2/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -31,7 +31,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.0.2/developing/usage/search/). +indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.0.2/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -213,7 +213,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.0.2/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.0.2/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.0.2/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.2/learn/concepts/eventual-consistency.md index 7cbe9f5101..237c6b666b 100644 --- a/content/riak/kv/2.0.2/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.2/learn/concepts/eventual-consistency.md @@ -16,14 +16,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.2/learn/concepts/replication -[glossary node]: /riak/kv/2.0.2/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.2/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.2/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.2/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.2/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -52,14 +52,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.2/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.2/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.2/developing/data-modeling/). +or models]({{}}riak/kv/2.0.2/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -76,7 +76,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.2/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.2/learn/concepts/keys-and-objects.md index 2bf0bfa8a5..27e4f74f14 100644 --- a/content/riak/kv/2.0.2/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.2/learn/concepts/keys-and-objects.md @@ -14,8 +14,8 @@ aliases: - /riak/2.0.2/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.2/learn/concepts/replication.md b/content/riak/kv/2.0.2/learn/concepts/replication.md index 982c36c34e..9ade97250e 100644 --- a/content/riak/kv/2.0.2/learn/concepts/replication.md +++ b/content/riak/kv/2.0.2/learn/concepts/replication.md @@ -16,14 +16,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.2/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.2/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.2/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.2/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.2/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.2/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.2/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.2/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.2/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -35,13 +35,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.2/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.2/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.2/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.2/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -68,7 +68,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.2/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.2/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -99,8 +99,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.2/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.2/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.2/learn/concepts/strong-consistency.md index 44237d7c37..fc4da41177 100644 --- a/content/riak/kv/2.0.2/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.2/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.2/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.2/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.2/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.2/learn/concepts/vnodes.md b/content/riak/kv/2.0.2/learn/concepts/vnodes.md index de17e6d58c..e7d310b204 100644 --- a/content/riak/kv/2.0.2/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.2/learn/concepts/vnodes.md @@ -15,16 +15,16 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.2/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.2/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.2/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.2/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.2/learn/glossary/#ring -[perf strong consistency]: /riak/kv/2.0.2/using/performance/strong-consistency -[plan backend]: /riak/kv/2.0.2/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.2/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.2/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.2/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.2/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.2/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.2/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.2/learn/glossary/#ring +[perf strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[plan backend]: {{}}riak/kv/2.0.2/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.2/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.2/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.2/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.2/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.2/learn/dynamo.md b/content/riak/kv/2.0.2/learn/dynamo.md index 13c8c9ace1..38da13efee 100644 --- a/content/riak/kv/2.0.2/learn/dynamo.md +++ b/content/riak/kv/2.0.2/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.2/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.2/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.2/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.2/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.2/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.2/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.2/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.2/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.2/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.2/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.2/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.2/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.2/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.2/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.2/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.2/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.2/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.2/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.2/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.2/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.2/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.2/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.2/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.2/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.2/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.2/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.2/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.2/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.2/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.2/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.2/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.2/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.2/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.2/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.2/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.2/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.2/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.2/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.2/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.2/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.2/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.2/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.2/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.2/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.2/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.2/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.2/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.2/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.2/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.2/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.2/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.2/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.2/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.2/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.2/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.2/learn/glossary.md b/content/riak/kv/2.0.2/learn/glossary.md index 7f39e45535..3c104e1f25 100644 --- a/content/riak/kv/2.0.2/learn/glossary.md +++ b/content/riak/kv/2.0.2/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.2/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.2/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.2/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.2/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.2/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.2/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.2/developing/api/http -[dev data model]: /riak/kv/2.0.2/developing/data-modeling -[dev data types]: /riak/kv/2.0.2/developing/data-types -[glossary read rep]: /riak/kv/2.0.2/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.2/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.2/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.2/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.2/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.2/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.2/developing/api/http +[dev data model]: {{}}riak/kv/2.0.2/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.2/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.2/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.2/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.2/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.2/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.2/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.2/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.2/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.2/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.2/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.2/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.2/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.2/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.2/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.2/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.2/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.2/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.2/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.2/learn/use-cases.md b/content/riak/kv/2.0.2/learn/use-cases.md index 522e2a1609..395165b3f2 100644 --- a/content/riak/kv/2.0.2/learn/use-cases.md +++ b/content/riak/kv/2.0.2/learn/use-cases.md @@ -15,20 +15,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.2/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.2/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.2/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.2/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.2/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.2/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.2/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.2/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.2/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.0.2/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.2/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.2/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.2/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.2/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.2/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.2/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.2/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.2/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.2/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.2/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.2/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.0.2/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.2/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.0.2/learn/why-riak-kv.md b/content/riak/kv/2.0.2/learn/why-riak-kv.md index 8758d45064..9987196183 100644 --- a/content/riak/kv/2.0.2/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.2/learn/why-riak-kv.md @@ -15,14 +15,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.2/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.2/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.2/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.2/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.2/developing/data-types -[glossary read rep]: /riak/kv/2.0.2/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.2/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.2/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.2/setup/downgrade.md b/content/riak/kv/2.0.2/setup/downgrade.md index 8015910800..c0e05b0453 100644 --- a/content/riak/kv/2.0.2/setup/downgrade.md +++ b/content/riak/kv/2.0.2/setup/downgrade.md @@ -17,7 +17,7 @@ aliases: Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a -[rolling upgrade](/riak/kv/2.0.2/setup/upgrading/cluster). +[rolling upgrade]({{}}riak/kv/2.0.2/setup/upgrading/cluster). {{% note title="End Of Life Warning" %}} We test downgrading for two feature release versions. However, all versions below KV 2.0 are End Of Life (EOL) and unsupported. Please be aware of that if you choose to downgrade. @@ -49,9 +49,9 @@ both 1.4 and 1.3 are performed. * Riak Control should be disabled throughout the rolling downgrade process -* [Configuration Files](/riak/kv/2.0.2/configuring/reference) must be replaced with those of the version +* [Configuration Files]({{}}riak/kv/2.0.2/configuring/reference) must be replaced with those of the version being downgraded to -* [Active anti-entropy](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version +* [Active anti-entropy]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version below 1.3. ## Before Stopping a Node @@ -94,7 +94,7 @@ will need to be downgraded before the rolling downgrade begins. This can be done using the --downgrade flag with `riak-admin reformat-indexes` More information on the `riak-admin reformat-indexes` command, and downgrading indexes can be found in the -[`riak-admin`](/riak/kv/2.0.2/using/admin/riak-admin/#reformat-indexes) documentation. +[`riak-admin`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#reformat-indexes) documentation. ## Before Starting a Node diff --git a/content/riak/kv/2.0.2/setup/installing.md b/content/riak/kv/2.0.2/setup/installing.md index 9d7375a064..05fdcfec96 100644 --- a/content/riak/kv/2.0.2/setup/installing.md +++ b/content/riak/kv/2.0.2/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.2/installing/ --- -[install aws]: /riak/kv/2.0.2/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.2/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.2/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.2/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.2/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.2/setup/installing/smartos -[install solaris]: /riak/kv/2.0.2/setup/installing/solaris -[install suse]: /riak/kv/2.0.2/setup/installing/suse -[install windows azure]: /riak/kv/2.0.2/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.2/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.2/setup/upgrading +[install aws]: {{}}riak/kv/2.0.2/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.2/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.2/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.2/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.2/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.2/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.2/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.2/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.2/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.2/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.2/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.2/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.2/setup/installing/amazon-web-services.md index 7276269c49..dfb0000175 100644 --- a/content/riak/kv/2.0.2/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.2/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.2/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.2/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.2/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.2/setup/installing/debian-ubuntu.md index 1af5c900d4..efc45fd44c 100644 --- a/content/riak/kv/2.0.2/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.2/setup/installing/debian-ubuntu.md @@ -20,10 +20,10 @@ aliases: -[install source index]: /riak/kv/2.0.2/setup/installing/source/ -[security index]: /riak/kv/2.0.2/using/security/ -[install source erlang]: /riak/kv/2.0.2/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.2/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.2/using/security/ +[install source erlang]: {{}}riak/kv/2.0.2/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify > **Note: 2.0.2 not currently available** > diff --git a/content/riak/kv/2.0.2/setup/installing/freebsd.md b/content/riak/kv/2.0.2/setup/installing/freebsd.md index bcfe4f8ce4..cdeb1461f4 100644 --- a/content/riak/kv/2.0.2/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.2/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.2/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.2/downloads/ -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.2/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.2/downloads/ +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.2/setup/installing/mac-osx.md b/content/riak/kv/2.0.2/setup/installing/mac-osx.md index d20e8c2b7b..aaead9abbb 100644 --- a/content/riak/kv/2.0.2/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.2/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.2/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.2/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.2/setup/installing/rhel-centos.md b/content/riak/kv/2.0.2/setup/installing/rhel-centos.md index 16dee2e2b1..c3140f2d12 100644 --- a/content/riak/kv/2.0.2/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.2/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.2/setup/installing/source -[install source erlang]: /riak/kv/2.0.2/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.2/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.2/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify > **Note: 2.0.2 not currently available** > diff --git a/content/riak/kv/2.0.2/setup/installing/smartos.md b/content/riak/kv/2.0.2/setup/installing/smartos.md index 0707b7ebe7..c03847a33a 100644 --- a/content/riak/kv/2.0.2/setup/installing/smartos.md +++ b/content/riak/kv/2.0.2/setup/installing/smartos.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.0.2/setup/installing/solaris.md b/content/riak/kv/2.0.2/setup/installing/solaris.md index 718025d116..d45c0cc364 100644 --- a/content/riak/kv/2.0.2/setup/installing/solaris.md +++ b/content/riak/kv/2.0.2/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.2/setup/installing/source.md b/content/riak/kv/2.0.2/setup/installing/source.md index bf558a6aef..057e9cdf88 100644 --- a/content/riak/kv/2.0.2/setup/installing/source.md +++ b/content/riak/kv/2.0.2/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.2/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.2/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.2/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.2/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.2/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.2/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.2/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.2/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.2/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.2/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.2/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.2/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.2/setup/installing/source/erlang.md b/content/riak/kv/2.0.2/setup/installing/source/erlang.md index 105495fddf..b800261722 100644 --- a/content/riak/kv/2.0.2/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.2/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.2/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.2/setup/installing -[security basics]: /riak/kv/2.0.2/using/security/basics +[install index]: {{}}riak/kv/2.0.2/setup/installing +[security basics]: {{}}riak/kv/2.0.2/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho8.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.2/setup/installing/source/jvm.md b/content/riak/kv/2.0.2/setup/installing/source/jvm.md index eb3b8ce3b1..b2efc2d16a 100644 --- a/content/riak/kv/2.0.2/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.2/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.2/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.2/developing/usage/search +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.2/setup/installing/suse.md b/content/riak/kv/2.0.2/setup/installing/suse.md index 5d93ca9798..dad9c7e8ea 100644 --- a/content/riak/kv/2.0.2/setup/installing/suse.md +++ b/content/riak/kv/2.0.2/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.2/installing/suse/ --- -[install verify]: /riak/kv/2.0.2/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.2/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.2/setup/installing/verify.md b/content/riak/kv/2.0.2/setup/installing/verify.md index 7c6e52ed24..a34a183d19 100644 --- a/content/riak/kv/2.0.2/setup/installing/verify.md +++ b/content/riak/kv/2.0.2/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.2/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.2/developing/client-libraries -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.2/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.2/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.2/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.2/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.2/setup/installing/windows-azure.md b/content/riak/kv/2.0.2/setup/installing/windows-azure.md index d33298ff99..d40e40c1ec 100644 --- a/content/riak/kv/2.0.2/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.2/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.2/setup/planning/backend.md b/content/riak/kv/2.0.2/setup/planning/backend.md index 44defb2bdf..c4cfb279e2 100644 --- a/content/riak/kv/2.0.2/setup/planning/backend.md +++ b/content/riak/kv/2.0.2/setup/planning/backend.md @@ -14,11 +14,11 @@ aliases: - /riak/2.0.2/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.2/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.2/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.2/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.2/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.2/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.2/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.2/setup/planning/backend/bitcask.md index cdcd5f4be4..928919b997 100644 --- a/content/riak/kv/2.0.2/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.2/setup/planning/backend/bitcask.md @@ -16,18 +16,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.2/using/admin/riak-cli -[config reference]: /riak/kv/2.0.2/configuring/reference -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.2/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.2/developing/usage/search - -[glossary aae]: /riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.2/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.2/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.2/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.2/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.2/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.2/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.2/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.2/setup/planning/backend/leveldb.md index 4e62dfdc11..29639de451 100644 --- a/content/riak/kv/2.0.2/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.2/setup/planning/backend/leveldb.md @@ -14,11 +14,11 @@ aliases: - /riak/2.0.2/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.2/configuring/reference -[perf index]: /riak/kv/2.0.2/using/performance -[config reference#aae]: /riak/kv/2.0.2/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[perf index]: {{}}riak/kv/2.0.2/using/performance +[config reference#aae]: {{}}riak/kv/2.0.2/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.2/setup/planning/backend/memory.md b/content/riak/kv/2.0.2/setup/planning/backend/memory.md index 3c0e4445b6..14eca8962d 100644 --- a/content/riak/kv/2.0.2/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.2/setup/planning/backend/memory.md @@ -14,10 +14,10 @@ aliases: - /riak/2.0.2/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.2/configuring/reference -[plan backend multi]: /riak/kv/2.0.2/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.2/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.2/setup/planning/backend/multi.md b/content/riak/kv/2.0.2/setup/planning/backend/multi.md index a8fdd6e250..edc7bd8bd0 100644 --- a/content/riak/kv/2.0.2/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.2/setup/planning/backend/multi.md @@ -14,13 +14,13 @@ aliases: - /riak/2.0.2/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.2/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.2/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.2/configuring/reference -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.2/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.2/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.2/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.2/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.2/setup/planning/best-practices.md b/content/riak/kv/2.0.2/setup/planning/best-practices.md index 184f3835a4..2b23c48f20 100644 --- a/content/riak/kv/2.0.2/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.2/setup/planning/best-practices.md @@ -14,10 +14,10 @@ aliases: - /riak/2.0.2/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.2/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.2/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.2/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.2/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.2/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.2/setup/planning/bitcask-capacity-calc.md index 04bee6b474..076ae182c4 100644 --- a/content/riak/kv/2.0.2/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.2/setup/planning/bitcask-capacity-calc.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.2/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.2/setup/planning/cluster-capacity.md index 2b292fa2e1..cd948ec3ec 100644 --- a/content/riak/kv/2.0.2/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.2/setup/planning/cluster-capacity.md @@ -14,13 +14,13 @@ aliases: - /riak/2.0.2/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.2/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.2/setup/planning -[concept replication]: /riak/kv/2.0.2/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.2/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.2/configuring/reference -[perf benchmark]: /riak/kv/2.0.2/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.2/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.2/setup/planning +[concept replication]: {{}}riak/kv/2.0.2/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.2/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.2/setup/planning/operating-system.md b/content/riak/kv/2.0.2/setup/planning/operating-system.md index 97546bd087..3b6b8b33c3 100644 --- a/content/riak/kv/2.0.2/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.2/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.2/downloads/ +[downloads]: {{}}riak/kv/2.0.2/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.2/setup/planning/start.md b/content/riak/kv/2.0.2/setup/planning/start.md index 1d61fb9bba..8651d07e0b 100644 --- a/content/riak/kv/2.0.2/setup/planning/start.md +++ b/content/riak/kv/2.0.2/setup/planning/start.md @@ -14,10 +14,10 @@ aliases: - /riak/2.0.2/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.2/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.2/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.2/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.2/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.2/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.2/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.2/setup/upgrading/checklist.md b/content/riak/kv/2.0.2/setup/upgrading/checklist.md index 0a005dd067..35e7d79ab9 100644 --- a/content/riak/kv/2.0.2/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.2/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.0.2/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.2/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.2/using/performance +[perf open files]: {{}}riak/kv/2.0.2/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.2/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.2/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.2/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.2/configuring/reference -[config backend]: /riak/kv/2.0.2/configuring/backend -[usage search]: /riak/kv/2.0.2/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.2/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.2/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.2/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.2/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.2/using/admin/commands -[use admin riak control]: /riak/kv/2.0.2/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.2/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.2/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.2/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.2/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.2/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[config backend]: {{}}riak/kv/2.0.2/configuring/backend +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.2/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.2/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.2/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.2/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.2/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.2/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.2/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.2/setup/upgrading/cluster.md b/content/riak/kv/2.0.2/setup/upgrading/cluster.md index a10d83bd5f..9645451c10 100644 --- a/content/riak/kv/2.0.2/setup/upgrading/cluster.md +++ b/content/riak/kv/2.0.2/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.0.2/ops/upgrading/rolling-upgrades/ - /riak/kv/2.0.2/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.0.2/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.2/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.2/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.2/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.2/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.2/release-notes/ +[production checklist]: {{}}riak/kv/2.0.2/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.2/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.2/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.2/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.2/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.2/release-notes/ [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.2/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.2/using/reference/jmx -[snmp]: /riak/kv/2.0.2/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.2/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.2/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.0.2/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.0.2/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported @@ -104,9 +104,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.2/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.2/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.2/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.2/release-notes/). {{% /note %}} ## RHEL/CentOS @@ -166,9 +166,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.2/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.2/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.2/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.2/release-notes/). {{% /note %}} ## Solaris/OpenSolaris @@ -252,9 +252,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.2/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.2/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.2/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.2/release-notes/). {{% /note %}} ## Rolling Upgrade to Enterprise diff --git a/content/riak/kv/2.0.2/setup/upgrading/search.md b/content/riak/kv/2.0.2/setup/upgrading/search.md index 31b049886d..0ae43b2703 100644 --- a/content/riak/kv/2.0.2/setup/upgrading/search.md +++ b/content/riak/kv/2.0.2/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.2/setup/upgrading/version.md b/content/riak/kv/2.0.2/setup/upgrading/version.md index 7f3714565b..838712e80c 100644 --- a/content/riak/kv/2.0.2/setup/upgrading/version.md +++ b/content/riak/kv/2.0.2/setup/upgrading/version.md @@ -20,7 +20,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.0.2/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.0.2/introduction). ## New Clients @@ -36,14 +36,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.0.2/introduction) like [data types](/riak/kv/2.0.2/developing/data-types) or the new [Riak Search](/riak/kv/2.0.2/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.0.2/introduction) like [data types]({{}}riak/kv/2.0.2/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.0.2/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.0.2/learn/concepts/buckets) and [key](/riak/kv/2.0.2/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.0.2/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.0.2/learn/concepts/buckets) and [key]({{}}riak/kv/2.0.2/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.0.2/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.0.2/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.0.2/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.0.2/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.0.2/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -56,7 +56,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.0.2/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.0.2/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -75,8 +75,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.0.2/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.0.2/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.0.2/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.0.2/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -86,17 +86,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/2.0.2/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.0.2/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.0.2/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.0.2/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.0.2/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.0.2/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.0.2/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -104,20 +104,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.0.2/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.0.2/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.0.2/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.0.2/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -126,11 +126,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.0.2/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.0.2/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.0.2/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -140,12 +140,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.0.2/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.0.2/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/2.0.2/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.0.2/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.0.2/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.0.2/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.0.2/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.0.2/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.0.2/using/security/basics) or the new [configuration files](/riak/kv/2.0.2/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.0.2/using/security/basics) or the new [configuration files]({{}}riak/kv/2.0.2/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -155,7 +155,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.0.2/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.0.2/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -166,12 +166,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.0.2/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.0.2/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.0.2/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.0.2/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -209,7 +209,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.0.2/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.0.2/setup/upgrading/search). ## Migrating from Short Names @@ -220,12 +220,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.0.2/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.0.2/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.0.2/using.md b/content/riak/kv/2.0.2/using.md index ccc7cab391..281b066ef4 100644 --- a/content/riak/kv/2.0.2/using.md +++ b/content/riak/kv/2.0.2/using.md @@ -15,7 +15,7 @@ toc: true [use running cluster]: ../using/running-a-cluster [use admin index]: ../using/admin/ [cluster ops index]: ../using/cluster-operations -[repair recover index]: ../repair-recovery +[repair recover index]: ../using/repair-recovery [security index]: ../using/security [perf index]: ../using/performance [troubleshoot index]: ../using/troubleshooting diff --git a/content/riak/kv/2.0.2/using/admin/commands.md b/content/riak/kv/2.0.2/using/admin/commands.md index de98696935..2d6ca109c4 100644 --- a/content/riak/kv/2.0.2/using/admin/commands.md +++ b/content/riak/kv/2.0.2/using/admin/commands.md @@ -14,11 +14,11 @@ aliases: - /riak/2.0.2/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.2/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.2/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.2/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.2/using/admin/riak-admin.md b/content/riak/kv/2.0.2/using/admin/riak-admin.md index b75b553808..20ac9a7750 100644 --- a/content/riak/kv/2.0.2/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.2/using/admin/riak-admin.md @@ -14,27 +14,27 @@ aliases: - /riak/2.0.2/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.2/configuring/reference -[use admin commands]: /riak/kv/2.0.2/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.2/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.2/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.2/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.2/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.2/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.2/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.2/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.2/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.2/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.2/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.2/setup/downgrade -[security index]: /riak/kv/2.0.2/using/security/ -[security managing]: /riak/kv/2.0.2/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.2/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.2/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.2/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.2/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.2/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.2/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.2/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.2/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.2/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.2/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.2/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.2/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.2/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.2/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.2/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.2/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.2/setup/downgrade +[security index]: {{}}riak/kv/2.0.2/using/security/ +[security managing]: {{}}riak/kv/2.0.2/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.2/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.2/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.2/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.2/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.2/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.2/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.0.2/using/admin/riak-cli.md b/content/riak/kv/2.0.2/using/admin/riak-cli.md index 83ace0043e..1bcad1c549 100644 --- a/content/riak/kv/2.0.2/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.2/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.2/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.2/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.2/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.2/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.2/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.2/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.2/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.2/using/admin/riak-control.md b/content/riak/kv/2.0.2/using/admin/riak-control.md index f80bb94650..c0d6293e7c 100644 --- a/content/riak/kv/2.0.2/using/admin/riak-control.md +++ b/content/riak/kv/2.0.2/using/admin/riak-control.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.2/configuring/reference +[config reference]: {{}}riak/kv/2.0.2/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -108,7 +108,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.2/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.2/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -181,7 +181,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -203,29 +203,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.2/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.2/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.2/using/cluster-operations.md b/content/riak/kv/2.0.2/using/cluster-operations.md index a47c06ad75..7b3d8100ec 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations.md +++ b/content/riak/kv/2.0.2/using/cluster-operations.md @@ -20,7 +20,6 @@ toc: true [ops log]: ./logging [ops backup]: ./backing-up [ops handoff]: ./handoff -[ops obj del]: ./object-deletion [ops strong consistency]: ./strong-consistency [ops v3 mdc]: ./v3-multi-datacenter [ops v2 mdc]: ./v2-multi-datacenter @@ -84,13 +83,6 @@ Information on using the `riak-admin handoff` interface to enable and disable ha [Learn More >>][ops handoff] -#### [Object Deletion][ops obj del] - -Describes possible settings for `delete_mode`. - -[Learn More >>][ops obj del] - - #### [Monitoring Strong Consistency][ops strong consistency] Overview of the various statistics used in monitoring strong consistency. diff --git a/content/riak/kv/2.0.2/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.2/using/cluster-operations/active-anti-entropy.md index 6f8d471752..b64290d9d4 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes.md index fd61c34e33..55faaac947 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.2/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.2/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.2/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.2/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.2/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.2/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.2/using/cluster-operations/backing-up.md index 61852480e3..d901fd59dd 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.2/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters -[config reference]: /riak/kv/2.0.2/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.2/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.2/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.2/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.2/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.2/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.2/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.2/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.2/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.2/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.2/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.2/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.2/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.2/using/cluster-operations/bucket-types.md index 2a195e7850..e163929012 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.2/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.2/using/cluster-operations/changing-cluster-info.md index 923aa1bae1..1381b61bb1 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.2/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.2/configuring/reference +[config reference]: {{}}riak/kv/2.0.2/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.2/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.2/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.2/using/cluster-operations/handoff.md b/content/riak/kv/2.0.2/using/cluster-operations/handoff.md index af4cd0c5c7..e04429ef28 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.2/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.2/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.2/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.2/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.2/using/cluster-operations/logging.md b/content/riak/kv/2.0.2/using/cluster-operations/logging.md index 6ab48bd1ba..9719061ed3 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.2/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.2/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.2/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.2/using/cluster-operations/replacing-node.md index aa6f527970..1db64875c8 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.2/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.2/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.2/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.2/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.2/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.2/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.2/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.2/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.2/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.2/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.2/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.2/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.2/using/cluster-operations/strong-consistency.md index 692f883633..c1ee3405dc 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.2/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.2/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.2/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.2/using/cluster-operations/v2-multi-datacenter.md index 0be4c4b71e..638cd4da12 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/v2-multi-datacenter.md @@ -158,7 +158,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -178,7 +178,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -216,7 +216,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.2/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.2/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -237,7 +237,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter.md index 8078c6a05f..d68bfdefdf 100644 --- a/content/riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter.md @@ -16,11 +16,11 @@ aliases: - /riak/2.0.2/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.2/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.2/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.2/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.2/using/performance.md b/content/riak/kv/2.0.2/using/performance.md index 4e9965abba..2a66c84730 100644 --- a/content/riak/kv/2.0.2/using/performance.md +++ b/content/riak/kv/2.0.2/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.2/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.2/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.2/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.2/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.2/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.2/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.2/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.2/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.2/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.2/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.2/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.2/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.2/using/performance/benchmarking.md b/content/riak/kv/2.0.2/using/performance/benchmarking.md index 262eaab3c0..676702555a 100644 --- a/content/riak/kv/2.0.2/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.2/using/performance/benchmarking.md @@ -50,7 +50,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.2/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.2/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.2/using/performance/latency-reduction.md b/content/riak/kv/2.0.2/using/performance/latency-reduction.md index c069e2a947..21bd4afaa2 100644 --- a/content/riak/kv/2.0.2/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.2/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.2/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.2/using/performance/multi-datacenter-tuning.md index 6faca741f1..9bed06c30b 100644 --- a/content/riak/kv/2.0.2/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.2/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.2/using/performance +[perf index]: {{}}riak/kv/2.0.2/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.2/using/performance/open-files-limit.md b/content/riak/kv/2.0.2/using/performance/open-files-limit.md index 691e8fe783..1cb493c674 100644 --- a/content/riak/kv/2.0.2/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.2/using/performance/open-files-limit.md @@ -13,10 +13,9 @@ toc: true aliases: - /riak/2.0.2/ops/tuning/open-files-limit/ - /riak/kv/2.0.2/ops/tuning/open-files-limit/ -canonical_link: "https://docs.basho.com/riak/kv/latest/using/performance/open-files-limit" --- -[plan backend bitcask]: /riak/kv/2.0.2/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.2/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.0.2/using/reference/bucket-types.md b/content/riak/kv/2.0.2/using/reference/bucket-types.md index 74c040b14b..829340e257 100644 --- a/content/riak/kv/2.0.2/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.2/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.2/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.2/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.2/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.2/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.2/developing/data-types), and [strong consistency](/riak/kv/2.0.2/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.2/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.2/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.2/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.2/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.2/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.2/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.2/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.2/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.2/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.2/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.2/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.2/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.2/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.2/learn/concepts/buckets) and [keys](/riak/kv/2.0.2/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.2/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.2/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.2/using/reference/custom-code.md b/content/riak/kv/2.0.2/using/reference/custom-code.md index 0fc8bfbc5c..afcbd1e7fd 100644 --- a/content/riak/kv/2.0.2/using/reference/custom-code.md +++ b/content/riak/kv/2.0.2/using/reference/custom-code.md @@ -15,10 +15,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.2/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.2/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.2/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.2/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -122,7 +122,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.2/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.2/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.2/using/reference/handoff.md b/content/riak/kv/2.0.2/using/reference/handoff.md index cee5fe08ad..0452f93abc 100644 --- a/content/riak/kv/2.0.2/using/reference/handoff.md +++ b/content/riak/kv/2.0.2/using/reference/handoff.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.2/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.2/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -34,13 +34,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.2/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.2/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.2/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.2/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -53,7 +53,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.2/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.2/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -120,14 +120,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.2/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.2/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.2/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.2/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.2/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.2/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.2/using/reference/jmx.md b/content/riak/kv/2.0.2/using/reference/jmx.md index 63d27e83e1..b2d1b666ae 100644 --- a/content/riak/kv/2.0.2/using/reference/jmx.md +++ b/content/riak/kv/2.0.2/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.2/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.2/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.2/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.2/using/reference/logging.md b/content/riak/kv/2.0.2/using/reference/logging.md index 444113d7ad..adb2fdf211 100644 --- a/content/riak/kv/2.0.2/using/reference/logging.md +++ b/content/riak/kv/2.0.2/using/reference/logging.md @@ -14,13 +14,13 @@ aliases: - /riak/2.0.2/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.2/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.2/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.2/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.2/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -45,7 +45,7 @@ File | Significance `console.log` | Console log output `crash.log` | Crash logs `erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. -`error.log` | [Common errors](../../repair-recover/errors) emitted by Riak. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax @@ -258,11 +258,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.2/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.2/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.2/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.2/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.2/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.2/using/reference/multi-datacenter/comparison.md index 9fb4bd64ea..cf279cc798 100644 --- a/content/riak/kv/2.0.2/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.2/using/reference/multi-datacenter/comparison.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.2/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.2/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.2/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.2/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -88,7 +88,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.2/using/reference/object-deletion.md b/content/riak/kv/2.0.2/using/reference/object-deletion.md index 53398acf2e..ab1b2413e0 100644 --- a/content/riak/kv/2.0.2/using/reference/object-deletion.md +++ b/content/riak/kv/2.0.2/using/reference/object-deletion.md @@ -38,7 +38,7 @@ concretely using the following example: * The object has been marked as deleted on nodes A and B, but it still lives on node C * A client attempts to read the object, Riak senses that there are - divergent replicas and initiates a repair process (either [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) or [active anti-entropy](../../../learn/concepts/active-anti-entropy/), + divergent replicas and initiates a repair process (either [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) or [active anti-entropy](../../../learn/concepts/active-anti-entropy/), depending on configuration) At this point, Riak needs to make a decision about what to do. Should diff --git a/content/riak/kv/2.0.2/using/reference/runtime-interaction.md b/content/riak/kv/2.0.2/using/reference/runtime-interaction.md index c72511d376..1f2c6d0443 100644 --- a/content/riak/kv/2.0.2/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.2/using/reference/runtime-interaction.md @@ -14,8 +14,8 @@ aliases: - /riak/2.0.2/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.2/configuring/reference -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.2/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.2/using/reference/search.md b/content/riak/kv/2.0.2/using/reference/search.md index 614cad7f30..a941208f65 100644 --- a/content/riak/kv/2.0.2/using/reference/search.md +++ b/content/riak/kv/2.0.2/using/reference/search.md @@ -14,20 +14,20 @@ aliases: - /riak/2.0.2/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.0.2/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.0.2/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -125,7 +125,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.2/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.2/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -287,7 +287,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.2/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -297,7 +297,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -352,7 +352,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.2/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.2/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.2/using/reference/secondary-indexes.md b/content/riak/kv/2.0.2/using/reference/secondary-indexes.md index fa5adeef0e..eb456b86ce 100644 --- a/content/riak/kv/2.0.2/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.2/using/reference/secondary-indexes.md @@ -14,28 +14,28 @@ aliases: - /riak/2.0.2/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.0.2/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.2/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.2/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.2/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.2/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.2/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.2/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.2/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -57,7 +57,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.2/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -65,7 +65,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.2/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.2/using/reference/statistics-monitoring.md index ed35ad0bea..15a6a6169d 100644 --- a/content/riak/kv/2.0.2/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.2/using/reference/statistics-monitoring.md @@ -16,13 +16,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.2/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.2/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.2/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.2/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.2/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.2/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.2/developing/api/http/status) documentation. ## System Metrics To Graph @@ -46,7 +46,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.2/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.2/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -56,7 +56,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.2/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -142,7 +142,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.2/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.2/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -167,14 +167,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.2/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.2/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.2/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -221,7 +221,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.2/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.2/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -245,7 +245,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.2/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.2/developing/api/http/status) endpoint is also available. #### Nagios @@ -319,14 +319,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.2/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.2/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.2/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.2/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -348,9 +348,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.2/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.2/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.2/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.2/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -366,9 +366,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.2/using/reference/strong-consistency.md b/content/riak/kv/2.0.2/using/reference/strong-consistency.md index e9848f97c7..ad69384373 100644 --- a/content/riak/kv/2.0.2/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.2/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.2/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.2/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.2/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.2/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.2/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.2/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.2/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.2/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.2/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.2/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.2/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.2/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.2/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.2/using/reference/v2-multi-datacenter/architecture.md index e1b9629fcf..531c46bb4c 100644 --- a/content/riak/kv/2.0.2/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.2/using/reference/v2-multi-datacenter/architecture.md @@ -77,7 +77,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.2/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.2/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -89,7 +89,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -107,7 +107,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -115,6 +115,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.2/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.2/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.2/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.2/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/aae.md index ff6937a4f4..28495a0c20 100644 --- a/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/aae.md @@ -16,9 +16,9 @@ aliases: - /riak/2.0.2/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.2/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.2/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.2/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/architecture.md index f56dc943d6..351814bfb4 100644 --- a/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/architecture.md @@ -16,8 +16,8 @@ aliases: - /riak/2.0.2/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.2/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.2/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.2/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.2/learn/concepts/clusters ## How Version 3 Replication Works @@ -110,7 +110,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -121,7 +121,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -135,7 +135,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -144,7 +144,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -153,7 +153,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -162,7 +162,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/cascading-writes.md index 89dc32f970..2764d11c72 100644 --- a/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/cascading-writes.md @@ -82,7 +82,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.2/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md index f482a87826..baedbe78cd 100644 --- a/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -16,7 +16,7 @@ aliases: - /riak/2.0.2/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.2/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.2/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.2/using/repair-recovery.md b/content/riak/kv/2.0.2/using/repair-recovery.md index 61bfa2adc9..af87f331e3 100644 --- a/content/riak/kv/2.0.2/using/repair-recovery.md +++ b/content/riak/kv/2.0.2/using/repair-recovery.md @@ -15,7 +15,7 @@ toc: true [repair recover fail]: ./failure-recovery/ [repair recover errors]: ./errors/ [repair recover repairs]: ./repairs/ -[repair recover restart]: ./rolling-restarts/ +[repair recover restart]: ./rolling-restart/ ## In This Section diff --git a/content/riak/kv/2.0.2/using/repair-recovery/errors.md b/content/riak/kv/2.0.2/using/repair-recovery/errors.md index 7eb47a10a4..09994d98b7 100644 --- a/content/riak/kv/2.0.2/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.2/using/repair-recovery/errors.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.2/configuring/reference +[config reference]: {{}}riak/kv/2.0.2/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -119,8 +119,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -185,7 +185,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -205,8 +205,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -236,7 +236,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -320,12 +320,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.2/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.2/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.2/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.2/using/repair-recovery/failure-recovery.md index 77aa482c51..2f9e5e59f5 100644 --- a/content/riak/kv/2.0.2/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.2/using/repair-recovery/failure-recovery.md @@ -37,7 +37,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.2/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.2/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -91,7 +91,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.2/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.2/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -114,7 +114,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.0.2/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.2/using/repair-recovery/repairs.md b/content/riak/kv/2.0.2/using/repair-recovery/repairs.md index 7bc351778e..d53032b522 100644 --- a/content/riak/kv/2.0.2/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.2/using/repair-recovery/repairs.md @@ -145,7 +145,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.0.2/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.0.2/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -214,23 +214,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.2/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.2/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.2/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.2/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.2/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.2/using/repair-recovery/rolling-restart.md index 80fd06fad4..af37fa4e0b 100644 --- a/content/riak/kv/2.0.2/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.2/using/repair-recovery/rolling-restart.md @@ -14,7 +14,7 @@ aliases: - /riak/2.0.2/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.2/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.2/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.2/using/running-a-cluster.md b/content/riak/kv/2.0.2/using/running-a-cluster.md index ff81edbe35..2d856e7292 100644 --- a/content/riak/kv/2.0.2/using/running-a-cluster.md +++ b/content/riak/kv/2.0.2/using/running-a-cluster.md @@ -18,7 +18,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.2/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.2/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -45,7 +45,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.2/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -58,7 +58,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.2/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -152,7 +152,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.2/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.2/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -248,7 +248,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.2/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.2/using/security.md b/content/riak/kv/2.0.2/using/security.md index 03088b9971..fccdb9e049 100644 --- a/content/riak/kv/2.0.2/using/security.md +++ b/content/riak/kv/2.0.2/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.2/ops/advanced/security --- -[config reference search]: /riak/kv/2.0.2/configuring/reference/#search -[config search enabling]: /riak/kv/2.0.2/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.2/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.2/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.2/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.0.2/using/security/basics -[security managing]: /riak/kv/2.0.2/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.2/using/security/basics +[security managing]: {{}}riak/kv/2.0.2/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.0.2/developing/usage/search +[usage search]: {{}}riak/kv/2.0.2/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.2/using/security/basics.md b/content/riak/kv/2.0.2/using/security/basics.md index ec0640129f..dd0df4c69a 100644 --- a/content/riak/kv/2.0.2/using/security/basics.md +++ b/content/riak/kv/2.0.2/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.2/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.2/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.2/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.2/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.2/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.2/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.2/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.2/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.2/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.2/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.2/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.2/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.2/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.2/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.2/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.2/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.2/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.2/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.2/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.2/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.2/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.2/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.2/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.2/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.2/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.2/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.2/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.2/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.2/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.2/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.2/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.2/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.2/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.2/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.2/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.2/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.2/configuring/reference/#directories).
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="../../learn/glossary#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.2/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.2/using/security/managing-sources.md b/content/riak/kv/2.0.2/using/security/managing-sources.md index 8722c2309a..cc6b9d64ee 100644 --- a/content/riak/kv/2.0.2/using/security/managing-sources.md +++ b/content/riak/kv/2.0.2/using/security/managing-sources.md @@ -15,7 +15,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.2/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.2/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -25,8 +25,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.2/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.2/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.2/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.2/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -121,7 +121,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.2/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.2/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -144,7 +144,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.2/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.2/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -155,7 +155,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.2/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.2/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -164,7 +164,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.2/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.2/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.0.4/_reference-links.md b/content/riak/kv/2.0.4/_reference-links.md index 15aabb996a..f22d431fa2 100644 --- a/content/riak/kv/2.0.4/_reference-links.md +++ b/content/riak/kv/2.0.4/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.4/downloads/ -[install index]: /riak/kv/2.0.4/setup/installing -[upgrade index]: /riak/kv/2.0.4/upgrading -[plan index]: /riak/kv/2.0.4/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.0.4/configuring/reference/ -[manage index]: /riak/kv/2.0.4/using/managing -[performance index]: /riak/kv/2.0.4/using/performance -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.4/downloads/ +[install index]: {{}}riak/kv/2.0.4/setup/installing +[upgrade index]: {{}}riak/kv/2.0.4/upgrading +[plan index]: {{}}riak/kv/2.0.4/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.0.4/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.4/using/managing +[performance index]: {{}}riak/kv/2.0.4/using/performance +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.4/setup/planning -[plan start]: /riak/kv/2.0.4/setup/planning/start -[plan backend]: /riak/kv/2.0.4/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.4/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.4/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.4/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.4/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.4/setup/planning/best-practices -[plan future]: /riak/kv/2.0.4/setup/planning/future +[plan index]: {{}}riak/kv/2.0.4/setup/planning +[plan start]: {{}}riak/kv/2.0.4/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.4/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.4/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.4/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.4/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.4/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.4/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.4/setup/installing -[install aws]: /riak/kv/2.0.4/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.4/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.4/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.4/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.4/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.4/setup/installing/smartos -[install solaris]: /riak/kv/2.0.4/setup/installing/solaris -[install suse]: /riak/kv/2.0.4/setup/installing/suse -[install windows azure]: /riak/kv/2.0.4/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.4/setup/installing +[install aws]: {{}}riak/kv/2.0.4/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.4/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.4/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.4/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.4/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.4/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.4/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.4/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.4/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.4/setup/installing/source -[install source erlang]: /riak/kv/2.0.4/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.4/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.4/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.4/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.4/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.4/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.4/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.4/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.4/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.4/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.4/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.4/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.4/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.4/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.4/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.4/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.4/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.4/configuring -[config basic]: /riak/kv/2.0.4/configuring/basic -[config backend]: /riak/kv/2.0.4/configuring/backend -[config manage]: /riak/kv/2.0.4/configuring/managing -[config reference]: /riak/kv/2.0.4/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.4/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.4/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.4/configuring/mapreduce -[config search]: /riak/kv/2.0.4/configuring/search/ +[config index]: {{}}riak/kv/2.0.4/configuring +[config basic]: {{}}riak/kv/2.0.4/configuring/basic +[config backend]: {{}}riak/kv/2.0.4/configuring/backend +[config manage]: {{}}riak/kv/2.0.4/configuring/managing +[config reference]: {{}}riak/kv/2.0.4/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.4/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.4/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.4/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.4/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.4/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.4/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.4/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.4/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.4/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.4/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.4/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.4/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.4/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.4/using/ -[use admin commands]: /riak/kv/2.0.4/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.4/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.4/using/ +[use admin commands]: {{}}riak/kv/2.0.4/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.4/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.4/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.4/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.4/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.4/using/reference/search -[use ref 2i]: /riak/kv/2.0.4/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.4/using/reference/snmp -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.4/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.4/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.4/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.4/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.4/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.4/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.4/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.4/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.4/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.4/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.4/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.4/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.4/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.4/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.4/using/admin/ -[use admin commands]: /riak/kv/2.0.4/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.4/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.4/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.4/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.4/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.4/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.4/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.4/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.4/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.4/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.4/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.4/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.4/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.4/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.4/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.4/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.4/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.4/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.4/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.4/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.4/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.4/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.4/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.4/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.4/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.4/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.4/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.4/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.4/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.4/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.4/using/repair-recovery -[repair recover index]: /riak/kv/2.0.4/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.4/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.4/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.4/using/security/ -[security basics]: /riak/kv/2.0.4/using/security/basics -[security managing]: /riak/kv/2.0.4/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.4/using/security/ +[security basics]: {{}}riak/kv/2.0.4/using/security/basics +[security managing]: {{}}riak/kv/2.0.4/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.4/using/performance/ -[perf benchmark]: /riak/kv/2.0.4/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.4/using/performance/erlang -[perf aws]: /riak/kv/2.0.4/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.4/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.4/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.4/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.4/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.4/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.4/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.4/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.4/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.4/developing -[dev client libraries]: /riak/kv/2.0.4/developing/client-libraries -[dev data model]: /riak/kv/2.0.4/developing/data-modeling -[dev data types]: /riak/kv/2.0.4/developing/data-types -[dev kv model]: /riak/kv/2.0.4/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.4/developing +[dev client libraries]: {{}}riak/kv/2.0.4/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.4/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.4/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.4/developing/getting-started -[getting started java]: /riak/kv/2.0.4/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.4/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.4/developing/getting-started/python -[getting started php]: /riak/kv/2.0.4/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.4/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.4/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.4/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.4/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.4/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.4/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.4/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.4/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.4/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.4/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.4/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.4/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.4/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.4/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.4/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.4/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.4/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.4/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.4/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.4/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.4/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.4/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.4/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.4/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.4/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.4/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.4/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.4/developing/usage -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.4/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.4/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.4/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.4/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.4/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.4/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.4/developing/usage/search -[usage search schema]: /riak/kv/2.0.4/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.4/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.4/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.4/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.4/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.4/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.4/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.4/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.4/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.4/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.4/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.4/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.4/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.4/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.4/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.4/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.4/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.4/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.4/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.4/developing/api/backend -[dev api http]: /riak/kv/2.0.4/developing/api/http -[dev api http status]: /riak/kv/2.0.4/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.4/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.4/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.4/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.4/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.4/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.4/learn/glossary/ -[glossary aae]: /riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.4/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.4/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.4/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.4/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.4/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.4/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.4/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.4/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.4/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.4/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.4/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.4/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.4/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.4/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.4/add-ons.md b/content/riak/kv/2.0.4/add-ons.md index dabc315af5..bfb4b848d8 100644 --- a/content/riak/kv/2.0.4/add-ons.md +++ b/content/riak/kv/2.0.4/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.4/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.4/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.4/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.4/add-ons/redis/developing-rra.md index 700076108c..8f8de02201 100644 --- a/content/riak/kv/2.0.4/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.4/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.4/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.4/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.4/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.4/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.4/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.4/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.4/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.4/add-ons/redis/redis-add-on-features.md index 12a722daa4..87ab2242f3 100644 --- a/content/riak/kv/2.0.4/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.4/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.4/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.4/add-ons/redis/set-up-rra.md index 55caf3b49a..eaba3759ea 100644 --- a/content/riak/kv/2.0.4/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.4/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.4/setup/installing -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.4/setup/installing +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.4/add-ons/redis/using-rra.md b/content/riak/kv/2.0.4/add-ons/redis/using-rra.md index 94ec31ae93..601a657352 100644 --- a/content/riak/kv/2.0.4/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.4/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.4/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.4/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.4/configuring/backend.md b/content/riak/kv/2.0.4/configuring/backend.md index 5c802fdf71..84a609170d 100644 --- a/content/riak/kv/2.0.4/configuring/backend.md +++ b/content/riak/kv/2.0.4/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.4/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.4/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.4/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.4/configuring/basic.md b/content/riak/kv/2.0.4/configuring/basic.md index 15343c636a..09e36cf35e 100644 --- a/content/riak/kv/2.0.4/configuring/basic.md +++ b/content/riak/kv/2.0.4/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.0.4/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.4/configuring/reference -[use running cluster]: /riak/kv/2.0.4/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.4/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.4/using/performance/erlang -[plan start]: /riak/kv/2.0.4/setup/planning/start -[plan best practices]: /riak/kv/2.0.4/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.4/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.4/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.4/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.4/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.4/using/performance -[perf aws]: /riak/kv/2.0.4/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.4/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.4/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.4/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.4/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.4/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.4/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.4/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.4/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.4/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.4/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.4/using/performance +[perf aws]: {{}}riak/kv/2.0.4/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.4/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.4/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.4/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.4/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.4/configuring/load-balancing-proxy.md index fa532d67dd..3914b8ff55 100644 --- a/content/riak/kv/2.0.4/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.4/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.4/configuring/managing.md b/content/riak/kv/2.0.4/configuring/managing.md index 70a5f3ef50..9d31a0a4b0 100644 --- a/content/riak/kv/2.0.4/configuring/managing.md +++ b/content/riak/kv/2.0.4/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.4/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.4/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.4/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.4/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.4/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.4/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.4/configuring/mapreduce.md b/content/riak/kv/2.0.4/configuring/mapreduce.md index 45de4ba6ea..189d367bf3 100644 --- a/content/riak/kv/2.0.4/configuring/mapreduce.md +++ b/content/riak/kv/2.0.4/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.0.4/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.4/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.4/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.4/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.4/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.4/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.4/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.4/configuring/reference.md b/content/riak/kv/2.0.4/configuring/reference.md index a593caea14..4954b18cfa 100644 --- a/content/riak/kv/2.0.4/configuring/reference.md +++ b/content/riak/kv/2.0.4/configuring/reference.md @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch. diff --git a/content/riak/kv/2.0.4/configuring/search.md b/content/riak/kv/2.0.4/configuring/search.md index 3e71d3e77f..e0b1b539a5 100644 --- a/content/riak/kv/2.0.4/configuring/search.md +++ b/content/riak/kv/2.0.4/configuring/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.4/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.4/developing/usage/search -[usage search schema]: /riak/kv/2.0.4/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.4/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.4/developing/usage/custom-extractors -[config reference]: /riak/kv/2.0.4/configuring/reference -[config reference#search]: /riak/kv/2.0.4/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.4/using/security/ +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.4/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.4/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.4/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.4/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.4/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.0.4/configuring/strong-consistency.md b/content/riak/kv/2.0.4/configuring/strong-consistency.md index 9ab903a500..c8707ec7f8 100644 --- a/content/riak/kv/2.0.4/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.4/configuring/strong-consistency.md @@ -13,29 +13,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.4/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.4/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.4/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.4/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.4/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.4/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.4/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.4/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.4/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.4/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.4/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.4/developing/data-types -[glossary aae]: /riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.4/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.4/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.4/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.4/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.4/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.4/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.4/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.4/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.4/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.4/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.4/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.4/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.4/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.4/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.4/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.4/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.4/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.4/developing/client-libraries > **Please Note:** > @@ -309,11 +309,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.4/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.4/configuring/v2-multi-datacenter.md index 2d586a54b0..a071f2d5be 100644 --- a/content/riak/kv/2.0.4/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.4/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.4/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.4/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.0.4/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.4/configuring/v2-multi-datacenter/nat.md index 11095bb43d..f8eeac7efc 100644 --- a/content/riak/kv/2.0.4/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.4/configuring/v2-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.4/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.4/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.0.4/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.4/configuring/v3-multi-datacenter.md index 3a686aef0f..61db4cfeee 100644 --- a/content/riak/kv/2.0.4/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.4/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.4/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.4/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/nat.md index b162e4e9eb..ba4a0b3a20 100644 --- a/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/quick-start.md index 05215e7a1d..6aa5a4d72c 100644 --- a/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.4/using/performance -[config v3 mdc]: /riak/kv/2.0.4/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.4/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl.md index ced6dfb9af..e3449f30f2 100644 --- a/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.4/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.4/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.4/developing/api/backend.md b/content/riak/kv/2.0.4/developing/api/backend.md index 4fb3ac215d..218970c7bf 100644 --- a/content/riak/kv/2.0.4/developing/api/backend.md +++ b/content/riak/kv/2.0.4/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.4/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.4/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.4/developing/api/http.md b/content/riak/kv/2.0.4/developing/api/http.md index d53912e417..966114adaa 100644 --- a/content/riak/kv/2.0.4/developing/api/http.md +++ b/content/riak/kv/2.0.4/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.4/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.4/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.4/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.4/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.4/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.4/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.4/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.4/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.4/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.4/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.4/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.4/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.4/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.4/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.4/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.4/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.4/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.4/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.4/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.4/developing/data-types). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.4/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.4/developing/data-types). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.4/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.4/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.4/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.4/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.4/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.4/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.4/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.4/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.4/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.4/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.4/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.4/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.4/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.4/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.4/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.4/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.4/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.4/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.4/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.4/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.4/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.4/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.4/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.4/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.4/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.4/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.4/developing/api/http/counters.md b/content/riak/kv/2.0.4/developing/api/http/counters.md index 7a90cae88f..376b729ca0 100644 --- a/content/riak/kv/2.0.4/developing/api/http/counters.md +++ b/content/riak/kv/2.0.4/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.4/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.4/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.4/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.4/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.4/developing/api/http/fetch-object.md b/content/riak/kv/2.0.4/developing/api/http/fetch-object.md index 2f365ea4a0..a6c0f2b9b4 100644 --- a/content/riak/kv/2.0.4/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.4/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.4/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.4/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.4/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.4/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.4/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.4/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.4/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.4/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.4/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.4/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.4/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.4/developing/api/http/fetch-search-index.md index a59e22dbcf..88ead5ea21 100644 --- a/content/riak/kv/2.0.4/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.4/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.4/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.4/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.4/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.4/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.4/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.4/developing/api/http/fetch-search-schema.md index aa22782ff2..d6bec10b39 100644 --- a/content/riak/kv/2.0.4/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.4/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.4/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.4/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.4/developing/api/http/get-bucket-props.md index 63702bb6af..476355a817 100644 --- a/content/riak/kv/2.0.4/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.4/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.4/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.4/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.4/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.4/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.4/developing/api/http/link-walking.md b/content/riak/kv/2.0.4/developing/api/http/link-walking.md index 3888d41732..4911a6bd4b 100644 --- a/content/riak/kv/2.0.4/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.4/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.4/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.4/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.4/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.4/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.4/developing/api/http/list-resources.md b/content/riak/kv/2.0.4/developing/api/http/list-resources.md index 3f56e9964d..1430cd50b2 100644 --- a/content/riak/kv/2.0.4/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.4/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.4/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.4/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.4/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.4/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.4/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.4/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.4/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.4/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.4/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.4/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.4/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.4/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.4/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.4/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.4/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.4/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.4/developing/api/http/mapreduce.md b/content/riak/kv/2.0.4/developing/api/http/mapreduce.md index 69bf1f60b7..67ac652c77 100644 --- a/content/riak/kv/2.0.4/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.4/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.4/developing/api/http/search-index-info.md b/content/riak/kv/2.0.4/developing/api/http/search-index-info.md index 765c454e2e..b63fb550b0 100644 --- a/content/riak/kv/2.0.4/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.4/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.4/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.4/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.4/developing/api/http/search-query.md b/content/riak/kv/2.0.4/developing/api/http/search-query.md index 5da238f021..9cb2d62c63 100644 --- a/content/riak/kv/2.0.4/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.4/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.4/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.4/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.4/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.4/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.4/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.4/developing/api/http/secondary-indexes.md index ab68ee5513..10674e6900 100644 --- a/content/riak/kv/2.0.4/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.4/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.4/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.4/developing/api/http/set-bucket-props.md index 4793b51849..8d48843177 100644 --- a/content/riak/kv/2.0.4/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.4/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.4/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.4/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.4/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.4/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.4/developing/api/http/status.md b/content/riak/kv/2.0.4/developing/api/http/status.md index 0856a3455e..fdd2c63da1 100644 --- a/content/riak/kv/2.0.4/developing/api/http/status.md +++ b/content/riak/kv/2.0.4/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.4/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.4/developing/api/http/store-object.md b/content/riak/kv/2.0.4/developing/api/http/store-object.md index 193660dd39..6ce2227a84 100644 --- a/content/riak/kv/2.0.4/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.4/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.4/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.4/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.4/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.4/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.4/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.4/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.4/developing/api/http/store-search-index.md b/content/riak/kv/2.0.4/developing/api/http/store-search-index.md index b4aff0289e..c419c27130 100644 --- a/content/riak/kv/2.0.4/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.4/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.4/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.4/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.4/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.4/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.4/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.4/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.4/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.4/developing/api/http/store-search-schema.md index 6051fc8fb3..b175711763 100644 --- a/content/riak/kv/2.0.4/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.4/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.4/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.4/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers.md index 76ff6d2170..8cdbfb8a93 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.4/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.4/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.4/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.4/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.4/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.4/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.4/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.4/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.4/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.4/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/auth-req.md index 1137f8181c..e9b5da0cc6 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.4/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.4/using/security/basics). diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/delete-object.md index 747cdd509e..e631a07ae3 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.4/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.4/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store.md index 9419e9aeed..30304f5fdc 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.4/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.4/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-fetch.md index 4fe11d44ec..21fe96734b 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.4/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.4/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.4/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.4/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.4/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store.md index 877fb5fbb5..152695efb0 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store.md index 31d2348fea..c44b4b699a 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-store.md index 420b9393ae..0b98514c78 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.4/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.4/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.4/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.4/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.4/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.4/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.4/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-union.md index f5b3b50c78..6b3a5bf504 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.4/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object.md index ca76c0b9d6..0ee56bdfe6 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.4/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.4/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.4/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props.md index e9035b3ecb..09597da50e 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.4/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.4/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.4/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.4/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riakcs/latest/cookbooks/mdc-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/latest/cookbooks/mdc-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-type.md index c9f08ca56b..76687b6a1f 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.4/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.4/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-client-id.md index 7d239a3c97..82a832e211 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.4/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/mapreduce.md index 0a0459a478..bdd7d08b78 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.4/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.4/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.4/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.4/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/reset-bucket-props.md index 415c6c185c..be2b3b57a4 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/secondary-indexes.md index 11815b08c0..03ecd3c750 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.4/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props.md index 25e1cbcd18..542ba1290c 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-type.md index 82481f6030..8973db6532 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.4/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.4/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/store-object.md index bd227f692b..20cc55e201 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.4/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.4/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.4/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.4/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.4/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.4/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.4/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.4/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-get.md index 8d2d65466c..e292763ce5 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-put.md index 8e94961cb7..ba2d3ef73d 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-index-put.md @@ -37,4 +37,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-get.md index c4e693af30..5f3cf45bf8 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.4/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-put.md index d60b84b882..3280ba7f71 100644 --- a/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.4/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.4/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.4/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.4/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.4/developing/app-guide.md b/content/riak/kv/2.0.4/developing/app-guide.md index 9d1842246b..709c376e3e 100644 --- a/content/riak/kv/2.0.4/developing/app-guide.md +++ b/content/riak/kv/2.0.4/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.0.4/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.4/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.4/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.4/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.4/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.4/developing/data-types -[dev data types#counters]: /riak/kv/2.0.4/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.0.4/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.0.4/developing/data-types/maps -[usage create objects]: /riak/kv/2.0.4/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.4/developing/usage/search -[use ref search]: /riak/kv/2.0.4/using/reference/search -[usage 2i]: /riak/kv/2.0.4/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.4/developing/client-libraries -[concept crdts]: /riak/kv/2.0.4/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.4/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.4/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.4/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.4/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.4/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.4/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.4/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.4/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.4/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.4/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.4/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.4/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.4/using/reference/strong-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.4/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.4/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.4/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.4/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.4/setup/installing -[getting started]: /riak/kv/2.0.4/developing/getting-started -[usage index]: /riak/kv/2.0.4/developing/usage -[glossary]: /riak/kv/2.0.4/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.4/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.4/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.4/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.4/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.4/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.0.4/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.0.4/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.0.4/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.4/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.4/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.4/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.4/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.4/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.4/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.4/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.4/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.4/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.4/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.4/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.4/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.4/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.4/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.4/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.4/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.4/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.4/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.4/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.4/setup/installing +[getting started]: {{}}riak/kv/2.0.4/developing/getting-started +[usage index]: {{}}riak/kv/2.0.4/developing/usage +[glossary]: {{}}riak/kv/2.0.4/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.4/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.4/developing/app-guide/advanced-mapreduce.md index 7c7e6f1102..454bba4000 100644 --- a/content/riak/kv/2.0.4/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.4/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.4/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.4/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.4/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.4/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.4/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.4/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.4/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.4/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.4/developing/app-guide/cluster-metadata.md index e172d4b4b9..7494b8a992 100644 --- a/content/riak/kv/2.0.4/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.4/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.4/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.4/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.4/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.4/developing/app-guide/replication-properties.md index cc03d9dd41..b1879e4fb5 100644 --- a/content/riak/kv/2.0.4/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.4/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.4/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/latest/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.4/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.4/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.4/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.4/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.4/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.4/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.4/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.4/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.4/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.4/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.4/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.4/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.4/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.4/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.4/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.4/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.4/developing/app-guide/strong-consistency.md index 1df0190e7f..6c42e88eab 100644 --- a/content/riak/kv/2.0.4/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.4/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.0.4/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/2.1.3/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.4/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.4/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.4/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.4/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.4/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.4/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/2.1.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.4/developing/client-libraries -[getting started]: /riak/kv/2.0.4/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.4/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.0.4/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.4/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.4/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.4/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.4/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.4/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.4/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.0.4/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.4/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.4/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.4/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.4/developing/client-libraries.md b/content/riak/kv/2.0.4/developing/client-libraries.md index 7814ab1423..d99b5fec32 100644 --- a/content/riak/kv/2.0.4/developing/client-libraries.md +++ b/content/riak/kv/2.0.4/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.4/developing/data-types.md b/content/riak/kv/2.0.4/developing/data-types.md index 2154d0e10e..f40f85b2a7 100644 --- a/content/riak/kv/2.0.4/developing/data-types.md +++ b/content/riak/kv/2.0.4/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.4/developing/faq.md b/content/riak/kv/2.0.4/developing/faq.md index 6dd170fd47..0493a4802f 100644 --- a/content/riak/kv/2.0.4/developing/faq.md +++ b/content/riak/kv/2.0.4/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.4/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.4/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.4/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.4/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.4/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.4/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.4/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.4/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.4/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.4/developing/client-libraries -[MapReduce]: /riak/kv/2.0.4/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.4/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.4/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.4/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.4/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.4/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.4/developing/getting-started.md b/content/riak/kv/2.0.4/developing/getting-started.md index 5970a68b93..0720966eba 100644 --- a/content/riak/kv/2.0.4/developing/getting-started.md +++ b/content/riak/kv/2.0.4/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.4/setup/installing -[dev client libraries]: /riak/kv/2.0.4/developing/client-libraries +[install index]: {{}}riak/kv/2.0.4/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.4/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.4/developing/getting-started/csharp.md b/content/riak/kv/2.0.4/developing/getting-started/csharp.md index fa69aa58eb..7c0dd20e81 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.4/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.4/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.4/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.4/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.4/developing/getting-started/csharp/querying.md index 7e4f4d7f51..4ade9f43de 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.4/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.4/developing/getting-started/erlang.md b/content/riak/kv/2.0.4/developing/getting-started/erlang.md index 5ab48b5384..0147fbc920 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.4/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.4/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.4/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.4/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.4/developing/getting-started/erlang/object-modeling.md index 5b4342d091..8206ec629a 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.4/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.4/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.4/developing/getting-started/erlang/querying.md index aa0959f8a4..947a8dd3c7 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.4/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.4/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.4/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.4/developing/getting-started/golang.md b/content/riak/kv/2.0.4/developing/getting-started/golang.md index 92bb1ddf2a..5bcbda9cf4 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.4/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.4/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.4/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.4/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.4/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.4/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.4/developing/getting-started/golang/object-modeling.md index bf3014f1c6..31e0d0330a 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.4/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.4/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.4/developing/getting-started/golang/querying.md index 91002b5ba9..76930ee5e7 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.4/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.4/developing/getting-started/java.md b/content/riak/kv/2.0.4/developing/getting-started/java.md index 9d0f460bf9..78fd28dae7 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/java.md +++ b/content/riak/kv/2.0.4/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.4/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.4/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.4/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.4/developing/getting-started/java/crud-operations.md index a94fc8ee1d..6db142c022 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.4/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.4/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.4/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.4/developing/getting-started/java/querying.md b/content/riak/kv/2.0.4/developing/getting-started/java/querying.md index 14c1ed66a5..86d1d7f549 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.4/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.4/developing/getting-started/nodejs.md b/content/riak/kv/2.0.4/developing/getting-started/nodejs.md index 4b24204787..e02bd0d738 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.4/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.4/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.4/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.4/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.4/developing/getting-started/nodejs/querying.md index 917dd157ed..bc4d4352cb 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.4/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.4/developing/getting-started/php.md b/content/riak/kv/2.0.4/developing/getting-started/php.md index 9bf635082b..cceffe7afe 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/php.md +++ b/content/riak/kv/2.0.4/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.4/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.4/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.4/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.4/developing/getting-started/php/crud-operations.md index 1254de4a8c..e1fd2abe1b 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.4/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.4/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.4/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.4/developing/getting-started/php/querying.md b/content/riak/kv/2.0.4/developing/getting-started/php/querying.md index cf57bf30c4..b84aa5a327 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.4/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.4/developing/getting-started/python.md b/content/riak/kv/2.0.4/developing/getting-started/python.md index 152d00a48e..0aed0b1eab 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/python.md +++ b/content/riak/kv/2.0.4/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.4/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.4/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.4/developing/getting-started/python/querying.md b/content/riak/kv/2.0.4/developing/getting-started/python/querying.md index a7c3997223..69665049ad 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.4/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.4/developing/getting-started/ruby.md b/content/riak/kv/2.0.4/developing/getting-started/ruby.md index b77fe56058..b506e967db 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.4/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.4/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.4/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.4/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.4/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.4/developing/getting-started/ruby/querying.md index 8b458a40b9..abbcb849fb 100644 --- a/content/riak/kv/2.0.4/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.4/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.4/developing/key-value-modeling.md b/content/riak/kv/2.0.4/developing/key-value-modeling.md index 5740ff542e..5ef18dbdc9 100644 --- a/content/riak/kv/2.0.4/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.4/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.4/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.4/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.4/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.4/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.4/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.4/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.4/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.4/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.4/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.4/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.4/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.4/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.4/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.4/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.4/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.4/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.4/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.4/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.4/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.4/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.4/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.4/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.4/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.4/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.4/developing/usage/commit-hooks.md b/content/riak/kv/2.0.4/developing/usage/commit-hooks.md index 399dca8756..1e10789b27 100644 --- a/content/riak/kv/2.0.4/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.4/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.4/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.4/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.4/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.4/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.4/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.4/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.4/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.4/developing/usage/conflict-resolution.md index 1f893f5f73..d066e91b1b 100644 --- a/content/riak/kv/2.0.4/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.4/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.4/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.4/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.4/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.4/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.4/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.4/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.4/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.4/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.4/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.4/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.4/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.4/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.4/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.4/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.4/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.4/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.4/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.0.4/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.0.4/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.4/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.4/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.4/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.4/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.4/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.4/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.4/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.4/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.4/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.4/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.4/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.4/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.4/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.4/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.4/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.4/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.4/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.4/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.4/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.4/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.4/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.4/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.4/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.4/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.4/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.4/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -611,7 +611,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.4/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.4/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -666,7 +666,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/csharp.md index 3e198bfa4f..0195e5c3ec 100644 --- a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.4/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/golang.md index b139a06788..549676c262 100644 --- a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.4/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/java.md index a6fc6f53a7..eb557f8187 100644 --- a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.4/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.4/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.4/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.4/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.4/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.4/developing/data-types/counters), [set](/riak/kv/2.0.4/developing/data-types/sets), or [map](/riak/kv/2.0.4/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.4/developing/data-types/counters), [set]({{}}riak/kv/2.0.4/developing/data-types/sets), or [map]({{}}riak/kv/2.0.4/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.4/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.4/developing/data-types/sets). diff --git a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/nodejs.md index 49b210bb81..c8a0a645c1 100644 --- a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.4/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/php.md index 93c3d9ce57..d70dec706d 100644 --- a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.4/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.4/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.4/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.4/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.4/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.4/developing/data-types/counters), [set](/riak/kv/2.0.4/developing/data-types/sets), or [map](/riak/kv/2.0.4/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.4/developing/data-types/counters), [set]({{}}riak/kv/2.0.4/developing/data-types/sets), or [map]({{}}riak/kv/2.0.4/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.4/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.4/developing/data-types/sets). diff --git a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/python.md index b4112d7546..35174d9ff2 100644 --- a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.4/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.4/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.4/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.4/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.4/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.4/developing/data-types/counters), [set](/riak/kv/2.0.4/developing/data-types/sets), or [map](/riak/kv/2.0.4/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.4/developing/data-types/counters), [set]({{}}riak/kv/2.0.4/developing/data-types/sets), or [map]({{}}riak/kv/2.0.4/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.4/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.4/developing/data-types/sets). diff --git a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/ruby.md index e9c2731d1d..21c98607da 100644 --- a/content/riak/kv/2.0.4/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.4/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.4/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.4/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.4/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.4/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.4/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.4/developing/data-types/counters), [set](/riak/kv/2.0.4/developing/data-types/sets), or [map](/riak/kv/2.0.4/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.4/developing/data-types/counters), [set]({{}}riak/kv/2.0.4/developing/data-types/sets), or [map]({{}}riak/kv/2.0.4/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.4/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.4/developing/data-types/sets). diff --git a/content/riak/kv/2.0.4/developing/usage/creating-objects.md b/content/riak/kv/2.0.4/developing/usage/creating-objects.md index 7919f8a66e..c2ddd07792 100644 --- a/content/riak/kv/2.0.4/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.4/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.4/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.4/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.0.4/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.0.4/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.4/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.4/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.4/developing/usage/custom-extractors.md b/content/riak/kv/2.0.4/developing/usage/custom-extractors.md index 5b4fdf0def..e04741067c 100644 --- a/content/riak/kv/2.0.4/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.4/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.4/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.4/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.4/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.4/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.4/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.4/developing/usage/deleting-objects.md b/content/riak/kv/2.0.4/developing/usage/deleting-objects.md index d6b3b36b64..8929e619a9 100644 --- a/content/riak/kv/2.0.4/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.4/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.4/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.4/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.4/developing/usage/document-store.md b/content/riak/kv/2.0.4/developing/usage/document-store.md index af9fe5a5f4..0a2434ba64 100644 --- a/content/riak/kv/2.0.4/developing/usage/document-store.md +++ b/content/riak/kv/2.0.4/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.4/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.4/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.4/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.4/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.0.4/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.4/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.4/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.4/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.4/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.4/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.4/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.4/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.4/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.4/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.4/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.4/developing/usage/mapreduce.md b/content/riak/kv/2.0.4/developing/usage/mapreduce.md index 2f7e0a89ce..8738d42804 100644 --- a/content/riak/kv/2.0.4/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.4/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.0.4/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.0.4/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.0.4/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.0.4/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.0.4/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.0.4/developing/usage/search/) and [secondary indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.0.4/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.0.4/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.0.4/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.0.4/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.0.4/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.0.4/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.0.4/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.0.4/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.0.4/developing/usage/reading-objects.md b/content/riak/kv/2.0.4/developing/usage/reading-objects.md index a31824f403..b0c1369bf7 100644 --- a/content/riak/kv/2.0.4/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.4/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.4/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.4/developing/usage/replication.md b/content/riak/kv/2.0.4/developing/usage/replication.md index f6e27b9f6d..c20ffb10b4 100644 --- a/content/riak/kv/2.0.4/developing/usage/replication.md +++ b/content/riak/kv/2.0.4/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.4/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/latest/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using Strong +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.4/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.4/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.4/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.4/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.4/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.4/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.4/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.4/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.4/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.4/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.4/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.4/developing/usage/search-schemas.md b/content/riak/kv/2.0.4/developing/usage/search-schemas.md index 8bba6caf25..e0cc8c51a1 100644 --- a/content/riak/kv/2.0.4/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.4/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.0.4/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.4/developing/data-types/), and [more](/riak/kv/2.0.4/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types/), and [more]({{}}riak/kv/2.0.4/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.4/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.4/developing/usage/search.md b/content/riak/kv/2.0.4/developing/usage/search.md index 1aaeb882f3..9fd01a8e06 100644 --- a/content/riak/kv/2.0.4/developing/usage/search.md +++ b/content/riak/kv/2.0.4/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.4/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.4/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.4/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.4/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.4/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.4/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.4/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.4/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.4/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.4/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.4/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.4/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.4/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.4/developing/usage/searching-data-types.md b/content/riak/kv/2.0.4/developing/usage/searching-data-types.md index 02c5e48c92..ef5b2dc996 100644 --- a/content/riak/kv/2.0.4/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.4/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.4/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.4/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.4/developing/data-types/counters), [sets](/riak/kv/2.0.4/developing/data-types/sets), and [maps](/riak/kv/2.0.4/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.4/developing/data-types/counters), [sets]({{}}riak/kv/2.0.4/developing/data-types/sets), and [maps]({{}}riak/kv/2.0.4/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.4/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.4/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.4/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.4/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.4/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.4/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.4/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.4/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.4/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.4/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.4/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.4/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.4/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.4/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.4/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.4/developing/usage/secondary-indexes.md index d474f7a317..07c8ef80c8 100644 --- a/content/riak/kv/2.0.4/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.4/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.0.4/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.4/setup/planning/backend/memory -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/latest/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.4/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.4/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.4/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.4/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.4/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.4/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.4/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.4/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.4/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.4/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.4/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.4/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.4/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.4/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.4/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.4/developing/usage/security.md b/content/riak/kv/2.0.4/developing/usage/security.md index 5057a2d330..85855e253d 100644 --- a/content/riak/kv/2.0.4/developing/usage/security.md +++ b/content/riak/kv/2.0.4/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.0.4/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.4/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.4/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.4/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.4/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.4/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.4/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.0.4/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.0.4/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.4/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.4/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.4/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.4/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.4/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.4/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.4/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.4/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.4/developing/usage/security/php) -* [Python](/riak/kv/2.0.4/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.4/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.4/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.4/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.4/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.4/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.4/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.4/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.4/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.4/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.4/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.4/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.4/developing/usage/security/erlang.md b/content/riak/kv/2.0.4/developing/usage/security/erlang.md index cba9f97854..c8d847b5ca 100644 --- a/content/riak/kv/2.0.4/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.4/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.4/using/security/managing-sources/), [PAM-](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.4/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.4/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.4/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.4/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.4/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.4/developing/usage/security/java.md b/content/riak/kv/2.0.4/developing/usage/security/java.md index a714c57167..5c0bc4f83e 100644 --- a/content/riak/kv/2.0.4/developing/usage/security/java.md +++ b/content/riak/kv/2.0.4/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.4/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.4/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.4/developing/usage/security/php.md b/content/riak/kv/2.0.4/developing/usage/security/php.md index bf7aa27090..bab8c86b9d 100644 --- a/content/riak/kv/2.0.4/developing/usage/security/php.md +++ b/content/riak/kv/2.0.4/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.4/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.4/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.4/developing/usage/security/python.md b/content/riak/kv/2.0.4/developing/usage/security/python.md index 7d578b0f04..d252908183 100644 --- a/content/riak/kv/2.0.4/developing/usage/security/python.md +++ b/content/riak/kv/2.0.4/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.4/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.4/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.4/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.4/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.4/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.4/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.4/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.4/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.4/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.4/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.4/developing/usage/security/ruby.md b/content/riak/kv/2.0.4/developing/usage/security/ruby.md index b0870a8076..47e79cf3cc 100644 --- a/content/riak/kv/2.0.4/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.4/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.4/using/security/managing-sources/) or [PAM](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.4/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.4/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.4/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.4/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.4/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.4/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.4/developing/usage/updating-objects.md b/content/riak/kv/2.0.4/developing/usage/updating-objects.md index 043a9ef680..bd8f8ef1d6 100644 --- a/content/riak/kv/2.0.4/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.4/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.4/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.4/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.4/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.4/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.4/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.4/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.4/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.4/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.4/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.4/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.4/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.4/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.4/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.4/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.4/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.4/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.4/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.4/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.4/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.4/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.4/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.4/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.4/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.4/index.md b/content/riak/kv/2.0.4/index.md index 0dacdb758f..4f920384a7 100644 --- a/content/riak/kv/2.0.4/index.md +++ b/content/riak/kv/2.0.4/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.4/configuring -[dev index]: /riak/kv/2.0.4/developing -[downloads]: /riak/kv/2.0.4/downloads/ -[install index]: /riak/kv/2.0.4/setup/installing/ -[plan index]: /riak/kv/2.0.4/setup/planning -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.4/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.4/developing/usage/search -[getting started]: /riak/kv/2.0.4/developing/getting-started -[dev client libraries]: /riak/kv/2.0.4/developing/client-libraries +[config index]: {{}}riak/kv/2.0.4/configuring +[dev index]: {{}}riak/kv/2.0.4/developing +[downloads]: {{}}riak/kv/2.0.4/downloads/ +[install index]: {{}}riak/kv/2.0.4/setup/installing/ +[plan index]: {{}}riak/kv/2.0.4/setup/planning +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.4/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search +[getting started]: {{}}riak/kv/2.0.4/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.4/developing/client-libraries diff --git a/content/riak/kv/2.0.4/introduction.md b/content/riak/kv/2.0.4/introduction.md index e43be98b39..8e642c5824 100644 --- a/content/riak/kv/2.0.4/introduction.md +++ b/content/riak/kv/2.0.4/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.0.4/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.0.4/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.0.4/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.0.4/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.0.4/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.0.4/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.0.4/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.0.4/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.0.4/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.0.4/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.0.4/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.0.4/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.0.4/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.0.4/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.0.4/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.0.4/developing/data-types/maps#flags), [registers](/riak/kv/2.0.4/developing/data-types/maps#registers), -[counters](/riak/kv/2.0.4/developing/data-types/counters), [sets](/riak/kv/2.0.4/developing/data-types/sets), and -[maps](/riak/kv/2.0.4/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.0.4/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.0.4/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.0.4/developing/data-types/counters), [sets]({{}}riak/kv/2.0.4/developing/data-types/sets), and +[maps]({{}}riak/kv/2.0.4/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.0.4/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.0.4/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.0.4/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.0.4/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.0.4/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.0.4/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.0.4/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.0.4/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.0.4/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.0.4/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.0.4/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.0.4/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.0.4/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.0.4/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.0.4/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.0.4/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.0.4/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.0.4/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.0.4/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.0.4/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.0.4/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.0.4/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.0.4/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.0.4/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.0.4/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.0.4/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.0.4/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.0.4/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.0.4/learn/concepts.md b/content/riak/kv/2.0.4/learn/concepts.md index fc217a588e..4a3871afc8 100644 --- a/content/riak/kv/2.0.4/learn/concepts.md +++ b/content/riak/kv/2.0.4/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.4/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.4/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.4/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.4/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.4/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.4/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.4/learn/concepts/vnodes -[config index]: /riak/kv/2.0.4/configuring -[plan index]: /riak/kv/2.0.4/setup/planning -[use index]: /riak/kv/2.0.4/using/ +[concept aae]: {{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.4/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.4/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.4/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.4/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.4/configuring +[plan index]: {{}}riak/kv/2.0.4/setup/planning +[use index]: {{}}riak/kv/2.0.4/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.4/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.4/learn/concepts/active-anti-entropy.md index 549e5e2da6..58405cc563 100644 --- a/content/riak/kv/2.0.4/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.4/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.4/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.4/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.4/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.4/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.0.4/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.4/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.4/developing/usage/search +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.4/learn/concepts/buckets.md b/content/riak/kv/2.0.4/learn/concepts/buckets.md index da1188b809..b95d7bafde 100644 --- a/content/riak/kv/2.0.4/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.4/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.0.4/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.4/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.4/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.4/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.4/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.4/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.4/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.4/configuring/basic -[dev api http]: /riak/kv/2.0.4/developing/api/http -[dev data types]: /riak/kv/2.0.4/developing/data-types -[glossary ring]: /riak/kv/2.0.4/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.4/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.4/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.4/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.4/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.4/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.4/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.4/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.4/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.4/configuring/basic +[dev api http]: {{}}riak/kv/2.0.4/developing/api/http +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.4/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.4/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.4/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.4/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.4/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.4/learn/concepts/capability-negotiation.md index 83c1a96450..62f44055a3 100644 --- a/content/riak/kv/2.0.4/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.4/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.4/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.4/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.4/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.4/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.4/learn/concepts/causal-context.md b/content/riak/kv/2.0.4/learn/concepts/causal-context.md index 82c7892dfc..6ea61e7d45 100644 --- a/content/riak/kv/2.0.4/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.4/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.4/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.4/developing/api/http -[dev key value]: /riak/kv/2.0.4/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.4/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.4/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.4/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.4/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.4/developing/api/http +[dev key value]: {{}}riak/kv/2.0.4/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.4/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.4/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.4/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.4/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -78,7 +78,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.0.4/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.0.4/learn/concepts/clusters.md b/content/riak/kv/2.0.4/learn/concepts/clusters.md index 45b5354f7f..732552a857 100644 --- a/content/riak/kv/2.0.4/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.4/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.4/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.4/learn/concepts/replication -[glossary node]: /riak/kv/2.0.4/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.4/learn/dynamo -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.4/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.4/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.4/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.4/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.4/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.4/learn/concepts/crdts.md b/content/riak/kv/2.0.4/learn/concepts/crdts.md index 78d9aab9c6..8b72e05d15 100644 --- a/content/riak/kv/2.0.4/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.4/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.4/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.4/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.4/developing/data-types -[glossary node]: /riak/kv/2.0.4/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.4/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[glossary node]: {{}}riak/kv/2.0.4/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.0.4/developing/usage/search/). +indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.0.4/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.0.4/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.0.4/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.0.4/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.4/learn/concepts/eventual-consistency.md index 82bf1eeb2a..922e5e012c 100644 --- a/content/riak/kv/2.0.4/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.4/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.4/learn/concepts/replication -[glossary node]: /riak/kv/2.0.4/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.4/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.4/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.4/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.4/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.4/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.4/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.4/developing/data-modeling/). +or models]({{}}riak/kv/2.0.4/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.4/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.4/learn/concepts/keys-and-objects.md index 82f93c547d..af559ef22f 100644 --- a/content/riak/kv/2.0.4/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.4/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.4/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.4/learn/concepts/replication.md b/content/riak/kv/2.0.4/learn/concepts/replication.md index 39c59e6570..499b1d0b21 100644 --- a/content/riak/kv/2.0.4/learn/concepts/replication.md +++ b/content/riak/kv/2.0.4/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.4/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.4/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.4/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.4/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.4/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.4/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.4/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.4/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.4/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.4/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.4/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.4/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.4/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.4/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.4/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.4/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.4/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.4/learn/concepts/strong-consistency.md index b7ff517e8f..68dab41da9 100644 --- a/content/riak/kv/2.0.4/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.4/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.4/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.4/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.4/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.4/learn/concepts/vnodes.md b/content/riak/kv/2.0.4/learn/concepts/vnodes.md index badfc9fe11..f1aa111641 100644 --- a/content/riak/kv/2.0.4/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.4/learn/concepts/vnodes.md @@ -16,16 +16,16 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.4/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.4/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.4/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.4/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.4/learn/glossary/#ring -[perf strong consistency]: /riak/kv/2.0.4/using/performance/strong-consistency -[plan backend]: /riak/kv/2.0.4/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.4/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.4/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.4/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.4/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.4/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.4/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.4/learn/glossary/#ring +[perf strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[plan backend]: {{}}riak/kv/2.0.4/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.4/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.4/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -81,7 +81,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -103,7 +103,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.4/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.4/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.4/learn/dynamo.md b/content/riak/kv/2.0.4/learn/dynamo.md index bb70e303de..04744de29d 100644 --- a/content/riak/kv/2.0.4/learn/dynamo.md +++ b/content/riak/kv/2.0.4/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.4/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.4/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.4/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.4/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.4/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.4/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.4/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.4/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.4/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.4/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.4/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.4/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.4/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.4/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.4/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.4/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.4/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.4/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.4/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.4/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.4/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.4/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.4/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.4/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.4/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.4/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.4/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.4/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.4/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.4/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.4/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.4/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.4/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.4/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.4/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.4/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.4/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.4/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.4/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.4/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.4/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.4/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.4/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.4/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.4/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.4/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.4/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.4/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.4/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.4/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.4/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.4/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.4/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.4/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.4/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.4/learn/glossary.md b/content/riak/kv/2.0.4/learn/glossary.md index b535653630..8e04100fd0 100644 --- a/content/riak/kv/2.0.4/learn/glossary.md +++ b/content/riak/kv/2.0.4/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.4/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.4/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.4/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.4/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.4/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.4/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.4/developing/api/http -[dev data model]: /riak/kv/2.0.4/developing/data-modeling -[dev data types]: /riak/kv/2.0.4/developing/data-types -[glossary read rep]: /riak/kv/2.0.4/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.4/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.4/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.4/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.4/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.4/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.4/developing/api/http +[dev data model]: {{}}riak/kv/2.0.4/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.4/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.4/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.4/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.4/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.4/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.4/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.4/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.4/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.4/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.4/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.4/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.4/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.4/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.4/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.4/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.4/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.4/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.4/learn/use-cases.md b/content/riak/kv/2.0.4/learn/use-cases.md index 445c41b6fc..b399be372a 100644 --- a/content/riak/kv/2.0.4/learn/use-cases.md +++ b/content/riak/kv/2.0.4/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.4/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.4/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.4/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.4/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.4/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.4/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.4/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.4/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.4/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.0.4/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.4/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.4/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.4/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.4/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.4/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.4/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.4/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.4/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.4/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.4/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.0.4/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.4/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.0.4/learn/why-riak-kv.md b/content/riak/kv/2.0.4/learn/why-riak-kv.md index 589480ac44..83fc37e90f 100644 --- a/content/riak/kv/2.0.4/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.4/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.4/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.4/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.4/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.4/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.4/developing/data-types -[glossary read rep]: /riak/kv/2.0.4/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.4/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.4/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.4/release-notes.md b/content/riak/kv/2.0.4/release-notes.md index bcd34cb54e..e0dcd6ee97 100644 --- a/content/riak/kv/2.0.4/release-notes.md +++ b/content/riak/kv/2.0.4/release-notes.md @@ -255,7 +255,7 @@ The results for each stage of fullsync: ## Download -Please see our [downloads](http://docs.basho.com/riak/latest/downloads/) +Please see our [downloads]({{< baseurl >}}riak/kv/latest/downloads/) page. ## Feedback diff --git a/content/riak/kv/2.0.4/setup/downgrade.md b/content/riak/kv/2.0.4/setup/downgrade.md index 2a73964246..2f7f587334 100644 --- a/content/riak/kv/2.0.4/setup/downgrade.md +++ b/content/riak/kv/2.0.4/setup/downgrade.md @@ -17,7 +17,7 @@ aliases: Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a -[rolling upgrade](/riak/kv/2.0.4/setup/upgrading/cluster). +[rolling upgrade]({{}}riak/kv/2.0.4/setup/upgrading/cluster). {{% note title="End Of Life Warning" %}} We test downgrading for two feature release versions. However, all versions below KV 2.0 are End Of Life (EOL) and unsupported. Please be aware of that if you choose to downgrade. @@ -49,9 +49,9 @@ both 1.4 and 1.3 are performed. * Riak Control should be disabled throughout the rolling downgrade process -* [Configuration Files](/riak/kv/2.0.4/configuring/reference) must be replaced with those of the version +* [Configuration Files]({{}}riak/kv/2.0.4/configuring/reference) must be replaced with those of the version being downgraded to -* [Active anti-entropy](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version +* [Active anti-entropy]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version below 1.3. ## Before Stopping a Node @@ -94,7 +94,7 @@ will need to be downgraded before the rolling downgrade begins. This can be done using the --downgrade flag with `riak-admin reformat-indexes` More information on the `riak-admin reformat-indexes` command, and downgrading indexes can be found in the -[`riak-admin`](/riak/kv/2.0.4/using/admin/riak-admin/#reformat-indexes) documentation. +[`riak-admin`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#reformat-indexes) documentation. ## Before Starting a Node diff --git a/content/riak/kv/2.0.4/setup/installing.md b/content/riak/kv/2.0.4/setup/installing.md index 1284704828..d71fe3eea4 100644 --- a/content/riak/kv/2.0.4/setup/installing.md +++ b/content/riak/kv/2.0.4/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.4/installing/ --- -[install aws]: /riak/kv/2.0.4/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.4/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.4/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.4/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.4/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.4/setup/installing/smartos -[install solaris]: /riak/kv/2.0.4/setup/installing/solaris -[install suse]: /riak/kv/2.0.4/setup/installing/suse -[install windows azure]: /riak/kv/2.0.4/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.4/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.4/setup/upgrading +[install aws]: {{}}riak/kv/2.0.4/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.4/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.4/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.4/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.4/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.4/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.4/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.4/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.4/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.4/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.4/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.4/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.4/setup/installing/amazon-web-services.md index 5c4988fb43..523d04ab25 100644 --- a/content/riak/kv/2.0.4/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.4/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.4/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.4/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.4/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.4/setup/installing/debian-ubuntu.md index cfd20338ad..e0c92becb5 100644 --- a/content/riak/kv/2.0.4/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.4/setup/installing/debian-ubuntu.md @@ -20,10 +20,10 @@ aliases: -[install source index]: /riak/kv/2.0.4/setup/installing/source/ -[security index]: /riak/kv/2.0.4/using/security/ -[install source erlang]: /riak/kv/2.0.4/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.4/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.4/using/security/ +[install source erlang]: {{}}riak/kv/2.0.4/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.0.4/setup/installing/freebsd.md b/content/riak/kv/2.0.4/setup/installing/freebsd.md index 238e8b0b8c..40167171d2 100644 --- a/content/riak/kv/2.0.4/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.4/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.4/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.4/downloads/ -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.4/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.4/downloads/ +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.4/setup/installing/mac-osx.md b/content/riak/kv/2.0.4/setup/installing/mac-osx.md index 488fbce95f..7192723c27 100644 --- a/content/riak/kv/2.0.4/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.4/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.4/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.4/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.4/setup/installing/rhel-centos.md b/content/riak/kv/2.0.4/setup/installing/rhel-centos.md index 665ecf70ba..0000ef9970 100644 --- a/content/riak/kv/2.0.4/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.4/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.4/setup/installing/source -[install source erlang]: /riak/kv/2.0.4/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.4/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.4/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify > **Note: 2.0.4 not currently available** > diff --git a/content/riak/kv/2.0.4/setup/installing/smartos.md b/content/riak/kv/2.0.4/setup/installing/smartos.md index a189a19cc9..8d6b0de06a 100644 --- a/content/riak/kv/2.0.4/setup/installing/smartos.md +++ b/content/riak/kv/2.0.4/setup/installing/smartos.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.0.4/setup/installing/solaris.md b/content/riak/kv/2.0.4/setup/installing/solaris.md index 5ff28a7dbc..fb2d9717da 100644 --- a/content/riak/kv/2.0.4/setup/installing/solaris.md +++ b/content/riak/kv/2.0.4/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.4/setup/installing/source.md b/content/riak/kv/2.0.4/setup/installing/source.md index d6b32e716e..4dfafd3503 100644 --- a/content/riak/kv/2.0.4/setup/installing/source.md +++ b/content/riak/kv/2.0.4/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.4/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.4/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.4/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.4/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.4/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.4/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.4/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.4/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.4/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.4/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.4/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.4/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.4/setup/installing/source/erlang.md b/content/riak/kv/2.0.4/setup/installing/source/erlang.md index 783cf1c7c1..12638d4eba 100644 --- a/content/riak/kv/2.0.4/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.4/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.4/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.4/setup/installing -[security basics]: /riak/kv/2.0.4/using/security/basics +[install index]: {{}}riak/kv/2.0.4/setup/installing +[security basics]: {{}}riak/kv/2.0.4/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho8.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.4/setup/installing/source/jvm.md b/content/riak/kv/2.0.4/setup/installing/source/jvm.md index a72bcd02f8..f83d6a670b 100644 --- a/content/riak/kv/2.0.4/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.4/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.4/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.4/developing/usage/search +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.4/setup/installing/suse.md b/content/riak/kv/2.0.4/setup/installing/suse.md index 59522e53ed..4d8f2daa2b 100644 --- a/content/riak/kv/2.0.4/setup/installing/suse.md +++ b/content/riak/kv/2.0.4/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.4/installing/suse/ --- -[install verify]: /riak/kv/2.0.4/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.4/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.4/setup/installing/verify.md b/content/riak/kv/2.0.4/setup/installing/verify.md index 29e2727628..0005475935 100644 --- a/content/riak/kv/2.0.4/setup/installing/verify.md +++ b/content/riak/kv/2.0.4/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.4/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.4/developing/client-libraries -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.4/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.4/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.4/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.4/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.4/setup/installing/windows-azure.md b/content/riak/kv/2.0.4/setup/installing/windows-azure.md index 82ae227fc0..a34daa5352 100644 --- a/content/riak/kv/2.0.4/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.4/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.4/setup/planning/backend.md b/content/riak/kv/2.0.4/setup/planning/backend.md index 1f446c3744..00833bb097 100644 --- a/content/riak/kv/2.0.4/setup/planning/backend.md +++ b/content/riak/kv/2.0.4/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.4/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.4/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.4/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.4/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.4/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.4/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.4/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.4/setup/planning/backend/bitcask.md index 57a69882b1..19d252230b 100644 --- a/content/riak/kv/2.0.4/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.4/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.4/using/admin/riak-cli -[config reference]: /riak/kv/2.0.4/configuring/reference -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.4/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.4/developing/usage/search - -[glossary aae]: /riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.4/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.4/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.4/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.4/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.4/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.4/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.4/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.4/setup/planning/backend/leveldb.md index e6de0e86b0..cfeecf8f45 100644 --- a/content/riak/kv/2.0.4/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.4/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.4/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.4/configuring/reference -[perf index]: /riak/kv/2.0.4/using/performance -[config reference#aae]: /riak/kv/2.0.4/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[perf index]: {{}}riak/kv/2.0.4/using/performance +[config reference#aae]: {{}}riak/kv/2.0.4/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.4/setup/planning/backend/memory.md b/content/riak/kv/2.0.4/setup/planning/backend/memory.md index 745d164cf2..da4f99fcb0 100644 --- a/content/riak/kv/2.0.4/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.4/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.4/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.4/configuring/reference -[plan backend multi]: /riak/kv/2.0.4/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.4/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.4/setup/planning/backend/multi.md b/content/riak/kv/2.0.4/setup/planning/backend/multi.md index 93ffd467f0..ba7a8169ab 100644 --- a/content/riak/kv/2.0.4/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.4/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.4/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.4/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.4/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.4/configuring/reference -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.4/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.4/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.4/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.4/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.4/setup/planning/best-practices.md b/content/riak/kv/2.0.4/setup/planning/best-practices.md index d02cc3f9f5..f8a8a5f190 100644 --- a/content/riak/kv/2.0.4/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.4/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.4/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.4/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.4/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.4/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.4/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.4/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.4/setup/planning/bitcask-capacity-calc.md index 09797f2378..a03c7a89cf 100644 --- a/content/riak/kv/2.0.4/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.4/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.4/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.4/setup/planning/cluster-capacity.md index b3aa668247..8ebccf4119 100644 --- a/content/riak/kv/2.0.4/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.4/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.4/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.4/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.4/setup/planning -[concept replication]: /riak/kv/2.0.4/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.4/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.4/configuring/reference -[perf benchmark]: /riak/kv/2.0.4/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.4/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.4/setup/planning +[concept replication]: {{}}riak/kv/2.0.4/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.4/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.4/setup/planning/operating-system.md b/content/riak/kv/2.0.4/setup/planning/operating-system.md index 964ed1ac62..64e1c803ba 100644 --- a/content/riak/kv/2.0.4/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.4/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.4/downloads/ +[downloads]: {{}}riak/kv/2.0.4/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.4/setup/planning/start.md b/content/riak/kv/2.0.4/setup/planning/start.md index c508a197b0..2512d23cb3 100644 --- a/content/riak/kv/2.0.4/setup/planning/start.md +++ b/content/riak/kv/2.0.4/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.4/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.4/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.4/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.4/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.4/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.4/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.4/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.4/setup/upgrading/checklist.md b/content/riak/kv/2.0.4/setup/upgrading/checklist.md index 53d2a3a477..b896b10e63 100644 --- a/content/riak/kv/2.0.4/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.4/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.0.4/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.4/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.4/using/performance +[perf open files]: {{}}riak/kv/2.0.4/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.4/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.4/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.4/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.4/configuring/reference -[config backend]: /riak/kv/2.0.4/configuring/backend -[usage search]: /riak/kv/2.0.4/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.4/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.4/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.4/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.4/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.4/using/admin/commands -[use admin riak control]: /riak/kv/2.0.4/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.4/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.4/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.4/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.4/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.4/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[config backend]: {{}}riak/kv/2.0.4/configuring/backend +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.4/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.4/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.4/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.4/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.4/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.4/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.4/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.4/setup/upgrading/cluster.md b/content/riak/kv/2.0.4/setup/upgrading/cluster.md index 78c97c1e32..738cd664b9 100644 --- a/content/riak/kv/2.0.4/setup/upgrading/cluster.md +++ b/content/riak/kv/2.0.4/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.0.4/ops/upgrading/rolling-upgrades/ - /riak/kv/2.0.4/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.0.4/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.4/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.4/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.4/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.4/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.4/release-notes/ +[production checklist]: {{}}riak/kv/2.0.4/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.4/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.4/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.4/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.4/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.4/release-notes/ [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.4/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.4/using/reference/jmx -[snmp]: /riak/kv/2.0.4/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.4/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.4/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.0.4/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.0.4/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported @@ -104,9 +104,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.4/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.4/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.4/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.4/release-notes/). {{% /note %}} ## RHEL/CentOS @@ -166,9 +166,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.4/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.4/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.4/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.4/release-notes/). {{% /note %}} ## Solaris/OpenSolaris @@ -252,9 +252,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.4/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.4/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.4/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.4/release-notes/). {{% /note %}} ## Rolling Upgrade to Enterprise diff --git a/content/riak/kv/2.0.4/setup/upgrading/search.md b/content/riak/kv/2.0.4/setup/upgrading/search.md index 38dc18c145..0591bda0b5 100644 --- a/content/riak/kv/2.0.4/setup/upgrading/search.md +++ b/content/riak/kv/2.0.4/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.4/setup/upgrading/version.md b/content/riak/kv/2.0.4/setup/upgrading/version.md index 63816fd74e..33a34a9cbe 100644 --- a/content/riak/kv/2.0.4/setup/upgrading/version.md +++ b/content/riak/kv/2.0.4/setup/upgrading/version.md @@ -20,7 +20,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.0.4/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.0.4/introduction). ## New Clients @@ -36,14 +36,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.0.4/introduction) like [data types](/riak/kv/2.0.4/developing/data-types) or the new [Riak Search](/riak/kv/2.0.4/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.0.4/introduction) like [data types]({{}}riak/kv/2.0.4/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.0.4/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.0.4/learn/concepts/buckets) and [key](/riak/kv/2.0.4/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.0.4/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.0.4/learn/concepts/buckets) and [key]({{}}riak/kv/2.0.4/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.0.4/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.0.4/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.0.4/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.0.4/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.0.4/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -56,7 +56,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.0.4/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.0.4/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -75,8 +75,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.0.4/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.0.4/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.0.4/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.0.4/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -86,17 +86,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/2.0.4/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.0.4/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.0.4/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.0.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.0.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.0.4/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.0.4/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -104,20 +104,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.0.4/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.0.4/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.0.4/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.0.4/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -126,11 +126,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.0.4/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.0.4/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.0.4/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -140,12 +140,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.0.4/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.0.4/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/2.0.4/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.0.4/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.0.4/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.0.4/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.0.4/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.0.4/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.0.4/using/security/basics) or the new [configuration files](/riak/kv/2.0.4/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.0.4/using/security/basics) or the new [configuration files]({{}}riak/kv/2.0.4/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -155,7 +155,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.0.4/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.0.4/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -166,12 +166,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.0.4/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.0.4/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.0.4/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.0.4/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -209,7 +209,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.0.4/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.0.4/setup/upgrading/search). ## Migrating from Short Names @@ -220,12 +220,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.0.4/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.0.4/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.0.4/using.md b/content/riak/kv/2.0.4/using.md index 083932f960..49336818cc 100644 --- a/content/riak/kv/2.0.4/using.md +++ b/content/riak/kv/2.0.4/using.md @@ -15,7 +15,7 @@ toc: true [use running cluster]: ../using/running-a-cluster [use admin index]: ../using/admin/ [cluster ops index]: ../using/cluster-operations -[repair recover index]: ../repair-recovery +[repair recover index]: ../using/repair-recovery [security index]: ../using/security [perf index]: ../using/performance [troubleshoot index]: ../using/troubleshooting diff --git a/content/riak/kv/2.0.4/using/admin/commands.md b/content/riak/kv/2.0.4/using/admin/commands.md index b791e23204..84ef0a5bb2 100644 --- a/content/riak/kv/2.0.4/using/admin/commands.md +++ b/content/riak/kv/2.0.4/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.4/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.4/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.4/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.4/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.4/using/admin/riak-admin.md b/content/riak/kv/2.0.4/using/admin/riak-admin.md index 998e453157..b67094193d 100644 --- a/content/riak/kv/2.0.4/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.4/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.0.4/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.4/configuring/reference -[use admin commands]: /riak/kv/2.0.4/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.4/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.4/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.4/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.4/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.4/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.4/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.4/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.4/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.4/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.4/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.4/setup/downgrade -[security index]: /riak/kv/2.0.4/using/security/ -[security managing]: /riak/kv/2.0.4/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.4/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.4/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.4/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.4/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.4/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.4/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.4/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.4/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.4/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.4/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.4/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.4/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.4/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.4/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.4/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.4/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.4/setup/downgrade +[security index]: {{}}riak/kv/2.0.4/using/security/ +[security managing]: {{}}riak/kv/2.0.4/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.4/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.4/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.4/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.4/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.4/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.4/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.0.4/using/admin/riak-cli.md b/content/riak/kv/2.0.4/using/admin/riak-cli.md index 925366c75b..8383f6342c 100644 --- a/content/riak/kv/2.0.4/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.4/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.4/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.4/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.4/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.4/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.4/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.4/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.4/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.4/using/admin/riak-control.md b/content/riak/kv/2.0.4/using/admin/riak-control.md index 74ac5037c1..1c917df5d5 100644 --- a/content/riak/kv/2.0.4/using/admin/riak-control.md +++ b/content/riak/kv/2.0.4/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.4/configuring/reference +[config reference]: {{}}riak/kv/2.0.4/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.4/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.4/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.4/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.4/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.4/using/cluster-operations.md b/content/riak/kv/2.0.4/using/cluster-operations.md index bab1aeee96..64b10ac47a 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations.md +++ b/content/riak/kv/2.0.4/using/cluster-operations.md @@ -20,7 +20,6 @@ toc: true [ops log]: ./logging [ops backup]: ./backing-up [ops handoff]: ./handoff -[ops obj del]: ./object-deletion [ops strong consistency]: ./strong-consistency [ops v3 mdc]: ./v3-multi-datacenter [ops v2 mdc]: ./v2-multi-datacenter @@ -84,13 +83,6 @@ Information on using the `riak-admin handoff` interface to enable and disable ha [Learn More >>][ops handoff] -#### [Object Deletion][ops obj del] - -Describes possible settings for `delete_mode`. - -[Learn More >>][ops obj del] - - #### [Monitoring Strong Consistency][ops strong consistency] Overview of the various statistics used in monitoring strong consistency. diff --git a/content/riak/kv/2.0.4/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.4/using/cluster-operations/active-anti-entropy.md index a96fb7ed47..33f225a1a7 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes.md index 6618a62cf4..a1c4e317e7 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.4/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.4/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.4/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.4/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.4/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.4/using/cluster-operations/backing-up.md index 9ccfaf1d23..9c915ca9f0 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.4/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters -[config reference]: /riak/kv/2.0.4/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.4/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.4/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.4/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.4/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.4/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.4/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.4/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.4/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.4/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.4/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.4/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.4/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.4/using/cluster-operations/bucket-types.md index 20dd6e42e8..d8eece3de0 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.4/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.4/using/cluster-operations/changing-cluster-info.md index 10efb0e768..19db7b206b 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.4/configuring/reference +[config reference]: {{}}riak/kv/2.0.4/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.4/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.4/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.4/using/cluster-operations/handoff.md b/content/riak/kv/2.0.4/using/cluster-operations/handoff.md index 863e4671e1..1fa576c9c0 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.4/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.4/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.4/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.4/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.4/using/cluster-operations/logging.md b/content/riak/kv/2.0.4/using/cluster-operations/logging.md index c3d42b9789..24617b7142 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.4/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.4/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.4/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.4/using/cluster-operations/replacing-node.md index 355cdef1d6..291fbcb35b 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.4/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.4/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.4/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.4/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.4/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.4/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.4/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.4/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.4/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.4/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.4/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.4/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.4/using/cluster-operations/strong-consistency.md index a5d19907dc..48326d5f3a 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.4/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.4/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.4/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.4/using/cluster-operations/v2-multi-datacenter.md index 8fd9bbe979..27173f7fb9 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.4/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.4/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter.md index 00cbfff335..4849dab757 100644 --- a/content/riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.4/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.4/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.4/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.4/using/performance.md b/content/riak/kv/2.0.4/using/performance.md index 819a6e989f..94f8553e89 100644 --- a/content/riak/kv/2.0.4/using/performance.md +++ b/content/riak/kv/2.0.4/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.4/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.4/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.4/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.4/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.4/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.4/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.4/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.4/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.4/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.4/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.4/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.4/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.4/using/performance/benchmarking.md b/content/riak/kv/2.0.4/using/performance/benchmarking.md index 7e57a40365..fc7d545a7e 100644 --- a/content/riak/kv/2.0.4/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.4/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.4/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.4/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.4/using/performance/latency-reduction.md b/content/riak/kv/2.0.4/using/performance/latency-reduction.md index 2037b5e390..7466ca8553 100644 --- a/content/riak/kv/2.0.4/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.4/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.4/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.4/using/performance/multi-datacenter-tuning.md index 9b76ad9faf..1470c4d9d1 100644 --- a/content/riak/kv/2.0.4/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.4/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.4/using/performance +[perf index]: {{}}riak/kv/2.0.4/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.4/using/performance/open-files-limit.md b/content/riak/kv/2.0.4/using/performance/open-files-limit.md index 1855884ad4..364cb78964 100644 --- a/content/riak/kv/2.0.4/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.4/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.0.4/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.4/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.0.4/using/reference/bucket-types.md b/content/riak/kv/2.0.4/using/reference/bucket-types.md index 16b2214cf2..fe3268184a 100644 --- a/content/riak/kv/2.0.4/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.4/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.4/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.4/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.4/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.4/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.4/developing/data-types), and [strong consistency](/riak/kv/2.0.4/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.4/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.4/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.4/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.4/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.4/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.4/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.4/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.4/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.4/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.4/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.4/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.4/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.4/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.4/learn/concepts/buckets) and [keys](/riak/kv/2.0.4/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.4/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.4/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.4/using/reference/custom-code.md b/content/riak/kv/2.0.4/using/reference/custom-code.md index ce65b8d5a4..a425c99e37 100644 --- a/content/riak/kv/2.0.4/using/reference/custom-code.md +++ b/content/riak/kv/2.0.4/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.4/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.4/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.4/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.4/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.4/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.4/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.4/using/reference/handoff.md b/content/riak/kv/2.0.4/using/reference/handoff.md index 9c298096e4..5330c33800 100644 --- a/content/riak/kv/2.0.4/using/reference/handoff.md +++ b/content/riak/kv/2.0.4/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.4/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.4/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.4/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.4/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.4/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.4/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.4/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.4/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.4/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.4/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.4/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.4/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.4/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.4/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.4/using/reference/jmx.md b/content/riak/kv/2.0.4/using/reference/jmx.md index b3b7ec57ca..b246b6afa6 100644 --- a/content/riak/kv/2.0.4/using/reference/jmx.md +++ b/content/riak/kv/2.0.4/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.4/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.4/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.4/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.4/using/reference/logging.md b/content/riak/kv/2.0.4/using/reference/logging.md index 3e23c795c5..52ffdb295b 100644 --- a/content/riak/kv/2.0.4/using/reference/logging.md +++ b/content/riak/kv/2.0.4/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.4/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.4/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.4/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.4/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.4/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -46,7 +46,7 @@ File | Significance `console.log` | Console log output `crash.log` | Crash logs `erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. -`error.log` | [Common errors](../../repair-recover/errors) emitted by Riak. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.4/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.4/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.4/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.4/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.4/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.4/using/reference/multi-datacenter/comparison.md index 376c84a98b..8187882fb9 100644 --- a/content/riak/kv/2.0.4/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.4/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.4/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.4/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.4/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.4/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.4/using/reference/object-deletion.md b/content/riak/kv/2.0.4/using/reference/object-deletion.md index d778207838..8867fc323c 100644 --- a/content/riak/kv/2.0.4/using/reference/object-deletion.md +++ b/content/riak/kv/2.0.4/using/reference/object-deletion.md @@ -39,7 +39,7 @@ concretely using the following example: * The object has been marked as deleted on nodes A and B, but it still lives on node C * A client attempts to read the object, Riak senses that there are - divergent replicas and initiates a repair process (either [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) or [active anti-entropy](../../../learn/concepts/active-anti-entropy/), + divergent replicas and initiates a repair process (either [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) or [active anti-entropy](../../../learn/concepts/active-anti-entropy/), depending on configuration) At this point, Riak needs to make a decision about what to do. Should diff --git a/content/riak/kv/2.0.4/using/reference/runtime-interaction.md b/content/riak/kv/2.0.4/using/reference/runtime-interaction.md index 73ea5912ef..cd5a80a2d8 100644 --- a/content/riak/kv/2.0.4/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.4/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.4/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.4/configuring/reference -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.4/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.4/using/reference/search.md b/content/riak/kv/2.0.4/using/reference/search.md index 6daaece142..4f42f19aed 100644 --- a/content/riak/kv/2.0.4/using/reference/search.md +++ b/content/riak/kv/2.0.4/using/reference/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.4/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.0.4/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.0.4/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -126,7 +126,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.4/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.4/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -288,7 +288,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.4/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -298,7 +298,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -353,7 +353,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.4/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.4/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.4/using/reference/secondary-indexes.md b/content/riak/kv/2.0.4/using/reference/secondary-indexes.md index 4d21e77f6e..4a3fdeaee1 100644 --- a/content/riak/kv/2.0.4/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.4/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.0.4/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.4/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.4/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.4/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.4/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.4/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.4/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.4/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.4/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.4/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.4/using/reference/statistics-monitoring.md index e66ff38004..a0c4bf31b3 100644 --- a/content/riak/kv/2.0.4/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.4/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.4/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.4/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.4/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.4/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.4/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.4/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.4/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.4/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.4/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.4/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.4/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.4/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.4/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.4/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.4/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.4/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.4/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.4/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.4/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.4/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.4/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.4/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.4/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,9 +349,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.4/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.4/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.4/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.4/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -367,9 +367,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.4/using/reference/strong-consistency.md b/content/riak/kv/2.0.4/using/reference/strong-consistency.md index 12176c074c..15f3ce1f83 100644 --- a/content/riak/kv/2.0.4/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.4/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.4/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.4/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.4/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.4/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.4/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.4/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.4/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.4/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.4/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.4/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.4/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.4/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.4/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.4/using/reference/v2-multi-datacenter/architecture.md index 5d841df396..7d3d49add7 100644 --- a/content/riak/kv/2.0.4/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.4/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.4/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.4/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.4/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.4/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.4/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.4/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/aae.md index b6bbfae7ed..c1fce8a036 100644 --- a/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.4/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.4/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.4/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/architecture.md index e974bd486a..fc0c574c16 100644 --- a/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.4/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.4/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.4/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.4/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/cascading-writes.md index b5a305e4e0..cddfdeee0c 100644 --- a/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.4/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md index f9ccd11380..8b7526472c 100644 --- a/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.4/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.4/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.4/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.4/using/repair-recovery.md b/content/riak/kv/2.0.4/using/repair-recovery.md index e4d17467c6..4e3fbcccea 100644 --- a/content/riak/kv/2.0.4/using/repair-recovery.md +++ b/content/riak/kv/2.0.4/using/repair-recovery.md @@ -15,7 +15,7 @@ toc: true [repair recover fail]: ./failure-recovery/ [repair recover errors]: ./errors/ [repair recover repairs]: ./repairs/ -[repair recover restart]: ./rolling-restarts/ +[repair recover restart]: ./rolling-restart/ ## In This Section diff --git a/content/riak/kv/2.0.4/using/repair-recovery/errors.md b/content/riak/kv/2.0.4/using/repair-recovery/errors.md index 20bff7fbcc..366c13994f 100644 --- a/content/riak/kv/2.0.4/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.4/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.4/configuring/reference +[config reference]: {{}}riak/kv/2.0.4/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.4/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.4/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.4/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.4/using/repair-recovery/failure-recovery.md index ed53129b21..d20b31a20e 100644 --- a/content/riak/kv/2.0.4/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.4/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.4/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.4/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.4/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.4/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -115,7 +115,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.0.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.4/using/repair-recovery/repairs.md b/content/riak/kv/2.0.4/using/repair-recovery/repairs.md index fb66500b63..9e3a6a42bd 100644 --- a/content/riak/kv/2.0.4/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.4/using/repair-recovery/repairs.md @@ -149,7 +149,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.0.4/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.0.4/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -218,23 +218,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.4/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.4/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.4/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.4/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.4/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.4/using/repair-recovery/rolling-restart.md index 2862a5d3d2..81577970f2 100644 --- a/content/riak/kv/2.0.4/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.4/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.4/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.4/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.4/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.4/using/running-a-cluster.md b/content/riak/kv/2.0.4/using/running-a-cluster.md index 4c0f41709c..ce7091aece 100644 --- a/content/riak/kv/2.0.4/using/running-a-cluster.md +++ b/content/riak/kv/2.0.4/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.4/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.4/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.4/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.4/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.4/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.4/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.4/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.4/using/security.md b/content/riak/kv/2.0.4/using/security.md index 1beb2d6605..dbebea6e16 100644 --- a/content/riak/kv/2.0.4/using/security.md +++ b/content/riak/kv/2.0.4/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.4/ops/advanced/security --- -[config reference search]: /riak/kv/2.0.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.0.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.4/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.4/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.4/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.0.4/using/security/basics -[security managing]: /riak/kv/2.0.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.4/using/security/basics +[security managing]: {{}}riak/kv/2.0.4/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.0.4/developing/usage/search +[usage search]: {{}}riak/kv/2.0.4/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.4/using/security/basics.md b/content/riak/kv/2.0.4/using/security/basics.md index 153cd85c2d..ad0f070512 100644 --- a/content/riak/kv/2.0.4/using/security/basics.md +++ b/content/riak/kv/2.0.4/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.4/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.4/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.4/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.4/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.4/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.4/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.4/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.4/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.4/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.4/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.4/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.4/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.4/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.4/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.4/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.4/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.4/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.4/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.4/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.4/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.4/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.4/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.4/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.4/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.4/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.4/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.4/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.4/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.4/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.4/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.4/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.4/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.4/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.4/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.4/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.4/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.4/configuring/reference/#directories).
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="../../learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.4/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.4/using/security/managing-sources.md b/content/riak/kv/2.0.4/using/security/managing-sources.md index d3d46025ce..8efcb9b200 100644 --- a/content/riak/kv/2.0.4/using/security/managing-sources.md +++ b/content/riak/kv/2.0.4/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.4/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.4/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.4/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.4/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.4/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.4/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.4/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.4/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.4/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.4/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.4/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.4/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.4/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.4/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.0.5/_reference-links.md b/content/riak/kv/2.0.5/_reference-links.md index c6f457a1c3..684aa256b5 100644 --- a/content/riak/kv/2.0.5/_reference-links.md +++ b/content/riak/kv/2.0.5/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.5/downloads/ -[install index]: /riak/kv/2.0.5/setup/installing -[upgrade index]: /riak/kv/2.0.5/upgrading -[plan index]: /riak/kv/2.0.5/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.0.5/configuring/reference/ -[manage index]: /riak/kv/2.0.5/using/managing -[performance index]: /riak/kv/2.0.5/using/performance -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.5/downloads/ +[install index]: {{}}riak/kv/2.0.5/setup/installing +[upgrade index]: {{}}riak/kv/2.0.5/upgrading +[plan index]: {{}}riak/kv/2.0.5/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.0.5/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.5/using/managing +[performance index]: {{}}riak/kv/2.0.5/using/performance +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.5/setup/planning -[plan start]: /riak/kv/2.0.5/setup/planning/start -[plan backend]: /riak/kv/2.0.5/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.5/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.5/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.5/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.5/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.5/setup/planning/best-practices -[plan future]: /riak/kv/2.0.5/setup/planning/future +[plan index]: {{}}riak/kv/2.0.5/setup/planning +[plan start]: {{}}riak/kv/2.0.5/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.5/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.5/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.5/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.5/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.5/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.5/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.5/setup/installing -[install aws]: /riak/kv/2.0.5/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.5/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.5/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.5/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.5/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.5/setup/installing/smartos -[install solaris]: /riak/kv/2.0.5/setup/installing/solaris -[install suse]: /riak/kv/2.0.5/setup/installing/suse -[install windows azure]: /riak/kv/2.0.5/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.5/setup/installing +[install aws]: {{}}riak/kv/2.0.5/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.5/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.5/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.5/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.5/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.5/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.5/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.5/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.5/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.5/setup/installing/source -[install source erlang]: /riak/kv/2.0.5/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.5/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.5/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.5/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.5/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.5/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.5/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.5/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.5/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.5/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.5/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.5/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.5/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.5/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.5/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.5/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.5/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.5/configuring -[config basic]: /riak/kv/2.0.5/configuring/basic -[config backend]: /riak/kv/2.0.5/configuring/backend -[config manage]: /riak/kv/2.0.5/configuring/managing -[config reference]: /riak/kv/2.0.5/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.5/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.5/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.5/configuring/mapreduce -[config search]: /riak/kv/2.0.5/configuring/search/ +[config index]: {{}}riak/kv/2.0.5/configuring +[config basic]: {{}}riak/kv/2.0.5/configuring/basic +[config backend]: {{}}riak/kv/2.0.5/configuring/backend +[config manage]: {{}}riak/kv/2.0.5/configuring/managing +[config reference]: {{}}riak/kv/2.0.5/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.5/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.5/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.5/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.5/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.5/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.5/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.5/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.5/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.5/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.5/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.5/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.5/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.5/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.5/using/ -[use admin commands]: /riak/kv/2.0.5/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.5/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.5/using/ +[use admin commands]: {{}}riak/kv/2.0.5/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.5/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.5/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.5/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.5/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.5/using/reference/search -[use ref 2i]: /riak/kv/2.0.5/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.5/using/reference/snmp -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.5/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.5/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.5/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.5/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.5/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.5/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.5/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.5/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.5/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.5/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.5/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.5/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.5/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.5/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.5/using/admin/ -[use admin commands]: /riak/kv/2.0.5/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.5/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.5/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.5/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.5/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.5/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.5/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.5/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.5/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.5/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.5/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.5/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.5/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.5/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.5/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.5/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.5/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.5/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.5/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.5/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.5/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.5/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.5/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.5/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.5/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.5/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.5/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.5/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.5/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.5/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.5/using/repair-recovery -[repair recover index]: /riak/kv/2.0.5/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.5/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.5/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.5/using/security/ -[security basics]: /riak/kv/2.0.5/using/security/basics -[security managing]: /riak/kv/2.0.5/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.5/using/security/ +[security basics]: {{}}riak/kv/2.0.5/using/security/basics +[security managing]: {{}}riak/kv/2.0.5/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.5/using/performance/ -[perf benchmark]: /riak/kv/2.0.5/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.5/using/performance/erlang -[perf aws]: /riak/kv/2.0.5/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.5/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.5/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.5/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.5/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.5/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.5/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.5/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.5/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.5/developing -[dev client libraries]: /riak/kv/2.0.5/developing/client-libraries -[dev data model]: /riak/kv/2.0.5/developing/data-modeling -[dev data types]: /riak/kv/2.0.5/developing/data-types -[dev kv model]: /riak/kv/2.0.5/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.5/developing +[dev client libraries]: {{}}riak/kv/2.0.5/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.5/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.5/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.5/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.5/developing/getting-started -[getting started java]: /riak/kv/2.0.5/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.5/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.5/developing/getting-started/python -[getting started php]: /riak/kv/2.0.5/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.5/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.5/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.5/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.5/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.5/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.5/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.5/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.5/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.5/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.5/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.5/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.5/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.5/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.5/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.5/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.5/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.5/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.5/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.5/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.5/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.5/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.5/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.5/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.5/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.5/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.5/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.5/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.5/developing/usage -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.5/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.5/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.5/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.5/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.5/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.5/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.5/developing/usage/search -[usage search schema]: /riak/kv/2.0.5/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.5/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.5/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.5/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.5/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.5/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.5/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.5/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.5/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.5/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.5/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.5/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.5/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.5/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.5/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.5/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.5/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.5/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.5/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.5/developing/api/backend -[dev api http]: /riak/kv/2.0.5/developing/api/http -[dev api http status]: /riak/kv/2.0.5/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.5/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.5/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.5/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.5/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.5/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.5/learn/glossary/ -[glossary aae]: /riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.5/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.5/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.5/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.5/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.5/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.5/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.5/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.5/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.5/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.5/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.5/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.5/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.5/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.5/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.5/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.5/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.5/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.5/add-ons.md b/content/riak/kv/2.0.5/add-ons.md index 9df16100f5..16c2fb84ad 100644 --- a/content/riak/kv/2.0.5/add-ons.md +++ b/content/riak/kv/2.0.5/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.5/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.5/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.5/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.5/add-ons/redis/developing-rra.md index 646cad4b16..8ea28e2cf9 100644 --- a/content/riak/kv/2.0.5/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.5/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.5/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.5/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.5/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.5/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.5/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.5/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.5/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.5/add-ons/redis/redis-add-on-features.md index 26dded7984..1ab391f231 100644 --- a/content/riak/kv/2.0.5/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.5/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.5/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.5/add-ons/redis/set-up-rra.md index f6e38d5a4b..bc2737416e 100644 --- a/content/riak/kv/2.0.5/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.5/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.5/setup/installing -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.5/setup/installing +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.5/add-ons/redis/using-rra.md b/content/riak/kv/2.0.5/add-ons/redis/using-rra.md index e12846bc4e..f109864718 100644 --- a/content/riak/kv/2.0.5/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.5/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.5/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.5/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.5/configuring/backend.md b/content/riak/kv/2.0.5/configuring/backend.md index 378c04c081..1ae83a713d 100644 --- a/content/riak/kv/2.0.5/configuring/backend.md +++ b/content/riak/kv/2.0.5/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.5/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.5/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.5/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.5/configuring/basic.md b/content/riak/kv/2.0.5/configuring/basic.md index cd72b21cc0..f794258eb2 100644 --- a/content/riak/kv/2.0.5/configuring/basic.md +++ b/content/riak/kv/2.0.5/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.0.5/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.5/configuring/reference -[use running cluster]: /riak/kv/2.0.5/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.5/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.5/using/performance/erlang -[plan start]: /riak/kv/2.0.5/setup/planning/start -[plan best practices]: /riak/kv/2.0.5/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.5/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.5/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.5/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.5/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.5/using/performance -[perf aws]: /riak/kv/2.0.5/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.5/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.5/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.5/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.5/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.5/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.5/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.5/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.5/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.5/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.5/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.5/using/performance +[perf aws]: {{}}riak/kv/2.0.5/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.5/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.5/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.5/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.5/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.5/configuring/load-balancing-proxy.md index 178c14023d..3beef4d9c3 100644 --- a/content/riak/kv/2.0.5/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.5/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.5/configuring/managing.md b/content/riak/kv/2.0.5/configuring/managing.md index ba58f70349..9ac9e5d852 100644 --- a/content/riak/kv/2.0.5/configuring/managing.md +++ b/content/riak/kv/2.0.5/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.5/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.5/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.5/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.5/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.5/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.5/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.5/configuring/mapreduce.md b/content/riak/kv/2.0.5/configuring/mapreduce.md index 6c03da026e..5d3c975171 100644 --- a/content/riak/kv/2.0.5/configuring/mapreduce.md +++ b/content/riak/kv/2.0.5/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.0.5/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.5/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.5/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.5/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.5/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.5/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.5/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.5/configuring/reference.md b/content/riak/kv/2.0.5/configuring/reference.md index a9f12ff3a9..8b399aca19 100644 --- a/content/riak/kv/2.0.5/configuring/reference.md +++ b/content/riak/kv/2.0.5/configuring/reference.md @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch. diff --git a/content/riak/kv/2.0.5/configuring/search.md b/content/riak/kv/2.0.5/configuring/search.md index f8e1059fd6..39da4b09df 100644 --- a/content/riak/kv/2.0.5/configuring/search.md +++ b/content/riak/kv/2.0.5/configuring/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.5/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.5/developing/usage/search -[usage search schema]: /riak/kv/2.0.5/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.5/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.5/developing/usage/custom-extractors -[config reference]: /riak/kv/2.0.5/configuring/reference -[config reference#search]: /riak/kv/2.0.5/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.5/using/security/ +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.5/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.5/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.5/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.5/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.5/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.0.5/configuring/strong-consistency.md b/content/riak/kv/2.0.5/configuring/strong-consistency.md index 31ebc6717b..bf87e5cf0e 100644 --- a/content/riak/kv/2.0.5/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.5/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.5/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.5/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.5/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.5/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.5/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.5/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.5/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.5/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.5/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.5/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.5/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.5/developing/data-types -[glossary aae]: /riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.5/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.5/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.5/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.5/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.5/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.5/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.5/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.5/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.5/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.5/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.5/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.5/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.5/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.5/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.5/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.5/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.5/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.5/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.5/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.5/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.5/configuring/v2-multi-datacenter.md index ea1a14b5be..df246f5c28 100644 --- a/content/riak/kv/2.0.5/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.5/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.5/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.5/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.0.5/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.5/configuring/v2-multi-datacenter/nat.md index 9d8eb9f87e..d750a82944 100644 --- a/content/riak/kv/2.0.5/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.5/configuring/v2-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.5/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.5/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.0.5/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.5/configuring/v3-multi-datacenter.md index ec190d64f1..67697c3222 100644 --- a/content/riak/kv/2.0.5/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.5/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.5/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.5/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/nat.md index 72f760f9e9..d821eb7331 100644 --- a/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/quick-start.md index b5a70b5734..ff5b496133 100644 --- a/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.5/using/performance -[config v3 mdc]: /riak/kv/2.0.5/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.5/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl.md index f4cb541a85..4a9b308741 100644 --- a/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.5/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.5/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.5/developing/api/backend.md b/content/riak/kv/2.0.5/developing/api/backend.md index 929943d6a0..085fd6d0e4 100644 --- a/content/riak/kv/2.0.5/developing/api/backend.md +++ b/content/riak/kv/2.0.5/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.5/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.5/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.5/developing/api/http.md b/content/riak/kv/2.0.5/developing/api/http.md index 81936a67eb..52e5ffee1e 100644 --- a/content/riak/kv/2.0.5/developing/api/http.md +++ b/content/riak/kv/2.0.5/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.5/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.5/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.5/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.5/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.5/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.5/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.5/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.5/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.5/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.5/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.5/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.5/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.5/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.5/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.5/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.5/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.5/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.5/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.5/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.5/developing/data-types). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.5/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.5/developing/data-types). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.5/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.5/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.5/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.5/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.5/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.5/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.5/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.5/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.5/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.5/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.5/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.5/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.5/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.5/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.5/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.5/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.5/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.5/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.5/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.5/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.5/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.5/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.5/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.5/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.5/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.5/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.5/developing/api/http/counters.md b/content/riak/kv/2.0.5/developing/api/http/counters.md index 28ed1134ab..6d5eb39331 100644 --- a/content/riak/kv/2.0.5/developing/api/http/counters.md +++ b/content/riak/kv/2.0.5/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.5/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.5/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.5/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.5/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.5/developing/api/http/fetch-object.md b/content/riak/kv/2.0.5/developing/api/http/fetch-object.md index c217e44f64..b7180e84fc 100644 --- a/content/riak/kv/2.0.5/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.5/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.5/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.5/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.5/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.5/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.5/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.5/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.5/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.5/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.5/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.5/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.5/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.5/developing/api/http/fetch-search-index.md index acc5d9d092..7e235b1d81 100644 --- a/content/riak/kv/2.0.5/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.5/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.5/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.5/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.5/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.5/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.5/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.5/developing/api/http/fetch-search-schema.md index 8c7bc7e4a8..2fac97b311 100644 --- a/content/riak/kv/2.0.5/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.5/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.5/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.5/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.5/developing/api/http/get-bucket-props.md index e98e43085d..609ef93e5e 100644 --- a/content/riak/kv/2.0.5/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.5/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.5/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.5/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.5/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.5/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.5/developing/api/http/link-walking.md b/content/riak/kv/2.0.5/developing/api/http/link-walking.md index 9620067e7e..d591afe51e 100644 --- a/content/riak/kv/2.0.5/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.5/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.5/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.5/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.5/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.5/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.5/developing/api/http/list-resources.md b/content/riak/kv/2.0.5/developing/api/http/list-resources.md index 753c6514bf..8747e65c0b 100644 --- a/content/riak/kv/2.0.5/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.5/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.5/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.5/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.5/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.5/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.5/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.5/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.5/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.5/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.5/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.5/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.5/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.5/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.5/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.5/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.5/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.5/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.5/developing/api/http/mapreduce.md b/content/riak/kv/2.0.5/developing/api/http/mapreduce.md index d6e301a888..62c20b0a1a 100644 --- a/content/riak/kv/2.0.5/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.5/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.5/developing/api/http/search-index-info.md b/content/riak/kv/2.0.5/developing/api/http/search-index-info.md index a4b0ed9f18..3ebbb56f6f 100644 --- a/content/riak/kv/2.0.5/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.5/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.5/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.5/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.5/developing/api/http/search-query.md b/content/riak/kv/2.0.5/developing/api/http/search-query.md index f502c4357f..8b3c316a77 100644 --- a/content/riak/kv/2.0.5/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.5/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.5/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.5/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.5/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.5/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.5/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.5/developing/api/http/secondary-indexes.md index 792682ad33..d3248cf1f9 100644 --- a/content/riak/kv/2.0.5/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.5/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.5/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.5/developing/api/http/set-bucket-props.md index 0315570c9a..d0fe32c17b 100644 --- a/content/riak/kv/2.0.5/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.5/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.5/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.5/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.5/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.5/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.5/developing/api/http/status.md b/content/riak/kv/2.0.5/developing/api/http/status.md index fe394ee043..6cd3372bf7 100644 --- a/content/riak/kv/2.0.5/developing/api/http/status.md +++ b/content/riak/kv/2.0.5/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.5/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.5/developing/api/http/store-object.md b/content/riak/kv/2.0.5/developing/api/http/store-object.md index 7dfb7ae3e6..4300681d86 100644 --- a/content/riak/kv/2.0.5/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.5/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.5/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.5/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.5/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.5/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.5/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.5/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.5/developing/api/http/store-search-index.md b/content/riak/kv/2.0.5/developing/api/http/store-search-index.md index c25a1b0f06..211e873ae7 100644 --- a/content/riak/kv/2.0.5/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.5/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.5/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.5/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.5/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.5/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.5/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.5/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.5/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.5/developing/api/http/store-search-schema.md index 86811507fa..a339fc57a7 100644 --- a/content/riak/kv/2.0.5/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.5/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.5/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.5/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers.md index 4355f6e1b9..6ac9d1f3be 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.5/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.5/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.5/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.5/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.5/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.5/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.5/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.5/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.5/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.5/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/auth-req.md index 5d33db823f..444ae8116d 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.5/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.5/using/security/basics). diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/delete-object.md index da39512f99..50716b59f6 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.5/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.5/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store.md index d5eed1b9c7..81e72cf2ff 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.5/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.5/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-fetch.md index 5a973efd1c..aab8941f18 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.5/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.5/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.5/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.5/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.5/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store.md index cb50d56965..5543828cca 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store.md index 41e42305c8..8696473f5e 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-store.md index 329e87a414..276485c201 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.5/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.5/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.5/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.5/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.5/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.5/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.5/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-union.md index 4c97c31f07..9f54d29a07 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.5/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object.md index 25c44c471a..424010b97b 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.5/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.5/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.5/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props.md index 16f289480d..774ed3afe3 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.5/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.5/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.5/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.5/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riakcs/latest/cookbooks/mdc-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/latest/cookbooks/mdc-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-type.md index fdab1c2104..f77e2f6529 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.5/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.5/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-client-id.md index cb65d232bf..7d8d0aecf8 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.5/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/mapreduce.md index 76443729d9..edd927dbc0 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.5/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.5/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.5/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.5/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/reset-bucket-props.md index 84c6f7882d..b1c0309554 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/secondary-indexes.md index cda734574c..28777edb3e 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.5/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props.md index c50836bd69..97899c7c2f 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-type.md index 02f54cca66..aa604f96c8 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.5/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.5/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/store-object.md index 1237471ac9..260628513a 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.5/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.5/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.5/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.5/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.5/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.5/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.5/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.5/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-get.md index e01974edaf..ef5bab675a 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.5/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.5/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-put.md index c5f4443bf7..f2d14454c3 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-index-put.md @@ -37,4 +37,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.5/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.5/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-get.md index 8b3ac85378..babf4c48b3 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.5/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-put.md index 9ac834a197..b75635ce33 100644 --- a/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.5/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.5/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.5/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.5/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.5/developing/app-guide.md b/content/riak/kv/2.0.5/developing/app-guide.md index e8bf7773a3..3bbdfa9983 100644 --- a/content/riak/kv/2.0.5/developing/app-guide.md +++ b/content/riak/kv/2.0.5/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.0.5/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.5/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.5/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.5/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.5/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.5/developing/data-types -[dev data types#counters]: /riak/kv/2.0.5/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.0.5/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.0.5/developing/data-types/maps -[usage create objects]: /riak/kv/2.0.5/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.5/developing/usage/search -[use ref search]: /riak/kv/2.0.5/using/reference/search -[usage 2i]: /riak/kv/2.0.5/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.5/developing/client-libraries -[concept crdts]: /riak/kv/2.0.5/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.5/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.5/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.5/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.5/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.5/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.5/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.5/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.5/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.5/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.5/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.5/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.5/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.5/using/reference/strong-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.5/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.5/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.5/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.5/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.5/setup/installing -[getting started]: /riak/kv/2.0.5/developing/getting-started -[usage index]: /riak/kv/2.0.5/developing/usage -[glossary]: /riak/kv/2.0.5/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.5/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.5/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.5/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.5/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.5/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.5/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.0.5/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.0.5/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.0.5/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.5/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.5/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.5/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.5/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.5/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.5/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.5/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.5/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.5/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.5/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.5/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.5/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.5/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.5/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.5/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.5/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.5/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.5/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.5/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.5/setup/installing +[getting started]: {{}}riak/kv/2.0.5/developing/getting-started +[usage index]: {{}}riak/kv/2.0.5/developing/usage +[glossary]: {{}}riak/kv/2.0.5/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.5/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.5/developing/app-guide/advanced-mapreduce.md index 0d7b1b9d6b..b66c9d8521 100644 --- a/content/riak/kv/2.0.5/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.5/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.5/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.5/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.5/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.5/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.5/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.5/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.5/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.5/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.5/developing/app-guide/cluster-metadata.md index 466dc0c9b4..476571720b 100644 --- a/content/riak/kv/2.0.5/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.5/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.5/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.5/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.5/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.5/developing/app-guide/replication-properties.md index cfa496556d..b5501769d3 100644 --- a/content/riak/kv/2.0.5/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.5/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.5/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.5/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.5/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.5/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.5/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.5/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.5/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.5/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.5/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.5/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.5/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.5/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.5/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.5/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.5/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.5/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.5/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.5/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.5/developing/app-guide/strong-consistency.md index 4891784013..434194913c 100644 --- a/content/riak/kv/2.0.5/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.5/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.0.5/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/2.1.3/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.5/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.5/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.5/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.5/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.5/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.5/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/2.1.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.5/developing/client-libraries -[getting started]: /riak/kv/2.0.5/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.5/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.0.5/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.5/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.5/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.5/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.5/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.5/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.5/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.0.5/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.5/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.5/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.5/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.5/developing/client-libraries.md b/content/riak/kv/2.0.5/developing/client-libraries.md index 8eabc88498..75b79b116c 100644 --- a/content/riak/kv/2.0.5/developing/client-libraries.md +++ b/content/riak/kv/2.0.5/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.5/developing/data-types.md b/content/riak/kv/2.0.5/developing/data-types.md index 0c1c9cbf7f..1a7cd82bdb 100644 --- a/content/riak/kv/2.0.5/developing/data-types.md +++ b/content/riak/kv/2.0.5/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.5/developing/faq.md b/content/riak/kv/2.0.5/developing/faq.md index 47aaeb001d..c886f80eb3 100644 --- a/content/riak/kv/2.0.5/developing/faq.md +++ b/content/riak/kv/2.0.5/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.5/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.5/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.5/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.5/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.5/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.5/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.5/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.5/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.5/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.5/developing/client-libraries -[MapReduce]: /riak/kv/2.0.5/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.5/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.5/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.5/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.5/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.5/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.5/developing/getting-started.md b/content/riak/kv/2.0.5/developing/getting-started.md index 9fafe48ba1..3dc9a9bf5e 100644 --- a/content/riak/kv/2.0.5/developing/getting-started.md +++ b/content/riak/kv/2.0.5/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.5/setup/installing -[dev client libraries]: /riak/kv/2.0.5/developing/client-libraries +[install index]: {{}}riak/kv/2.0.5/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.5/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.5/developing/getting-started/csharp.md b/content/riak/kv/2.0.5/developing/getting-started/csharp.md index 011197584f..083582a3c1 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.5/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.5/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.5/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.5/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.5/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.5/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.5/developing/getting-started/csharp/querying.md index db9e8d4a15..cecf5e501c 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.5/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.5/developing/getting-started/erlang.md b/content/riak/kv/2.0.5/developing/getting-started/erlang.md index c13ee3f004..0e64971f35 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.5/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.5/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.5/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.5/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.5/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.5/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.5/developing/getting-started/erlang/object-modeling.md index 81eef0e0d6..26c9e3882f 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.5/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.5/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.5/developing/getting-started/erlang/querying.md index 59161518c4..66f1e4b2ef 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.5/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.5/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.5/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.5/developing/getting-started/golang.md b/content/riak/kv/2.0.5/developing/getting-started/golang.md index 76fbccf751..5e8a1eaeee 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.5/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.5/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.5/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.5/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.5/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.5/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.5/developing/getting-started/golang/object-modeling.md index 4e8a9ef915..0e9ae5cac1 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.5/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.5/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.5/developing/getting-started/golang/querying.md index 56be89b6fe..226cff1678 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.5/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.5/developing/getting-started/java.md b/content/riak/kv/2.0.5/developing/getting-started/java.md index 9eadc4f1f4..65c3dd346f 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/java.md +++ b/content/riak/kv/2.0.5/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.5/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.5/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.5/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.5/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.5/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.5/developing/getting-started/java/crud-operations.md index 65e8905b28..004c023639 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.5/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.5/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.5/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.5/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.5/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.5/developing/getting-started/java/querying.md b/content/riak/kv/2.0.5/developing/getting-started/java/querying.md index 19f24c8c30..8be47d52d0 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.5/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.5/developing/getting-started/nodejs.md b/content/riak/kv/2.0.5/developing/getting-started/nodejs.md index bff6aa24f8..6cc38ff26a 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.5/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.5/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.5/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.5/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.5/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.5/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.5/developing/getting-started/nodejs/querying.md index bec2777dc8..c8af4a97bf 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.5/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.5/developing/getting-started/php.md b/content/riak/kv/2.0.5/developing/getting-started/php.md index 730a162d94..886e6d7c13 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/php.md +++ b/content/riak/kv/2.0.5/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.5/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.5/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.5/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.5/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.5/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.5/developing/getting-started/php/crud-operations.md index 13ce7e4315..e6843cb49c 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.5/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.5/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.5/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.5/developing/getting-started/php/querying.md b/content/riak/kv/2.0.5/developing/getting-started/php/querying.md index 15ec3e033c..088defa0ff 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.5/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.5/developing/getting-started/python.md b/content/riak/kv/2.0.5/developing/getting-started/python.md index 615f8a774f..994f693dc5 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/python.md +++ b/content/riak/kv/2.0.5/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.5/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.5/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.5/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.5/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.5/developing/getting-started/python/querying.md b/content/riak/kv/2.0.5/developing/getting-started/python/querying.md index 69d651f4be..60f7497425 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.5/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.5/developing/getting-started/ruby.md b/content/riak/kv/2.0.5/developing/getting-started/ruby.md index 06d61367d7..99dfa73e5b 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.5/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.5/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.5/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.5/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.5/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.5/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.5/developing/getting-started/ruby/querying.md index 7e5d03ebb3..30c1fa86ce 100644 --- a/content/riak/kv/2.0.5/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.5/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.5/developing/key-value-modeling.md b/content/riak/kv/2.0.5/developing/key-value-modeling.md index 8b690f4853..c000c30fc9 100644 --- a/content/riak/kv/2.0.5/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.5/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.5/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.5/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.5/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.5/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.5/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.5/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.5/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.5/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.5/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.5/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.5/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.5/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.5/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.5/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.5/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.5/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.5/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.5/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.5/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.5/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.5/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.5/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.5/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.5/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.5/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.5/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.5/developing/usage/commit-hooks.md b/content/riak/kv/2.0.5/developing/usage/commit-hooks.md index 0637dc9713..d63cf0c037 100644 --- a/content/riak/kv/2.0.5/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.5/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.5/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.5/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.5/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.5/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.5/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.5/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.5/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.5/developing/usage/conflict-resolution.md index 7013176679..38151d7b50 100644 --- a/content/riak/kv/2.0.5/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.5/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.5/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.5/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.5/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.5/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.5/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.5/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.5/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.5/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.5/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.5/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.5/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.5/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.5/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.5/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.5/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.5/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.5/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.0.5/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.0.5/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.5/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.5/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.5/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.5/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.5/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.5/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.5/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.5/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.5/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.5/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.5/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.5/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.5/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.5/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.5/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.5/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.5/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.5/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.5/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.5/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.5/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.5/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.5/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.5/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.5/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.5/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -611,7 +611,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.5/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.5/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -666,7 +666,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/csharp.md index 3a550935c8..42b2881112 100644 --- a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.5/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/golang.md index badede5172..4f8a4ed5fe 100644 --- a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.5/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/java.md index 1ff00bbf16..e232ae5e25 100644 --- a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.5/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.5/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.5/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.5/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.5/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.5/developing/data-types/counters), [set](/riak/kv/2.0.5/developing/data-types/sets), or [map](/riak/kv/2.0.5/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.5/developing/data-types/counters), [set]({{}}riak/kv/2.0.5/developing/data-types/sets), or [map]({{}}riak/kv/2.0.5/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.5/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.5/developing/data-types/sets). diff --git a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/nodejs.md index 1add471608..3aeafea879 100644 --- a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.5/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/php.md index 7634d8eb33..3380fc83b4 100644 --- a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.5/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.5/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.5/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.5/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.5/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.5/developing/data-types/counters), [set](/riak/kv/2.0.5/developing/data-types/sets), or [map](/riak/kv/2.0.5/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.5/developing/data-types/counters), [set]({{}}riak/kv/2.0.5/developing/data-types/sets), or [map]({{}}riak/kv/2.0.5/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.5/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.5/developing/data-types/sets). diff --git a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/python.md index 59bd949276..089a1a5060 100644 --- a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.5/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.5/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.5/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.5/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.5/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.5/developing/data-types/counters), [set](/riak/kv/2.0.5/developing/data-types/sets), or [map](/riak/kv/2.0.5/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.5/developing/data-types/counters), [set]({{}}riak/kv/2.0.5/developing/data-types/sets), or [map]({{}}riak/kv/2.0.5/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.5/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.5/developing/data-types/sets). diff --git a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/ruby.md index 83ae31038c..91b1abb265 100644 --- a/content/riak/kv/2.0.5/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.5/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.5/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.5/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.5/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.5/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.5/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.5/developing/data-types/counters), [set](/riak/kv/2.0.5/developing/data-types/sets), or [map](/riak/kv/2.0.5/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.5/developing/data-types/counters), [set]({{}}riak/kv/2.0.5/developing/data-types/sets), or [map]({{}}riak/kv/2.0.5/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.5/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.5/developing/data-types/sets). diff --git a/content/riak/kv/2.0.5/developing/usage/creating-objects.md b/content/riak/kv/2.0.5/developing/usage/creating-objects.md index eb7553304c..15ccc719c6 100644 --- a/content/riak/kv/2.0.5/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.5/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.5/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.5/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.0.5/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.0.5/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.5/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.5/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.5/developing/usage/custom-extractors.md b/content/riak/kv/2.0.5/developing/usage/custom-extractors.md index 581a80b73e..71bcb7c3da 100644 --- a/content/riak/kv/2.0.5/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.5/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.5/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.5/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.5/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.5/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.5/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.5/developing/usage/deleting-objects.md b/content/riak/kv/2.0.5/developing/usage/deleting-objects.md index b47499a27a..80f23a1c74 100644 --- a/content/riak/kv/2.0.5/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.5/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.5/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.5/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.5/developing/usage/document-store.md b/content/riak/kv/2.0.5/developing/usage/document-store.md index 365d0bc8ab..f478bfc223 100644 --- a/content/riak/kv/2.0.5/developing/usage/document-store.md +++ b/content/riak/kv/2.0.5/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.5/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.5/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.5/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.5/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.0.5/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.5/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.5/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.5/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.5/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.5/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.5/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.5/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.5/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.5/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.5/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.5/developing/usage/mapreduce.md b/content/riak/kv/2.0.5/developing/usage/mapreduce.md index 2685fbc53d..86b03ad60c 100644 --- a/content/riak/kv/2.0.5/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.5/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.0.5/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.0.5/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.0.5/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.0.5/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.0.5/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.0.5/developing/usage/search/) and [secondary indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.0.5/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.0.5/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.0.5/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.0.5/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.0.5/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.0.5/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.0.5/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.0.5/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.0.5/developing/usage/reading-objects.md b/content/riak/kv/2.0.5/developing/usage/reading-objects.md index 102607097c..75e2811ba7 100644 --- a/content/riak/kv/2.0.5/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.5/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.5/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.5/developing/usage/replication.md b/content/riak/kv/2.0.5/developing/usage/replication.md index 944a91427b..55a506fe1b 100644 --- a/content/riak/kv/2.0.5/developing/usage/replication.md +++ b/content/riak/kv/2.0.5/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.5/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.5/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using Strong +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.5/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.5/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.5/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.5/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.5/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.5/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.5/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.5/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.5/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.5/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.5/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.5/developing/usage/search-schemas.md b/content/riak/kv/2.0.5/developing/usage/search-schemas.md index 947dc82c7c..f901c051d3 100644 --- a/content/riak/kv/2.0.5/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.5/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.0.5/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.5/developing/data-types/), and [more](/riak/kv/2.0.5/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types/), and [more]({{}}riak/kv/2.0.5/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.5/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.5/developing/usage/search.md b/content/riak/kv/2.0.5/developing/usage/search.md index d468297e12..248e48b103 100644 --- a/content/riak/kv/2.0.5/developing/usage/search.md +++ b/content/riak/kv/2.0.5/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.5/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.5/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.5/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.5/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.5/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.5/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.5/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.5/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.5/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.5/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.5/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.5/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.5/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.5/developing/usage/searching-data-types.md b/content/riak/kv/2.0.5/developing/usage/searching-data-types.md index 4756033033..761385d3f1 100644 --- a/content/riak/kv/2.0.5/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.5/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.5/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.5/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.5/developing/data-types/counters), [sets](/riak/kv/2.0.5/developing/data-types/sets), and [maps](/riak/kv/2.0.5/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.5/developing/data-types/counters), [sets]({{}}riak/kv/2.0.5/developing/data-types/sets), and [maps]({{}}riak/kv/2.0.5/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.5/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.5/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.5/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.5/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.5/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.5/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.5/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.5/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.5/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.5/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.5/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.5/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.5/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.5/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.5/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.5/developing/usage/secondary-indexes.md index 5a636c655b..82f45375d7 100644 --- a/content/riak/kv/2.0.5/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.5/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.0.5/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.5/setup/planning/backend/memory -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.5/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.5/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.5/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.5/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.5/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.5/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.5/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.5/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.5/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.5/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.5/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.5/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.5/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.5/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.5/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.5/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.5/developing/usage/security.md b/content/riak/kv/2.0.5/developing/usage/security.md index c1dad5e451..36f7ca71f2 100644 --- a/content/riak/kv/2.0.5/developing/usage/security.md +++ b/content/riak/kv/2.0.5/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.0.5/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.5/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.5/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.5/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.5/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.5/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.5/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.0.5/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.0.5/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.5/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.5/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.5/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.5/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.5/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.5/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.5/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.5/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.5/developing/usage/security/php) -* [Python](/riak/kv/2.0.5/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.5/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.5/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.5/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.5/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.5/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.5/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.5/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.5/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.5/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.5/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.5/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.5/developing/usage/security/erlang.md b/content/riak/kv/2.0.5/developing/usage/security/erlang.md index 21a65625a5..35ca83d5af 100644 --- a/content/riak/kv/2.0.5/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.5/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.5/using/security/managing-sources/), [PAM-](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.5/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.5/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.5/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.5/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.5/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.5/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.5/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.5/developing/usage/security/java.md b/content/riak/kv/2.0.5/developing/usage/security/java.md index 37fe6a3565..f01cdbb282 100644 --- a/content/riak/kv/2.0.5/developing/usage/security/java.md +++ b/content/riak/kv/2.0.5/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.5/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.5/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.5/developing/usage/security/php.md b/content/riak/kv/2.0.5/developing/usage/security/php.md index df675278da..b5be8f384a 100644 --- a/content/riak/kv/2.0.5/developing/usage/security/php.md +++ b/content/riak/kv/2.0.5/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.5/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.5/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.5/developing/usage/security/python.md b/content/riak/kv/2.0.5/developing/usage/security/python.md index 7ec9db4d97..2001b12e62 100644 --- a/content/riak/kv/2.0.5/developing/usage/security/python.md +++ b/content/riak/kv/2.0.5/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.5/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.5/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.5/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.5/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.5/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.5/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.5/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.5/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.5/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.5/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.5/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.5/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.5/developing/usage/security/ruby.md b/content/riak/kv/2.0.5/developing/usage/security/ruby.md index 7d86832157..481d5bcb5a 100644 --- a/content/riak/kv/2.0.5/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.5/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.5/using/security/managing-sources/) or [PAM](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.5/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.5/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.5/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.5/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.5/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.5/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.5/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.5/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.5/developing/usage/updating-objects.md b/content/riak/kv/2.0.5/developing/usage/updating-objects.md index 1d2c3bfb53..ca5fc23ac5 100644 --- a/content/riak/kv/2.0.5/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.5/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.5/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.5/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.5/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.5/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.5/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.5/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.5/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.5/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.5/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.5/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.5/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.5/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.5/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.5/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.5/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.5/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.5/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.5/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.5/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.5/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.5/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.5/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.5/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.5/index.md b/content/riak/kv/2.0.5/index.md index 7eeedfcf85..0bf0884d09 100644 --- a/content/riak/kv/2.0.5/index.md +++ b/content/riak/kv/2.0.5/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.5/configuring -[dev index]: /riak/kv/2.0.5/developing -[downloads]: /riak/kv/2.0.5/downloads/ -[install index]: /riak/kv/2.0.5/setup/installing/ -[plan index]: /riak/kv/2.0.5/setup/planning -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.5/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.5/developing/usage/search -[getting started]: /riak/kv/2.0.5/developing/getting-started -[dev client libraries]: /riak/kv/2.0.5/developing/client-libraries +[config index]: {{}}riak/kv/2.0.5/configuring +[dev index]: {{}}riak/kv/2.0.5/developing +[downloads]: {{}}riak/kv/2.0.5/downloads/ +[install index]: {{}}riak/kv/2.0.5/setup/installing/ +[plan index]: {{}}riak/kv/2.0.5/setup/planning +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.5/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search +[getting started]: {{}}riak/kv/2.0.5/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.5/developing/client-libraries diff --git a/content/riak/kv/2.0.5/introduction.md b/content/riak/kv/2.0.5/introduction.md index 8925b5d33a..a64fc39df7 100644 --- a/content/riak/kv/2.0.5/introduction.md +++ b/content/riak/kv/2.0.5/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.0.5/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.0.5/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.0.5/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.0.5/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.0.5/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.0.5/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.0.5/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.0.5/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.0.5/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.0.5/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.0.5/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.0.5/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.0.5/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.0.5/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.0.5/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.0.5/developing/data-types/maps#flags), [registers](/riak/kv/2.0.5/developing/data-types/maps#registers), -[counters](/riak/kv/2.0.5/developing/data-types/counters), [sets](/riak/kv/2.0.5/developing/data-types/sets), and -[maps](/riak/kv/2.0.5/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.0.5/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.0.5/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.0.5/developing/data-types/counters), [sets]({{}}riak/kv/2.0.5/developing/data-types/sets), and +[maps]({{}}riak/kv/2.0.5/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.0.5/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.0.5/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.0.5/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.0.5/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.0.5/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.0.5/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.0.5/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.0.5/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.0.5/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.0.5/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.0.5/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.0.5/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.0.5/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.0.5/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.0.5/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.0.5/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.0.5/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.0.5/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.0.5/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.0.5/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.0.5/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.0.5/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.0.5/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.0.5/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.0.5/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.0.5/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.0.5/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.0.5/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.0.5/learn/concepts.md b/content/riak/kv/2.0.5/learn/concepts.md index 77896d182c..d7e9fbea17 100644 --- a/content/riak/kv/2.0.5/learn/concepts.md +++ b/content/riak/kv/2.0.5/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.5/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.5/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.5/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.5/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.5/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.5/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.5/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.5/learn/concepts/vnodes -[config index]: /riak/kv/2.0.5/configuring -[plan index]: /riak/kv/2.0.5/setup/planning -[use index]: /riak/kv/2.0.5/using/ +[concept aae]: {{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.5/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.5/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.5/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.5/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.5/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.5/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.5/configuring +[plan index]: {{}}riak/kv/2.0.5/setup/planning +[use index]: {{}}riak/kv/2.0.5/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.5/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.5/learn/concepts/active-anti-entropy.md index 48bd68a6b1..4c2dfef474 100644 --- a/content/riak/kv/2.0.5/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.5/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.5/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.5/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.5/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.5/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.0.5/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.5/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.5/developing/usage/search +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.5/learn/concepts/buckets.md b/content/riak/kv/2.0.5/learn/concepts/buckets.md index 920ca896e5..93e742fb94 100644 --- a/content/riak/kv/2.0.5/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.5/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.0.5/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.5/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.5/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.5/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.5/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.5/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.5/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.5/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.5/configuring/basic -[dev api http]: /riak/kv/2.0.5/developing/api/http -[dev data types]: /riak/kv/2.0.5/developing/data-types -[glossary ring]: /riak/kv/2.0.5/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.5/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.5/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.5/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.5/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.5/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.5/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.5/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.5/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.5/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.5/configuring/basic +[dev api http]: {{}}riak/kv/2.0.5/developing/api/http +[dev data types]: {{}}riak/kv/2.0.5/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.5/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.5/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.5/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.5/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.5/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.5/learn/concepts/capability-negotiation.md index c8be2b8e77..4a9ff50970 100644 --- a/content/riak/kv/2.0.5/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.5/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.5/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.5/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.5/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.5/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.5/learn/concepts/causal-context.md b/content/riak/kv/2.0.5/learn/concepts/causal-context.md index abb0482b40..6b0f812625 100644 --- a/content/riak/kv/2.0.5/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.5/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.5/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.5/developing/api/http -[dev key value]: /riak/kv/2.0.5/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.5/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.5/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.5/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.5/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.5/developing/api/http +[dev key value]: {{}}riak/kv/2.0.5/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.5/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.5/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.5/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.5/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -78,7 +78,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.0.5/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.0.5/learn/concepts/clusters.md b/content/riak/kv/2.0.5/learn/concepts/clusters.md index 0253358305..402b22e038 100644 --- a/content/riak/kv/2.0.5/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.5/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.5/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.5/learn/concepts/replication -[glossary node]: /riak/kv/2.0.5/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.5/learn/dynamo -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.5/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.5/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.5/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.5/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.5/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.5/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.5/learn/concepts/crdts.md b/content/riak/kv/2.0.5/learn/concepts/crdts.md index 893379bd92..b027e1724c 100644 --- a/content/riak/kv/2.0.5/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.5/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.5/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.5/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.5/developing/data-types -[glossary node]: /riak/kv/2.0.5/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.5/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.5/developing/data-types +[glossary node]: {{}}riak/kv/2.0.5/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.0.5/developing/usage/search/). +indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.0.5/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.0.5/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.0.5/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.0.5/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.5/learn/concepts/eventual-consistency.md index 7c4b628d6d..7063f97fce 100644 --- a/content/riak/kv/2.0.5/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.5/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.5/learn/concepts/replication -[glossary node]: /riak/kv/2.0.5/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.5/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.5/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.5/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.5/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.5/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.5/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.5/developing/data-modeling/). +or models]({{}}riak/kv/2.0.5/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.5/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.5/learn/concepts/keys-and-objects.md index 64db87d0b8..695beb9ef6 100644 --- a/content/riak/kv/2.0.5/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.5/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.5/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.5/learn/concepts/replication.md b/content/riak/kv/2.0.5/learn/concepts/replication.md index 36923deca4..17a7b61b14 100644 --- a/content/riak/kv/2.0.5/learn/concepts/replication.md +++ b/content/riak/kv/2.0.5/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.5/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.5/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.5/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.5/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.5/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.5/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.5/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.5/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.5/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.5/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.5/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.5/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.5/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.5/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.5/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.5/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.5/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.5/learn/concepts/strong-consistency.md index fff9935e7d..746dab67c2 100644 --- a/content/riak/kv/2.0.5/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.5/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.5/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.5/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.5/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.5/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.5/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.5/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.5/learn/concepts/vnodes.md b/content/riak/kv/2.0.5/learn/concepts/vnodes.md index 8625cd4300..37fa801d03 100644 --- a/content/riak/kv/2.0.5/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.5/learn/concepts/vnodes.md @@ -16,16 +16,16 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.5/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.5/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.5/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.5/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.5/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.5/learn/glossary/#ring -[perf strong consistency]: /riak/kv/2.0.5/using/performance/strong-consistency -[plan backend]: /riak/kv/2.0.5/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.5/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.5/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.5/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.5/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.5/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.5/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.5/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.5/learn/glossary/#ring +[perf strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[plan backend]: {{}}riak/kv/2.0.5/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.5/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.5/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -81,7 +81,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -103,7 +103,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.5/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.5/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.5/learn/dynamo.md b/content/riak/kv/2.0.5/learn/dynamo.md index 75d27327cb..b19bb2c955 100644 --- a/content/riak/kv/2.0.5/learn/dynamo.md +++ b/content/riak/kv/2.0.5/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.5/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.5/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.5/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.5/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.5/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.5/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.5/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.5/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.5/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.5/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.5/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.5/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.5/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.5/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.5/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.5/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.5/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.5/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.5/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.5/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.5/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.5/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.5/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.5/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.5/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.5/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.5/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.5/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.5/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.5/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.5/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.5/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.5/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.5/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.5/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.5/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.5/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.5/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.5/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.5/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.5/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.5/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.5/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.5/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.5/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.5/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.5/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.5/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.5/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.5/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.5/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.5/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.5/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.5/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.5/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.5/learn/glossary.md b/content/riak/kv/2.0.5/learn/glossary.md index a3afe61d2d..1e598fb0e0 100644 --- a/content/riak/kv/2.0.5/learn/glossary.md +++ b/content/riak/kv/2.0.5/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.5/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.5/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.5/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.5/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.5/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.5/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.5/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.5/developing/api/http -[dev data model]: /riak/kv/2.0.5/developing/data-modeling -[dev data types]: /riak/kv/2.0.5/developing/data-types -[glossary read rep]: /riak/kv/2.0.5/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.5/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.5/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.5/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.5/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.5/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.5/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.5/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.5/developing/api/http +[dev data model]: {{}}riak/kv/2.0.5/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.5/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.5/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.5/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.5/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.5/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.5/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.5/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.5/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.5/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.5/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.5/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.5/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.5/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.5/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.5/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.5/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.5/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.5/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.5/learn/use-cases.md b/content/riak/kv/2.0.5/learn/use-cases.md index f08ea33c11..035935695c 100644 --- a/content/riak/kv/2.0.5/learn/use-cases.md +++ b/content/riak/kv/2.0.5/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.5/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.5/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.5/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.5/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.5/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.5/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.5/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.5/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.5/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.0.5/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.5/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.5/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.5/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.5/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.5/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.5/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.5/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.5/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.5/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.5/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.5/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.0.5/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.5/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.0.5/learn/why-riak-kv.md b/content/riak/kv/2.0.5/learn/why-riak-kv.md index e82dacd41d..2021a82684 100644 --- a/content/riak/kv/2.0.5/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.5/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.5/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.5/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.5/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.5/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.5/developing/data-types -[glossary read rep]: /riak/kv/2.0.5/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.5/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.5/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.5/setup/downgrade.md b/content/riak/kv/2.0.5/setup/downgrade.md index c2abba6e24..600bce20a3 100644 --- a/content/riak/kv/2.0.5/setup/downgrade.md +++ b/content/riak/kv/2.0.5/setup/downgrade.md @@ -17,7 +17,7 @@ aliases: Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a -[rolling upgrade](/riak/kv/2.0.5/setup/upgrading/cluster). +[rolling upgrade]({{}}riak/kv/2.0.5/setup/upgrading/cluster). {{% note title="End Of Life Warning" %}} We test downgrading for two feature release versions. However, all versions below KV 2.0 are End Of Life (EOL) and unsupported. Please be aware of that if you choose to downgrade. @@ -49,9 +49,9 @@ both 1.4 and 1.3 are performed. * Riak Control should be disabled throughout the rolling downgrade process -* [Configuration Files](/riak/kv/2.0.5/configuring/reference) must be replaced with those of the version +* [Configuration Files]({{}}riak/kv/2.0.5/configuring/reference) must be replaced with those of the version being downgraded to -* [Active anti-entropy](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version +* [Active anti-entropy]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version below 1.3. ## Before Stopping a Node @@ -94,7 +94,7 @@ will need to be downgraded before the rolling downgrade begins. This can be done using the --downgrade flag with `riak-admin reformat-indexes` More information on the `riak-admin reformat-indexes` command, and downgrading indexes can be found in the -[`riak-admin`](/riak/kv/2.0.5/using/admin/riak-admin/#reformat-indexes) documentation. +[`riak-admin`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#reformat-indexes) documentation. ## Before Starting a Node diff --git a/content/riak/kv/2.0.5/setup/installing.md b/content/riak/kv/2.0.5/setup/installing.md index 3410243cc2..83566f38e2 100644 --- a/content/riak/kv/2.0.5/setup/installing.md +++ b/content/riak/kv/2.0.5/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.5/installing/ --- -[install aws]: /riak/kv/2.0.5/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.5/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.5/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.5/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.5/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.5/setup/installing/smartos -[install solaris]: /riak/kv/2.0.5/setup/installing/solaris -[install suse]: /riak/kv/2.0.5/setup/installing/suse -[install windows azure]: /riak/kv/2.0.5/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.5/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.5/setup/upgrading +[install aws]: {{}}riak/kv/2.0.5/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.5/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.5/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.5/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.5/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.5/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.5/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.5/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.5/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.5/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.5/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.5/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.5/setup/installing/amazon-web-services.md index f492186d6b..d3a2d3ded5 100644 --- a/content/riak/kv/2.0.5/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.5/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.5/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.5/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.5/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.5/setup/installing/debian-ubuntu.md index 5ea8259f22..8f34e909b5 100644 --- a/content/riak/kv/2.0.5/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.5/setup/installing/debian-ubuntu.md @@ -20,10 +20,10 @@ aliases: -[install source index]: /riak/kv/2.0.5/setup/installing/source/ -[security index]: /riak/kv/2.0.5/using/security/ -[install source erlang]: /riak/kv/2.0.5/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.5/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.5/using/security/ +[install source erlang]: {{}}riak/kv/2.0.5/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.0.5/setup/installing/freebsd.md b/content/riak/kv/2.0.5/setup/installing/freebsd.md index 4ee2a0360c..360815e8dd 100644 --- a/content/riak/kv/2.0.5/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.5/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.5/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.5/downloads/ -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.5/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.5/downloads/ +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.5/setup/installing/mac-osx.md b/content/riak/kv/2.0.5/setup/installing/mac-osx.md index 59116dd401..e3e1edccb5 100644 --- a/content/riak/kv/2.0.5/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.5/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.5/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.5/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.5/setup/installing/rhel-centos.md b/content/riak/kv/2.0.5/setup/installing/rhel-centos.md index 3eb92b4173..6933460819 100644 --- a/content/riak/kv/2.0.5/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.5/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.5/setup/installing/source -[install source erlang]: /riak/kv/2.0.5/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.5/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.5/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.0.5/setup/installing/smartos.md b/content/riak/kv/2.0.5/setup/installing/smartos.md index cac9c71b8a..370c84a830 100644 --- a/content/riak/kv/2.0.5/setup/installing/smartos.md +++ b/content/riak/kv/2.0.5/setup/installing/smartos.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.0.5/setup/installing/solaris.md b/content/riak/kv/2.0.5/setup/installing/solaris.md index 99ae84674f..ba44919ec9 100644 --- a/content/riak/kv/2.0.5/setup/installing/solaris.md +++ b/content/riak/kv/2.0.5/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.5/setup/installing/source.md b/content/riak/kv/2.0.5/setup/installing/source.md index 0b892d97f1..955dd309c8 100644 --- a/content/riak/kv/2.0.5/setup/installing/source.md +++ b/content/riak/kv/2.0.5/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.5/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.5/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.5/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.5/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.5/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.5/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.5/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.5/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.5/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.5/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.5/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.5/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.5/setup/installing/source/erlang.md b/content/riak/kv/2.0.5/setup/installing/source/erlang.md index 20709e5bab..b46b1e6269 100644 --- a/content/riak/kv/2.0.5/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.5/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.5/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.5/setup/installing -[security basics]: /riak/kv/2.0.5/using/security/basics +[install index]: {{}}riak/kv/2.0.5/setup/installing +[security basics]: {{}}riak/kv/2.0.5/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho8.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.5/setup/installing/source/jvm.md b/content/riak/kv/2.0.5/setup/installing/source/jvm.md index 4fdf313710..691da1936d 100644 --- a/content/riak/kv/2.0.5/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.5/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.5/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.5/developing/usage/search +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.5/setup/installing/suse.md b/content/riak/kv/2.0.5/setup/installing/suse.md index 260393d842..8e74bfd3a0 100644 --- a/content/riak/kv/2.0.5/setup/installing/suse.md +++ b/content/riak/kv/2.0.5/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.5/installing/suse/ --- -[install verify]: /riak/kv/2.0.5/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.5/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.5/setup/installing/verify.md b/content/riak/kv/2.0.5/setup/installing/verify.md index 91e077755b..b23ac2f206 100644 --- a/content/riak/kv/2.0.5/setup/installing/verify.md +++ b/content/riak/kv/2.0.5/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.5/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.5/developing/client-libraries -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.5/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.5/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.5/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.5/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.5/setup/installing/windows-azure.md b/content/riak/kv/2.0.5/setup/installing/windows-azure.md index 1745ddd2d5..f25739577a 100644 --- a/content/riak/kv/2.0.5/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.5/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.5/setup/planning/backend.md b/content/riak/kv/2.0.5/setup/planning/backend.md index 6a1d689b5f..1b828463db 100644 --- a/content/riak/kv/2.0.5/setup/planning/backend.md +++ b/content/riak/kv/2.0.5/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.5/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.5/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.5/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.5/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.5/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.5/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.5/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.5/setup/planning/backend/bitcask.md index 2d25bdb42b..202cb3704a 100644 --- a/content/riak/kv/2.0.5/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.5/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.5/using/admin/riak-cli -[config reference]: /riak/kv/2.0.5/configuring/reference -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.5/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.5/developing/usage/search - -[glossary aae]: /riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.5/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.5/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.5/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.5/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.5/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.5/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.5/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.5/setup/planning/backend/leveldb.md index 9e6c6fd0a1..0df098a69b 100644 --- a/content/riak/kv/2.0.5/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.5/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.5/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.5/configuring/reference -[perf index]: /riak/kv/2.0.5/using/performance -[config reference#aae]: /riak/kv/2.0.5/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[perf index]: {{}}riak/kv/2.0.5/using/performance +[config reference#aae]: {{}}riak/kv/2.0.5/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.5/setup/planning/backend/memory.md b/content/riak/kv/2.0.5/setup/planning/backend/memory.md index 38810e39e5..dba3e1f72b 100644 --- a/content/riak/kv/2.0.5/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.5/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.5/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.5/configuring/reference -[plan backend multi]: /riak/kv/2.0.5/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.5/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.5/setup/planning/backend/multi.md b/content/riak/kv/2.0.5/setup/planning/backend/multi.md index 0202271c95..d06a713d6d 100644 --- a/content/riak/kv/2.0.5/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.5/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.5/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.5/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.5/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.5/configuring/reference -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.5/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.5/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.5/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.5/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.5/setup/planning/best-practices.md b/content/riak/kv/2.0.5/setup/planning/best-practices.md index 3410601330..f508d90e41 100644 --- a/content/riak/kv/2.0.5/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.5/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.5/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.5/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.5/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.5/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.5/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.5/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.5/setup/planning/bitcask-capacity-calc.md index 9cbde90c01..d6a965ecc5 100644 --- a/content/riak/kv/2.0.5/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.5/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.5/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.5/setup/planning/cluster-capacity.md index 3e47a1ea4d..5da8dd7885 100644 --- a/content/riak/kv/2.0.5/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.5/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.5/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.5/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.5/setup/planning -[concept replication]: /riak/kv/2.0.5/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.5/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.5/configuring/reference -[perf benchmark]: /riak/kv/2.0.5/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.5/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.5/setup/planning +[concept replication]: {{}}riak/kv/2.0.5/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.5/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.5/setup/planning/operating-system.md b/content/riak/kv/2.0.5/setup/planning/operating-system.md index 6b45b8b516..fdf05cfe66 100644 --- a/content/riak/kv/2.0.5/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.5/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.5/downloads/ +[downloads]: {{}}riak/kv/2.0.5/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.5/setup/planning/start.md b/content/riak/kv/2.0.5/setup/planning/start.md index 2e4d130180..149d0e7b77 100644 --- a/content/riak/kv/2.0.5/setup/planning/start.md +++ b/content/riak/kv/2.0.5/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.5/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.5/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.5/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.5/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.5/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.5/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.5/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.5/setup/upgrading/checklist.md b/content/riak/kv/2.0.5/setup/upgrading/checklist.md index 6eca99d89c..fd960cdc9c 100644 --- a/content/riak/kv/2.0.5/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.5/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.0.5/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.5/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.5/using/performance +[perf open files]: {{}}riak/kv/2.0.5/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.5/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.5/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.5/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.5/configuring/reference -[config backend]: /riak/kv/2.0.5/configuring/backend -[usage search]: /riak/kv/2.0.5/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.5/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.5/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.5/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.5/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.5/using/admin/commands -[use admin riak control]: /riak/kv/2.0.5/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.5/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.5/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.5/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.5/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.5/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[config backend]: {{}}riak/kv/2.0.5/configuring/backend +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.5/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.5/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.5/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.5/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.5/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.5/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.5/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.5/setup/upgrading/cluster.md b/content/riak/kv/2.0.5/setup/upgrading/cluster.md index dd31cca8df..c50f74264c 100644 --- a/content/riak/kv/2.0.5/setup/upgrading/cluster.md +++ b/content/riak/kv/2.0.5/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.0.5/ops/upgrading/rolling-upgrades/ - /riak/kv/2.0.5/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.0.5/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.5/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.5/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.5/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.5/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.5/release-notes/ +[production checklist]: {{}}riak/kv/2.0.5/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.5/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.5/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.5/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.5/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.5/release-notes/ [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.5/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.5/using/reference/jmx -[snmp]: /riak/kv/2.0.5/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.5/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.5/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.0.5/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.0.5/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported @@ -104,9 +104,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.5/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.5/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.5/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.5/release-notes/). {{% /note %}} ## RHEL/CentOS @@ -166,9 +166,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.5/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.5/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.5/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.5/release-notes/). {{% /note %}} ## Solaris/OpenSolaris @@ -252,9 +252,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.5/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.5/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.5/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.5/release-notes/). {{% /note %}} ## Rolling Upgrade to Enterprise diff --git a/content/riak/kv/2.0.5/setup/upgrading/search.md b/content/riak/kv/2.0.5/setup/upgrading/search.md index 110aae3fc2..c88cb877fe 100644 --- a/content/riak/kv/2.0.5/setup/upgrading/search.md +++ b/content/riak/kv/2.0.5/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.5/setup/upgrading/version.md b/content/riak/kv/2.0.5/setup/upgrading/version.md index 20c081801a..a44a656e2f 100644 --- a/content/riak/kv/2.0.5/setup/upgrading/version.md +++ b/content/riak/kv/2.0.5/setup/upgrading/version.md @@ -20,7 +20,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.0.5/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.0.5/introduction). ## New Clients @@ -36,14 +36,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.0.5/introduction) like [data types](/riak/kv/2.0.5/developing/data-types) or the new [Riak Search](/riak/kv/2.0.5/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.0.5/introduction) like [data types]({{}}riak/kv/2.0.5/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.0.5/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.0.5/learn/concepts/buckets) and [key](/riak/kv/2.0.5/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.0.5/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.0.5/learn/concepts/buckets) and [key]({{}}riak/kv/2.0.5/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.0.5/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.0.5/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.0.5/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.0.5/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.0.5/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -56,7 +56,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.0.5/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.0.5/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -75,8 +75,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.0.5/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.0.5/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.0.5/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.0.5/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -86,17 +86,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/2.0.5/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.0.5/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.0.5/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.0.5/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.0.5/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.0.5/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.0.5/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -104,20 +104,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.0.5/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.0.5/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.0.5/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.0.5/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -126,11 +126,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.0.5/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.0.5/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.0.5/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -140,12 +140,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.0.5/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.0.5/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/2.0.5/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.0.5/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.0.5/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.0.5/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.0.5/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.0.5/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.0.5/using/security/basics) or the new [configuration files](/riak/kv/2.0.5/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.0.5/using/security/basics) or the new [configuration files]({{}}riak/kv/2.0.5/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -155,7 +155,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.0.5/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.0.5/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -166,12 +166,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.0.5/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.0.5/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.0.5/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.0.5/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -209,7 +209,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.0.5/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.0.5/setup/upgrading/search). ## Migrating from Short Names @@ -220,12 +220,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.0.5/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.0.5/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.0.5/using.md b/content/riak/kv/2.0.5/using.md index 41a2bc631f..3987d33480 100644 --- a/content/riak/kv/2.0.5/using.md +++ b/content/riak/kv/2.0.5/using.md @@ -15,7 +15,7 @@ toc: true [use running cluster]: ../using/running-a-cluster [use admin index]: ../using/admin/ [cluster ops index]: ../using/cluster-operations -[repair recover index]: ../repair-recovery +[repair recover index]: ../using/repair-recovery [security index]: ../using/security [perf index]: ../using/performance [troubleshoot index]: ../using/troubleshooting diff --git a/content/riak/kv/2.0.5/using/admin/commands.md b/content/riak/kv/2.0.5/using/admin/commands.md index 6a73c139c0..bd4aab8c75 100644 --- a/content/riak/kv/2.0.5/using/admin/commands.md +++ b/content/riak/kv/2.0.5/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.5/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.5/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.5/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.5/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.5/using/admin/riak-admin.md b/content/riak/kv/2.0.5/using/admin/riak-admin.md index 9b7a9d1c6f..79c7c187ea 100644 --- a/content/riak/kv/2.0.5/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.5/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.0.5/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.5/configuring/reference -[use admin commands]: /riak/kv/2.0.5/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.5/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.5/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.5/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.5/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.5/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.5/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.5/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.5/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.5/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.5/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.5/setup/downgrade -[security index]: /riak/kv/2.0.5/using/security/ -[security managing]: /riak/kv/2.0.5/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.5/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.5/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.5/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.5/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.5/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.5/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.5/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.5/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.5/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.5/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.5/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.5/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.5/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.5/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.5/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.5/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.5/setup/downgrade +[security index]: {{}}riak/kv/2.0.5/using/security/ +[security managing]: {{}}riak/kv/2.0.5/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.5/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.5/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.5/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.5/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.5/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.5/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.0.5/using/admin/riak-cli.md b/content/riak/kv/2.0.5/using/admin/riak-cli.md index a83964788a..a3b5fd6e2e 100644 --- a/content/riak/kv/2.0.5/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.5/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.5/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.5/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.5/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.5/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.5/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.5/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.5/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.5/using/admin/riak-control.md b/content/riak/kv/2.0.5/using/admin/riak-control.md index 308079eaed..374e2727a4 100644 --- a/content/riak/kv/2.0.5/using/admin/riak-control.md +++ b/content/riak/kv/2.0.5/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.5/configuring/reference +[config reference]: {{}}riak/kv/2.0.5/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.5/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.5/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.5/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.5/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.5/using/cluster-operations.md b/content/riak/kv/2.0.5/using/cluster-operations.md index 2695ac3aa8..084a4ce461 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations.md +++ b/content/riak/kv/2.0.5/using/cluster-operations.md @@ -20,7 +20,6 @@ toc: true [ops log]: ./logging [ops backup]: ./backing-up [ops handoff]: ./handoff -[ops obj del]: ./object-deletion [ops strong consistency]: ./strong-consistency [ops v3 mdc]: ./v3-multi-datacenter [ops v2 mdc]: ./v2-multi-datacenter @@ -84,13 +83,6 @@ Information on using the `riak-admin handoff` interface to enable and disable ha [Learn More >>][ops handoff] -#### [Object Deletion][ops obj del] - -Describes possible settings for `delete_mode`. - -[Learn More >>][ops obj del] - - #### [Monitoring Strong Consistency][ops strong consistency] Overview of the various statistics used in monitoring strong consistency. diff --git a/content/riak/kv/2.0.5/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.5/using/cluster-operations/active-anti-entropy.md index 04433fd439..a9d0a36962 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes.md index d43dc0dd4e..cd96e82e54 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.5/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.5/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.5/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.5/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.5/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.5/using/cluster-operations/backing-up.md index bf080a486a..6d32fa2311 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.5/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters -[config reference]: /riak/kv/2.0.5/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.5/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.5/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.5/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.5/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.5/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.5/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.5/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.5/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.5/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.5/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.5/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.5/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.5/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.5/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.5/using/cluster-operations/bucket-types.md index 4050fe7c16..9451129699 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.5/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.5/using/cluster-operations/changing-cluster-info.md index 3affe061d4..ae4c2738d9 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.5/configuring/reference +[config reference]: {{}}riak/kv/2.0.5/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.5/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.5/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.5/using/cluster-operations/handoff.md b/content/riak/kv/2.0.5/using/cluster-operations/handoff.md index 7f5a4157b8..951fa416e0 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.5/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.5/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.5/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.5/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.5/using/cluster-operations/logging.md b/content/riak/kv/2.0.5/using/cluster-operations/logging.md index 797fc157ff..ce5416c781 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.5/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.5/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.5/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.5/using/cluster-operations/replacing-node.md index 3ff89a40fc..ad61fe0fb2 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.5/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.5/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.5/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.5/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.5/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.5/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.5/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.5/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.5/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.5/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.5/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.5/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.5/using/cluster-operations/strong-consistency.md index aa9fa6fa67..7a4e99e90b 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.5/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.5/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.5/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.5/using/cluster-operations/v2-multi-datacenter.md index ff0f94bd3c..2a1265f9d6 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.5/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.5/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter.md index c32973bcda..5c58fc0e7a 100644 --- a/content/riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.5/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.5/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.5/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.5/using/performance.md b/content/riak/kv/2.0.5/using/performance.md index 6d0b97cd30..39d88a4d76 100644 --- a/content/riak/kv/2.0.5/using/performance.md +++ b/content/riak/kv/2.0.5/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.5/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.5/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.5/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.5/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.5/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.5/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.5/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.5/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.5/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.5/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.5/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.5/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.5/using/performance/benchmarking.md b/content/riak/kv/2.0.5/using/performance/benchmarking.md index 1f06a5a1c5..2cf92d0b82 100644 --- a/content/riak/kv/2.0.5/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.5/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.5/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.5/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.5/using/performance/latency-reduction.md b/content/riak/kv/2.0.5/using/performance/latency-reduction.md index bab637a4b8..d000ff18fe 100644 --- a/content/riak/kv/2.0.5/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.5/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.5/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.5/using/performance/multi-datacenter-tuning.md index b3f05375b6..5882948419 100644 --- a/content/riak/kv/2.0.5/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.5/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.5/using/performance +[perf index]: {{}}riak/kv/2.0.5/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.5/using/performance/open-files-limit.md b/content/riak/kv/2.0.5/using/performance/open-files-limit.md index 352d2ce063..5cca89634f 100644 --- a/content/riak/kv/2.0.5/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.5/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.0.5/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.5/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.0.5/using/reference/bucket-types.md b/content/riak/kv/2.0.5/using/reference/bucket-types.md index faadc1f5fe..1ccf4bb9e1 100644 --- a/content/riak/kv/2.0.5/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.5/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.5/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.5/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.5/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.5/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.5/developing/data-types), and [strong consistency](/riak/kv/2.0.5/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.5/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.5/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.5/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.5/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.5/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.5/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.5/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.5/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.5/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.5/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.5/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.5/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.5/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.5/learn/concepts/buckets) and [keys](/riak/kv/2.0.5/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.5/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.5/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.5/using/reference/custom-code.md b/content/riak/kv/2.0.5/using/reference/custom-code.md index b086b1d7dc..42d52a83ef 100644 --- a/content/riak/kv/2.0.5/using/reference/custom-code.md +++ b/content/riak/kv/2.0.5/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.5/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.5/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.5/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.5/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.5/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.5/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.5/using/reference/handoff.md b/content/riak/kv/2.0.5/using/reference/handoff.md index 1aa50ca197..fce78f5bcc 100644 --- a/content/riak/kv/2.0.5/using/reference/handoff.md +++ b/content/riak/kv/2.0.5/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.5/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.5/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.5/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.5/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.5/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.5/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.5/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.5/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.5/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.5/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.5/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.5/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.5/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.5/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.5/using/reference/jmx.md b/content/riak/kv/2.0.5/using/reference/jmx.md index 0c3365aa08..0341e34ec2 100644 --- a/content/riak/kv/2.0.5/using/reference/jmx.md +++ b/content/riak/kv/2.0.5/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.5/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.5/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.5/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.5/using/reference/logging.md b/content/riak/kv/2.0.5/using/reference/logging.md index c06b153c88..dc1185ff2d 100644 --- a/content/riak/kv/2.0.5/using/reference/logging.md +++ b/content/riak/kv/2.0.5/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.5/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.5/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.5/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.5/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.5/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -46,7 +46,7 @@ File | Significance `console.log` | Console log output `crash.log` | Crash logs `erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. -`error.log` | [Common errors](../../repair-recover/errors) emitted by Riak. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.5/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.5/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.5/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.5/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.5/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.5/using/reference/multi-datacenter/comparison.md index 168970559d..101a55686e 100644 --- a/content/riak/kv/2.0.5/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.5/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.5/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.5/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.5/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.5/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.5/using/reference/runtime-interaction.md b/content/riak/kv/2.0.5/using/reference/runtime-interaction.md index 2fa4a07678..e64c02945e 100644 --- a/content/riak/kv/2.0.5/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.5/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.5/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.5/configuring/reference -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.5/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.5/using/reference/search.md b/content/riak/kv/2.0.5/using/reference/search.md index 11280311d0..5b5aa8db7a 100644 --- a/content/riak/kv/2.0.5/using/reference/search.md +++ b/content/riak/kv/2.0.5/using/reference/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.5/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.0.5/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.0.5/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -126,7 +126,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.5/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.5/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -288,7 +288,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.5/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -298,7 +298,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -353,7 +353,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.5/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.5/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.5/using/reference/secondary-indexes.md b/content/riak/kv/2.0.5/using/reference/secondary-indexes.md index 4826a58778..58cdeeb6e0 100644 --- a/content/riak/kv/2.0.5/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.5/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.0.5/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.5/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.5/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.5/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.5/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.5/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.5/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.5/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.5/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.5/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.5/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.5/using/reference/statistics-monitoring.md index 1e96b60a16..fc629139a6 100644 --- a/content/riak/kv/2.0.5/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.5/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.5/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.5/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.5/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.5/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.5/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.5/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.5/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.5/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.5/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.5/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.5/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.5/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.5/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.5/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.5/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.5/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.5/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.5/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.5/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.5/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.5/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.5/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.5/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,9 +349,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.5/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.5/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.5/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.5/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -367,9 +367,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.5/using/reference/strong-consistency.md b/content/riak/kv/2.0.5/using/reference/strong-consistency.md index 53b7733b8d..d4ad63bb5c 100644 --- a/content/riak/kv/2.0.5/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.5/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.5/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.5/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.5/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.5/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.5/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.5/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.5/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.5/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.5/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.5/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.5/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.5/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.5/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.5/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.5/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.5/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.5/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.5/using/reference/v2-multi-datacenter/architecture.md index ae5d2b6ee6..0636240231 100644 --- a/content/riak/kv/2.0.5/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.5/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.5/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.5/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.5/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.5/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.5/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.5/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/aae.md index d909e89680..6f7e5f2b96 100644 --- a/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.5/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.5/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.5/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/architecture.md index 809eea31af..9f91043cae 100644 --- a/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.5/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.5/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.5/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.5/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/cascading-writes.md index f15c865329..f5fce10480 100644 --- a/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.5/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 34e9650674..fbd5803a7c 100644 --- a/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.5/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.5/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.5/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.5/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.5/using/repair-recovery.md b/content/riak/kv/2.0.5/using/repair-recovery.md index 560c7e49b5..21675c30aa 100644 --- a/content/riak/kv/2.0.5/using/repair-recovery.md +++ b/content/riak/kv/2.0.5/using/repair-recovery.md @@ -15,7 +15,7 @@ toc: true [repair recover fail]: ./failure-recovery/ [repair recover errors]: ./errors/ [repair recover repairs]: ./repairs/ -[repair recover restart]: ./rolling-restarts/ +[repair recover restart]: ./rolling-restart/ ## In This Section diff --git a/content/riak/kv/2.0.5/using/repair-recovery/errors.md b/content/riak/kv/2.0.5/using/repair-recovery/errors.md index 400e3660f2..e2fc192892 100644 --- a/content/riak/kv/2.0.5/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.5/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.5/configuring/reference +[config reference]: {{}}riak/kv/2.0.5/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.5/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.5/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.5/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.5/using/repair-recovery/failure-recovery.md index 6092e730f7..e9a5f76264 100644 --- a/content/riak/kv/2.0.5/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.5/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.5/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.5/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.5/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.5/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -115,7 +115,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.0.5/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.5/using/repair-recovery/repairs.md b/content/riak/kv/2.0.5/using/repair-recovery/repairs.md index 1e4f4284a7..7a1a18c712 100644 --- a/content/riak/kv/2.0.5/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.5/using/repair-recovery/repairs.md @@ -149,7 +149,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.0.5/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.0.5/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -218,23 +218,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.5/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.5/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.5/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.5/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.5/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.5/using/repair-recovery/rolling-restart.md index 35a1867043..9ae99f36c3 100644 --- a/content/riak/kv/2.0.5/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.5/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.5/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.5/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.5/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.5/using/running-a-cluster.md b/content/riak/kv/2.0.5/using/running-a-cluster.md index 3133599ae6..f2d05c26c5 100644 --- a/content/riak/kv/2.0.5/using/running-a-cluster.md +++ b/content/riak/kv/2.0.5/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.5/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.5/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.5/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.5/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.5/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.5/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.5/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.5/using/security.md b/content/riak/kv/2.0.5/using/security.md index e1263a218e..ebdf37a36d 100644 --- a/content/riak/kv/2.0.5/using/security.md +++ b/content/riak/kv/2.0.5/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.5/ops/advanced/security --- -[config reference search]: /riak/kv/2.0.5/configuring/reference/#search -[config search enabling]: /riak/kv/2.0.5/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.5/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.5/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.5/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.0.5/using/security/basics -[security managing]: /riak/kv/2.0.5/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.5/using/security/basics +[security managing]: {{}}riak/kv/2.0.5/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.0.5/developing/usage/search +[usage search]: {{}}riak/kv/2.0.5/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.5/using/security/basics.md b/content/riak/kv/2.0.5/using/security/basics.md index 414ef847ea..b4ff91191b 100644 --- a/content/riak/kv/2.0.5/using/security/basics.md +++ b/content/riak/kv/2.0.5/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.5/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.5/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.5/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.5/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.5/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.5/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.5/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.5/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.5/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.5/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.5/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.5/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.5/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.5/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.5/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.5/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.5/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.5/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.5/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.5/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.5/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.5/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.5/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.5/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.5/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.5/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.5/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.5/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.5/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.5/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.5/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.5/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.5/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.5/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.5/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.5/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.5/configuring/reference/#directories).
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="../../learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.5/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.5/using/security/managing-sources.md b/content/riak/kv/2.0.5/using/security/managing-sources.md index 7d91ef4044..f3c2503263 100644 --- a/content/riak/kv/2.0.5/using/security/managing-sources.md +++ b/content/riak/kv/2.0.5/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.5/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.5/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.5/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.5/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.5/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.5/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.5/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.5/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.5/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.5/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.5/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.5/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.5/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.5/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.0.6/_reference-links.md b/content/riak/kv/2.0.6/_reference-links.md index b66d7fa1a9..a1305623e4 100644 --- a/content/riak/kv/2.0.6/_reference-links.md +++ b/content/riak/kv/2.0.6/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.6/downloads/ -[install index]: /riak/kv/2.0.6/setup/installing -[upgrade index]: /riak/kv/2.0.6/upgrading -[plan index]: /riak/kv/2.0.6/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.0.6/configuring/reference/ -[manage index]: /riak/kv/2.0.6/using/managing -[performance index]: /riak/kv/2.0.6/using/performance -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.6/downloads/ +[install index]: {{}}riak/kv/2.0.6/setup/installing +[upgrade index]: {{}}riak/kv/2.0.6/upgrading +[plan index]: {{}}riak/kv/2.0.6/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.0.6/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.6/using/managing +[performance index]: {{}}riak/kv/2.0.6/using/performance +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.6/setup/planning -[plan start]: /riak/kv/2.0.6/setup/planning/start -[plan backend]: /riak/kv/2.0.6/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.6/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.6/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.6/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.6/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.6/setup/planning/best-practices -[plan future]: /riak/kv/2.0.6/setup/planning/future +[plan index]: {{}}riak/kv/2.0.6/setup/planning +[plan start]: {{}}riak/kv/2.0.6/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.6/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.6/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.6/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.6/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.6/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.6/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.6/setup/installing -[install aws]: /riak/kv/2.0.6/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.6/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.6/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.6/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.6/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.6/setup/installing/smartos -[install solaris]: /riak/kv/2.0.6/setup/installing/solaris -[install suse]: /riak/kv/2.0.6/setup/installing/suse -[install windows azure]: /riak/kv/2.0.6/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.6/setup/installing +[install aws]: {{}}riak/kv/2.0.6/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.6/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.6/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.6/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.6/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.6/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.6/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.6/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.6/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.6/setup/installing/source -[install source erlang]: /riak/kv/2.0.6/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.6/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.6/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.6/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.6/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.6/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.6/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.6/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.6/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.6/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.6/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.6/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.6/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.6/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.6/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.6/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.6/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.6/configuring -[config basic]: /riak/kv/2.0.6/configuring/basic -[config backend]: /riak/kv/2.0.6/configuring/backend -[config manage]: /riak/kv/2.0.6/configuring/managing -[config reference]: /riak/kv/2.0.6/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.6/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.6/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.6/configuring/mapreduce -[config search]: /riak/kv/2.0.6/configuring/search/ +[config index]: {{}}riak/kv/2.0.6/configuring +[config basic]: {{}}riak/kv/2.0.6/configuring/basic +[config backend]: {{}}riak/kv/2.0.6/configuring/backend +[config manage]: {{}}riak/kv/2.0.6/configuring/managing +[config reference]: {{}}riak/kv/2.0.6/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.6/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.6/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.6/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.6/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.6/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.6/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.6/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.6/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.6/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.6/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.6/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.6/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.6/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.6/using/ -[use admin commands]: /riak/kv/2.0.6/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.6/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.6/using/ +[use admin commands]: {{}}riak/kv/2.0.6/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.6/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.6/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.6/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.6/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.6/using/reference/search -[use ref 2i]: /riak/kv/2.0.6/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.6/using/reference/snmp -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.6/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.6/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.6/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.6/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.6/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.6/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.6/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.6/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.6/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.6/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.6/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.6/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.6/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.6/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.6/using/admin/ -[use admin commands]: /riak/kv/2.0.6/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.6/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.6/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.6/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.6/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.6/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.6/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.6/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.6/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.6/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.6/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.6/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.6/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.6/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.6/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.6/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.6/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.6/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.6/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.6/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.6/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.6/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.6/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.6/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.6/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.6/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.6/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.6/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.6/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.6/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.6/using/repair-recovery -[repair recover index]: /riak/kv/2.0.6/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.6/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.6/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.6/using/security/ -[security basics]: /riak/kv/2.0.6/using/security/basics -[security managing]: /riak/kv/2.0.6/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.6/using/security/ +[security basics]: {{}}riak/kv/2.0.6/using/security/basics +[security managing]: {{}}riak/kv/2.0.6/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.6/using/performance/ -[perf benchmark]: /riak/kv/2.0.6/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.6/using/performance/erlang -[perf aws]: /riak/kv/2.0.6/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.6/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.6/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.6/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.6/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.6/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.6/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.6/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.6/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.6/developing -[dev client libraries]: /riak/kv/2.0.6/developing/client-libraries -[dev data model]: /riak/kv/2.0.6/developing/data-modeling -[dev data types]: /riak/kv/2.0.6/developing/data-types -[dev kv model]: /riak/kv/2.0.6/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.6/developing +[dev client libraries]: {{}}riak/kv/2.0.6/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.6/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.6/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.6/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.6/developing/getting-started -[getting started java]: /riak/kv/2.0.6/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.6/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.6/developing/getting-started/python -[getting started php]: /riak/kv/2.0.6/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.6/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.6/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.6/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.6/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.6/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.6/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.6/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.6/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.6/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.6/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.6/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.6/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.6/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.6/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.6/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.6/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.6/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.6/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.6/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.6/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.6/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.6/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.6/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.6/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.6/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.6/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.6/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.6/developing/usage -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.6/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.6/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.6/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.6/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.6/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.6/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.6/developing/usage/search -[usage search schema]: /riak/kv/2.0.6/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.6/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.6/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.6/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.6/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.6/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.6/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.6/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.6/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.6/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.6/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.6/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.6/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.6/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.6/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.6/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.6/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.6/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.6/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.6/developing/api/backend -[dev api http]: /riak/kv/2.0.6/developing/api/http -[dev api http status]: /riak/kv/2.0.6/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.6/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.6/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.6/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.6/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.6/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.6/learn/glossary/ -[glossary aae]: /riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.6/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.6/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.6/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.6/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.6/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.6/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.6/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.6/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.6/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.6/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.6/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.6/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.6/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.6/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.6/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.6/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.6/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.6/add-ons.md b/content/riak/kv/2.0.6/add-ons.md index 81ff90db0d..15c593eb0f 100644 --- a/content/riak/kv/2.0.6/add-ons.md +++ b/content/riak/kv/2.0.6/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.6/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.6/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.6/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.6/add-ons/redis/developing-rra.md index 4a3d71c1fb..7030d09fbb 100644 --- a/content/riak/kv/2.0.6/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.6/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.6/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.6/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.6/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.6/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.6/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.6/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.6/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.6/add-ons/redis/redis-add-on-features.md index 0396784707..e6d87d6f62 100644 --- a/content/riak/kv/2.0.6/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.6/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.6/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.6/add-ons/redis/set-up-rra.md index 0bf0d2d912..0427672ad4 100644 --- a/content/riak/kv/2.0.6/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.6/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.6/setup/installing -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.6/setup/installing +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.6/add-ons/redis/using-rra.md b/content/riak/kv/2.0.6/add-ons/redis/using-rra.md index 7346f00ab2..8388fdc6f9 100644 --- a/content/riak/kv/2.0.6/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.6/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.6/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.6/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.6/configuring/backend.md b/content/riak/kv/2.0.6/configuring/backend.md index c91d007035..b391af86b4 100644 --- a/content/riak/kv/2.0.6/configuring/backend.md +++ b/content/riak/kv/2.0.6/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.6/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.6/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.6/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.6/configuring/basic.md b/content/riak/kv/2.0.6/configuring/basic.md index 11857966f0..2d631dd4b3 100644 --- a/content/riak/kv/2.0.6/configuring/basic.md +++ b/content/riak/kv/2.0.6/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.0.6/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.6/configuring/reference -[use running cluster]: /riak/kv/2.0.6/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.6/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.6/using/performance/erlang -[plan start]: /riak/kv/2.0.6/setup/planning/start -[plan best practices]: /riak/kv/2.0.6/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.6/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.6/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.6/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.6/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.6/using/performance -[perf aws]: /riak/kv/2.0.6/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.6/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.6/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.6/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.6/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.6/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.6/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.6/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.6/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.6/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.6/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.6/using/performance +[perf aws]: {{}}riak/kv/2.0.6/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.6/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.6/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.6/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.6/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.6/configuring/load-balancing-proxy.md index 2232c306d5..e134bb0f18 100644 --- a/content/riak/kv/2.0.6/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.6/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.6/configuring/managing.md b/content/riak/kv/2.0.6/configuring/managing.md index 0c3dce1d40..c1b50e36c5 100644 --- a/content/riak/kv/2.0.6/configuring/managing.md +++ b/content/riak/kv/2.0.6/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.6/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.6/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.6/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.6/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.6/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.6/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.6/configuring/mapreduce.md b/content/riak/kv/2.0.6/configuring/mapreduce.md index f4a066292a..1fb182be86 100644 --- a/content/riak/kv/2.0.6/configuring/mapreduce.md +++ b/content/riak/kv/2.0.6/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.0.6/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.6/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.6/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.6/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.6/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.6/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.6/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.6/configuring/reference.md b/content/riak/kv/2.0.6/configuring/reference.md index 88dcab0f6d..24cf97fba6 100644 --- a/content/riak/kv/2.0.6/configuring/reference.md +++ b/content/riak/kv/2.0.6/configuring/reference.md @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch. diff --git a/content/riak/kv/2.0.6/configuring/search.md b/content/riak/kv/2.0.6/configuring/search.md index 74538ab18d..63b2421f67 100644 --- a/content/riak/kv/2.0.6/configuring/search.md +++ b/content/riak/kv/2.0.6/configuring/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.6/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.6/developing/usage/search -[usage search schema]: /riak/kv/2.0.6/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.6/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.6/developing/usage/custom-extractors -[config reference]: /riak/kv/2.0.6/configuring/reference -[config reference#search]: /riak/kv/2.0.6/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.6/using/security/ +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.6/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.6/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.6/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.6/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.6/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.0.6/configuring/strong-consistency.md b/content/riak/kv/2.0.6/configuring/strong-consistency.md index 8d363cb785..b3f0b8efaf 100644 --- a/content/riak/kv/2.0.6/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.6/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.6/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.6/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.6/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.6/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.6/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.6/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.6/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.6/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.6/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.6/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.6/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.6/developing/data-types -[glossary aae]: /riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.6/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.6/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.6/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.6/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.6/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.6/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.6/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.6/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.6/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.6/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.6/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.6/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.6/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.6/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.6/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.6/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.6/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.6/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.6/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.6/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.6/configuring/v2-multi-datacenter.md index e51949a4a0..8fcf5b7c44 100644 --- a/content/riak/kv/2.0.6/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.6/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.6/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.6/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.0.6/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.6/configuring/v2-multi-datacenter/nat.md index 70156afcce..cd2a8bc9c4 100644 --- a/content/riak/kv/2.0.6/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.6/configuring/v2-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.6/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.6/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.0.6/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.6/configuring/v3-multi-datacenter.md index 5aa8afd29a..67ad7d55b5 100644 --- a/content/riak/kv/2.0.6/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.6/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.6/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.6/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/nat.md index d394b6ec4d..a248f2b667 100644 --- a/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/quick-start.md index cca33a2173..f016d83d18 100644 --- a/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.6/using/performance -[config v3 mdc]: /riak/kv/2.0.6/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.6/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl.md index 653e64869c..58803060b7 100644 --- a/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.6/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.6/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.6/developing/api/backend.md b/content/riak/kv/2.0.6/developing/api/backend.md index 0b503999c8..3a37802b81 100644 --- a/content/riak/kv/2.0.6/developing/api/backend.md +++ b/content/riak/kv/2.0.6/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.6/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.6/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.6/developing/api/http.md b/content/riak/kv/2.0.6/developing/api/http.md index d480e64a58..36c17028ff 100644 --- a/content/riak/kv/2.0.6/developing/api/http.md +++ b/content/riak/kv/2.0.6/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.6/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.6/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.6/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.6/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.6/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.6/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.6/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.6/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.6/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.6/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.6/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.6/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.6/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.6/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.6/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.6/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.6/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.6/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.6/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.6/developing/data-types). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.6/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.6/developing/data-types). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.6/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.6/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.6/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.6/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.6/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.6/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.6/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.6/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.6/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.6/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.6/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.6/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.6/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.6/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.6/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.6/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.6/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.6/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.6/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.6/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.6/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.6/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.6/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.6/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.6/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.6/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.6/developing/api/http/counters.md b/content/riak/kv/2.0.6/developing/api/http/counters.md index 4aa2e139aa..116c13195a 100644 --- a/content/riak/kv/2.0.6/developing/api/http/counters.md +++ b/content/riak/kv/2.0.6/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.6/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.6/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.6/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.6/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.6/developing/api/http/fetch-object.md b/content/riak/kv/2.0.6/developing/api/http/fetch-object.md index a90a3ab5b9..99fb8df669 100644 --- a/content/riak/kv/2.0.6/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.6/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.6/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.6/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.6/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.6/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.6/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.6/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.6/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.6/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.6/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.6/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.6/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.6/developing/api/http/fetch-search-index.md index c5474f7a03..b1f5e1f054 100644 --- a/content/riak/kv/2.0.6/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.6/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.6/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.6/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.6/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.6/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.6/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.6/developing/api/http/fetch-search-schema.md index 91fc83fb22..fd2da1bf7a 100644 --- a/content/riak/kv/2.0.6/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.6/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.6/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.6/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.6/developing/api/http/get-bucket-props.md index db4701f7bb..43144c64ab 100644 --- a/content/riak/kv/2.0.6/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.6/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.6/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.6/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.6/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.6/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.6/developing/api/http/link-walking.md b/content/riak/kv/2.0.6/developing/api/http/link-walking.md index e533d8069d..04995e619c 100644 --- a/content/riak/kv/2.0.6/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.6/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.6/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.6/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.6/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.6/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.6/developing/api/http/list-resources.md b/content/riak/kv/2.0.6/developing/api/http/list-resources.md index 169a2c8760..6c7e299758 100644 --- a/content/riak/kv/2.0.6/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.6/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.6/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.6/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.6/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.6/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.6/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.6/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.6/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.6/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.6/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.6/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.6/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.6/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.6/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.6/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.6/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.6/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.6/developing/api/http/mapreduce.md b/content/riak/kv/2.0.6/developing/api/http/mapreduce.md index ddaef810ae..2a61626353 100644 --- a/content/riak/kv/2.0.6/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.6/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.6/developing/api/http/search-index-info.md b/content/riak/kv/2.0.6/developing/api/http/search-index-info.md index c153e99aec..95bc641b75 100644 --- a/content/riak/kv/2.0.6/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.6/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.6/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.6/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.6/developing/api/http/search-query.md b/content/riak/kv/2.0.6/developing/api/http/search-query.md index 3cf52a0106..f667e9b99f 100644 --- a/content/riak/kv/2.0.6/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.6/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.6/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.6/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.6/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.6/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.6/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.6/developing/api/http/secondary-indexes.md index 3bd7554ca5..6e3cd74c52 100644 --- a/content/riak/kv/2.0.6/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.6/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.6/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.6/developing/api/http/set-bucket-props.md index 6a4e220978..26adfb56d2 100644 --- a/content/riak/kv/2.0.6/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.6/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.6/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.6/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.6/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.6/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.6/developing/api/http/status.md b/content/riak/kv/2.0.6/developing/api/http/status.md index b56a7adc04..da02bf28eb 100644 --- a/content/riak/kv/2.0.6/developing/api/http/status.md +++ b/content/riak/kv/2.0.6/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.6/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.6/developing/api/http/store-object.md b/content/riak/kv/2.0.6/developing/api/http/store-object.md index c5da33f53d..ddc99fc2e1 100644 --- a/content/riak/kv/2.0.6/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.6/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.6/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.6/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.6/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.6/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.6/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.6/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.6/developing/api/http/store-search-index.md b/content/riak/kv/2.0.6/developing/api/http/store-search-index.md index 2bffc7368d..86452055f5 100644 --- a/content/riak/kv/2.0.6/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.6/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.6/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.6/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.6/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.6/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.6/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.6/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.6/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.6/developing/api/http/store-search-schema.md index c2cb2716e9..fbf7187480 100644 --- a/content/riak/kv/2.0.6/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.6/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.6/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.6/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers.md index 625957d623..7564a365e0 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.6/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.6/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.6/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.6/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.6/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.6/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.6/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.6/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.6/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.6/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/auth-req.md index f2edd84215..38c947e432 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.6/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.6/using/security/basics). diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/delete-object.md index cf383de910..a1a757e818 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.6/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.6/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store.md index 2ba06b5742..a2541dbc56 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.6/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.6/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-fetch.md index 34fe10fb36..6358670630 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.6/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.6/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.6/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.6/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.6/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store.md index ef8f9dad54..27f58cf2a8 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store.md index e2c325b5bb..21d7f3204b 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-store.md index 6872cd5c9f..0327dd2735 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.6/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.6/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.6/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.6/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.6/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.6/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.6/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-union.md index 8333bcba00..73e01da7a0 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.6/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object.md index 0d8f9f56bc..735a19f50c 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.6/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.6/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.6/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props.md index 4cf8fcdec0..8c0dca3180 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.6/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.6/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.6/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.6/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riakcs/latest/cookbooks/mdc-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/latest/cookbooks/mdc-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-type.md index b5077993c0..1c2ec7db23 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.6/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.6/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-client-id.md index 075bfb1115..cebd369c41 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.6/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/mapreduce.md index e1b46a364a..d7cc8b2086 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.6/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.6/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.6/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.6/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/reset-bucket-props.md index ebd5b1ca47..e6a0b4e132 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/secondary-indexes.md index b24605ce8e..65dd0d9cbf 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.6/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props.md index 4148394de3..bb23d12ae7 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-type.md index 3bfa3d24b8..3efef1dab5 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.6/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.6/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/store-object.md index 25f26132fe..313be8b47e 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.6/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.6/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.6/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.6/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.6/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.6/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.6/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.6/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-get.md index 7018e35207..b18e70570a 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.6/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.6/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-put.md index c12fc0cc76..52375959b1 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-index-put.md @@ -37,4 +37,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.6/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.6/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-get.md index d28f487af5..dd0ef16ce0 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.6/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-put.md index 877c68d39b..be19cfe536 100644 --- a/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.6/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.6/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.6/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.6/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.6/developing/app-guide.md b/content/riak/kv/2.0.6/developing/app-guide.md index d832add64f..43094357f9 100644 --- a/content/riak/kv/2.0.6/developing/app-guide.md +++ b/content/riak/kv/2.0.6/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.0.6/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.6/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.6/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.6/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.6/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.6/developing/data-types -[dev data types#counters]: /riak/kv/2.0.6/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.0.6/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.0.6/developing/data-types/maps -[usage create objects]: /riak/kv/2.0.6/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.6/developing/usage/search -[use ref search]: /riak/kv/2.0.6/using/reference/search -[usage 2i]: /riak/kv/2.0.6/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.6/developing/client-libraries -[concept crdts]: /riak/kv/2.0.6/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.6/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.6/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.6/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.6/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.6/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.6/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.6/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.6/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.6/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.6/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.6/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.6/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.6/using/reference/strong-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.6/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.6/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.6/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.6/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.6/setup/installing -[getting started]: /riak/kv/2.0.6/developing/getting-started -[usage index]: /riak/kv/2.0.6/developing/usage -[glossary]: /riak/kv/2.0.6/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.6/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.6/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.6/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.6/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.6/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.6/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.0.6/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.0.6/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.0.6/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.6/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.6/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.6/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.6/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.6/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.6/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.6/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.6/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.6/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.6/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.6/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.6/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.6/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.6/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.6/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.6/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.6/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.6/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.6/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.6/setup/installing +[getting started]: {{}}riak/kv/2.0.6/developing/getting-started +[usage index]: {{}}riak/kv/2.0.6/developing/usage +[glossary]: {{}}riak/kv/2.0.6/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.6/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.6/developing/app-guide/advanced-mapreduce.md index eb838d258b..e499cb91fa 100644 --- a/content/riak/kv/2.0.6/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.6/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.6/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.6/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.6/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.6/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.6/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.6/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.6/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.6/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.6/developing/app-guide/cluster-metadata.md index 5f8311e58f..bf4797b4d7 100644 --- a/content/riak/kv/2.0.6/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.6/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.6/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.6/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.6/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.6/developing/app-guide/replication-properties.md index 1132393175..37021ea0d0 100644 --- a/content/riak/kv/2.0.6/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.6/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.6/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.6/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.6/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.6/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.6/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.6/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.6/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.6/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.6/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.6/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.6/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.6/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.6/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.6/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.6/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.6/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.6/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.6/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.6/developing/app-guide/strong-consistency.md index 4669740b5f..90ba3e756c 100644 --- a/content/riak/kv/2.0.6/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.6/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.0.6/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/2.1.3/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.6/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.6/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.6/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.6/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.6/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.6/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/2.1.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.6/developing/client-libraries -[getting started]: /riak/kv/2.0.6/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.6/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.0.6/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.6/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.6/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.6/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.6/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.6/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.6/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.0.6/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.6/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.6/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.6/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.6/developing/client-libraries.md b/content/riak/kv/2.0.6/developing/client-libraries.md index 842dfb92e9..71b9b25973 100644 --- a/content/riak/kv/2.0.6/developing/client-libraries.md +++ b/content/riak/kv/2.0.6/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.6/developing/data-types.md b/content/riak/kv/2.0.6/developing/data-types.md index 143ae032ee..8c5e92da4b 100644 --- a/content/riak/kv/2.0.6/developing/data-types.md +++ b/content/riak/kv/2.0.6/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.6/developing/faq.md b/content/riak/kv/2.0.6/developing/faq.md index 78a5b77cf7..43e4b6dc55 100644 --- a/content/riak/kv/2.0.6/developing/faq.md +++ b/content/riak/kv/2.0.6/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.6/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.6/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.6/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.6/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.6/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.6/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.6/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.6/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.6/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.6/developing/client-libraries -[MapReduce]: /riak/kv/2.0.6/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.6/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.6/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.6/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.6/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.6/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.6/developing/getting-started.md b/content/riak/kv/2.0.6/developing/getting-started.md index db2ee26392..f1f764b92b 100644 --- a/content/riak/kv/2.0.6/developing/getting-started.md +++ b/content/riak/kv/2.0.6/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.6/setup/installing -[dev client libraries]: /riak/kv/2.0.6/developing/client-libraries +[install index]: {{}}riak/kv/2.0.6/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.6/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.6/developing/getting-started/csharp.md b/content/riak/kv/2.0.6/developing/getting-started/csharp.md index 7e56f25843..1d50b8bd25 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.6/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.6/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.6/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.6/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.6/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.6/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.6/developing/getting-started/csharp/querying.md index 28a59a9513..ba34c0aa61 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.6/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.6/developing/getting-started/erlang.md b/content/riak/kv/2.0.6/developing/getting-started/erlang.md index 8e73b38537..195fff1be6 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.6/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.6/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.6/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.6/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.6/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.6/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.6/developing/getting-started/erlang/object-modeling.md index 3e58c6d638..a56b828e70 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.6/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.6/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.6/developing/getting-started/erlang/querying.md index cd4bca3a83..7e69e606c1 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.6/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.6/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.6/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.6/developing/getting-started/golang.md b/content/riak/kv/2.0.6/developing/getting-started/golang.md index bcd2a86054..0c77ea8465 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.6/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.6/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.6/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.6/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.6/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.6/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.6/developing/getting-started/golang/object-modeling.md index 286b8d70c6..cf08f812b8 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.6/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.6/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.6/developing/getting-started/golang/querying.md index 1010d94d14..dbe3750238 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.6/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.6/developing/getting-started/java.md b/content/riak/kv/2.0.6/developing/getting-started/java.md index e301955a74..9ffb858929 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/java.md +++ b/content/riak/kv/2.0.6/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.6/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.6/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.6/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.6/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.6/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.6/developing/getting-started/java/crud-operations.md index 3333e07367..42dec4dcdc 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.6/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.6/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.6/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.6/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.6/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.6/developing/getting-started/java/querying.md b/content/riak/kv/2.0.6/developing/getting-started/java/querying.md index 25a032c9af..3c78640f99 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.6/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.6/developing/getting-started/nodejs.md b/content/riak/kv/2.0.6/developing/getting-started/nodejs.md index 55c0316a56..2d22e9dc5f 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.6/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.6/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.6/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.6/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.6/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.6/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.6/developing/getting-started/nodejs/querying.md index b7ca26120f..3ead0b955f 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.6/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.6/developing/getting-started/php.md b/content/riak/kv/2.0.6/developing/getting-started/php.md index 9ce6517d6f..8b85a15d25 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/php.md +++ b/content/riak/kv/2.0.6/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.6/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.6/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.6/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.6/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.6/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.6/developing/getting-started/php/crud-operations.md index c9b51d5771..6bb36640e9 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.6/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.6/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.6/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.6/developing/getting-started/php/querying.md b/content/riak/kv/2.0.6/developing/getting-started/php/querying.md index 59484505eb..309abbc379 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.6/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.6/developing/getting-started/python.md b/content/riak/kv/2.0.6/developing/getting-started/python.md index 9e1727e5d0..3a103b3636 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/python.md +++ b/content/riak/kv/2.0.6/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.6/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.6/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.6/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.6/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.6/developing/getting-started/python/querying.md b/content/riak/kv/2.0.6/developing/getting-started/python/querying.md index 42c2298137..c65a6a2c4b 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.6/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.6/developing/getting-started/ruby.md b/content/riak/kv/2.0.6/developing/getting-started/ruby.md index 535304646f..5e56a74e03 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.6/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.6/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.6/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.6/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.6/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.6/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.6/developing/getting-started/ruby/querying.md index fac4e6d2db..7368ca7180 100644 --- a/content/riak/kv/2.0.6/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.6/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.6/developing/key-value-modeling.md b/content/riak/kv/2.0.6/developing/key-value-modeling.md index 0b3078a7f9..c7b2aea38f 100644 --- a/content/riak/kv/2.0.6/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.6/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.6/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.6/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.6/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.6/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.6/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.6/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.6/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.6/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.6/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.6/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.6/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.6/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.6/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.6/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.6/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.6/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.6/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.6/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.6/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.6/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.6/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.6/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.6/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.6/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.6/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.6/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.6/developing/usage/commit-hooks.md b/content/riak/kv/2.0.6/developing/usage/commit-hooks.md index ba384e3b6f..605fcc8011 100644 --- a/content/riak/kv/2.0.6/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.6/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.6/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.6/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.6/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.6/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.6/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.6/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.6/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.6/developing/usage/conflict-resolution.md index 79ca49301c..85d92a875c 100644 --- a/content/riak/kv/2.0.6/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.6/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.6/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.6/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.6/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.6/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.6/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.6/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.6/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.6/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.6/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.6/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.6/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.6/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.6/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.6/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.6/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.6/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.6/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.0.6/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.0.6/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.6/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.6/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.6/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.6/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.6/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.6/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.6/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.6/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.6/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.6/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.6/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.6/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.6/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.6/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.6/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.6/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.6/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.6/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.6/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.6/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.6/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.6/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.6/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.6/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.6/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.6/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -611,7 +611,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.6/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.6/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -666,7 +666,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/csharp.md index 2b9bd52b19..ebd5815c6e 100644 --- a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.6/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/golang.md index 4306247767..143802cc9e 100644 --- a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.6/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/java.md index 6fba04c3ac..745e24769e 100644 --- a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.6/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.6/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.6/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.6/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.6/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.6/developing/data-types/counters), [set](/riak/kv/2.0.6/developing/data-types/sets), or [map](/riak/kv/2.0.6/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.6/developing/data-types/counters), [set]({{}}riak/kv/2.0.6/developing/data-types/sets), or [map]({{}}riak/kv/2.0.6/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.6/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.6/developing/data-types/sets). diff --git a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/nodejs.md index 8f00aa85f9..82ae193182 100644 --- a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.6/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/php.md index 0f6cec15fc..44e798e0d3 100644 --- a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.6/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.6/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.6/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.6/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.6/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.6/developing/data-types/counters), [set](/riak/kv/2.0.6/developing/data-types/sets), or [map](/riak/kv/2.0.6/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.6/developing/data-types/counters), [set]({{}}riak/kv/2.0.6/developing/data-types/sets), or [map]({{}}riak/kv/2.0.6/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.6/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.6/developing/data-types/sets). diff --git a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/python.md index f11c296f6d..d8565bd5bb 100644 --- a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.6/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.6/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.6/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.6/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.6/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.6/developing/data-types/counters), [set](/riak/kv/2.0.6/developing/data-types/sets), or [map](/riak/kv/2.0.6/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.6/developing/data-types/counters), [set]({{}}riak/kv/2.0.6/developing/data-types/sets), or [map]({{}}riak/kv/2.0.6/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.6/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.6/developing/data-types/sets). diff --git a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/ruby.md index 6f09befbea..13480005f4 100644 --- a/content/riak/kv/2.0.6/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.6/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.6/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.6/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.6/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.6/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.6/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.6/developing/data-types/counters), [set](/riak/kv/2.0.6/developing/data-types/sets), or [map](/riak/kv/2.0.6/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.6/developing/data-types/counters), [set]({{}}riak/kv/2.0.6/developing/data-types/sets), or [map]({{}}riak/kv/2.0.6/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.6/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.6/developing/data-types/sets). diff --git a/content/riak/kv/2.0.6/developing/usage/creating-objects.md b/content/riak/kv/2.0.6/developing/usage/creating-objects.md index 736cb76dec..f00149f593 100644 --- a/content/riak/kv/2.0.6/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.6/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.6/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.6/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.0.6/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.0.6/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.6/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.6/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.6/developing/usage/custom-extractors.md b/content/riak/kv/2.0.6/developing/usage/custom-extractors.md index 57b281e7ad..5a825e5a84 100644 --- a/content/riak/kv/2.0.6/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.6/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.6/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.6/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.6/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.6/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.6/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.6/developing/usage/deleting-objects.md b/content/riak/kv/2.0.6/developing/usage/deleting-objects.md index 9e2447e96f..0285b29f14 100644 --- a/content/riak/kv/2.0.6/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.6/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.6/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.6/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.6/developing/usage/document-store.md b/content/riak/kv/2.0.6/developing/usage/document-store.md index 55ac1a3943..4d0c2ebf1a 100644 --- a/content/riak/kv/2.0.6/developing/usage/document-store.md +++ b/content/riak/kv/2.0.6/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.6/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.6/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.6/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.6/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.0.6/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.6/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.6/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.6/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.6/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.6/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.6/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.6/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.6/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.6/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.6/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.6/developing/usage/mapreduce.md b/content/riak/kv/2.0.6/developing/usage/mapreduce.md index fd0dfbd411..0703427df4 100644 --- a/content/riak/kv/2.0.6/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.6/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.0.6/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.0.6/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.0.6/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.0.6/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.0.6/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.0.6/developing/usage/search/) and [secondary indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.0.6/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.0.6/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.0.6/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.0.6/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.0.6/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.0.6/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.0.6/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.0.6/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.0.6/developing/usage/reading-objects.md b/content/riak/kv/2.0.6/developing/usage/reading-objects.md index 353ab34046..741897b763 100644 --- a/content/riak/kv/2.0.6/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.6/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.6/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.6/developing/usage/replication.md b/content/riak/kv/2.0.6/developing/usage/replication.md index f0be6d7be0..ebbfc85346 100644 --- a/content/riak/kv/2.0.6/developing/usage/replication.md +++ b/content/riak/kv/2.0.6/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.6/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.6/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using Strong +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.6/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.6/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.6/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.6/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.6/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.6/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.6/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.6/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.6/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.6/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.6/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.6/developing/usage/search-schemas.md b/content/riak/kv/2.0.6/developing/usage/search-schemas.md index 002b88b7d2..3a1676e328 100644 --- a/content/riak/kv/2.0.6/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.6/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.0.6/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.6/developing/data-types/), and [more](/riak/kv/2.0.6/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types/), and [more]({{}}riak/kv/2.0.6/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.6/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.6/developing/usage/search.md b/content/riak/kv/2.0.6/developing/usage/search.md index 12b3584aa4..12496f1753 100644 --- a/content/riak/kv/2.0.6/developing/usage/search.md +++ b/content/riak/kv/2.0.6/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.6/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.6/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.6/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.6/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.6/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.6/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.6/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.6/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.6/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.6/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.6/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.6/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.6/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.6/developing/usage/searching-data-types.md b/content/riak/kv/2.0.6/developing/usage/searching-data-types.md index 2ab88c696f..8d1850436b 100644 --- a/content/riak/kv/2.0.6/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.6/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.6/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.6/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.6/developing/data-types/counters), [sets](/riak/kv/2.0.6/developing/data-types/sets), and [maps](/riak/kv/2.0.6/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.6/developing/data-types/counters), [sets]({{}}riak/kv/2.0.6/developing/data-types/sets), and [maps]({{}}riak/kv/2.0.6/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.6/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.6/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.6/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.6/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.6/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.6/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.6/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.6/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.6/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.6/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.6/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.6/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.6/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.6/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.6/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.6/developing/usage/secondary-indexes.md index 7479cc3cd8..c1b26c91df 100644 --- a/content/riak/kv/2.0.6/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.6/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.0.6/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.6/setup/planning/backend/memory -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.6/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.6/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.6/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.6/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.6/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.6/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.6/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.6/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.6/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.6/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.6/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.6/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.6/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.6/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.6/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.6/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.6/developing/usage/security.md b/content/riak/kv/2.0.6/developing/usage/security.md index 4f8eb14086..d890cb323a 100644 --- a/content/riak/kv/2.0.6/developing/usage/security.md +++ b/content/riak/kv/2.0.6/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.0.6/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.6/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.6/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.6/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.6/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.6/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.6/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.0.6/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.0.6/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.6/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.6/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.6/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.6/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.6/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.6/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.6/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.6/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.6/developing/usage/security/php) -* [Python](/riak/kv/2.0.6/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.6/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.6/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.6/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.6/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.6/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.6/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.6/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.6/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.6/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.6/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.6/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.6/developing/usage/security/erlang.md b/content/riak/kv/2.0.6/developing/usage/security/erlang.md index 010b10649f..95d2527f86 100644 --- a/content/riak/kv/2.0.6/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.6/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.6/using/security/managing-sources/), [PAM-](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.6/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.6/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.6/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.6/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.6/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.6/developing/usage/security/java.md b/content/riak/kv/2.0.6/developing/usage/security/java.md index 59f94c08ad..f968d27a7e 100644 --- a/content/riak/kv/2.0.6/developing/usage/security/java.md +++ b/content/riak/kv/2.0.6/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.6/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.6/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.6/developing/usage/security/php.md b/content/riak/kv/2.0.6/developing/usage/security/php.md index c51a4682d6..9abb322878 100644 --- a/content/riak/kv/2.0.6/developing/usage/security/php.md +++ b/content/riak/kv/2.0.6/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.6/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.6/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.6/developing/usage/security/python.md b/content/riak/kv/2.0.6/developing/usage/security/python.md index 14b4e6ee10..ce22c7279f 100644 --- a/content/riak/kv/2.0.6/developing/usage/security/python.md +++ b/content/riak/kv/2.0.6/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.6/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.6/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.6/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.6/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.6/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.6/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.6/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.6/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.6/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.6/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.6/developing/usage/security/ruby.md b/content/riak/kv/2.0.6/developing/usage/security/ruby.md index af936e8a74..e362724480 100644 --- a/content/riak/kv/2.0.6/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.6/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.6/using/security/managing-sources/) or [PAM](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.6/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.6/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.6/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.6/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.6/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.6/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.6/developing/usage/updating-objects.md b/content/riak/kv/2.0.6/developing/usage/updating-objects.md index 98163c4e4c..74f93a0661 100644 --- a/content/riak/kv/2.0.6/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.6/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.6/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.6/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.6/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.6/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.6/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.6/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.6/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.6/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.6/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.6/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.6/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.6/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.6/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.6/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.6/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.6/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.6/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.6/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.6/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.6/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.6/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.6/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.6/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.6/index.md b/content/riak/kv/2.0.6/index.md index 3087598cf8..f529094864 100644 --- a/content/riak/kv/2.0.6/index.md +++ b/content/riak/kv/2.0.6/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.6/configuring -[dev index]: /riak/kv/2.0.6/developing -[downloads]: /riak/kv/2.0.6/downloads/ -[install index]: /riak/kv/2.0.6/setup/installing/ -[plan index]: /riak/kv/2.0.6/setup/planning -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.6/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.6/developing/usage/search -[getting started]: /riak/kv/2.0.6/developing/getting-started -[dev client libraries]: /riak/kv/2.0.6/developing/client-libraries +[config index]: {{}}riak/kv/2.0.6/configuring +[dev index]: {{}}riak/kv/2.0.6/developing +[downloads]: {{}}riak/kv/2.0.6/downloads/ +[install index]: {{}}riak/kv/2.0.6/setup/installing/ +[plan index]: {{}}riak/kv/2.0.6/setup/planning +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.6/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search +[getting started]: {{}}riak/kv/2.0.6/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.6/developing/client-libraries diff --git a/content/riak/kv/2.0.6/introduction.md b/content/riak/kv/2.0.6/introduction.md index f50d198d37..a7e2603db3 100644 --- a/content/riak/kv/2.0.6/introduction.md +++ b/content/riak/kv/2.0.6/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.0.6/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.0.6/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.0.6/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.0.6/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.0.6/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.0.6/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.0.6/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.0.6/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.0.6/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.0.6/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.0.6/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.0.6/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.0.6/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.0.6/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.0.6/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.0.6/developing/data-types/maps#flags), [registers](/riak/kv/2.0.6/developing/data-types/maps#registers), -[counters](/riak/kv/2.0.6/developing/data-types/counters), [sets](/riak/kv/2.0.6/developing/data-types/sets), and -[maps](/riak/kv/2.0.6/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.0.6/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.0.6/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.0.6/developing/data-types/counters), [sets]({{}}riak/kv/2.0.6/developing/data-types/sets), and +[maps]({{}}riak/kv/2.0.6/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.0.6/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.0.6/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.0.6/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.0.6/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.0.6/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.0.6/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.0.6/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.0.6/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.0.6/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.0.6/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.0.6/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.0.6/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.0.6/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.0.6/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.0.6/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.0.6/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.0.6/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.0.6/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.0.6/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.0.6/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.0.6/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.0.6/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.0.6/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.0.6/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.0.6/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.0.6/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.0.6/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.0.6/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.0.6/learn/concepts.md b/content/riak/kv/2.0.6/learn/concepts.md index ce0535c786..dcf2a0f937 100644 --- a/content/riak/kv/2.0.6/learn/concepts.md +++ b/content/riak/kv/2.0.6/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.6/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.6/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.6/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.6/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.6/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.6/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.6/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.6/learn/concepts/vnodes -[config index]: /riak/kv/2.0.6/configuring -[plan index]: /riak/kv/2.0.6/setup/planning -[use index]: /riak/kv/2.0.6/using/ +[concept aae]: {{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.6/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.6/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.6/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.6/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.6/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.6/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.6/configuring +[plan index]: {{}}riak/kv/2.0.6/setup/planning +[use index]: {{}}riak/kv/2.0.6/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.6/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.6/learn/concepts/active-anti-entropy.md index b731ac29e5..c4c5441572 100644 --- a/content/riak/kv/2.0.6/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.6/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.6/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.6/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.6/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.6/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.0.6/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.6/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.6/developing/usage/search +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.6/learn/concepts/buckets.md b/content/riak/kv/2.0.6/learn/concepts/buckets.md index 9dc27fa7f3..40cdc56813 100644 --- a/content/riak/kv/2.0.6/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.6/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.0.6/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.6/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.6/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.6/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.6/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.6/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.6/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.6/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.6/configuring/basic -[dev api http]: /riak/kv/2.0.6/developing/api/http -[dev data types]: /riak/kv/2.0.6/developing/data-types -[glossary ring]: /riak/kv/2.0.6/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.6/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.6/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.6/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.6/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.6/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.6/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.6/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.6/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.6/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.6/configuring/basic +[dev api http]: {{}}riak/kv/2.0.6/developing/api/http +[dev data types]: {{}}riak/kv/2.0.6/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.6/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.6/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.6/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.6/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.6/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.6/learn/concepts/capability-negotiation.md index c3254476db..1bd2446bce 100644 --- a/content/riak/kv/2.0.6/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.6/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.6/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.6/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.6/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.6/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.6/learn/concepts/causal-context.md b/content/riak/kv/2.0.6/learn/concepts/causal-context.md index 742971c688..4715fe7a57 100644 --- a/content/riak/kv/2.0.6/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.6/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.6/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.6/developing/api/http -[dev key value]: /riak/kv/2.0.6/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.6/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.6/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.6/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.6/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.6/developing/api/http +[dev key value]: {{}}riak/kv/2.0.6/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.6/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.6/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.6/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.6/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -78,7 +78,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.0.6/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.0.6/learn/concepts/clusters.md b/content/riak/kv/2.0.6/learn/concepts/clusters.md index 44004a2027..bfa4e88206 100644 --- a/content/riak/kv/2.0.6/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.6/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.6/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.6/learn/concepts/replication -[glossary node]: /riak/kv/2.0.6/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.6/learn/dynamo -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.6/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.6/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.6/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.6/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.6/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.6/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.6/learn/concepts/crdts.md b/content/riak/kv/2.0.6/learn/concepts/crdts.md index 7515480b9f..d629250ed9 100644 --- a/content/riak/kv/2.0.6/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.6/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.6/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.6/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.6/developing/data-types -[glossary node]: /riak/kv/2.0.6/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.6/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.6/developing/data-types +[glossary node]: {{}}riak/kv/2.0.6/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.0.6/developing/usage/search/). +indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.0.6/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.0.6/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.0.6/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.0.6/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.6/learn/concepts/eventual-consistency.md index 2179b8ad3e..8f8f9e4903 100644 --- a/content/riak/kv/2.0.6/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.6/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.6/learn/concepts/replication -[glossary node]: /riak/kv/2.0.6/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.6/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.6/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.6/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.6/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.6/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.6/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.6/developing/data-modeling/). +or models]({{}}riak/kv/2.0.6/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.6/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.6/learn/concepts/keys-and-objects.md index c185dcfc12..9609109b9b 100644 --- a/content/riak/kv/2.0.6/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.6/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.6/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.6/learn/concepts/replication.md b/content/riak/kv/2.0.6/learn/concepts/replication.md index 103d93eea2..51deb86608 100644 --- a/content/riak/kv/2.0.6/learn/concepts/replication.md +++ b/content/riak/kv/2.0.6/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.6/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.6/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.6/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.6/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.6/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.6/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.6/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.6/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.6/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.6/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.6/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.6/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.6/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.6/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.6/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.6/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.6/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.6/learn/concepts/strong-consistency.md index 0fa7d7eb29..cef6507ef1 100644 --- a/content/riak/kv/2.0.6/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.6/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.6/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.6/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.6/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.6/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.6/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.6/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.6/learn/concepts/vnodes.md b/content/riak/kv/2.0.6/learn/concepts/vnodes.md index 8a552831dc..41b95c9378 100644 --- a/content/riak/kv/2.0.6/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.6/learn/concepts/vnodes.md @@ -16,16 +16,16 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.6/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.6/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.6/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.6/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.6/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.6/learn/glossary/#ring -[perf strong consistency]: /riak/kv/2.0.6/using/performance/strong-consistency -[plan backend]: /riak/kv/2.0.6/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.6/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.6/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.6/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.6/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.6/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.6/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.6/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.6/learn/glossary/#ring +[perf strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[plan backend]: {{}}riak/kv/2.0.6/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.6/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.6/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -81,7 +81,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -103,7 +103,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.6/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.6/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.6/learn/dynamo.md b/content/riak/kv/2.0.6/learn/dynamo.md index 609896c7dc..ad7d5cc6e2 100644 --- a/content/riak/kv/2.0.6/learn/dynamo.md +++ b/content/riak/kv/2.0.6/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.6/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.6/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.6/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.6/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.6/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.6/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.6/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.6/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.6/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.6/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.6/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.6/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.6/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.6/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.6/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.6/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.6/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.6/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.6/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.6/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.6/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.6/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.6/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.6/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.6/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.6/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.6/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.6/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.6/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.6/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.6/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.6/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.6/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.6/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.6/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.6/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.6/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.6/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.6/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.6/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.6/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.6/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.6/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.6/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.6/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.6/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.6/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.6/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.6/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.6/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.6/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.6/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.6/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.6/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.6/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.6/learn/glossary.md b/content/riak/kv/2.0.6/learn/glossary.md index d68ee73a3c..618baf5180 100644 --- a/content/riak/kv/2.0.6/learn/glossary.md +++ b/content/riak/kv/2.0.6/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.6/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.6/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.6/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.6/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.6/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.6/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.6/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.6/developing/api/http -[dev data model]: /riak/kv/2.0.6/developing/data-modeling -[dev data types]: /riak/kv/2.0.6/developing/data-types -[glossary read rep]: /riak/kv/2.0.6/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.6/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.6/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.6/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.6/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.6/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.6/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.6/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.6/developing/api/http +[dev data model]: {{}}riak/kv/2.0.6/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.6/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.6/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.6/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.6/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.6/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.6/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.6/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.6/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.6/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.6/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.6/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.6/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.6/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.6/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.6/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.6/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.6/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.6/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.6/learn/use-cases.md b/content/riak/kv/2.0.6/learn/use-cases.md index 3087457314..335fe8d90e 100644 --- a/content/riak/kv/2.0.6/learn/use-cases.md +++ b/content/riak/kv/2.0.6/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.6/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.6/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.6/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.6/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.6/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.6/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.6/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.6/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.6/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.0.6/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.6/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.6/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.6/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.6/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.6/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.6/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.6/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.6/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.6/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.6/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.6/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.0.6/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.6/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.0.6/learn/why-riak-kv.md b/content/riak/kv/2.0.6/learn/why-riak-kv.md index 13f95292a8..d30f91932d 100644 --- a/content/riak/kv/2.0.6/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.6/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.6/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.6/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.6/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.6/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.6/developing/data-types -[glossary read rep]: /riak/kv/2.0.6/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.6/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.6/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.6/setup/downgrade.md b/content/riak/kv/2.0.6/setup/downgrade.md index 96821dfa10..b5934ef174 100644 --- a/content/riak/kv/2.0.6/setup/downgrade.md +++ b/content/riak/kv/2.0.6/setup/downgrade.md @@ -17,7 +17,7 @@ aliases: Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a -[rolling upgrade](/riak/kv/2.0.6/setup/upgrading/cluster). +[rolling upgrade]({{}}riak/kv/2.0.6/setup/upgrading/cluster). {{% note title="End Of Life Warning" %}} We test downgrading for two feature release versions. However, all versions below KV 2.0 are End Of Life (EOL) and unsupported. Please be aware of that if you choose to downgrade. @@ -50,9 +50,9 @@ both 1.4 and 1.3 are performed. * Riak Control should be disabled throughout the rolling downgrade process -* [Configuration Files](/riak/kv/2.0.6/configuring/reference) must be replaced with those of the version +* [Configuration Files]({{}}riak/kv/2.0.6/configuring/reference) must be replaced with those of the version being downgraded to -* [Active anti-entropy](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version +* [Active anti-entropy]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version below 1.3. ## Before Stopping a Node @@ -95,7 +95,7 @@ will need to be downgraded before the rolling downgrade begins. This can be done using the --downgrade flag with `riak-admin reformat-indexes` More information on the `riak-admin reformat-indexes` command, and downgrading indexes can be found in the -[`riak-admin`](/riak/kv/2.0.6/using/admin/riak-admin/#reformat-indexes) documentation. +[`riak-admin`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#reformat-indexes) documentation. ## Before Starting a Node diff --git a/content/riak/kv/2.0.6/setup/installing.md b/content/riak/kv/2.0.6/setup/installing.md index dea84dd4f6..3ff3f3b691 100644 --- a/content/riak/kv/2.0.6/setup/installing.md +++ b/content/riak/kv/2.0.6/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.6/installing/ --- -[install aws]: /riak/kv/2.0.6/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.6/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.6/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.6/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.6/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.6/setup/installing/smartos -[install solaris]: /riak/kv/2.0.6/setup/installing/solaris -[install suse]: /riak/kv/2.0.6/setup/installing/suse -[install windows azure]: /riak/kv/2.0.6/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.6/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.6/setup/upgrading +[install aws]: {{}}riak/kv/2.0.6/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.6/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.6/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.6/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.6/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.6/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.6/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.6/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.6/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.6/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.6/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.6/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.6/setup/installing/amazon-web-services.md index b568abb0cf..3e95283f8e 100644 --- a/content/riak/kv/2.0.6/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.6/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.6/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.6/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.6/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.6/setup/installing/debian-ubuntu.md index cdae6affb0..547d1cfb7c 100644 --- a/content/riak/kv/2.0.6/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.6/setup/installing/debian-ubuntu.md @@ -20,10 +20,10 @@ aliases: -[install source index]: /riak/kv/2.0.6/setup/installing/source/ -[security index]: /riak/kv/2.0.6/using/security/ -[install source erlang]: /riak/kv/2.0.6/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.6/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.6/using/security/ +[install source erlang]: {{}}riak/kv/2.0.6/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.0.6/setup/installing/freebsd.md b/content/riak/kv/2.0.6/setup/installing/freebsd.md index 571e0a8b53..9026415a8c 100644 --- a/content/riak/kv/2.0.6/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.6/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.6/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.6/downloads/ -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.6/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.6/downloads/ +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.6/setup/installing/mac-osx.md b/content/riak/kv/2.0.6/setup/installing/mac-osx.md index aa1cc51f99..ed07b39a7f 100644 --- a/content/riak/kv/2.0.6/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.6/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.6/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.6/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.6/setup/installing/rhel-centos.md b/content/riak/kv/2.0.6/setup/installing/rhel-centos.md index 4d514674cc..0ef511327c 100644 --- a/content/riak/kv/2.0.6/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.6/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.6/setup/installing/source -[install source erlang]: /riak/kv/2.0.6/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.6/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.6/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.0.6/setup/installing/smartos.md b/content/riak/kv/2.0.6/setup/installing/smartos.md index 55f3b2e308..83bcc21c6c 100644 --- a/content/riak/kv/2.0.6/setup/installing/smartos.md +++ b/content/riak/kv/2.0.6/setup/installing/smartos.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.0.6/setup/installing/solaris.md b/content/riak/kv/2.0.6/setup/installing/solaris.md index 48879bfe7c..b591b5a826 100644 --- a/content/riak/kv/2.0.6/setup/installing/solaris.md +++ b/content/riak/kv/2.0.6/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.6/setup/installing/source.md b/content/riak/kv/2.0.6/setup/installing/source.md index a35865cc6b..3d4857b3b6 100644 --- a/content/riak/kv/2.0.6/setup/installing/source.md +++ b/content/riak/kv/2.0.6/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.6/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.6/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.6/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.6/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.6/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.6/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.6/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.6/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.6/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.6/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.6/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.6/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.6/setup/installing/source/erlang.md b/content/riak/kv/2.0.6/setup/installing/source/erlang.md index 677d96bbce..fb3347ef82 100644 --- a/content/riak/kv/2.0.6/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.6/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.6/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.6/setup/installing -[security basics]: /riak/kv/2.0.6/using/security/basics +[install index]: {{}}riak/kv/2.0.6/setup/installing +[security basics]: {{}}riak/kv/2.0.6/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho8.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.6/setup/installing/source/jvm.md b/content/riak/kv/2.0.6/setup/installing/source/jvm.md index 6115026ef9..6fdbcab3fe 100644 --- a/content/riak/kv/2.0.6/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.6/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.6/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.6/developing/usage/search +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.6/setup/installing/suse.md b/content/riak/kv/2.0.6/setup/installing/suse.md index 18eb817a3f..0b0f7934bd 100644 --- a/content/riak/kv/2.0.6/setup/installing/suse.md +++ b/content/riak/kv/2.0.6/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.6/installing/suse/ --- -[install verify]: /riak/kv/2.0.6/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.6/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.6/setup/installing/verify.md b/content/riak/kv/2.0.6/setup/installing/verify.md index be0425078c..07443c7bac 100644 --- a/content/riak/kv/2.0.6/setup/installing/verify.md +++ b/content/riak/kv/2.0.6/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.6/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.6/developing/client-libraries -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.6/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.6/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.6/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.6/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.6/setup/installing/windows-azure.md b/content/riak/kv/2.0.6/setup/installing/windows-azure.md index 57dd2c5ff5..065c7c326e 100644 --- a/content/riak/kv/2.0.6/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.6/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.6/setup/planning/backend.md b/content/riak/kv/2.0.6/setup/planning/backend.md index ec00990ca5..28e1c29e77 100644 --- a/content/riak/kv/2.0.6/setup/planning/backend.md +++ b/content/riak/kv/2.0.6/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.6/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.6/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.6/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.6/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.6/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.6/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.6/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.6/setup/planning/backend/bitcask.md index 48c79b4377..b2995fd54c 100644 --- a/content/riak/kv/2.0.6/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.6/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.6/using/admin/riak-cli -[config reference]: /riak/kv/2.0.6/configuring/reference -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.6/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.6/developing/usage/search - -[glossary aae]: /riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.6/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.6/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.6/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.6/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.6/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.6/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.6/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.6/setup/planning/backend/leveldb.md index 5150daa95f..ba81c5de63 100644 --- a/content/riak/kv/2.0.6/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.6/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.6/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.6/configuring/reference -[perf index]: /riak/kv/2.0.6/using/performance -[config reference#aae]: /riak/kv/2.0.6/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[perf index]: {{}}riak/kv/2.0.6/using/performance +[config reference#aae]: {{}}riak/kv/2.0.6/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.6/setup/planning/backend/memory.md b/content/riak/kv/2.0.6/setup/planning/backend/memory.md index 6c05b8c780..66a52071a5 100644 --- a/content/riak/kv/2.0.6/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.6/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.6/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.6/configuring/reference -[plan backend multi]: /riak/kv/2.0.6/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.6/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.6/setup/planning/backend/multi.md b/content/riak/kv/2.0.6/setup/planning/backend/multi.md index f011e50164..cb8b1ea846 100644 --- a/content/riak/kv/2.0.6/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.6/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.6/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.6/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.6/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.6/configuring/reference -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.6/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.6/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.6/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.6/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.6/setup/planning/best-practices.md b/content/riak/kv/2.0.6/setup/planning/best-practices.md index f140dc3e76..629eb20e88 100644 --- a/content/riak/kv/2.0.6/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.6/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.6/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.6/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.6/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.6/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.6/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.6/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.6/setup/planning/bitcask-capacity-calc.md index 99132558ab..f3287ceaca 100644 --- a/content/riak/kv/2.0.6/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.6/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.6/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.6/setup/planning/cluster-capacity.md index 696b85d76a..63612ca068 100644 --- a/content/riak/kv/2.0.6/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.6/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.6/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.6/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.6/setup/planning -[concept replication]: /riak/kv/2.0.6/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.6/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.6/configuring/reference -[perf benchmark]: /riak/kv/2.0.6/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.6/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.6/setup/planning +[concept replication]: {{}}riak/kv/2.0.6/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.6/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.6/setup/planning/operating-system.md b/content/riak/kv/2.0.6/setup/planning/operating-system.md index 3126a1a8c9..66e64df909 100644 --- a/content/riak/kv/2.0.6/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.6/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.6/downloads/ +[downloads]: {{}}riak/kv/2.0.6/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.6/setup/planning/start.md b/content/riak/kv/2.0.6/setup/planning/start.md index 4073026929..e53227d547 100644 --- a/content/riak/kv/2.0.6/setup/planning/start.md +++ b/content/riak/kv/2.0.6/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.6/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.6/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.6/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.6/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.6/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.6/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.6/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.6/setup/upgrading/checklist.md b/content/riak/kv/2.0.6/setup/upgrading/checklist.md index 23456f5314..e6df3ae5fe 100644 --- a/content/riak/kv/2.0.6/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.6/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.0.6/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.6/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.6/using/performance +[perf open files]: {{}}riak/kv/2.0.6/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.6/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.6/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.6/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.6/configuring/reference -[config backend]: /riak/kv/2.0.6/configuring/backend -[usage search]: /riak/kv/2.0.6/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.6/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.6/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.6/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.6/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.6/using/admin/commands -[use admin riak control]: /riak/kv/2.0.6/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.6/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.6/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.6/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.6/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.6/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[config backend]: {{}}riak/kv/2.0.6/configuring/backend +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.6/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.6/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.6/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.6/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.6/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.6/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.6/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.6/setup/upgrading/cluster.md b/content/riak/kv/2.0.6/setup/upgrading/cluster.md index 308607f6bf..e8e7e35f5f 100644 --- a/content/riak/kv/2.0.6/setup/upgrading/cluster.md +++ b/content/riak/kv/2.0.6/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.0.6/ops/upgrading/rolling-upgrades/ - /riak/kv/2.0.6/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.0.6/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.6/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.6/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.6/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.6/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.6/release-notes/ +[production checklist]: {{}}riak/kv/2.0.6/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.6/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.6/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.6/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.6/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.6/release-notes/ [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.6/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.6/using/reference/jmx -[snmp]: /riak/kv/2.0.6/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.6/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.6/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.0.6/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.0.6/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported @@ -104,9 +104,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.6/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.6/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.6/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.6/release-notes/). {{% /note %}} ## RHEL/CentOS @@ -166,9 +166,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.6/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.6/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.6/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.6/release-notes/). {{% /note %}} ## Solaris/OpenSolaris @@ -252,9 +252,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.6/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.6/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.6/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.6/release-notes/). {{% /note %}} ## Rolling Upgrade to Enterprise diff --git a/content/riak/kv/2.0.6/setup/upgrading/search.md b/content/riak/kv/2.0.6/setup/upgrading/search.md index 72195de2e4..822e02cb33 100644 --- a/content/riak/kv/2.0.6/setup/upgrading/search.md +++ b/content/riak/kv/2.0.6/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.6/setup/upgrading/version.md b/content/riak/kv/2.0.6/setup/upgrading/version.md index 2ebc90f10e..e9ad5a64aa 100644 --- a/content/riak/kv/2.0.6/setup/upgrading/version.md +++ b/content/riak/kv/2.0.6/setup/upgrading/version.md @@ -20,7 +20,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.0.6/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.0.6/introduction). ## New Clients @@ -36,14 +36,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.0.6/introduction) like [data types](/riak/kv/2.0.6/developing/data-types) or the new [Riak Search](/riak/kv/2.0.6/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.0.6/introduction) like [data types]({{}}riak/kv/2.0.6/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.0.6/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.0.6/learn/concepts/buckets) and [key](/riak/kv/2.0.6/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.0.6/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.0.6/learn/concepts/buckets) and [key]({{}}riak/kv/2.0.6/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.0.6/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.0.6/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.0.6/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.0.6/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.0.6/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -56,7 +56,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.0.6/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.0.6/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -75,8 +75,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.0.6/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.0.6/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.0.6/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.0.6/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -86,17 +86,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/2.0.6/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.0.6/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.0.6/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.0.6/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.0.6/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.0.6/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.0.6/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -104,20 +104,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.0.6/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.0.6/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.0.6/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.0.6/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -126,11 +126,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.0.6/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.0.6/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.0.6/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -140,12 +140,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.0.6/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.0.6/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/2.0.6/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.0.6/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.0.6/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.0.6/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.0.6/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.0.6/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.0.6/using/security/basics) or the new [configuration files](/riak/kv/2.0.6/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.0.6/using/security/basics) or the new [configuration files]({{}}riak/kv/2.0.6/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -155,7 +155,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.0.6/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.0.6/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -166,12 +166,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.0.6/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.0.6/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.0.6/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.0.6/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -209,7 +209,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.0.6/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.0.6/setup/upgrading/search). ## Migrating from Short Names @@ -220,12 +220,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.0.6/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.0.6/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.0.6/using.md b/content/riak/kv/2.0.6/using.md index 01fdb5d950..3dde51b47c 100644 --- a/content/riak/kv/2.0.6/using.md +++ b/content/riak/kv/2.0.6/using.md @@ -15,7 +15,7 @@ toc: true [use running cluster]: ../using/running-a-cluster [use admin index]: ../using/admin/ [cluster ops index]: ../using/cluster-operations -[repair recover index]: ../repair-recovery +[repair recover index]: ../using/repair-recovery [security index]: ../using/security [perf index]: ../using/performance [troubleshoot index]: ../using/troubleshooting diff --git a/content/riak/kv/2.0.6/using/admin/commands.md b/content/riak/kv/2.0.6/using/admin/commands.md index f4b6ea81aa..0daf60bbf6 100644 --- a/content/riak/kv/2.0.6/using/admin/commands.md +++ b/content/riak/kv/2.0.6/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.6/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.6/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.6/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.6/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.6/using/admin/riak-admin.md b/content/riak/kv/2.0.6/using/admin/riak-admin.md index 32aeef1d41..5b620c94f1 100644 --- a/content/riak/kv/2.0.6/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.6/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.0.6/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.6/configuring/reference -[use admin commands]: /riak/kv/2.0.6/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.6/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.6/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.6/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.6/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.6/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.6/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.6/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.6/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.6/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.6/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.6/setup/downgrade -[security index]: /riak/kv/2.0.6/using/security/ -[security managing]: /riak/kv/2.0.6/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.6/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.6/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.6/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.6/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.6/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.6/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.6/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.6/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.6/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.6/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.6/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.6/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.6/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.6/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.6/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.6/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.6/setup/downgrade +[security index]: {{}}riak/kv/2.0.6/using/security/ +[security managing]: {{}}riak/kv/2.0.6/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.6/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.6/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.6/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.6/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.6/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.6/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.0.6/using/admin/riak-cli.md b/content/riak/kv/2.0.6/using/admin/riak-cli.md index 0236e8d89d..3d07d05d95 100644 --- a/content/riak/kv/2.0.6/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.6/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.6/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.6/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.6/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.6/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.6/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.6/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.6/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.6/using/admin/riak-control.md b/content/riak/kv/2.0.6/using/admin/riak-control.md index fe9c397fb5..964a2dbee2 100644 --- a/content/riak/kv/2.0.6/using/admin/riak-control.md +++ b/content/riak/kv/2.0.6/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.6/configuring/reference +[config reference]: {{}}riak/kv/2.0.6/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.6/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.6/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.6/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.6/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.6/using/cluster-operations.md b/content/riak/kv/2.0.6/using/cluster-operations.md index d3bb3b70e5..c032310dd6 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations.md +++ b/content/riak/kv/2.0.6/using/cluster-operations.md @@ -20,7 +20,6 @@ toc: true [ops log]: ./logging [ops backup]: ./backing-up [ops handoff]: ./handoff -[ops obj del]: ./object-deletion [ops strong consistency]: ./strong-consistency [ops v3 mdc]: ./v3-multi-datacenter [ops v2 mdc]: ./v2-multi-datacenter @@ -84,13 +83,6 @@ Information on using the `riak-admin handoff` interface to enable and disable ha [Learn More >>][ops handoff] -#### [Object Deletion][ops obj del] - -Describes possible settings for `delete_mode`. - -[Learn More >>][ops obj del] - - #### [Monitoring Strong Consistency][ops strong consistency] Overview of the various statistics used in monitoring strong consistency. diff --git a/content/riak/kv/2.0.6/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.6/using/cluster-operations/active-anti-entropy.md index b7306354bd..1f5f59b60e 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes.md index f8ac5305e2..f63610deb1 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.6/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.6/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.6/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.6/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.6/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.6/using/cluster-operations/backing-up.md index 9f2fe11c6c..531c629d82 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.6/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters -[config reference]: /riak/kv/2.0.6/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.6/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.6/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.6/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.6/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.6/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.6/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.6/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.6/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.6/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.6/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.6/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.6/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.6/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.6/using/cluster-operations/bucket-types.md index 1e60625df9..0654e23564 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.6/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.6/using/cluster-operations/changing-cluster-info.md index ec55a6d336..dbfb26cacf 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.6/configuring/reference +[config reference]: {{}}riak/kv/2.0.6/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.6/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.6/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.6/using/cluster-operations/handoff.md b/content/riak/kv/2.0.6/using/cluster-operations/handoff.md index e06e0fe6a2..80142eb252 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.6/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.6/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.6/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.6/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.6/using/cluster-operations/logging.md b/content/riak/kv/2.0.6/using/cluster-operations/logging.md index 022ce086ca..1aeb6ef980 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.6/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.6/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.6/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.6/using/cluster-operations/replacing-node.md index 1b65c8fb4f..691fd5cbe7 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.6/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.6/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.6/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.6/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.6/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.6/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.6/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.6/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.6/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.6/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.6/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.6/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.6/using/cluster-operations/strong-consistency.md index 4452e276f7..88224575cc 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.6/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.6/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.6/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.6/using/cluster-operations/v2-multi-datacenter.md index 178749e324..d4660cf043 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.6/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.6/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter.md index b688c05947..b8150e9b87 100644 --- a/content/riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.6/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.6/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.6/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.6/using/performance.md b/content/riak/kv/2.0.6/using/performance.md index d7bf1d754f..eb5e9849e0 100644 --- a/content/riak/kv/2.0.6/using/performance.md +++ b/content/riak/kv/2.0.6/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.6/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.6/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.6/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.6/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.6/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.6/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.6/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.6/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.6/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.6/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.6/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.6/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.6/using/performance/benchmarking.md b/content/riak/kv/2.0.6/using/performance/benchmarking.md index 8f2622a6d8..0c5df0d1d1 100644 --- a/content/riak/kv/2.0.6/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.6/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.6/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.6/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.6/using/performance/latency-reduction.md b/content/riak/kv/2.0.6/using/performance/latency-reduction.md index 45606d0d70..977df8c05d 100644 --- a/content/riak/kv/2.0.6/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.6/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.6/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.6/using/performance/multi-datacenter-tuning.md index 37a04cf11b..84235b3633 100644 --- a/content/riak/kv/2.0.6/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.6/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.6/using/performance +[perf index]: {{}}riak/kv/2.0.6/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.6/using/performance/open-files-limit.md b/content/riak/kv/2.0.6/using/performance/open-files-limit.md index b7e76af57a..1ff11ce957 100644 --- a/content/riak/kv/2.0.6/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.6/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.0.6/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.6/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.0.6/using/reference/bucket-types.md b/content/riak/kv/2.0.6/using/reference/bucket-types.md index 6202d874b8..2571b0e938 100644 --- a/content/riak/kv/2.0.6/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.6/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.6/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.6/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.6/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.6/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.6/developing/data-types), and [strong consistency](/riak/kv/2.0.6/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.6/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.6/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.6/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.6/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.6/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.6/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.6/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.6/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.6/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.6/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.6/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.6/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.6/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.6/learn/concepts/buckets) and [keys](/riak/kv/2.0.6/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.6/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.6/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.6/using/reference/custom-code.md b/content/riak/kv/2.0.6/using/reference/custom-code.md index 300254a842..34f8e02849 100644 --- a/content/riak/kv/2.0.6/using/reference/custom-code.md +++ b/content/riak/kv/2.0.6/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.6/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.6/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.6/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.6/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.6/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.6/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.6/using/reference/handoff.md b/content/riak/kv/2.0.6/using/reference/handoff.md index be240bd527..666f552dc0 100644 --- a/content/riak/kv/2.0.6/using/reference/handoff.md +++ b/content/riak/kv/2.0.6/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.6/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.6/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.6/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.6/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.6/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.6/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.6/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.6/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.6/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.6/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.6/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.6/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.6/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.6/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.6/using/reference/jmx.md b/content/riak/kv/2.0.6/using/reference/jmx.md index 72afe7e819..5ae582f1e6 100644 --- a/content/riak/kv/2.0.6/using/reference/jmx.md +++ b/content/riak/kv/2.0.6/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.6/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.6/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.6/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.6/using/reference/logging.md b/content/riak/kv/2.0.6/using/reference/logging.md index cb038c151e..899daca092 100644 --- a/content/riak/kv/2.0.6/using/reference/logging.md +++ b/content/riak/kv/2.0.6/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.6/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.6/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.6/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.6/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.6/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -46,7 +46,7 @@ File | Significance `console.log` | Console log output `crash.log` | Crash logs `erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. -`error.log` | [Common errors](../../repair-recover/errors) emitted by Riak. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.6/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.6/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.6/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.6/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.6/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.6/using/reference/multi-datacenter/comparison.md index f5b1748d3c..ecdaf4240b 100644 --- a/content/riak/kv/2.0.6/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.6/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.6/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.6/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.6/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.6/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.6/using/reference/runtime-interaction.md b/content/riak/kv/2.0.6/using/reference/runtime-interaction.md index f9f23f15f7..9171f07b05 100644 --- a/content/riak/kv/2.0.6/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.6/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.6/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.6/configuring/reference -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.6/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.6/using/reference/search.md b/content/riak/kv/2.0.6/using/reference/search.md index 3fe649c4a7..2192da67ab 100644 --- a/content/riak/kv/2.0.6/using/reference/search.md +++ b/content/riak/kv/2.0.6/using/reference/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.0.6/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.0.6/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.0.6/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -126,7 +126,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.6/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.6/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -288,7 +288,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.6/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -298,7 +298,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -353,7 +353,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.6/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.6/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.6/using/reference/secondary-indexes.md b/content/riak/kv/2.0.6/using/reference/secondary-indexes.md index f15e2bfa53..d27b9aa936 100644 --- a/content/riak/kv/2.0.6/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.6/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.0.6/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.6/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.6/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.6/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.6/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.6/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.6/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.6/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.6/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.6/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.6/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.6/using/reference/statistics-monitoring.md index 7bc1e5d797..0fb3f9c4d0 100644 --- a/content/riak/kv/2.0.6/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.6/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.6/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.6/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.6/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.6/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.6/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.6/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.6/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.6/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.6/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.6/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.6/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.6/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.6/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.6/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.6/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.6/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.6/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.6/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.6/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.6/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.6/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.6/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.6/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,9 +349,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.6/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.6/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.6/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.6/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -367,9 +367,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.6/using/reference/strong-consistency.md b/content/riak/kv/2.0.6/using/reference/strong-consistency.md index 241bee36ac..55c92b1d11 100644 --- a/content/riak/kv/2.0.6/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.6/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.6/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.6/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.6/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.6/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.6/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.6/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.6/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.6/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.6/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.6/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.6/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.6/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.6/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.6/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.6/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.6/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.6/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.6/using/reference/v2-multi-datacenter/architecture.md index 7c5b7d2e6c..5b373cf95e 100644 --- a/content/riak/kv/2.0.6/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.6/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.6/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.6/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.6/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.6/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.6/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.6/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/aae.md index 32ce75829b..a9738ed2dd 100644 --- a/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.6/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.6/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.6/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/architecture.md index bd8afa5b28..eaa09abacd 100644 --- a/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.6/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.6/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.6/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.6/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/cascading-writes.md index 24c44b1b7d..9587120f26 100644 --- a/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.6/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/scheduling-fullsync.md index a5928176cd..6a8737732b 100644 --- a/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.6/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.6/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.6/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.6/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.6/using/repair-recovery.md b/content/riak/kv/2.0.6/using/repair-recovery.md index 85f8c5c3b7..80879b3d4d 100644 --- a/content/riak/kv/2.0.6/using/repair-recovery.md +++ b/content/riak/kv/2.0.6/using/repair-recovery.md @@ -15,7 +15,7 @@ toc: true [repair recover fail]: ./failure-recovery/ [repair recover errors]: ./errors/ [repair recover repairs]: ./repairs/ -[repair recover restart]: ./rolling-restarts/ +[repair recover restart]: ./rolling-restart/ ## In This Section diff --git a/content/riak/kv/2.0.6/using/repair-recovery/errors.md b/content/riak/kv/2.0.6/using/repair-recovery/errors.md index e122054e34..47b6a42b4f 100644 --- a/content/riak/kv/2.0.6/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.6/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.6/configuring/reference +[config reference]: {{}}riak/kv/2.0.6/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.6/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.6/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.6/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.6/using/repair-recovery/failure-recovery.md index ac0505dbd8..3ec6df2f1c 100644 --- a/content/riak/kv/2.0.6/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.6/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.6/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.6/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.6/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.6/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -115,7 +115,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.0.6/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.6/using/repair-recovery/repairs.md b/content/riak/kv/2.0.6/using/repair-recovery/repairs.md index 5e9e9739db..2536967beb 100644 --- a/content/riak/kv/2.0.6/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.6/using/repair-recovery/repairs.md @@ -149,7 +149,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.0.6/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.0.6/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -218,23 +218,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.6/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.6/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.6/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.6/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.6/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.6/using/repair-recovery/rolling-restart.md index 7586dd85f3..7b33fefb52 100644 --- a/content/riak/kv/2.0.6/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.6/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.6/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.6/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.6/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.6/using/running-a-cluster.md b/content/riak/kv/2.0.6/using/running-a-cluster.md index f9f0618152..cabea8a6b3 100644 --- a/content/riak/kv/2.0.6/using/running-a-cluster.md +++ b/content/riak/kv/2.0.6/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.6/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.6/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.6/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.6/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.6/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.6/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.6/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.6/using/security.md b/content/riak/kv/2.0.6/using/security.md index 12ea3fd432..50b217d1fe 100644 --- a/content/riak/kv/2.0.6/using/security.md +++ b/content/riak/kv/2.0.6/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.6/ops/advanced/security --- -[config reference search]: /riak/kv/2.0.6/configuring/reference/#search -[config search enabling]: /riak/kv/2.0.6/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.6/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.6/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.6/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.0.6/using/security/basics -[security managing]: /riak/kv/2.0.6/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.6/using/security/basics +[security managing]: {{}}riak/kv/2.0.6/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.0.6/developing/usage/search +[usage search]: {{}}riak/kv/2.0.6/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.6/using/security/basics.md b/content/riak/kv/2.0.6/using/security/basics.md index 16c9ce6364..4f502b01e4 100644 --- a/content/riak/kv/2.0.6/using/security/basics.md +++ b/content/riak/kv/2.0.6/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.6/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.6/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.6/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.6/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.6/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.6/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.6/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.6/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.6/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.6/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.6/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.6/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.6/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.6/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.6/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.6/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.6/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.6/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.6/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.6/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.6/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.6/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.6/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.6/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.6/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.6/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.6/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.6/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.6/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.6/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.6/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.6/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.6/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.6/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.6/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.6/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.6/configuring/reference/#directories).
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="../../learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.6/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.6/using/security/managing-sources.md b/content/riak/kv/2.0.6/using/security/managing-sources.md index d3d73b7a8f..ec169cf7ff 100644 --- a/content/riak/kv/2.0.6/using/security/managing-sources.md +++ b/content/riak/kv/2.0.6/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.6/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.6/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.6/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.6/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.6/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.6/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.6/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.6/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.6/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.6/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.6/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.6/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.6/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.6/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.0.7/_reference-links.md b/content/riak/kv/2.0.7/_reference-links.md index 936be44b5d..5d431b4604 100644 --- a/content/riak/kv/2.0.7/_reference-links.md +++ b/content/riak/kv/2.0.7/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.7/downloads/ -[install index]: /riak/kv/2.0.7/setup/installing -[upgrade index]: /riak/kv/2.0.7/upgrading -[plan index]: /riak/kv/2.0.7/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.0.7/configuring/reference/ -[manage index]: /riak/kv/2.0.7/using/managing -[performance index]: /riak/kv/2.0.7/using/performance -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.7/downloads/ +[install index]: {{}}riak/kv/2.0.7/setup/installing +[upgrade index]: {{}}riak/kv/2.0.7/upgrading +[plan index]: {{}}riak/kv/2.0.7/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.0.7/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.7/using/managing +[performance index]: {{}}riak/kv/2.0.7/using/performance +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.7/setup/planning -[plan start]: /riak/kv/2.0.7/setup/planning/start -[plan backend]: /riak/kv/2.0.7/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.7/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.7/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.7/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.7/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.7/setup/planning/best-practices -[plan future]: /riak/kv/2.0.7/setup/planning/future +[plan index]: {{}}riak/kv/2.0.7/setup/planning +[plan start]: {{}}riak/kv/2.0.7/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.7/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.7/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.7/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.7/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.7/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.7/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.7/setup/installing -[install aws]: /riak/kv/2.0.7/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.7/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.7/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.7/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.7/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.7/setup/installing/smartos -[install solaris]: /riak/kv/2.0.7/setup/installing/solaris -[install suse]: /riak/kv/2.0.7/setup/installing/suse -[install windows azure]: /riak/kv/2.0.7/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.7/setup/installing +[install aws]: {{}}riak/kv/2.0.7/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.7/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.7/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.7/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.7/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.7/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.7/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.7/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.7/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.7/setup/installing/source -[install source erlang]: /riak/kv/2.0.7/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.7/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.7/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.7/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.7/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.7/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.7/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.7/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.7/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.7/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.7/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.7/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.7/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.7/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.7/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.7/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.7/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.7/configuring -[config basic]: /riak/kv/2.0.7/configuring/basic -[config backend]: /riak/kv/2.0.7/configuring/backend -[config manage]: /riak/kv/2.0.7/configuring/managing -[config reference]: /riak/kv/2.0.7/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.7/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.7/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.7/configuring/mapreduce -[config search]: /riak/kv/2.0.7/configuring/search/ +[config index]: {{}}riak/kv/2.0.7/configuring +[config basic]: {{}}riak/kv/2.0.7/configuring/basic +[config backend]: {{}}riak/kv/2.0.7/configuring/backend +[config manage]: {{}}riak/kv/2.0.7/configuring/managing +[config reference]: {{}}riak/kv/2.0.7/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.7/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.7/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.7/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.7/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.7/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.7/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.7/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.7/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.7/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.7/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.7/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.7/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.7/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.7/using/ -[use admin commands]: /riak/kv/2.0.7/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.7/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.7/using/ +[use admin commands]: {{}}riak/kv/2.0.7/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.7/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.7/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.7/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.7/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.7/using/reference/search -[use ref 2i]: /riak/kv/2.0.7/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.7/using/reference/snmp -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.7/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.7/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.7/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.7/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.7/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.7/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.7/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.7/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.7/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.7/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.7/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.7/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.7/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.7/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.7/using/admin/ -[use admin commands]: /riak/kv/2.0.7/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.7/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.7/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.7/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.7/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.7/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.7/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.7/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.7/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.7/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.7/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.7/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.7/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.7/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.7/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.7/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.7/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.7/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.7/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.7/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.7/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.7/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.7/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.7/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.7/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.7/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.7/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.7/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.7/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.7/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.7/using/repair-recovery -[repair recover index]: /riak/kv/2.0.7/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.7/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.7/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.7/using/security/ -[security basics]: /riak/kv/2.0.7/using/security/basics -[security managing]: /riak/kv/2.0.7/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.7/using/security/ +[security basics]: {{}}riak/kv/2.0.7/using/security/basics +[security managing]: {{}}riak/kv/2.0.7/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.7/using/performance/ -[perf benchmark]: /riak/kv/2.0.7/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.7/using/performance/erlang -[perf aws]: /riak/kv/2.0.7/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.7/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.7/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.7/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.7/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.7/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.7/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.7/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.7/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.7/developing -[dev client libraries]: /riak/kv/2.0.7/developing/client-libraries -[dev data model]: /riak/kv/2.0.7/developing/data-modeling -[dev data types]: /riak/kv/2.0.7/developing/data-types -[dev kv model]: /riak/kv/2.0.7/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.7/developing +[dev client libraries]: {{}}riak/kv/2.0.7/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.7/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.7/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.7/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.7/developing/getting-started -[getting started java]: /riak/kv/2.0.7/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.7/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.7/developing/getting-started/python -[getting started php]: /riak/kv/2.0.7/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.7/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.7/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.7/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.7/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.7/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.7/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.7/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.7/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.7/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.7/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.7/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.7/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.7/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.7/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.7/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.7/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.7/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.7/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.7/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.7/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.7/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.7/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.7/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.7/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.7/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.7/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.7/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.7/developing/usage -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.7/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.7/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.7/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.7/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.7/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.7/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.7/developing/usage/search -[usage search schema]: /riak/kv/2.0.7/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.7/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.7/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.7/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.7/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.7/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.7/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.7/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.7/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.7/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.7/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.7/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.7/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.7/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.7/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.7/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.7/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.7/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.7/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.7/developing/api/backend -[dev api http]: /riak/kv/2.0.7/developing/api/http -[dev api http status]: /riak/kv/2.0.7/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.7/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.7/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.7/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.7/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.7/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.7/learn/glossary/ -[glossary aae]: /riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.7/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.7/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.7/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.7/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.7/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.7/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.7/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.7/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.7/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.7/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.7/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.7/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.7/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.7/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.7/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.7/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.7/add-ons.md b/content/riak/kv/2.0.7/add-ons.md index 2ceb30856c..a4cf7348ee 100644 --- a/content/riak/kv/2.0.7/add-ons.md +++ b/content/riak/kv/2.0.7/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.7/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.7/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.7/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.7/add-ons/redis/developing-rra.md index fe05dd9c9b..c3bc722685 100644 --- a/content/riak/kv/2.0.7/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.7/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.7/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.7/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.7/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.7/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.7/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.7/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.7/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.7/add-ons/redis/redis-add-on-features.md index ecb0d36888..f2492b1af0 100644 --- a/content/riak/kv/2.0.7/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.7/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.7/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.7/add-ons/redis/set-up-rra.md index 4376eb3486..eeec852d89 100644 --- a/content/riak/kv/2.0.7/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.7/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.7/setup/installing -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.7/setup/installing +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.7/add-ons/redis/using-rra.md b/content/riak/kv/2.0.7/add-ons/redis/using-rra.md index 723e7db925..9cba1b58e7 100644 --- a/content/riak/kv/2.0.7/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.7/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.7/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.7/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.7/configuring/backend.md b/content/riak/kv/2.0.7/configuring/backend.md index 3050a180a5..b294174e7d 100644 --- a/content/riak/kv/2.0.7/configuring/backend.md +++ b/content/riak/kv/2.0.7/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.7/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.7/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.7/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.7/configuring/basic.md b/content/riak/kv/2.0.7/configuring/basic.md index 867b3d938d..68d131ed2e 100644 --- a/content/riak/kv/2.0.7/configuring/basic.md +++ b/content/riak/kv/2.0.7/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.0.7/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.7/configuring/reference -[use running cluster]: /riak/kv/2.0.7/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.7/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.7/using/performance/erlang -[plan start]: /riak/kv/2.0.7/setup/planning/start -[plan best practices]: /riak/kv/2.0.7/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.7/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.7/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.7/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.7/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.7/using/performance -[perf aws]: /riak/kv/2.0.7/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.7/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.7/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.7/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.7/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.7/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.7/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.7/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.7/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.7/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.7/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.7/using/performance +[perf aws]: {{}}riak/kv/2.0.7/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.7/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.7/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.7/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.7/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.7/configuring/load-balancing-proxy.md index 42e814d75f..c8e252bfb8 100644 --- a/content/riak/kv/2.0.7/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.7/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.7/configuring/managing.md b/content/riak/kv/2.0.7/configuring/managing.md index 3ef761aa00..f91e26e262 100644 --- a/content/riak/kv/2.0.7/configuring/managing.md +++ b/content/riak/kv/2.0.7/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.7/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.7/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.7/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.7/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.7/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.7/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.7/configuring/mapreduce.md b/content/riak/kv/2.0.7/configuring/mapreduce.md index 955a10226a..f16b283b3f 100644 --- a/content/riak/kv/2.0.7/configuring/mapreduce.md +++ b/content/riak/kv/2.0.7/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.0.7/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.7/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.7/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.7/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.7/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.7/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.7/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.7/configuring/reference.md b/content/riak/kv/2.0.7/configuring/reference.md index be25645095..476f04c5d8 100644 --- a/content/riak/kv/2.0.7/configuring/reference.md +++ b/content/riak/kv/2.0.7/configuring/reference.md @@ -199,7 +199,7 @@ executables are stored. +as active anti-entropy data, and cluster metadata. @@ -1727,7 +1727,7 @@ abandons the leader (in milliseconds). This must be set greater than the diff --git a/content/riak/kv/2.0.7/configuring/search.md b/content/riak/kv/2.0.7/configuring/search.md index ce1bb5a09f..13e2ccf971 100644 --- a/content/riak/kv/2.0.7/configuring/search.md +++ b/content/riak/kv/2.0.7/configuring/search.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.7/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.7/developing/usage/search -[usage search schema]: /riak/kv/2.0.7/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.7/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.7/developing/usage/custom-extractors -[config reference]: /riak/kv/2.0.7/configuring/reference -[config reference#search]: /riak/kv/2.0.7/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.7/using/security/ +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.7/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.7/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.7/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.7/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.7/using/security/ This document covers how to use the Riak Search (with @@ -38,7 +38,7 @@ If you are looking developer-focused docs, we recommend the following: * [Custom Search Extractors][usage custom extractors] * [Riak KV Data Types and Search][usage search data types] -##Overview +## Overview We'll be walking through: @@ -90,8 +90,8 @@ Field | Default | Valid values | Description `search.queue.high_watermark` | `10000` | Integer | The queue high water mark. If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak and the Riak Search batching subsystem, if writes into Solr start to fall behind. `search.queue.worker_count` | `10` | Integer | The number of Solr queue workers to instantiate. Solr queue workers are responsible for enqueing objects for insertion or update into Solr. Increasing the number of Solrq workers distributes the queuing of objects and can lead to greater throughput under high load, potentially at the expense of smaller batch sizes. `search.queue.helper_count` | `10` | Integer | The number of Solr queue helpers to instantiate. Solr queue helpers are responsible for delivering batches of data into Solr. Increasing the number of Solrq helpers will increase concurrent writes into Solr. -`search.index.error_threshold.failure_count` | `3` | Integer | The number of failures encountered while updating a search index within `search.queue.error_threshold.failure_interval` before Riak will skip updates to that index. -`search.index.error_threshold.failure_interval` | `5000` | Milliseconds | The window of time during which `search.queue.error_threshold.failure_count` failures will cause Riak to skip updates to a search index. If `search.queue.error_threshold.failure_count` errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the `search.queue.error_threshold.reset_interval` has passed. +`search.index.error_threshold.failure_count` | `3` | Integer | The number of failures encountered while updating a search index within `search.index.error_threshold.failure_interval` before Riak will skip updates to that index. +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds | The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak to skip updates to a search index. If `search.index.error_threshold.failure_count` errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the `search.index.error_threshold.reset_interval` has passed. `search.index.error_threshold.reset_interval` | `30000` | Milliseconds | The amount of time it takes for updates to a given search index to resume/refresh once Riak has started skipping update operations. `search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, `purge_all`, or `off` | The strategy for how we handle purging when we hit the `search.queue.high_watermark`. The options:
* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the `search.queue.high_watermark`,

* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the `search.queue.high_watermark`,

* `purge_all` removes all items associated with all erroring (references to fuses blown in the code) indices in order to get below the `search.queue.high_watermark`, and

*`off` disables purging.
diff --git a/content/riak/kv/2.0.7/configuring/strong-consistency.md b/content/riak/kv/2.0.7/configuring/strong-consistency.md index 0fbcd0a95a..0a08af9a42 100644 --- a/content/riak/kv/2.0.7/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.7/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.7/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.7/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.7/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.7/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.7/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.7/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.7/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.7/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.7/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.7/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.7/developing/data-types -[glossary aae]: /riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.7/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.7/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.7/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.7/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.7/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.7/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.7/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.7/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.7/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.7/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.7/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.7/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.7/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.7/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.7/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.7/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.7/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.7/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.7/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.7/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.7/configuring/v2-multi-datacenter.md index 07834b7a27..3bac1aa05b 100644 --- a/content/riak/kv/2.0.7/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.7/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.7/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.7/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.0.7/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.7/configuring/v2-multi-datacenter/nat.md index bf1d0f779f..cb62b293ad 100644 --- a/content/riak/kv/2.0.7/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.7/configuring/v2-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.7/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.7/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.0.7/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.7/configuring/v3-multi-datacenter.md index e9928a2844..cc848b493c 100644 --- a/content/riak/kv/2.0.7/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.7/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.7/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.7/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/nat.md index 47a5ef9c1c..7fea83b96f 100644 --- a/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/quick-start.md index 8683965bf7..735b88b952 100644 --- a/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.7/using/performance -[config v3 mdc]: /riak/kv/2.0.7/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.7/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl.md index 30b467aa70..7323006b9e 100644 --- a/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.7/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.7/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.7/developing/api/backend.md b/content/riak/kv/2.0.7/developing/api/backend.md index a1cd8c1029..e6f6ba08fa 100644 --- a/content/riak/kv/2.0.7/developing/api/backend.md +++ b/content/riak/kv/2.0.7/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.7/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.7/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.7/developing/api/http.md b/content/riak/kv/2.0.7/developing/api/http.md index 948736a516..524e1d8448 100644 --- a/content/riak/kv/2.0.7/developing/api/http.md +++ b/content/riak/kv/2.0.7/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.7/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.7/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.7/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.7/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.7/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.7/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.7/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.7/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.7/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.7/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.7/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.7/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.7/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.7/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.7/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.7/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.7/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.7/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.7/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.7/developing/data-types/#usage-examples) and subpages e.g. [sets](/riak/kv/2.0.7/developing/data-types/sets). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.7/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.7/developing/data-types/#usage-examples) and subpages e.g. [sets]({{}}riak/kv/2.0.7/developing/data-types/sets). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.7/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.7/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.7/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.7/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.7/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.7/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.7/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.7/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.7/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.7/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.7/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.7/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.7/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.7/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.7/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.7/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.7/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.7/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.7/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.7/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.7/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.7/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.7/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.7/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.7/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.7/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.7/developing/api/http/counters.md b/content/riak/kv/2.0.7/developing/api/http/counters.md index f92e7f969f..cf1eff1fee 100644 --- a/content/riak/kv/2.0.7/developing/api/http/counters.md +++ b/content/riak/kv/2.0.7/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.7/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.7/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.7/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.7/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.7/developing/api/http/fetch-object.md b/content/riak/kv/2.0.7/developing/api/http/fetch-object.md index 990e2470ca..4ca25cf113 100644 --- a/content/riak/kv/2.0.7/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.7/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.7/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.7/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.7/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.7/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.7/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.7/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.7/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.7/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.7/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.7/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.7/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.7/developing/api/http/fetch-search-index.md index 1d99ac0110..df5cb2fa0a 100644 --- a/content/riak/kv/2.0.7/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.7/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.7/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.7/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.7/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.7/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.7/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.7/developing/api/http/fetch-search-schema.md index be49150737..a68c281d6d 100644 --- a/content/riak/kv/2.0.7/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.7/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.7/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.7/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.7/developing/api/http/get-bucket-props.md index 86c7c3baa1..85d031209e 100644 --- a/content/riak/kv/2.0.7/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.7/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.7/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.7/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.7/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.7/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.7/developing/api/http/link-walking.md b/content/riak/kv/2.0.7/developing/api/http/link-walking.md index 3f5d95465b..96415af58f 100644 --- a/content/riak/kv/2.0.7/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.7/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.7/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.7/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.7/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.7/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.7/developing/api/http/list-resources.md b/content/riak/kv/2.0.7/developing/api/http/list-resources.md index 0559133ed4..64417804d8 100644 --- a/content/riak/kv/2.0.7/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.7/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.7/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.7/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.7/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.7/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.7/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.7/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.7/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.7/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.7/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.7/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.7/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.7/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.7/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.7/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.7/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.7/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.7/developing/api/http/mapreduce.md b/content/riak/kv/2.0.7/developing/api/http/mapreduce.md index d699055d2f..45b385cff7 100644 --- a/content/riak/kv/2.0.7/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.7/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.7/developing/api/http/search-index-info.md b/content/riak/kv/2.0.7/developing/api/http/search-index-info.md index bb5dacdac9..05b65ca3ac 100644 --- a/content/riak/kv/2.0.7/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.7/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.7/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.7/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.7/developing/api/http/search-query.md b/content/riak/kv/2.0.7/developing/api/http/search-query.md index 8d751727b4..34bfd7f4ac 100644 --- a/content/riak/kv/2.0.7/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.7/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.7/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.7/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.7/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.7/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.7/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.7/developing/api/http/secondary-indexes.md index dc90fe1b41..c3f56dd60f 100644 --- a/content/riak/kv/2.0.7/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.7/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.7/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.7/developing/api/http/set-bucket-props.md index 36b13b5005..1e5ef0fcda 100644 --- a/content/riak/kv/2.0.7/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.7/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.7/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.7/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.7/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.7/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.7/developing/api/http/status.md b/content/riak/kv/2.0.7/developing/api/http/status.md index 6bc023d886..b82fdd7d08 100644 --- a/content/riak/kv/2.0.7/developing/api/http/status.md +++ b/content/riak/kv/2.0.7/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.7/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.7/developing/api/http/store-object.md b/content/riak/kv/2.0.7/developing/api/http/store-object.md index dde66a46b7..616527db77 100644 --- a/content/riak/kv/2.0.7/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.7/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.7/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.7/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.7/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.7/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.7/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.7/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.7/developing/api/http/store-search-index.md b/content/riak/kv/2.0.7/developing/api/http/store-search-index.md index ca22b59e27..a7b4f6293a 100644 --- a/content/riak/kv/2.0.7/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.7/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.7/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.7/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.7/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.7/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.7/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.7/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.7/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.7/developing/api/http/store-search-schema.md index ff939e186d..9e4a6a0c72 100644 --- a/content/riak/kv/2.0.7/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.7/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.7/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.7/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers.md index a6685c57a5..17e6a79ed0 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.7/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.7/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.7/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.7/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.7/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.7/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.7/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.7/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.7/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.7/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/auth-req.md index c181ca94b6..ee487c2bd1 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.7/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.7/using/security/basics). diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/delete-object.md index af3cd19c38..0af1eeda24 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.7/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.7/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store.md index 6780dfea2d..9355a9b926 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.7/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.7/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-fetch.md index 1e1b570a37..cc67751654 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.7/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.7/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.7/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.7/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.7/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store.md index 3190dced27..a0ef5e6eb8 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store.md index 6ed7ba81dd..30e0b8e68b 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-store.md index ab4fb74486..85a3b024ff 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.7/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.7/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.7/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.7/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.7/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.7/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.7/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-union.md index da0ad9977e..db33f86045 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.7/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object.md index 9294103e3f..ee00bd666f 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.7/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.7/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.7/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props.md index cf3fee66dc..7f729c5665 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.7/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.7/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.7/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.7/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-type.md index 56e08d09ed..cec472afec 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.7/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.7/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-client-id.md index f84d4dc177..6d9aef10a5 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.7/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/mapreduce.md index 66d95d0eef..6d210937b0 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.7/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.7/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.7/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.7/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/reset-bucket-props.md index cab040c8c6..4fc5ef0675 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/secondary-indexes.md index 714d3f2783..802e121241 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.7/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props.md index 48696da6f1..44bed4b9d5 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-type.md index aa4e8f2b06..7e2a8ecf87 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.7/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.7/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/store-object.md index 9295b7c41a..50e68c62d7 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.7/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.7/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.7/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.7/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.7/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.7/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.7/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.7/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-delete.md index 5280d99ffc..1d96251438 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-delete.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-delete.md @@ -29,4 +29,4 @@ message RpbYokozunaIndexDeleteReq { ## Response - Returns a [RpbDelResp](/riak/kv/2.7.0/developing/api/protocol-buffers/#message-codes) code with no data on success. + Returns a [RpbDelResp]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-get.md index fc0f28b812..6201f72c99 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.7/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.7/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-put.md index 9f0da26aae..e5851aad59 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-index-put.md @@ -37,8 +37,8 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.7/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.7/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. ## Response -Returns a [RpbPutResp](/riak/kv/2.7.0/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-get.md index 6c4b89046a..729e81d318 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.7/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-put.md index 8dcc39ee52..dbf8443acd 100644 --- a/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.7/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.7/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas). ## Request @@ -34,9 +34,9 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.7/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.7/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.7/developing/app-guide.md b/content/riak/kv/2.0.7/developing/app-guide.md index 1c69e93e3a..b80b5d0960 100644 --- a/content/riak/kv/2.0.7/developing/app-guide.md +++ b/content/riak/kv/2.0.7/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.0.7/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.7/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.7/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.7/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.7/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.7/developing/data-types -[dev data types#counters]: /riak/kv/2.0.7/developing/data-types/#counters -[dev data types#sets]: /riak/kv/2.0.7/developing/data-types/#sets -[dev data types#maps]: /riak/kv/2.0.7/developing/data-types/#maps -[usage create objects]: /riak/kv/2.0.7/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.7/developing/usage/search -[use ref search]: /riak/kv/2.0.7/using/reference/search -[usage 2i]: /riak/kv/2.0.7/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.7/developing/client-libraries -[concept crdts]: /riak/kv/2.0.7/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.7/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.7/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.7/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.7/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.7/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.7/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.7/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.7/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.7/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.7/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.7/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.7/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.7/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.7/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.7/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.7/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.7/setup/installing -[getting started]: /riak/kv/2.0.7/developing/getting-started -[usage index]: /riak/kv/2.0.7/developing/usage -[glossary]: /riak/kv/2.0.7/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.7/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.7/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.7/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.7/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.7/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.7/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.0.7/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.0.7/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.0.7/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.7/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.7/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.7/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.7/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.7/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.7/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.7/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.7/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.7/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.7/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.7/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.7/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.7/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.7/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.7/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.7/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.7/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.7/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.7/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.7/setup/installing +[getting started]: {{}}riak/kv/2.0.7/developing/getting-started +[usage index]: {{}}riak/kv/2.0.7/developing/usage +[glossary]: {{}}riak/kv/2.0.7/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.7/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.7/developing/app-guide/advanced-mapreduce.md index e885839b9a..5b8e118372 100644 --- a/content/riak/kv/2.0.7/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.7/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.7/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.7/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.7/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.7/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.7/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.7/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.7/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.7/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.7/developing/app-guide/cluster-metadata.md index 4edfb7dbfe..d3ab8291ed 100644 --- a/content/riak/kv/2.0.7/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.7/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.7/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.7/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.7/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.7/developing/app-guide/replication-properties.md index ecbf27b3c8..d72d949322 100644 --- a/content/riak/kv/2.0.7/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.7/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.7/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.7/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.7/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.7/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.7/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.7/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.7/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.7/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.7/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.7/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.7/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.7/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.7/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.7/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.7/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.7/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.7/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.7/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.7/developing/app-guide/strong-consistency.md index f52908f325..f955cd7ffc 100644 --- a/content/riak/kv/2.0.7/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.7/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.0.7/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/kv/2.0.7/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.7/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.7/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.7/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.7/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.7/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.7/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/kv/2.0.7/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.7/developing/client-libraries -[getting started]: /riak/kv/2.0.7/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.7/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.0.7/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.7/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.7/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.7/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.7/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.7/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.7/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.0.7/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.7/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.7/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.7/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.7/developing/client-libraries.md b/content/riak/kv/2.0.7/developing/client-libraries.md index bb79865a76..0320fae051 100644 --- a/content/riak/kv/2.0.7/developing/client-libraries.md +++ b/content/riak/kv/2.0.7/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.7/developing/data-types.md b/content/riak/kv/2.0.7/developing/data-types.md index f40edf4bac..5e12d43212 100644 --- a/content/riak/kv/2.0.7/developing/data-types.md +++ b/content/riak/kv/2.0.7/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.7/developing/faq.md b/content/riak/kv/2.0.7/developing/faq.md index f9395fd3f6..cc827c82a1 100644 --- a/content/riak/kv/2.0.7/developing/faq.md +++ b/content/riak/kv/2.0.7/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.7/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.7/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.7/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.7/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.7/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.7/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.7/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.7/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.7/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.7/developing/client-libraries -[MapReduce]: /riak/kv/2.0.7/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.7/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.7/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.7/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.7/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.7/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.7/developing/getting-started.md b/content/riak/kv/2.0.7/developing/getting-started.md index 45c6e9f5b5..09f7808d88 100644 --- a/content/riak/kv/2.0.7/developing/getting-started.md +++ b/content/riak/kv/2.0.7/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.7/setup/installing -[dev client libraries]: /riak/kv/2.0.7/developing/client-libraries +[install index]: {{}}riak/kv/2.0.7/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.7/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.7/developing/getting-started/csharp.md b/content/riak/kv/2.0.7/developing/getting-started/csharp.md index 941830b86d..405a002fae 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.7/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.7/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.7/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.7/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.7/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.7/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.7/developing/getting-started/csharp/querying.md index b35f784edf..0e4da1b696 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.7/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.7/developing/getting-started/erlang.md b/content/riak/kv/2.0.7/developing/getting-started/erlang.md index 55b5389f01..e225465110 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.7/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.7/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.7/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.7/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.7/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.7/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.7/developing/getting-started/erlang/object-modeling.md index 4b6c050e88..4d1e7b72df 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.7/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.7/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.7/developing/getting-started/erlang/querying.md index 52e6efecba..23d1444a12 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.7/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.7/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.7/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.7/developing/getting-started/golang.md b/content/riak/kv/2.0.7/developing/getting-started/golang.md index 41d5400239..77b39343d6 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.7/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.7/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.7/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.7/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.7/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.7/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.7/developing/getting-started/golang/object-modeling.md index c3f7abea4a..a1ba12ac8a 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.7/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.7/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.7/developing/getting-started/golang/querying.md index c6d96881e2..14a1a193b6 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.7/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.7/developing/getting-started/java.md b/content/riak/kv/2.0.7/developing/getting-started/java.md index 40005a4538..2f38d7ed76 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/java.md +++ b/content/riak/kv/2.0.7/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.7/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.7/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.7/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.7/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.7/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.7/developing/getting-started/java/crud-operations.md index e0636850b7..a15c042f88 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.7/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.7/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.7/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.7/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.7/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.7/developing/getting-started/java/querying.md b/content/riak/kv/2.0.7/developing/getting-started/java/querying.md index b4764ed58f..3808254d0d 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.7/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.7/developing/getting-started/nodejs.md b/content/riak/kv/2.0.7/developing/getting-started/nodejs.md index c71cb2ba61..ee8afeaada 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.7/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.7/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.7/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.7/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.7/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.7/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.7/developing/getting-started/nodejs/querying.md index e9af4cea2c..6e79bcf8cf 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.7/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.7/developing/getting-started/php.md b/content/riak/kv/2.0.7/developing/getting-started/php.md index 26237d5d6e..885004ece7 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/php.md +++ b/content/riak/kv/2.0.7/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.7/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.7/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.7/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.7/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.7/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.7/developing/getting-started/php/crud-operations.md index 29eb308143..5babef6c2d 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.7/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.7/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.7/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.7/developing/getting-started/php/querying.md b/content/riak/kv/2.0.7/developing/getting-started/php/querying.md index 22e3d1347a..910c040474 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.7/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.7/developing/getting-started/python.md b/content/riak/kv/2.0.7/developing/getting-started/python.md index d1d98ecd8a..5e23c816d3 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/python.md +++ b/content/riak/kv/2.0.7/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.7/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.7/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.7/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.7/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.7/developing/getting-started/python/querying.md b/content/riak/kv/2.0.7/developing/getting-started/python/querying.md index 0700ba4e2a..68e8e07029 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.7/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.7/developing/getting-started/ruby.md b/content/riak/kv/2.0.7/developing/getting-started/ruby.md index 31b588432d..9902884c55 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.7/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.7/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.7/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.7/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.7/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.7/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.7/developing/getting-started/ruby/querying.md index c71abcf42f..8e6e5a5d19 100644 --- a/content/riak/kv/2.0.7/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.7/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.7/developing/key-value-modeling.md b/content/riak/kv/2.0.7/developing/key-value-modeling.md index 24bda21d10..e767b703fe 100644 --- a/content/riak/kv/2.0.7/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.7/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.7/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.7/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.7/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.7/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.7/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.7/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.7/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.7/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.7/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.7/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.7/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.7/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.7/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.7/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.7/developing/data-types/#sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.7/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.7/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.7/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.7/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.7/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.7/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.7/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.7/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.7/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.7/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.7/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.7/developing/usage/commit-hooks.md b/content/riak/kv/2.0.7/developing/usage/commit-hooks.md index f8c680e7f4..335f1a379b 100644 --- a/content/riak/kv/2.0.7/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.7/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.7/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.7/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.7/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.7/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.7/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.7/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.7/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.7/developing/usage/conflict-resolution.md index f908cd08f9..b9b2699dda 100644 --- a/content/riak/kv/2.0.7/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.7/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.7/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.7/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.7/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.7/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.7/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.7/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.7/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.7/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.7/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.7/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.7/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.7/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.7/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.7/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.7/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.7/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.7/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the [`allow_mult`](#siblings) parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -[`last_write_wins`](/riak/kv/2.0.7/learn/concepts/buckets). If `last_write_wins` is set to `false`, +[`last_write_wins`]({{}}riak/kv/2.0.7/learn/concepts/buckets). If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.7/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.7/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.7/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.7/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.7/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.7/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.7/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.7/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.7/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.7/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.7/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.7/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.7/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.7/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.7/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.7/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.7/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.7/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.7/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.7/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.7/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.7/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.7/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.7/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.7/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.7/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -610,7 +610,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.7/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.7/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -665,7 +665,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/csharp.md index fd29e1bc8b..4cb49da398 100644 --- a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.7/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/golang.md index 8f42906e0e..20e82893f9 100644 --- a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.7/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/java.md index 503dd66b64..03915b33f3 100644 --- a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.7/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.7/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.7/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.7/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.7/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.7/developing/data-types/#counters), [set](/riak/kv/2.0.7/developing/data-types/#sets), or [map](/riak/kv/2.0.7/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.7/developing/data-types/#counters), [set]({{}}riak/kv/2.0.7/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.7/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.7/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.7/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/nodejs.md index 7ad4704fa1..85d56e870a 100644 --- a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.7/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/php.md index 73762bd19f..f7cd1b4493 100644 --- a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.7/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.7/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.7/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.7/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.7/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.7/developing/data-types/#counters), [set](/riak/kv/2.0.7/developing/data-types/#sets), or [map](/riak/kv/2.0.7/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.7/developing/data-types/#counters), [set]({{}}riak/kv/2.0.7/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.7/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.7/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.7/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/python.md index 4bafde9070..dc4c25c803 100644 --- a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.7/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.7/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.7/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.7/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.7/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.7/developing/data-types/#counters), [set](/riak/kv/2.0.7/developing/data-types/#sets), or [map](/riak/kv/2.0.7/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.7/developing/data-types/#counters), [set]({{}}riak/kv/2.0.7/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.7/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.7/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.7/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/ruby.md index 7df15d3eed..585f09fc69 100644 --- a/content/riak/kv/2.0.7/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.7/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.7/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.7/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.7/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.7/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.7/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.7/developing/data-types/#counters), [set](/riak/kv/2.0.7/developing/data-types/#sets), or [map](/riak/kv/2.0.7/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.7/developing/data-types/#counters), [set]({{}}riak/kv/2.0.7/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.7/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.7/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.7/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.7/developing/usage/creating-objects.md b/content/riak/kv/2.0.7/developing/usage/creating-objects.md index b4c2fce906..15728c6209 100644 --- a/content/riak/kv/2.0.7/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.7/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.7/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.7/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.0.7/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.0.7/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.7/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.7/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.7/developing/usage/custom-extractors.md b/content/riak/kv/2.0.7/developing/usage/custom-extractors.md index ab08df6fae..8130b7ce62 100644 --- a/content/riak/kv/2.0.7/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.7/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.7/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.7/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.7/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.7/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.7/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.7/developing/usage/deleting-objects.md b/content/riak/kv/2.0.7/developing/usage/deleting-objects.md index c19c18ce71..a667317e2a 100644 --- a/content/riak/kv/2.0.7/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.7/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.7/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.7/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.7/developing/usage/document-store.md b/content/riak/kv/2.0.7/developing/usage/document-store.md index 07fc0cd496..781cd46d61 100644 --- a/content/riak/kv/2.0.7/developing/usage/document-store.md +++ b/content/riak/kv/2.0.7/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.7/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.7/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.7/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.7/developing/data-types/#maps). +[Riak maps]({{}}riak/kv/2.0.7/developing/data-types/#maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.7/developing/data-types/#maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.7/developing/data-types/#maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.7/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.7/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.7/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.7/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.7/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.7/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.7/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.7/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.7/developing/usage/mapreduce.md b/content/riak/kv/2.0.7/developing/usage/mapreduce.md index 202b2c8969..6818882016 100644 --- a/content/riak/kv/2.0.7/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.7/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.0.7/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.0.7/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.0.7/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.0.7/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.0.7/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.0.7/developing/usage/search/) and [secondary indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.0.7/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.0.7/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.0.7/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.0.7/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.0.7/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.0.7/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.0.7/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.0.7/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.0.7/developing/usage/reading-objects.md b/content/riak/kv/2.0.7/developing/usage/reading-objects.md index 228d5fde87..8542b719f5 100644 --- a/content/riak/kv/2.0.7/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.7/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.7/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.7/developing/usage/replication.md b/content/riak/kv/2.0.7/developing/usage/replication.md index 6aaed71635..b4b867bfa4 100644 --- a/content/riak/kv/2.0.7/developing/usage/replication.md +++ b/content/riak/kv/2.0.7/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.7/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.7/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.7/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.7/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.7/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.7/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.7/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.7/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.7/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.7/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.7/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.7/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.7/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.7/developing/usage/search-schemas.md b/content/riak/kv/2.0.7/developing/usage/search-schemas.md index 61890d332e..19532900ca 100644 --- a/content/riak/kv/2.0.7/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.7/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.0.7/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.7/developing/data-types/), and [more](/riak/kv/2.0.7/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types/), and [more]({{}}riak/kv/2.0.7/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.7/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.7/developing/usage/search.md b/content/riak/kv/2.0.7/developing/usage/search.md index f3edac4abe..0b19ff4bdf 100644 --- a/content/riak/kv/2.0.7/developing/usage/search.md +++ b/content/riak/kv/2.0.7/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.7/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.7/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.7/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.7/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.7/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.7/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.7/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.7/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.7/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.7/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.7/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.7/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.7/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.7/developing/usage/searching-data-types.md b/content/riak/kv/2.0.7/developing/usage/searching-data-types.md index d9b4325838..6d3a35c594 100644 --- a/content/riak/kv/2.0.7/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.7/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.7/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.7/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.7/developing/data-types/#counters), [sets](/riak/kv/2.0.7/developing/data-types/#sets), and [maps](/riak/kv/2.0.7/developing/data-types/#maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.7/developing/data-types/#counters), [sets]({{}}riak/kv/2.0.7/developing/data-types/#sets), and [maps]({{}}riak/kv/2.0.7/developing/data-types/#maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.7/developing/data-types/#counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.7/developing/data-types/#counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.7/developing/data-types/#sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.7/developing/data-types/#sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.7/developing/data-types/#maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.7/developing/data-types/#maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.7/developing/data-types/#counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.7/developing/data-types/#counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.7/developing/data-types/#sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.7/developing/data-types/#sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.7/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.7/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.7/developing/data-types/#maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.7/developing/data-types/#maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.7/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.7/developing/usage/secondary-indexes.md index 35e2e23285..2dd35b5096 100644 --- a/content/riak/kv/2.0.7/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.7/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.0.7/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.7/setup/planning/backend/memory -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.7/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.7/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.7/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.7/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.7/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.7/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.7/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.7/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.7/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.7/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.7/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.7/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.7/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.7/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.7/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.7/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.7/developing/usage/security.md b/content/riak/kv/2.0.7/developing/usage/security.md index 7e1f245351..76c2591581 100644 --- a/content/riak/kv/2.0.7/developing/usage/security.md +++ b/content/riak/kv/2.0.7/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.0.7/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.7/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.7/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.7/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.7/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.7/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.7/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - [`riak-admin security`](/riak/kv/2.0.7/using/security/managing-sources/#managing-sources) + [`riak-admin security`]({{}}riak/kv/2.0.7/using/security/managing-sources/#managing-sources) command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.7/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.7/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.7/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.7/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.7/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.7/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.7/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.7/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.7/developing/usage/security/php) -* [Python](/riak/kv/2.0.7/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.7/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.7/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.7/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.7/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.7/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.7/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.7/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.7/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.7/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.7/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.7/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.7/developing/usage/security/erlang.md b/content/riak/kv/2.0.7/developing/usage/security/erlang.md index 3ff695a40c..cfcb60b2f0 100644 --- a/content/riak/kv/2.0.7/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.7/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.7/using/security/managing-sources/), [PAM-](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.7/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.7/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.7/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.7/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.7/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.7/developing/usage/security/java.md b/content/riak/kv/2.0.7/developing/usage/security/java.md index 794b3b9417..17f241cd14 100644 --- a/content/riak/kv/2.0.7/developing/usage/security/java.md +++ b/content/riak/kv/2.0.7/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.7/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.7/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.7/developing/usage/security/php.md b/content/riak/kv/2.0.7/developing/usage/security/php.md index 3ed4999157..49fd3cb496 100644 --- a/content/riak/kv/2.0.7/developing/usage/security/php.md +++ b/content/riak/kv/2.0.7/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.7/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.7/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.7/developing/usage/security/python.md b/content/riak/kv/2.0.7/developing/usage/security/python.md index 0dec52fa97..25b5464a19 100644 --- a/content/riak/kv/2.0.7/developing/usage/security/python.md +++ b/content/riak/kv/2.0.7/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.7/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.7/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.7/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.7/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.7/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.7/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.7/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.7/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.7/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.7/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.7/developing/usage/security/ruby.md b/content/riak/kv/2.0.7/developing/usage/security/ruby.md index 8b16631830..7c3dd481e4 100644 --- a/content/riak/kv/2.0.7/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.7/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.7/using/security/managing-sources/) or [PAM](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.7/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.7/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.7/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.7/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.7/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.7/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.7/developing/usage/updating-objects.md b/content/riak/kv/2.0.7/developing/usage/updating-objects.md index 78cb9e5ba7..ba5d0af31c 100644 --- a/content/riak/kv/2.0.7/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.7/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.7/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.7/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.7/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.7/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.7/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.7/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.7/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.7/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.7/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.7/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.7/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.7/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.7/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.7/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.7/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.7/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.7/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.7/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.7/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.7/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.7/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.7/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.7/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.7/index.md b/content/riak/kv/2.0.7/index.md index d7a29024cc..71a229caa0 100644 --- a/content/riak/kv/2.0.7/index.md +++ b/content/riak/kv/2.0.7/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.7/configuring -[dev index]: /riak/kv/2.0.7/developing -[downloads]: /riak/kv/2.0.7/downloads/ -[install index]: /riak/kv/2.0.7/setup/installing/ -[plan index]: /riak/kv/2.0.7/setup/planning -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.7/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.7/developing/usage/search -[getting started]: /riak/kv/2.0.7/developing/getting-started -[dev client libraries]: /riak/kv/2.0.7/developing/client-libraries +[config index]: {{}}riak/kv/2.0.7/configuring +[dev index]: {{}}riak/kv/2.0.7/developing +[downloads]: {{}}riak/kv/2.0.7/downloads/ +[install index]: {{}}riak/kv/2.0.7/setup/installing/ +[plan index]: {{}}riak/kv/2.0.7/setup/planning +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.7/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search +[getting started]: {{}}riak/kv/2.0.7/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.7/developing/client-libraries diff --git a/content/riak/kv/2.0.7/introduction.md b/content/riak/kv/2.0.7/introduction.md index 63e79bbd19..dcc6dd5208 100644 --- a/content/riak/kv/2.0.7/introduction.md +++ b/content/riak/kv/2.0.7/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.0.7/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.0.7/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.0.7/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.0.7/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.0.7/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.0.7/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.0.7/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.0.7/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.0.7/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.0.7/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.0.7/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.0.7/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.0.7/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.0.7/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.0.7/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.0.7/developing/data-types/#flags), [registers](/riak/kv/2.0.7/developing/data-types/#registers), -[counters](/riak/kv/2.0.7/developing/data-types/#counters), [sets](/riak/kv/2.0.7/developing/data-types/#sets), and -[maps](/riak/kv/2.0.7/developing/data-types/#maps). +Riak: [flags]({{}}riak/kv/2.0.7/developing/data-types/#flags), [registers]({{}}riak/kv/2.0.7/developing/data-types/#registers), +[counters]({{}}riak/kv/2.0.7/developing/data-types/#counters), [sets]({{}}riak/kv/2.0.7/developing/data-types/#sets), and +[maps]({{}}riak/kv/2.0.7/developing/data-types/#maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.0.7/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.0.7/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.0.7/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.0.7/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.0.7/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.0.7/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.0.7/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.0.7/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.0.7/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.0.7/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.0.7/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.0.7/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.0.7/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.0.7/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.0.7/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.0.7/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.0.7/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.0.7/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.0.7/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.0.7/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.0.7/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.0.7/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.0.7/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.0.7/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.0.7/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.0.7/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.0.7/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.0.7/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.0.7/learn/concepts.md b/content/riak/kv/2.0.7/learn/concepts.md index d4558c6696..ca42cb32f2 100644 --- a/content/riak/kv/2.0.7/learn/concepts.md +++ b/content/riak/kv/2.0.7/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.7/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.7/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.7/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.7/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.7/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.7/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.7/learn/concepts/vnodes -[config index]: /riak/kv/2.0.7/configuring -[plan index]: /riak/kv/2.0.7/setup/planning -[use index]: /riak/kv/2.0.7/using/ +[concept aae]: {{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.7/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.7/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.7/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.7/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.7/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.7/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.7/configuring +[plan index]: {{}}riak/kv/2.0.7/setup/planning +[use index]: {{}}riak/kv/2.0.7/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.7/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.7/learn/concepts/active-anti-entropy.md index 2ce22843c0..859b8d0d4b 100644 --- a/content/riak/kv/2.0.7/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.7/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.7/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.7/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.7/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.7/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.0.7/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.7/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.7/developing/usage/search +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.7/learn/concepts/buckets.md b/content/riak/kv/2.0.7/learn/concepts/buckets.md index 9d6472ad8f..1c6e8de938 100644 --- a/content/riak/kv/2.0.7/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.7/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.0.7/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.7/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.7/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.7/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.7/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.7/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.7/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.7/configuring/basic -[dev api http]: /riak/kv/2.0.7/developing/api/http -[dev data types]: /riak/kv/2.0.7/developing/data-types -[glossary ring]: /riak/kv/2.0.7/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.7/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.7/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.7/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.7/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.7/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.7/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.7/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.7/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.7/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.7/configuring/basic +[dev api http]: {{}}riak/kv/2.0.7/developing/api/http +[dev data types]: {{}}riak/kv/2.0.7/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.7/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.7/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.7/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.7/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.7/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.7/learn/concepts/capability-negotiation.md index 3a598a9c32..14fe4d00ad 100644 --- a/content/riak/kv/2.0.7/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.7/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.7/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.7/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.7/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.7/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.7/learn/concepts/causal-context.md b/content/riak/kv/2.0.7/learn/concepts/causal-context.md index d1e5568a0d..7ef2aa3698 100644 --- a/content/riak/kv/2.0.7/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.7/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.7/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.7/developing/api/http -[dev key value]: /riak/kv/2.0.7/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.7/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.7/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.7/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.7/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.7/developing/api/http +[dev key value]: {{}}riak/kv/2.0.7/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.7/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.7/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.7/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.7/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -78,7 +78,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.0.7/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.0.7/learn/concepts/clusters.md b/content/riak/kv/2.0.7/learn/concepts/clusters.md index dc76cd8a7c..d0f766226e 100644 --- a/content/riak/kv/2.0.7/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.7/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.7/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.7/learn/concepts/replication -[glossary node]: /riak/kv/2.0.7/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.7/learn/dynamo -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.7/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.7/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.7/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.7/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.7/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.7/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.7/learn/concepts/crdts.md b/content/riak/kv/2.0.7/learn/concepts/crdts.md index 4ca6bfb41a..6871c6e152 100644 --- a/content/riak/kv/2.0.7/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.7/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.7/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.7/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.7/developing/data-types -[glossary node]: /riak/kv/2.0.7/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.7/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.7/developing/data-types +[glossary node]: {{}}riak/kv/2.0.7/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.0.7/developing/usage/search/). +indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.0.7/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.0.7/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.0.7/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.0.7/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.7/learn/concepts/eventual-consistency.md index cbe67354e3..0c2461de0b 100644 --- a/content/riak/kv/2.0.7/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.7/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.7/learn/concepts/replication -[glossary node]: /riak/kv/2.0.7/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.7/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.7/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.7/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.7/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.7/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.7/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.7/developing/data-modeling/). +or models]({{}}riak/kv/2.0.7/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.7/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.7/learn/concepts/keys-and-objects.md index 1fd32848c1..0fd0167262 100644 --- a/content/riak/kv/2.0.7/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.7/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.7/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.7/learn/concepts/replication.md b/content/riak/kv/2.0.7/learn/concepts/replication.md index 3a8978bbe9..df902bc17a 100644 --- a/content/riak/kv/2.0.7/learn/concepts/replication.md +++ b/content/riak/kv/2.0.7/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.7/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.7/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.7/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.7/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.7/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.7/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.7/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.7/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.7/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.7/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.7/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.7/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.7/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.7/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.7/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.7/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.7/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.7/learn/concepts/strong-consistency.md index 4770dc7882..7222843b18 100644 --- a/content/riak/kv/2.0.7/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.7/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.7/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.7/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.7/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.7/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.7/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.7/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.7/learn/concepts/vnodes.md b/content/riak/kv/2.0.7/learn/concepts/vnodes.md index 8a42049a71..c97d627dfb 100644 --- a/content/riak/kv/2.0.7/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.7/learn/concepts/vnodes.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.7/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.7/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.7/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.7/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.7/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.7/learn/glossary/#ring -[plan backend]: /riak/kv/2.0.7/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.7/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.7/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.7/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.7/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.7/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.7/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.7/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.7/learn/glossary/#ring +[plan backend]: {{}}riak/kv/2.0.7/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.7/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.7/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.7/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.7/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.7/learn/dynamo.md b/content/riak/kv/2.0.7/learn/dynamo.md index 290cf90891..35ad99e18b 100644 --- a/content/riak/kv/2.0.7/learn/dynamo.md +++ b/content/riak/kv/2.0.7/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.7/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.7/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.7/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.7/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.7/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.7/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.7/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.7/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.7/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.7/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.7/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.7/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.7/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.7/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.7/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.7/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.7/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.7/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.7/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.7/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.7/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.7/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.7/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.7/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.7/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.7/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.7/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.7/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.7/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.7/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.7/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.7/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.7/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.7/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.7/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.7/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.7/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.7/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.7/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.7/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.7/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.7/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.7/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.7/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.7/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.7/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.7/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.7/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.7/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.7/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.7/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.7/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.7/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.7/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.7/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.7/learn/glossary.md b/content/riak/kv/2.0.7/learn/glossary.md index a85ca4f28b..96ce5c5a45 100644 --- a/content/riak/kv/2.0.7/learn/glossary.md +++ b/content/riak/kv/2.0.7/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.7/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.7/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.7/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.7/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.7/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.7/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.7/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.7/developing/api/http -[dev data model]: /riak/kv/2.0.7/developing/data-modeling -[dev data types]: /riak/kv/2.0.7/developing/data-types -[glossary read rep]: /riak/kv/2.0.7/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.7/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.7/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.7/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.7/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.7/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.7/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.7/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.7/developing/api/http +[dev data model]: {{}}riak/kv/2.0.7/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.7/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.7/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.7/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.7/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.7/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.7/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.7/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.7/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.7/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.7/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.7/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.7/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.7/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.7/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.7/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.7/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.7/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.7/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.7/learn/use-cases.md b/content/riak/kv/2.0.7/learn/use-cases.md index 5647dc6927..bf8df0b2a4 100644 --- a/content/riak/kv/2.0.7/learn/use-cases.md +++ b/content/riak/kv/2.0.7/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.7/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.7/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.7/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.7/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.7/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.7/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.7/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.7/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.7/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.0.7/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.7/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.7/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.7/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.7/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.7/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.7/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.7/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.7/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.7/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.7/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.7/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.0.7/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.7/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.0.7/learn/why-riak-kv.md b/content/riak/kv/2.0.7/learn/why-riak-kv.md index 1253559fb7..8893ebce44 100644 --- a/content/riak/kv/2.0.7/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.7/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.7/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.7/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.7/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.7/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.7/developing/data-types -[glossary read rep]: /riak/kv/2.0.7/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.7/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.7/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.7/release-notes.md b/content/riak/kv/2.0.7/release-notes.md index a7f128949a..71c86d087e 100644 --- a/content/riak/kv/2.0.7/release-notes.md +++ b/content/riak/kv/2.0.7/release-notes.md @@ -23,8 +23,8 @@ This is an LTS feature release, including new updates to batching and Solr, Cutt This release includes fixes for two product advisories: -* [LevelDB Segfault advisory](http://docs.basho.com/community/productadvisories/leveldbsegfault/) - The AddDB() call now occurs after all object initialization is complete to eliminate a race condition that leads to segfault. [[LevelDB PR #184](https://github.com/basho/leveldb/pull/184)] -* [Code Injection on Riak Init File](http://docs.basho.com/community/productadvisories/codeinjectioninitfiles/) - A thorough review of file ownership across the Riak KV package was done and several files, including riak init, were changed to tighten the ownership to root:root instead of riak:riak to prevent a potential code injection across all supported operating systems. Additionally, node_package was bumped to version 3.0.1. [[node_package PR #196](https://github.com/basho/node_package/pull/196)] +* [LevelDB Segfault advisory]({{}}community/productadvisories/leveldbsegfault/) - The AddDB() call now occurs after all object initialization is complete to eliminate a race condition that leads to segfault. [[LevelDB PR #184](https://github.com/basho/leveldb/pull/184)] +* [Code Injection on Riak Init File]({{}}community/productadvisories/codeinjectioninitfiles/) - A thorough review of file ownership across the Riak KV package was done and several files, including riak init, were changed to tighten the ownership to root:root instead of riak:riak to prevent a potential code injection across all supported operating systems. Additionally, node_package was bumped to version 3.0.1. [[node_package PR #196](https://github.com/basho/node_package/pull/196)] @@ -32,7 +32,7 @@ This release includes fixes for two product advisories: * We've introduced a new batching system for Riak Search so indexing calls are no longer made synchronously when data is written to Riak. This allows Solr to process the data in chunks and Riak to move forward accepting new work at the vnode level without waiting for the call to Solr to happen. Out-of-the-box performance should be similar to Riak 2.0.6 with Search enabled. However, additional configuration options (see "Cuttlefish configurations…" below) will allow you to set the batching parameters based on your needs and have, in certain cases, led to significantly higher write throughput to Solr. * [[PR #648](https://github.com/basho/yokozuna/pull/648)] -* Cuttlefish configurations have been updated to support the Riak Search batching updates. These configs are tunable via the riak.conf file. (Note: Changes to this file require a restart of Riak). You can control the behavior of batching through various [new Cuttlefish parameters](http://docs.basho.com/riak/kv/2.1.4/configuring/reference/#search). These parameters guide Cuttlefish operation, Solr integration, and statistics on Riak performance. +* Cuttlefish configurations have been updated to support the Riak Search batching updates. These configs are tunable via the riak.conf file. (Note: Changes to this file require a restart of Riak). You can control the behavior of batching through various [new Cuttlefish parameters]({{< baseurl >}}riak/kv/2.0.7/configuring/reference/#search). These parameters guide Cuttlefish operation, Solr integration, and statistics on Riak performance. * [[PR #614](https://github.com/basho/yokozuna/pull/614)] * Our Erlang/OTP has been updated to version R16B02_basho10 and included in this release. This update includes bugfixes and improvements for ERTS, as well as bugfixes for SSL. * You can read the complete release notes for Erlang/OTP [here](https://github.com/basho/otp/blob/basho-otp-16/BASHO-RELEASES.md). diff --git a/content/riak/kv/2.0.7/setup/downgrade.md b/content/riak/kv/2.0.7/setup/downgrade.md index 07ba8626d0..bb23011372 100644 --- a/content/riak/kv/2.0.7/setup/downgrade.md +++ b/content/riak/kv/2.0.7/setup/downgrade.md @@ -17,7 +17,7 @@ aliases: Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a -[rolling upgrade](/riak/kv/2.0.7/setup/upgrading/cluster). +[rolling upgrade]({{}}riak/kv/2.0.7/setup/upgrading/cluster). You should perform the following actions on each node: @@ -45,9 +45,9 @@ both 1.4 and 1.3 are performed. * Riak Control should be disabled throughout the rolling downgrade process -* [Configuration Files](/riak/kv/2.0.7/configuring/reference) must be replaced with those of the version +* [Configuration Files]({{}}riak/kv/2.0.7/configuring/reference) must be replaced with those of the version being downgraded to -* [Active anti-entropy](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version +* [Active anti-entropy]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/) should be disabled if downgrading to a version below 1.3. ## Before Stopping a Node @@ -90,7 +90,7 @@ will need to be downgraded before the rolling downgrade begins. This can be done using the --downgrade flag with `riak-admin reformat-indexes` More information on the `riak-admin reformat-indexes` command, and downgrading indexes can be found in the -[`riak-admin`](/riak/kv/2.0.7/using/admin/riak-admin/#reformat-indexes) documentation. +[`riak-admin`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#reformat-indexes) documentation. ## Before Starting a Node diff --git a/content/riak/kv/2.0.7/setup/installing.md b/content/riak/kv/2.0.7/setup/installing.md index 68d674f996..d1ed8a6a2c 100644 --- a/content/riak/kv/2.0.7/setup/installing.md +++ b/content/riak/kv/2.0.7/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.7/installing/ --- -[install aws]: /riak/kv/2.0.7/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.7/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.7/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.7/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.7/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.7/setup/installing/smartos -[install solaris]: /riak/kv/2.0.7/setup/installing/solaris -[install suse]: /riak/kv/2.0.7/setup/installing/suse -[install windows azure]: /riak/kv/2.0.7/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.7/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.7/setup/upgrading +[install aws]: {{}}riak/kv/2.0.7/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.7/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.7/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.7/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.7/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.7/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.7/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.7/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.7/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.7/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.7/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.7/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.7/setup/installing/amazon-web-services.md index 596d42f52f..0422a1adce 100644 --- a/content/riak/kv/2.0.7/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.7/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.7/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.7/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.7/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.7/setup/installing/debian-ubuntu.md index 65adb33051..eadb379ad6 100644 --- a/content/riak/kv/2.0.7/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.7/setup/installing/debian-ubuntu.md @@ -20,10 +20,10 @@ aliases: -[install source index]: /riak/kv/2.0.7/setup/installing/source/ -[security index]: /riak/kv/2.0.7/using/security/ -[install source erlang]: /riak/kv/2.0.7/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.7/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.7/using/security/ +[install source erlang]: {{}}riak/kv/2.0.7/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.0.7/setup/installing/freebsd.md b/content/riak/kv/2.0.7/setup/installing/freebsd.md index 4b5be53053..93db8073f4 100644 --- a/content/riak/kv/2.0.7/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.7/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.7/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.7/downloads/ -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.7/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.7/downloads/ +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.7/setup/installing/mac-osx.md b/content/riak/kv/2.0.7/setup/installing/mac-osx.md index 085638acc6..cb89a59953 100644 --- a/content/riak/kv/2.0.7/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.7/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.7/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.7/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.7/setup/installing/rhel-centos.md b/content/riak/kv/2.0.7/setup/installing/rhel-centos.md index ab96cf31dd..9b975cfed7 100644 --- a/content/riak/kv/2.0.7/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.7/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.7/setup/installing/source -[install source erlang]: /riak/kv/2.0.7/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.7/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.7/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.0.7/setup/installing/smartos.md b/content/riak/kv/2.0.7/setup/installing/smartos.md index 141b2a7791..b6355098dc 100644 --- a/content/riak/kv/2.0.7/setup/installing/smartos.md +++ b/content/riak/kv/2.0.7/setup/installing/smartos.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.0.7/setup/installing/solaris.md b/content/riak/kv/2.0.7/setup/installing/solaris.md index e78507424b..03b319f46a 100644 --- a/content/riak/kv/2.0.7/setup/installing/solaris.md +++ b/content/riak/kv/2.0.7/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.7/setup/installing/source.md b/content/riak/kv/2.0.7/setup/installing/source.md index 8829234340..6a430eb99f 100644 --- a/content/riak/kv/2.0.7/setup/installing/source.md +++ b/content/riak/kv/2.0.7/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.7/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.7/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.7/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.7/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.7/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.7/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.7/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.7/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.7/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.7/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.7/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.7/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.7/setup/installing/source/erlang.md b/content/riak/kv/2.0.7/setup/installing/source/erlang.md index 771eb5781c..c32045b7ef 100644 --- a/content/riak/kv/2.0.7/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.7/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.7/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.7/setup/installing -[security basics]: /riak/kv/2.0.7/using/security/basics +[install index]: {{}}riak/kv/2.0.7/setup/installing +[security basics]: {{}}riak/kv/2.0.7/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.7/setup/installing/source/jvm.md b/content/riak/kv/2.0.7/setup/installing/source/jvm.md index 7a1c2ef5a9..4f87c41ea3 100644 --- a/content/riak/kv/2.0.7/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.7/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.7/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.7/developing/usage/search +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.7/setup/installing/suse.md b/content/riak/kv/2.0.7/setup/installing/suse.md index b3ba59409a..6150a212ec 100644 --- a/content/riak/kv/2.0.7/setup/installing/suse.md +++ b/content/riak/kv/2.0.7/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.7/installing/suse/ --- -[install verify]: /riak/kv/2.0.7/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.7/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.7/setup/installing/verify.md b/content/riak/kv/2.0.7/setup/installing/verify.md index 96257a83f8..fd44588e2b 100644 --- a/content/riak/kv/2.0.7/setup/installing/verify.md +++ b/content/riak/kv/2.0.7/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.7/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.7/developing/client-libraries -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.7/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.7/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.7/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.7/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.7/setup/installing/windows-azure.md b/content/riak/kv/2.0.7/setup/installing/windows-azure.md index 23e7b3c447..1d5008bf81 100644 --- a/content/riak/kv/2.0.7/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.7/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.7/setup/planning/backend.md b/content/riak/kv/2.0.7/setup/planning/backend.md index bfd1341015..5dd9b92892 100644 --- a/content/riak/kv/2.0.7/setup/planning/backend.md +++ b/content/riak/kv/2.0.7/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.7/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.7/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.7/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.7/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.7/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.7/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.7/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.7/setup/planning/backend/bitcask.md index b1b7132ad1..a5617e0ad5 100644 --- a/content/riak/kv/2.0.7/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.7/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.7/using/admin/riak-cli -[config reference]: /riak/kv/2.0.7/configuring/reference -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.7/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.7/developing/usage/search - -[glossary aae]: /riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.7/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.7/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.7/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.7/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.7/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.7/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.7/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.7/setup/planning/backend/leveldb.md index d47ed00826..f3d4d7c54a 100644 --- a/content/riak/kv/2.0.7/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.7/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.7/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.7/configuring/reference -[perf index]: /riak/kv/2.0.7/using/performance -[config reference#aae]: /riak/kv/2.0.7/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[perf index]: {{}}riak/kv/2.0.7/using/performance +[config reference#aae]: {{}}riak/kv/2.0.7/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.7/setup/planning/backend/memory.md b/content/riak/kv/2.0.7/setup/planning/backend/memory.md index 4a2e9cd70f..add57e5a3a 100644 --- a/content/riak/kv/2.0.7/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.7/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.7/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.7/configuring/reference -[plan backend multi]: /riak/kv/2.0.7/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.7/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.7/setup/planning/backend/multi.md b/content/riak/kv/2.0.7/setup/planning/backend/multi.md index e2b7aa083f..9e464ca721 100644 --- a/content/riak/kv/2.0.7/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.7/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.7/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.7/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.7/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.7/configuring/reference -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.7/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.7/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.7/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.7/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.7/setup/planning/best-practices.md b/content/riak/kv/2.0.7/setup/planning/best-practices.md index ab54f33bcb..2d0d407c61 100644 --- a/content/riak/kv/2.0.7/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.7/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.7/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.7/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.7/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.7/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.7/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.7/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.7/setup/planning/bitcask-capacity-calc.md index 5009a459e2..e7df772926 100644 --- a/content/riak/kv/2.0.7/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.7/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.7/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.7/setup/planning/cluster-capacity.md index 4189bc403c..a6abae17b0 100644 --- a/content/riak/kv/2.0.7/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.7/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.7/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.7/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.7/setup/planning -[concept replication]: /riak/kv/2.0.7/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.7/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.7/configuring/reference -[perf benchmark]: /riak/kv/2.0.7/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.7/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.7/setup/planning +[concept replication]: {{}}riak/kv/2.0.7/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.7/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.7/setup/planning/operating-system.md b/content/riak/kv/2.0.7/setup/planning/operating-system.md index 625b557446..41c67930a5 100644 --- a/content/riak/kv/2.0.7/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.7/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.7/downloads/ +[downloads]: {{}}riak/kv/2.0.7/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.7/setup/planning/start.md b/content/riak/kv/2.0.7/setup/planning/start.md index 00e9d844a6..ba13bbd829 100644 --- a/content/riak/kv/2.0.7/setup/planning/start.md +++ b/content/riak/kv/2.0.7/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.7/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.7/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.7/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.7/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.7/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.7/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.7/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.7/setup/upgrading/checklist.md b/content/riak/kv/2.0.7/setup/upgrading/checklist.md index af90ef82fa..edda98ca1b 100644 --- a/content/riak/kv/2.0.7/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.7/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.0.7/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.7/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.7/using/performance +[perf open files]: {{}}riak/kv/2.0.7/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.7/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.7/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.7/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.7/configuring/reference -[config backend]: /riak/kv/2.0.7/configuring/backend -[usage search]: /riak/kv/2.0.7/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.7/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.7/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.7/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.7/using/admin/commands -[use admin riak control]: /riak/kv/2.0.7/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.7/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.7/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.7/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.7/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.7/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[config backend]: {{}}riak/kv/2.0.7/configuring/backend +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.7/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.7/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.7/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.7/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.7/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.7/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.7/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.7/setup/upgrading/cluster.md b/content/riak/kv/2.0.7/setup/upgrading/cluster.md index 6e3ca4e53f..d0adb28cd1 100644 --- a/content/riak/kv/2.0.7/setup/upgrading/cluster.md +++ b/content/riak/kv/2.0.7/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.0.7/ops/upgrading/rolling-upgrades/ - /riak/kv/2.0.7/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.0.7/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.7/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.7/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.7/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.7/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.7/release-notes/ +[production checklist]: {{}}riak/kv/2.0.7/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.7/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.7/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.7/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.7/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.7/release-notes/ [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.7/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.7/using/reference/jmx -[snmp]: /riak/kv/2.0.7/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.7/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.7/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.0.7/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.0.7/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported @@ -104,9 +104,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.7/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.7/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.7/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.7/release-notes/). {{% /note %}} ## RHEL/CentOS @@ -166,9 +166,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.7/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.7/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.7/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.7/release-notes/). {{% /note %}} ## Solaris/OpenSolaris @@ -252,9 +252,9 @@ behalf. This data is transferred to the node when it becomes available. 8\. Repeat the process for the remaining nodes in the cluster. {{% note title="Note for secondary index (2i) users" %}} -If you use Riak KV's [secondary indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes) and are +If you use Riak KV's [secondary indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes) and are upgrading from a version prior to Riak KV version 1.3.1, you need to -reformat the indexes using the [`riak-admin reformat-indexes`](/riak/kv/2.0.7/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes](/riak/kv/2.0.7/release-notes/). +reformat the indexes using the [`riak-admin reformat-indexes`]({{}}riak/kv/2.0.7/using/admin/riak-admin) command. More details about reformatting indexes are available in the [release notes]({{}}riak/kv/2.0.7/release-notes/). {{% /note %}} ## Rolling Upgrade to Enterprise diff --git a/content/riak/kv/2.0.7/setup/upgrading/search.md b/content/riak/kv/2.0.7/setup/upgrading/search.md index dc141323c0..8f764333eb 100644 --- a/content/riak/kv/2.0.7/setup/upgrading/search.md +++ b/content/riak/kv/2.0.7/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.7/setup/upgrading/version.md b/content/riak/kv/2.0.7/setup/upgrading/version.md index d97ac19764..ffc0f7e7a9 100644 --- a/content/riak/kv/2.0.7/setup/upgrading/version.md +++ b/content/riak/kv/2.0.7/setup/upgrading/version.md @@ -20,7 +20,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.0.7/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.0.7/introduction). ## New Clients @@ -36,14 +36,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.0.7/introduction) like [data types](/riak/kv/2.0.7/developing/data-types) or the new [Riak Search](/riak/kv/2.0.7/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.0.7/introduction) like [data types]({{}}riak/kv/2.0.7/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.0.7/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.0.7/learn/concepts/buckets) and [key](/riak/kv/2.0.7/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.0.7/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.0.7/learn/concepts/buckets) and [key]({{}}riak/kv/2.0.7/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.0.7/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.0.7/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.0.7/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.0.7/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.0.7/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -56,7 +56,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.0.7/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.0.7/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -75,8 +75,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.0.7/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.0.7/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.0.7/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.0.7/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -86,17 +86,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/kv/2.0.7/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.0.7/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.0.7/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.0.7/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.0.7/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.0.7/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.0.7/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -104,20 +104,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.0.7/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.0.7/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.0.7/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.0.7/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -126,11 +126,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.0.7/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.0.7/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.0.7/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -140,12 +140,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.0.7/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.0.7/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/kv/2.0.7/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.0.7/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.0.7/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.0.7/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.0.7/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.0.7/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.0.7/using/security/basics) or the new [configuration files](/riak/kv/2.0.7/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.0.7/using/security/basics) or the new [configuration files]({{}}riak/kv/2.0.7/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -155,7 +155,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.0.7/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.0.7/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -166,12 +166,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.0.7/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.0.7/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.0.7/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.0.7/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -209,7 +209,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.0.7/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.0.7/setup/upgrading/search). ## Migrating from Short Names @@ -220,12 +220,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.0.7/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.0.7/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.0.7/using/admin/commands.md b/content/riak/kv/2.0.7/using/admin/commands.md index fcb0b1d053..f45cbc41be 100644 --- a/content/riak/kv/2.0.7/using/admin/commands.md +++ b/content/riak/kv/2.0.7/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.7/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.7/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.7/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.7/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.7/using/admin/riak-admin.md b/content/riak/kv/2.0.7/using/admin/riak-admin.md index 2080af56bb..18d801ec26 100644 --- a/content/riak/kv/2.0.7/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.7/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.0.7/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.7/configuring/reference -[use admin commands]: /riak/kv/2.0.7/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.7/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.7/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.7/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.7/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.7/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.7/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.7/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.7/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.7/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.7/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.7/setup/downgrade -[security index]: /riak/kv/2.0.7/using/security/ -[security managing]: /riak/kv/2.0.7/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.7/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.7/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.7/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.7/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.7/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.7/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.7/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.7/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.7/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.7/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.7/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.7/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.7/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.7/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.7/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.7/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.7/setup/downgrade +[security index]: {{}}riak/kv/2.0.7/using/security/ +[security managing]: {{}}riak/kv/2.0.7/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.7/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.7/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.7/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.7/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.7/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.7/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.0.7/using/admin/riak-cli.md b/content/riak/kv/2.0.7/using/admin/riak-cli.md index 956e7c9e1c..3b6d1d8293 100644 --- a/content/riak/kv/2.0.7/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.7/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.7/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.7/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.7/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.7/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.7/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.7/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.7/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.7/using/admin/riak-control.md b/content/riak/kv/2.0.7/using/admin/riak-control.md index 24d7788f59..91811a7da1 100644 --- a/content/riak/kv/2.0.7/using/admin/riak-control.md +++ b/content/riak/kv/2.0.7/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.7/configuring/reference +[config reference]: {{}}riak/kv/2.0.7/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.7/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.7/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.7/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.7/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.7/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.7/using/cluster-operations/active-anti-entropy.md index 481a3d3f3d..e6ab7654ee 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes.md index 27af8014d3..771bd11031 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.7/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.7/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.7/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.7/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.7/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.7/using/cluster-operations/backing-up.md index 842e809923..78dbfa6e0b 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.7/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[config reference]: /riak/kv/2.0.7/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.7/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.7/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.7/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.7/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.7/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.7/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.7/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.7/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.7/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.7/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.7/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.7/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.7/using/cluster-operations/bucket-types.md index e905118550..a0dcadaf28 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.7/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.7/using/cluster-operations/changing-cluster-info.md index 6da5dc3f91..329cca7cb6 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.7/configuring/reference +[config reference]: {{}}riak/kv/2.0.7/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.7/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.7/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.7/using/cluster-operations/handoff.md b/content/riak/kv/2.0.7/using/cluster-operations/handoff.md index 3b11661073..8b2cbf7738 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.7/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.7/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.7/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.7/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.7/using/cluster-operations/logging.md b/content/riak/kv/2.0.7/using/cluster-operations/logging.md index 5aa3073187..d8620bf9c4 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.7/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.7/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.7/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.7/using/cluster-operations/replacing-node.md index 56b4096056..7e16e82b78 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.7/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.7/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.7/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.7/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.7/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.7/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.7/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.7/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.7/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.7/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.7/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.7/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.7/using/cluster-operations/strong-consistency.md index d5737a04be..b4d4b4b606 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.7/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.7/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.7/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.7/using/cluster-operations/v2-multi-datacenter.md index 2f1300a6cd..59b0d5e85e 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.7/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.7/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter.md index 0df0d6a1f2..14960e9f38 100644 --- a/content/riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.7/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.7/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.7/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.7/using/performance.md b/content/riak/kv/2.0.7/using/performance.md index e3840e50ab..d2dd4c184f 100644 --- a/content/riak/kv/2.0.7/using/performance.md +++ b/content/riak/kv/2.0.7/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.7/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.7/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.7/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.7/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.7/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.7/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.7/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.7/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.7/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.7/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.7/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.7/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.7/using/performance/benchmarking.md b/content/riak/kv/2.0.7/using/performance/benchmarking.md index 8dba1435be..0fc1c63993 100644 --- a/content/riak/kv/2.0.7/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.7/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.7/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.7/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.7/using/performance/latency-reduction.md b/content/riak/kv/2.0.7/using/performance/latency-reduction.md index 1a99e995b8..5d5b89e06e 100644 --- a/content/riak/kv/2.0.7/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.7/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.7/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.7/using/performance/multi-datacenter-tuning.md index 5d7e1169f1..fe7bfa71aa 100644 --- a/content/riak/kv/2.0.7/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.7/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.7/using/performance +[perf index]: {{}}riak/kv/2.0.7/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.7/using/performance/open-files-limit.md b/content/riak/kv/2.0.7/using/performance/open-files-limit.md index d046e7d848..b97cd29540 100644 --- a/content/riak/kv/2.0.7/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.7/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.0.7/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.7/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.0.7/using/reference/bucket-types.md b/content/riak/kv/2.0.7/using/reference/bucket-types.md index e49343e4ad..5da8150d74 100644 --- a/content/riak/kv/2.0.7/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.7/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.7/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.7/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.7/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.7/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.7/developing/data-types), and [strong consistency](/riak/kv/2.0.7/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.7/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.7/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.7/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.7/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.7/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.7/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.7/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.7/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.7/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.7/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.7/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.7/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.7/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.7/learn/concepts/buckets) and [keys](/riak/kv/2.0.7/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.7/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.7/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.7/using/reference/custom-code.md b/content/riak/kv/2.0.7/using/reference/custom-code.md index 4a97ce1d63..32a9d5869b 100644 --- a/content/riak/kv/2.0.7/using/reference/custom-code.md +++ b/content/riak/kv/2.0.7/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.7/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.7/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.7/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.7/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.7/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.7/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.7/using/reference/handoff.md b/content/riak/kv/2.0.7/using/reference/handoff.md index 5ff2674149..3b4674fb74 100644 --- a/content/riak/kv/2.0.7/using/reference/handoff.md +++ b/content/riak/kv/2.0.7/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.7/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.7/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.7/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.7/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.7/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.7/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.7/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.7/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.7/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.7/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.7/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.7/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.7/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.7/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.7/using/reference/jmx.md b/content/riak/kv/2.0.7/using/reference/jmx.md index f5766f185d..0fee0b81cf 100644 --- a/content/riak/kv/2.0.7/using/reference/jmx.md +++ b/content/riak/kv/2.0.7/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.7/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.7/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.7/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.7/using/reference/logging.md b/content/riak/kv/2.0.7/using/reference/logging.md index 625fd203bd..7590416790 100644 --- a/content/riak/kv/2.0.7/using/reference/logging.md +++ b/content/riak/kv/2.0.7/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.7/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.7/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.7/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.7/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.7/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.7/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.7/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.7/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.7/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.7/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.7/using/reference/multi-datacenter/comparison.md index 710d84a6b6..663502977c 100644 --- a/content/riak/kv/2.0.7/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.7/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.7/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.7/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.7/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.7/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.7/using/reference/runtime-interaction.md b/content/riak/kv/2.0.7/using/reference/runtime-interaction.md index 49a63c42ee..61f79edf48 100644 --- a/content/riak/kv/2.0.7/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.7/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.7/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.7/configuring/reference -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.7/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.7/using/reference/search.md b/content/riak/kv/2.0.7/using/reference/search.md index 574ab3b296..2338896ad0 100644 --- a/content/riak/kv/2.0.7/using/reference/search.md +++ b/content/riak/kv/2.0.7/using/reference/search.md @@ -15,21 +15,21 @@ aliases: - /riak/kv/2.0.7/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters -[configuring search]: /riak/kv/2.0.7/configuring/search +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters +[configuring search]: {{}}riak/kv/2.0.7/configuring/search > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.0.7/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.0.7/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -127,7 +127,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.7/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.7/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -289,7 +289,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.7/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -299,7 +299,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -354,7 +354,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.7/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.7/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.7/using/reference/secondary-indexes.md b/content/riak/kv/2.0.7/using/reference/secondary-indexes.md index 550c2f1575..dbc80c77d8 100644 --- a/content/riak/kv/2.0.7/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.7/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.0.7/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.0.7/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.7/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.7/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.7/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.7/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.7/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.7/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.7/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.7/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.7/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.7/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.7/using/reference/statistics-monitoring.md index 6fe42c1137..8b29526d2a 100644 --- a/content/riak/kv/2.0.7/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.7/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.7/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.7/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.7/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.7/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.7/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.7/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.7/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.7/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.7/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.7/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.7/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.7/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.7/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.7/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.7/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.7/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.7/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.7/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.7/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.7/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.7/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.7/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.7/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,9 +349,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.7/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.7/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.7/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.7/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -367,9 +367,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.7/using/reference/strong-consistency.md b/content/riak/kv/2.0.7/using/reference/strong-consistency.md index dfbb97b1cd..1f60f58bba 100644 --- a/content/riak/kv/2.0.7/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.7/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.7/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.7/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.7/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.7/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.7/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.7/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.7/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.7/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.7/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.7/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.7/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.7/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.7/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.7/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.7/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.7/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.7/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.7/using/reference/v2-multi-datacenter/architecture.md index a438a4a4a9..834713cf59 100644 --- a/content/riak/kv/2.0.7/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.7/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.7/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.7/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.7/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.7/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.7/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.7/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/aae.md index 5dd5a155fc..3eb4456d3a 100644 --- a/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.7/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.7/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.7/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/architecture.md index a184c59d65..875586fc8d 100644 --- a/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.7/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.7/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.7/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.7/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/cascading-writes.md index d2fa0529da..0395cccf3d 100644 --- a/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.7/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/scheduling-fullsync.md index e4eece9c09..7541439d4c 100644 --- a/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.7/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.7/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.7/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.7/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.7/using/repair-recovery/errors.md b/content/riak/kv/2.0.7/using/repair-recovery/errors.md index 5f66f02719..f859a692eb 100644 --- a/content/riak/kv/2.0.7/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.7/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.7/configuring/reference +[config reference]: {{}}riak/kv/2.0.7/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.7/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.7/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.7/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.7/using/repair-recovery/failure-recovery.md index 775ab2d1da..1a2c4464d0 100644 --- a/content/riak/kv/2.0.7/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.7/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.7/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.7/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.7/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.7/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -116,7 +116,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.0.7/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.7/using/repair-recovery/repairs.md b/content/riak/kv/2.0.7/using/repair-recovery/repairs.md index f461e23247..64bd331bce 100644 --- a/content/riak/kv/2.0.7/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.7/using/repair-recovery/repairs.md @@ -149,7 +149,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.0.7/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.0.7/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -218,23 +218,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.7/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.7/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.7/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.7/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.7/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.0.7/using/repair-recovery/rolling-replaces.md index 8f2b9b0e7d..e7380346f0 100644 --- a/content/riak/kv/2.0.7/using/repair-recovery/rolling-replaces.md +++ b/content/riak/kv/2.0.7/using/repair-recovery/rolling-replaces.md @@ -12,9 +12,9 @@ menu: toc: true --- -[upgrade]: /riak/kv/2.0.7/setup/upgrading/cluster/ -[rolling restarts]: /riak/kv/2.0.7/using/repair-recovery/rolling-restart/ -[add node]: /riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes +[upgrade]: {{}}riak/kv/2.0.7/setup/upgrading/cluster/ +[rolling restarts]: {{}}riak/kv/2.0.7/using/repair-recovery/rolling-restart/ +[add node]: {{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. diff --git a/content/riak/kv/2.0.7/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.7/using/repair-recovery/rolling-restart.md index 6d9ce81d63..166432b692 100644 --- a/content/riak/kv/2.0.7/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.7/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.7/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.7/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.7/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.7/using/running-a-cluster.md b/content/riak/kv/2.0.7/using/running-a-cluster.md index 10f44fe97b..602affb09c 100644 --- a/content/riak/kv/2.0.7/using/running-a-cluster.md +++ b/content/riak/kv/2.0.7/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.7/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.7/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.7/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.7/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.7/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.7/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.7/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.7/using/security.md b/content/riak/kv/2.0.7/using/security.md index 8d2f069e1b..db03a6ec9a 100644 --- a/content/riak/kv/2.0.7/using/security.md +++ b/content/riak/kv/2.0.7/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.7/ops/advanced/security --- -[config reference search]: /riak/kv/2.0.7/configuring/reference/#search -[config search enabling]: /riak/kv/2.0.7/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.7/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.7/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.7/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.0.7/using/security/basics -[security managing]: /riak/kv/2.0.7/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.7/using/security/basics +[security managing]: {{}}riak/kv/2.0.7/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.0.7/developing/usage/search +[usage search]: {{}}riak/kv/2.0.7/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.7/using/security/basics.md b/content/riak/kv/2.0.7/using/security/basics.md index 74708e6f7e..054112cfce 100644 --- a/content/riak/kv/2.0.7/using/security/basics.md +++ b/content/riak/kv/2.0.7/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.7/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.7/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.7/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.7/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.7/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.7/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.7/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.7/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.7/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.7/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.7/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.7/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.7/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.7/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.7/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.7/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.7/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.7/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.7/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.7/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.7/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.7/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.7/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.7/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.7/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.7/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.7/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.7/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.7/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.7/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.7/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.7/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.7/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.7/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.7/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.7/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.7/configuring/reference/#directories).
platform_data_dir The directory in which Riak stores its storage backend data, as well -as active anti-entropy data, and cluster metadata. ./data
alive_tokens Determines the number of ticks the leader will wait to hear from its -associated vnode before assuming that the vnode +associated vnode before assuming that the vnode is unhealthy and stepping down as leader. If the vnode does not respond to the leader before ensemble_tick * alive_tokens milliseconds have elapsed, the leader will @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch.
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="{{< baseurl >}}riak/kv/2.0.7/learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.7/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.7/using/security/managing-sources.md b/content/riak/kv/2.0.7/using/security/managing-sources.md index ea25c9ef4b..05c2b57f7c 100644 --- a/content/riak/kv/2.0.7/using/security/managing-sources.md +++ b/content/riak/kv/2.0.7/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.7/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.7/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.7/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.7/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.7/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.7/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.7/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.7/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.7/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.7/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.7/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.7/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.7/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.7/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.0.8/_reference-links.md b/content/riak/kv/2.0.8/_reference-links.md index 5f6131e9db..2a49057217 100644 --- a/content/riak/kv/2.0.8/_reference-links.md +++ b/content/riak/kv/2.0.8/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.8/downloads/ -[install index]: /riak/kv/2.0.8/setup/installing -[upgrade index]: /riak/kv/2.0.8/upgrading -[plan index]: /riak/kv/2.0.8/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.0.8/configuring/reference/ -[manage index]: /riak/kv/2.0.8/using/managing -[performance index]: /riak/kv/2.0.8/using/performance -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.8/downloads/ +[install index]: {{}}riak/kv/2.0.8/setup/installing +[upgrade index]: {{}}riak/kv/2.0.8/upgrading +[plan index]: {{}}riak/kv/2.0.8/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.0.8/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.8/using/managing +[performance index]: {{}}riak/kv/2.0.8/using/performance +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.8/setup/planning -[plan start]: /riak/kv/2.0.8/setup/planning/start -[plan backend]: /riak/kv/2.0.8/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.8/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.8/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.8/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.8/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.8/setup/planning/best-practices -[plan future]: /riak/kv/2.0.8/setup/planning/future +[plan index]: {{}}riak/kv/2.0.8/setup/planning +[plan start]: {{}}riak/kv/2.0.8/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.8/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.8/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.8/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.8/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.8/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.8/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.8/setup/installing -[install aws]: /riak/kv/2.0.8/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.8/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.8/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.8/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.8/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.8/setup/installing/smartos -[install solaris]: /riak/kv/2.0.8/setup/installing/solaris -[install suse]: /riak/kv/2.0.8/setup/installing/suse -[install windows azure]: /riak/kv/2.0.8/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.8/setup/installing +[install aws]: {{}}riak/kv/2.0.8/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.8/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.8/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.8/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.8/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.8/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.8/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.8/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.8/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.8/setup/installing/source -[install source erlang]: /riak/kv/2.0.8/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.8/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.8/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.8/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.8/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.8/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.8/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.8/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.8/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.8/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.8/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.8/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.8/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.8/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.8/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.8/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.8/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.8/configuring -[config basic]: /riak/kv/2.0.8/configuring/basic -[config backend]: /riak/kv/2.0.8/configuring/backend -[config manage]: /riak/kv/2.0.8/configuring/managing -[config reference]: /riak/kv/2.0.8/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.8/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.8/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.8/configuring/mapreduce -[config search]: /riak/kv/2.0.8/configuring/search/ +[config index]: {{}}riak/kv/2.0.8/configuring +[config basic]: {{}}riak/kv/2.0.8/configuring/basic +[config backend]: {{}}riak/kv/2.0.8/configuring/backend +[config manage]: {{}}riak/kv/2.0.8/configuring/managing +[config reference]: {{}}riak/kv/2.0.8/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.8/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.8/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.8/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.8/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.8/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.8/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.8/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.8/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.8/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.8/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.8/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.8/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.8/using/ -[use admin commands]: /riak/kv/2.0.8/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.8/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.8/using/ +[use admin commands]: {{}}riak/kv/2.0.8/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.8/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.8/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.8/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.8/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.8/using/reference/search -[use ref 2i]: /riak/kv/2.0.8/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.8/using/reference/snmp -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.8/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.8/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.8/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.8/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.8/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.8/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.8/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.8/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.8/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.8/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.8/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.8/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.8/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.8/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.8/using/admin/ -[use admin commands]: /riak/kv/2.0.8/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.8/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.8/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.8/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.8/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.8/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.8/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.8/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.8/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.8/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.8/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.8/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.8/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.8/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.8/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.8/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.8/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.8/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.8/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.8/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.8/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.8/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.8/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.8/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.8/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.8/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.8/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.8/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.8/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.8/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.8/using/repair-recovery -[repair recover index]: /riak/kv/2.0.8/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.8/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.8/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.8/using/security/ -[security basics]: /riak/kv/2.0.8/using/security/basics -[security managing]: /riak/kv/2.0.8/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.8/using/security/ +[security basics]: {{}}riak/kv/2.0.8/using/security/basics +[security managing]: {{}}riak/kv/2.0.8/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.8/using/performance/ -[perf benchmark]: /riak/kv/2.0.8/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.8/using/performance/erlang -[perf aws]: /riak/kv/2.0.8/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.8/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.8/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.8/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.8/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.8/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.8/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.8/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.8/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.8/developing -[dev client libraries]: /riak/kv/2.0.8/developing/client-libraries -[dev data model]: /riak/kv/2.0.8/developing/data-modeling -[dev data types]: /riak/kv/2.0.8/developing/data-types -[dev kv model]: /riak/kv/2.0.8/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.8/developing +[dev client libraries]: {{}}riak/kv/2.0.8/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.8/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.8/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.8/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.8/developing/getting-started -[getting started java]: /riak/kv/2.0.8/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.8/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.8/developing/getting-started/python -[getting started php]: /riak/kv/2.0.8/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.8/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.8/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.8/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.8/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.8/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.8/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.8/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.8/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.8/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.8/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.8/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.8/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.8/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.8/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.8/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.8/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.8/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.8/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.8/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.8/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.8/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.8/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.8/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.8/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.8/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.8/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.8/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.8/developing/usage -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.8/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.8/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.8/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.8/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.8/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.8/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.8/developing/usage/search -[usage search schema]: /riak/kv/2.0.8/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.8/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.8/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.8/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.8/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.8/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.8/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.8/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.8/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.8/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.8/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.8/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.8/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.8/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.8/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.8/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.8/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.8/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.8/developing/api/backend -[dev api http]: /riak/kv/2.0.8/developing/api/http -[dev api http status]: /riak/kv/2.0.8/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.8/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.8/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.8/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.8/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.8/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.8/learn/glossary/ -[glossary aae]: /riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.8/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.8/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.8/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.8/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.8/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.8/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.8/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.8/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.8/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.8/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.8/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.8/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.8/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.8/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.8/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.8/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.8/add-ons.md b/content/riak/kv/2.0.8/add-ons.md index a11d735907..67c31e6453 100644 --- a/content/riak/kv/2.0.8/add-ons.md +++ b/content/riak/kv/2.0.8/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.8/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.8/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.8/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.8/add-ons/redis/developing-rra.md index 09999fea0b..da30655cdc 100644 --- a/content/riak/kv/2.0.8/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.8/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.8/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.8/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.8/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.8/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.8/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.8/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.8/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.8/add-ons/redis/redis-add-on-features.md index e5317f086d..4681bbe246 100644 --- a/content/riak/kv/2.0.8/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.8/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.8/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.8/add-ons/redis/set-up-rra.md index 225f503c99..c2b840698e 100644 --- a/content/riak/kv/2.0.8/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.8/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.8/setup/installing -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.8/setup/installing +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.8/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.0.8/add-ons/redis/set-up-rra/deployment-models.md index 4d3627a694..1ffa75462b 100644 --- a/content/riak/kv/2.0.8/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/kv/2.0.8/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/kv/2.0.8/add-ons/redis/using-rra.md b/content/riak/kv/2.0.8/add-ons/redis/using-rra.md index 7809187f92..e2ba8d48ec 100644 --- a/content/riak/kv/2.0.8/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.8/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.8/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.8/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.8/configuring/backend.md b/content/riak/kv/2.0.8/configuring/backend.md index 9e7edab304..06e99c694d 100644 --- a/content/riak/kv/2.0.8/configuring/backend.md +++ b/content/riak/kv/2.0.8/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.8/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.8/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.8/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.8/configuring/basic.md b/content/riak/kv/2.0.8/configuring/basic.md index e84abde594..1412ab97be 100644 --- a/content/riak/kv/2.0.8/configuring/basic.md +++ b/content/riak/kv/2.0.8/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.0.8/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.8/configuring/reference -[use running cluster]: /riak/kv/2.0.8/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.8/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.8/using/performance/erlang -[plan start]: /riak/kv/2.0.8/setup/planning/start -[plan best practices]: /riak/kv/2.0.8/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.8/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.8/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.8/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.8/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.8/using/performance -[perf aws]: /riak/kv/2.0.8/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.8/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.8/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.8/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.8/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.8/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.8/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.8/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.8/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.8/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.8/using/performance +[perf aws]: {{}}riak/kv/2.0.8/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.8/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.8/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.8/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.8/configuring/global-object-expiration.md b/content/riak/kv/2.0.8/configuring/global-object-expiration.md index 47c4d9e2ec..be769ca36f 100644 --- a/content/riak/kv/2.0.8/configuring/global-object-expiration.md +++ b/content/riak/kv/2.0.8/configuring/global-object-expiration.md @@ -10,7 +10,6 @@ menu: project: "riak_kv" project_version: "2.0.8" toc: true -canonical_link: "https://docs.basho.com/riak/kv/latest/configuring/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/kv/2.0.8/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.8/configuring/load-balancing-proxy.md index fa7bb10a59..4fc37df05b 100644 --- a/content/riak/kv/2.0.8/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.8/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.8/configuring/managing.md b/content/riak/kv/2.0.8/configuring/managing.md index 4f8ddca893..5cb63620dc 100644 --- a/content/riak/kv/2.0.8/configuring/managing.md +++ b/content/riak/kv/2.0.8/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.8/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.8/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.8/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.8/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.8/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.8/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.8/configuring/mapreduce.md b/content/riak/kv/2.0.8/configuring/mapreduce.md index ec8de2e71d..66f01ae579 100644 --- a/content/riak/kv/2.0.8/configuring/mapreduce.md +++ b/content/riak/kv/2.0.8/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.0.8/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.8/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.8/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.8/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.8/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.8/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.8/configuring/reference.md b/content/riak/kv/2.0.8/configuring/reference.md index fd3f088d08..ec1d44cf20 100644 --- a/content/riak/kv/2.0.8/configuring/reference.md +++ b/content/riak/kv/2.0.8/configuring/reference.md @@ -200,7 +200,7 @@ executables are stored. +as active anti-entropy data, and cluster metadata. @@ -1684,7 +1684,7 @@ abandons the leader (in milliseconds). This must be set greater than the diff --git a/content/riak/kv/2.0.8/configuring/search.md b/content/riak/kv/2.0.8/configuring/search.md index be95619d3e..82d11e1a79 100644 --- a/content/riak/kv/2.0.8/configuring/search.md +++ b/content/riak/kv/2.0.8/configuring/search.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.8/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.8/developing/usage/search -[usage search schema]: /riak/kv/2.0.8/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.8/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.8/developing/usage/custom-extractors -[cluster-ops aae throttle]: /riak/kv/2.0.8/using/cluster-operations/active-anti-entropy/#throttling -[config reference]: /riak/kv/2.0.8/configuring/reference -[config reference#search]: /riak/kv/2.0.8/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.8/using/security/ +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.8/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.8/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.8/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.0.8/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.8/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.8/using/security/ [java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads [java se docs]: http://www.oracle.com/technetwork/java/javase/documentation @@ -106,15 +106,15 @@ Valid values: `on` or `off` ### `search.index.error_threshold.failure_count` -The number of failures encountered while updating a search index within [`search.queue.error_threshold.failure_interval`](#search-queue-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. Valid values: Integer ### `search.index.error_threshold.failure_interval` -The window of time during which `search.queue.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. -If [`search.queue.error_threshold.failure_count`](#search-queue-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.queue.error_threshold.reset_interval`](search-queue-error-threshold-reset-interval) has passed. +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. Valid values: Milliseconds diff --git a/content/riak/kv/2.0.8/configuring/strong-consistency.md b/content/riak/kv/2.0.8/configuring/strong-consistency.md index f931660cbb..92a06cb6aa 100644 --- a/content/riak/kv/2.0.8/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.8/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.8/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.8/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.8/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.8/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.8/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.8/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.8/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.8/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.8/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.8/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.8/developing/data-types -[glossary aae]: /riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.8/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.8/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.8/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.8/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.8/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.8/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.8/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.8/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.8/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.8/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.8/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.8/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.8/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.8/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.8/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.8/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.8/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.8/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.8/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.8/configuring/v2-multi-datacenter.md index bb8d1d69c9..3fcaa3ff81 100644 --- a/content/riak/kv/2.0.8/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.8/configuring/v2-multi-datacenter.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/configuring/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication capabilities offer a diff --git a/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/nat.md index a541418408..9b82928274 100644 --- a/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/nat.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/configuring/v3-multi-datacenter/nat/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/nat/) instead. {{% /note %}} Riak Enterprise supports replication of data on networks that use static diff --git a/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/quick-start.md index 27fa4e9078..f779eee7b2 100644 --- a/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/quick-start.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start/) instead. {{% /note %}} The Riak Multi-Datacenter Replication Quick Start will walk you through diff --git a/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl.md index a52678dc39..f68995f1c4 100644 --- a/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.8/configuring/v2-multi-datacenter/ssl.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl/) instead. {{% /note %}} ## Features diff --git a/content/riak/kv/2.0.8/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.8/configuring/v3-multi-datacenter.md index 738cf5c579..be93789345 100644 --- a/content/riak/kv/2.0.8/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.8/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.8/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.8/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/nat.md index c90f5ed750..3aabd2ed20 100644 --- a/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start.md index 13314ddf74..157ba0e34a 100644 --- a/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.8/using/performance -[config v3 mdc]: /riak/kv/2.0.8/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.8/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl.md index f15f7b6d7a..d95ed84585 100644 --- a/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.8/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.8/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.8/developing/api/backend.md b/content/riak/kv/2.0.8/developing/api/backend.md index cf80e3bf34..bd77df9413 100644 --- a/content/riak/kv/2.0.8/developing/api/backend.md +++ b/content/riak/kv/2.0.8/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.8/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.8/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.8/developing/api/http.md b/content/riak/kv/2.0.8/developing/api/http.md index dfe6e34295..1fe5e2fac3 100644 --- a/content/riak/kv/2.0.8/developing/api/http.md +++ b/content/riak/kv/2.0.8/developing/api/http.md @@ -29,20 +29,20 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.8/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.8/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.8/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.8/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.8/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.8/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.8/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.8/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.8/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.8/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.8/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.8/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.8/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.8/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.8/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.8/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.8/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.8/developing/api/http/delete-object) ## Riak-Data-Type-related Operations @@ -52,9 +52,9 @@ Method | URL `POST` | `/types//buckets//datatypes` `POST` | `/types//buckets//datatypes/` -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.8/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.8/developing/data-types/#usage-examples) -and subpages e.g. [sets](/riak/kv/2.0.8/developing/data-types/sets). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.8/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.8/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.0.8/developing/data-types/sets). Advanced users may consult the technical documentation inside the Riak KV internal module `riak_kv_wm_crdt`. @@ -63,26 +63,26 @@ KV internal module `riak_kv_wm_crdt`. Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.8/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.8/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.8/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.8/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.8/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.8/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.8/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.8/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.8/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.8/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.8/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.8/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.8/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.8/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.8/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.8/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.8/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.8/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.8/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.8/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.8/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.8/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.8/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.8/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.8/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.8/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.8/developing/api/http/counters.md b/content/riak/kv/2.0.8/developing/api/http/counters.md index 60ace7e51c..8fb6e04519 100644 --- a/content/riak/kv/2.0.8/developing/api/http/counters.md +++ b/content/riak/kv/2.0.8/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.8/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.8/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.8/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.8/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.8/developing/api/http/fetch-object.md b/content/riak/kv/2.0.8/developing/api/http/fetch-object.md index b43c59cb84..e4bdbed000 100644 --- a/content/riak/kv/2.0.8/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.8/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.8/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.8/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.8/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.8/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.8/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.8/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.8/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.8/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.8/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.8/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.8/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.8/developing/api/http/fetch-search-index.md index 0599896d09..dcaa7d926b 100644 --- a/content/riak/kv/2.0.8/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.8/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.8/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.8/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.8/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.8/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.8/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.8/developing/api/http/fetch-search-schema.md index 9156035c77..41450245b2 100644 --- a/content/riak/kv/2.0.8/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.8/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.8/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.8/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.8/developing/api/http/get-bucket-props.md index 3f1862185e..a606a7c62b 100644 --- a/content/riak/kv/2.0.8/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.8/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.8/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.8/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.8/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.8/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.8/developing/api/http/link-walking.md b/content/riak/kv/2.0.8/developing/api/http/link-walking.md index 7c3934254f..6495257903 100644 --- a/content/riak/kv/2.0.8/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.8/developing/api/http/link-walking.md @@ -21,8 +21,8 @@ This feature is deprecated and will be removed in a future version. Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.8/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.8/learn/glossary/#links). ## Request @@ -68,7 +68,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.8/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.8/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.8/developing/api/http/list-resources.md b/content/riak/kv/2.0.8/developing/api/http/list-resources.md index c6522767a1..e154343bb7 100644 --- a/content/riak/kv/2.0.8/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.8/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.8/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.8/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.8/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.8/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.8/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.8/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.8/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.8/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.8/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.8/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.8/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.8/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.8/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.8/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.8/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.8/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.8/developing/api/http/mapreduce.md b/content/riak/kv/2.0.8/developing/api/http/mapreduce.md index 878bc59fe4..4ede11f77c 100644 --- a/content/riak/kv/2.0.8/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.8/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.8/developing/api/http/search-index-info.md b/content/riak/kv/2.0.8/developing/api/http/search-index-info.md index d5fff02f16..0e3fa36fe1 100644 --- a/content/riak/kv/2.0.8/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.8/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.8/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.8/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.8/developing/api/http/search-query.md b/content/riak/kv/2.0.8/developing/api/http/search-query.md index 86c0a61897..c9350f5078 100644 --- a/content/riak/kv/2.0.8/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.8/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.8/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.8/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.8/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.8/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.8/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.8/developing/api/http/secondary-indexes.md index 887d3dc880..095420fcb9 100644 --- a/content/riak/kv/2.0.8/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.8/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.8/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.8/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.8/developing/api/http/set-bucket-props.md index 702f148891..563938313f 100644 --- a/content/riak/kv/2.0.8/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.8/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.8/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.8/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.8/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.8/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.8/developing/api/http/status.md b/content/riak/kv/2.0.8/developing/api/http/status.md index c45e42a308..56872cd133 100644 --- a/content/riak/kv/2.0.8/developing/api/http/status.md +++ b/content/riak/kv/2.0.8/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.8/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.8/developing/api/http/store-object.md b/content/riak/kv/2.0.8/developing/api/http/store-object.md index d5ce0404f6..3397b03e2e 100644 --- a/content/riak/kv/2.0.8/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.8/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.8/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.8/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.8/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.8/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.8/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.8/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.8/developing/api/http/store-search-index.md b/content/riak/kv/2.0.8/developing/api/http/store-search-index.md index 9b03987595..aebeeb45c5 100644 --- a/content/riak/kv/2.0.8/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.8/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.8/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.8/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.8/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.8/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.8/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.8/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.8/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.8/developing/api/http/store-search-schema.md index 81d52447ed..563e81d294 100644 --- a/content/riak/kv/2.0.8/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.8/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.8/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/index/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.8/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers.md index 764ebf9b58..92190c6fce 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.8/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.8/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.8/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.8/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.8/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.8/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.8/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.8/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.8/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.8/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/auth-req.md index 8cae42bf6d..a9f8960fbe 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.8/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.8/using/security/basics). diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/delete-object.md index 3f2bd33eab..b0d04e7a71 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.8/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.8/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store.md index 5280171762..860ca7c6bb 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.8/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.8/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-fetch.md index e7c4d6b201..717daff00d 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.8/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.8/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.8/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.8/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.8/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.8/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store.md index 63de61d8da..5ba791dd54 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store.md index 2e92901f46..bff3c5f70f 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-store.md index 0c54a2563b..460e551443 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.8/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.8/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.8/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.8/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.8/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.8/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.8/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-union.md index 4fc1ca7ad2..7774c60404 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.8/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object.md index 6a13be2622..65e3ff8688 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.8/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.8/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.8/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.8/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props.md index 3be5ff424a..529d52c5f6 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.8/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.8/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.8/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.8/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-type.md index 46ad109343..dc9e605bb5 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.8/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.8/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-client-id.md index ed4735fd31..33476b382d 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.8/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/mapreduce.md index 730420a298..b684288354 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.8/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.8/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.8/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.8/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/reset-bucket-props.md index 695f551836..39614af59d 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/secondary-indexes.md index 7e7a043663..d8f999f643 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.8/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props.md index 71d0396011..e915026879 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-type.md index 68df53f0e7..8677b57438 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.8/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.8/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/store-object.md index bc47e82f07..ffc8ef3263 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.8/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.8/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.8/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.8/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.8/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.8/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.8/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.8/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-delete.md index e7d9914be7..4fd2b9dec8 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-delete.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-delete.md @@ -29,5 +29,5 @@ message RpbYokozunaIndexDeleteReq { ## Response -Returns a [RpbDelResp](/riak/kv/2.0.8/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbDelResp]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-get.md index 018ae7edaf..48b49d3d56 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.8/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.8/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-put.md index 4423dfe27b..d3dff75a49 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-index-put.md @@ -37,9 +37,9 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.8/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.8/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.8/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-get.md index 5970c405ca..5d73433c4b 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.8/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-put.md index cb284fabd6..ea89f24d5a 100644 --- a/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.8/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.8/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.8/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.8/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.8/developing/app-guide.md b/content/riak/kv/2.0.8/developing/app-guide.md index 8893f6e5ef..4061b80218 100644 --- a/content/riak/kv/2.0.8/developing/app-guide.md +++ b/content/riak/kv/2.0.8/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.0.8/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.8/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.8/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.8/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.8/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.8/developing/data-types -[dev data types#counters]: /riak/kv/2.0.8/developing/data-types/#counters -[dev data types#sets]: /riak/kv/2.0.8/developing/data-types/#sets -[dev data types#maps]: /riak/kv/2.0.8/developing/data-types/#maps -[usage create objects]: /riak/kv/2.0.8/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.8/developing/usage/search -[use ref search]: /riak/kv/2.0.8/using/reference/search -[usage 2i]: /riak/kv/2.0.8/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.8/developing/client-libraries -[concept crdts]: /riak/kv/2.0.8/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.8/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.8/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.8/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.8/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.8/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.8/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.8/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.8/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.8/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.8/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.8/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.8/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.8/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.8/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.8/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.8/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.8/setup/installing -[getting started]: /riak/kv/2.0.8/developing/getting-started -[usage index]: /riak/kv/2.0.8/developing/usage -[glossary]: /riak/kv/2.0.8/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.8/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.8/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.8/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.8/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.8/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.8/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.0.8/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.0.8/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.0.8/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.8/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.8/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.8/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.8/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.8/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.8/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.8/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.8/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.8/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.8/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.8/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.8/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.8/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.8/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.8/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.8/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.8/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.8/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.8/setup/installing +[getting started]: {{}}riak/kv/2.0.8/developing/getting-started +[usage index]: {{}}riak/kv/2.0.8/developing/usage +[glossary]: {{}}riak/kv/2.0.8/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.8/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.8/developing/app-guide/advanced-mapreduce.md index 2457ad20d2..c64ec674c0 100644 --- a/content/riak/kv/2.0.8/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.8/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.8/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.8/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.8/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.8/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.8/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.8/configuring/reference [google mr]: http://research.google.com/archive/mapreduce.html [mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map [function contrib]: https://github.com/basho/riak_function_contrib @@ -728,7 +728,7 @@ You can use streaming with Erlang via the Riak KV local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.8/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.8/developing/app-guide/cluster-metadata.md index b95f2e455a..331dff29eb 100644 --- a/content/riak/kv/2.0.8/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.8/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.8/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.8/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.8/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.8/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.8/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.8/developing/app-guide/replication-properties.md index c9b9c91c20..f50ae0fe8c 100644 --- a/content/riak/kv/2.0.8/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.8/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.8/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.8/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.8/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.8/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.8/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.8/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.8/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.8/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.8/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.8/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.8/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.8/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.8/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.8/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.8/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.8/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.8/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.8/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.8/developing/app-guide/strong-consistency.md index b6f77e2252..f349a9ec3f 100644 --- a/content/riak/kv/2.0.8/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.8/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.0.8/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/kv/2.0.8/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.8/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.8/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.8/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.8/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.8/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.8/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.8/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/kv/2.0.8/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.8/developing/client-libraries -[getting started]: /riak/kv/2.0.8/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.8/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.0.8/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.8/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.8/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.8/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.8/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.8/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.8/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.0.8/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.8/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.8/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.8/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.8/developing/app-guide/write-once.md b/content/riak/kv/2.0.8/developing/app-guide/write-once.md index ce334c6582..c1d1cfb66c 100644 --- a/content/riak/kv/2.0.8/developing/app-guide/write-once.md +++ b/content/riak/kv/2.0.8/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.8/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[bucket type]: /riak/kv/2.0.8/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.0.8/developing/data-types -[strong consistency]: /riak/kv/2.0.8/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.0.8/developing/data-types +[strong consistency]: {{}}riak/kv/2.0.8/developing/app-guide/strong-consistency Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. @@ -98,7 +98,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -149,7 +149,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.0.8/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.0.8/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.0.8/developing/client-libraries.md b/content/riak/kv/2.0.8/developing/client-libraries.md index 1df3328fe0..3fdfb636b4 100644 --- a/content/riak/kv/2.0.8/developing/client-libraries.md +++ b/content/riak/kv/2.0.8/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.8/developing/data-modeling.md b/content/riak/kv/2.0.8/developing/data-modeling.md index e00b2553f7..9f8bdd3b8a 100644 --- a/content/riak/kv/2.0.8/developing/data-modeling.md +++ b/content/riak/kv/2.0.8/developing/data-modeling.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.8/dev/using/data-modeling --- -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list @@ -28,9 +28,9 @@ provide links to videos and documentation for further exploration. How you structure your application to run on Riak should take into account the unique needs of your use case, including access patterns such as read/write distribution, latency differences between various -operations, use of Riak features including [Data Types](/riak/kv/2.0.8/developing/data-types/), -[MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/), [Search](/riak/kv/2.0.8/developing/usage/search/), -[secondary indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes/) and more. This guide +operations, use of Riak features including [Data Types]({{}}riak/kv/2.0.8/developing/data-types/), +[MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/), [Search]({{}}riak/kv/2.0.8/developing/usage/search/), +[secondary indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/) and more. This guide is intended to be illustrative only. ## High Read/Write, Simple Applications @@ -39,20 +39,20 @@ The following are examples of Riak use cases that require high read/write performance without necessarily utilizing complex data structures: -* [Session Storage](/riak/kv/2.0.8/developing/data-modeling/#session-storage) -* [Serving Advertisements](/riak/kv/2.0.8/developing/data-modeling/#serving-advertisements) -* [Log Data](/riak/kv/2.0.8/developing/data-modeling/#log-data) -* [Sensor Data](/riak/kv/2.0.8/developing/data-modeling/#sensor-data) +* [Session Storage]({{}}riak/kv/2.0.8/developing/data-modeling/#session-storage) +* [Serving Advertisements]({{}}riak/kv/2.0.8/developing/data-modeling/#serving-advertisements) +* [Log Data]({{}}riak/kv/2.0.8/developing/data-modeling/#log-data) +* [Sensor Data]({{}}riak/kv/2.0.8/developing/data-modeling/#sensor-data) ## Content Management, Social Applications The following application types require more subtle relationships between objects, e.g. one-to-many and many-to-many relationships. -* [User Accounts](/riak/kv/2.0.8/developing/data-modeling/#user-accounts) -* [User Settings and Preferences](/riak/kv/2.0.8/developing/data-modeling/#user-settings-and-preferences) -* [User Events and Timelines](/riak/kv/2.0.8/developing/data-modeling/#user-events-and-timelines) -* [Articles, Blog Posts, and Other Content](/riak/kv/2.0.8/developing/data-modeling/#articles-blog-posts-and-other-content) +* [User Accounts]({{}}riak/kv/2.0.8/developing/data-modeling/#user-accounts) +* [User Settings and Preferences]({{}}riak/kv/2.0.8/developing/data-modeling/#user-settings-and-preferences) +* [User Events and Timelines]({{}}riak/kv/2.0.8/developing/data-modeling/#user-events-and-timelines) +* [Articles, Blog Posts, and Other Content]({{}}riak/kv/2.0.8/developing/data-modeling/#articles-blog-posts-and-other-content) ## Session Storage @@ -70,11 +70,11 @@ administrative changes to schemas. Riak has features that allow for more complex session storage use cases. The [Bitcask][plan backend bitcask] storage backend, for example, supports automatic expiry of keys, which frees application developers from implementing manual -session expiry. Riak's [MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/) system can also be +session expiry. Riak's [MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/) system can also be used to perform batch processing analysis on large bodies of session data, for example to compute the average number of active users. If sessions must be retrieved using multiple keys (e.g. a UUID or email -address), [using secondary indexes](/riak/kv/2.0.8/developing/usage/secondary-indexes/) can provide an easy solution. +address), [using secondary indexes]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/) can provide an easy solution. ### Session Storage Community Examples @@ -129,7 +129,7 @@ involves serving reads. ## Log Data A common use case for Riak is storing large amounts of log data, either -for analysis [using MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce) or as a storage system used in +for analysis [using MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce) or as a storage system used in conjunction with a secondary analytics cluster used to perform more advanced analytics tasks. To store log data, you can use a bucket called `logs` (just to give an example) and use a unique value, such as a date, @@ -177,9 +177,9 @@ and then store update data as the value. That data could then be queried on the basis of the interval. Alternatively, a timestamp could be attached to each object as a -[secondary index](/riak/kv/2.0.8/developing/usage/secondary-indexes/), which would allow you to +[secondary index]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/), which would allow you to perform queries on specific time interval ranges or to perform -[MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/) queries against the indexes. +[MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/) queries against the indexes. ### Sensor Data Complex Case @@ -215,7 +215,7 @@ and a read request could be performed on the corresponding key. There are, however, several drawbacks to this approach. What happens if a user wants to change their username later on? The most common solution would be to use a UUID-type key for the user and store the user's -username as a [secondary index](/riak/kv/2.0.8/developing/usage/secondary-indexes/) for efficient +username as a [secondary index]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/) for efficient lookup. ### User Accounts Complex Case @@ -224,7 +224,7 @@ For simple retrieval of a specific account, a user ID (plus perhaps a secondary index on a username or email) is enough. If you foresee the need to make queries on additional user attributes (e.g. creation time, user type, or region), plan ahead and either set up additional secondary -indexes or consider using [Riak Search](/riak/kv/2.0.8/developing/usage/search/) to index the JSON +indexes or consider using [Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/) to index the JSON contents of the user account. ### User Accounts Community Examples @@ -308,9 +308,9 @@ part of a URL string, etc. In Riak, you can store content of any kind, from HTML files to plain text to JSON or XML or another document type entirely. Keep in mind that -data in Riak is opaque, with the exception of [Riak Data Types](/riak/kv/2.0.8/developing/data-types), +data in Riak is opaque, with the exception of [Riak Data Types]({{}}riak/kv/2.0.8/developing/data-types), and so Riak won't "know" about the object unless it is indexed -[using Riak Search](/riak/kv/2.0.8/developing/usage/search/) or [using secondary indexes](/riak/kv/2.0.8/developing/usage/secondary-indexes/). +[using Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/) or [using secondary indexes]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/). ### Articles et al Complex Case @@ -329,9 +329,9 @@ with comments would require your application to call from the posts and comments buckets to assemble the view. Other possible cases may involve performing operations on content beyond -key/value pairs. [Riak Search](/riak/kv/2.0.8/developing/usage/search/) is recommended for use cases +key/value pairs. [Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/) is recommended for use cases involving full-text search. For lighter-weight querying, -[using secondary indexes](/riak/kv/2.0.8/developing/usage/secondary-indexes/) \(2i) enables you to add metadata to objects to +[using secondary indexes]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/) \(2i) enables you to add metadata to objects to either query for exact matches or to perform range queries. 2i also enables you to tag posts with dates, timestamps, topic areas, or other pieces of information useful for later retrieval. diff --git a/content/riak/kv/2.0.8/developing/data-types.md b/content/riak/kv/2.0.8/developing/data-types.md index e81fc5052f..557e8ef931 100644 --- a/content/riak/kv/2.0.8/developing/data-types.md +++ b/content/riak/kv/2.0.8/developing/data-types.md @@ -43,9 +43,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -268,5 +268,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.8/developing/faq.md b/content/riak/kv/2.0.8/developing/faq.md index 8dcdc10f97..25ca2ca2fe 100644 --- a/content/riak/kv/2.0.8/developing/faq.md +++ b/content/riak/kv/2.0.8/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.8/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.8/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.8/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.8/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.8/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.8/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.8/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.8/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.8/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.8/developing/client-libraries -[MapReduce]: /riak/kv/2.0.8/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.8/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.8/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.8/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.8/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.8/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.8/developing/getting-started.md b/content/riak/kv/2.0.8/developing/getting-started.md index 577f37a5c1..61020518c8 100644 --- a/content/riak/kv/2.0.8/developing/getting-started.md +++ b/content/riak/kv/2.0.8/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.8/setup/installing -[dev client libraries]: /riak/kv/2.0.8/developing/client-libraries +[install index]: {{}}riak/kv/2.0.8/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.8/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.8/developing/getting-started/csharp.md b/content/riak/kv/2.0.8/developing/getting-started/csharp.md index 1c683d70a3..5ab2889bbf 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.8/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.8/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.8/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.8/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.8/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.8/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.8/developing/getting-started/csharp/querying.md index b1a5f11a2c..8529512347 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.8/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.8/developing/getting-started/erlang.md b/content/riak/kv/2.0.8/developing/getting-started/erlang.md index c17e776b5a..a2aae544b4 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.8/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.8/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.8/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.8/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.8/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.8/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.8/developing/getting-started/erlang/object-modeling.md index 85c0bf3a27..a50d38b357 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.8/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.8/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.8/developing/getting-started/erlang/querying.md index 6fa87a97c6..59ff871cdc 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.8/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.8/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.8/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.8/developing/getting-started/golang.md b/content/riak/kv/2.0.8/developing/getting-started/golang.md index 214b5a3f6c..59a7d9fdd1 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.8/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.8/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.8/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.8/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.8/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.8/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.8/developing/getting-started/golang/object-modeling.md index f6d0c9c55b..df95d1928e 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.8/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.8/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.8/developing/getting-started/golang/querying.md index d478f15fa4..f7d114e682 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.8/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.8/developing/getting-started/java.md b/content/riak/kv/2.0.8/developing/getting-started/java.md index 2a1111853f..1a08a40ad1 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/java.md +++ b/content/riak/kv/2.0.8/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.8/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.8/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.8/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.8/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.8/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.8/developing/getting-started/java/crud-operations.md index b8cbadd820..2d0f7b7a46 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.8/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.8/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.8/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/) documentation. ## Updating Objects @@ -85,8 +85,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.8/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.8/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -196,6 +196,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.8/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.8/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.8/developing/getting-started/java/querying.md b/content/riak/kv/2.0.8/developing/getting-started/java/querying.md index 6b5ddb1562..798d55b741 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.8/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.8/developing/getting-started/nodejs.md b/content/riak/kv/2.0.8/developing/getting-started/nodejs.md index 7fdae3ae6a..a7c2bc4259 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.8/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.8/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.8/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.8/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.8/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.8/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.8/developing/getting-started/nodejs/querying.md index 9ff7666942..369cdb843b 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.8/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.8/developing/getting-started/php.md b/content/riak/kv/2.0.8/developing/getting-started/php.md index 7f94501479..e6eea6a955 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/php.md +++ b/content/riak/kv/2.0.8/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.8/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.8/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.8/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.8/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.8/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.8/developing/getting-started/php/crud-operations.md index 40cc85b60d..d7729cd1a7 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.8/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.8/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.8/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.8/developing/getting-started/php/querying.md b/content/riak/kv/2.0.8/developing/getting-started/php/querying.md index 6a93c5c50a..94f2b1f857 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.8/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.8/developing/getting-started/python.md b/content/riak/kv/2.0.8/developing/getting-started/python.md index 51fc37b63a..f55dab669b 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/python.md +++ b/content/riak/kv/2.0.8/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.8/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.8/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.8/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.8/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.8/developing/getting-started/python/querying.md b/content/riak/kv/2.0.8/developing/getting-started/python/querying.md index 9aaea7f75f..0349000f0e 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.8/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.8/developing/getting-started/ruby.md b/content/riak/kv/2.0.8/developing/getting-started/ruby.md index 77f61b8f3e..a543696f7d 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.8/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.8/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.8/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.8/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.8/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.8/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.8/developing/getting-started/ruby/querying.md index 4aca9bc6f0..2cccd5ecbd 100644 --- a/content/riak/kv/2.0.8/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.8/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.8/developing/key-value-modeling.md b/content/riak/kv/2.0.8/developing/key-value-modeling.md index e2817e933c..ebc29e129c 100644 --- a/content/riak/kv/2.0.8/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.8/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.8/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.8/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.8/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.8/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.8/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.8/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.8/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.8/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.8/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.8/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.8/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.8/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.8/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.8/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.8/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.8/developing/data-types/#sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.8/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.8/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.8/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.8/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.8/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.8/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.8/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.8/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.8/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.8/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.8/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.8/developing/usage/commit-hooks.md b/content/riak/kv/2.0.8/developing/usage/commit-hooks.md index c8fd1c3b4e..512e34f736 100644 --- a/content/riak/kv/2.0.8/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.8/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.8/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.8/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.8/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.8/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.8/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.8/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.8/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.8/developing/usage/conflict-resolution.md index 823339da04..a3dd935ed9 100644 --- a/content/riak/kv/2.0.8/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.8/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.8/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.8/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.8/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.8/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.8/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.8/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.8/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.8/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.8/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.8/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.8/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.8/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.8/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.8/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.8/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.8/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.8/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the [`allow_mult`](#siblings) parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -[`last_write_wins`](/riak/kv/2.0.8/learn/concepts/buckets). If `last_write_wins` is set to `false`, +[`last_write_wins`]({{}}riak/kv/2.0.8/learn/concepts/buckets). If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.8/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.8/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.8/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.8/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.8/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.8/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.8/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.8/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.8/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.8/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.8/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.8/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.8/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.8/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.8/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.8/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.8/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.8/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.8/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.8/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.8/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.8/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.8/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.8/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.8/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.8/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -610,7 +610,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.8/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.8/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -665,7 +665,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/csharp.md index 5186a881fa..f065730505 100644 --- a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.8/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/golang.md index f3401410c8..73ce564d9a 100644 --- a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.8/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/java.md index 8cfe799337..9bd5510965 100644 --- a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.8/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.8/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.8/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.8/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.8/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.8/developing/data-types/#counters), [set](/riak/kv/2.0.8/developing/data-types/#sets), or [map](/riak/kv/2.0.8/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.8/developing/data-types/#counters), [set]({{}}riak/kv/2.0.8/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.8/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.8/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.8/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/nodejs.md index c2314d36b4..fb3f6bca20 100644 --- a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.8/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/php.md index 04eae865b4..b88f0edbab 100644 --- a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.8/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.8/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.8/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.8/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.8/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.8/developing/data-types/#counters), [set](/riak/kv/2.0.8/developing/data-types/#sets), or [map](/riak/kv/2.0.8/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.8/developing/data-types/#counters), [set]({{}}riak/kv/2.0.8/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.8/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.8/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.8/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/python.md index c4b529b96d..f837bf27bc 100644 --- a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.8/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.8/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.8/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.8/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.8/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.8/developing/data-types/#counters), [set](/riak/kv/2.0.8/developing/data-types/#sets), or [map](/riak/kv/2.0.8/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.8/developing/data-types/#counters), [set]({{}}riak/kv/2.0.8/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.8/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.8/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.8/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/ruby.md index cd3575284c..a253e88b93 100644 --- a/content/riak/kv/2.0.8/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.8/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.8/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.8/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.8/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.8/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.8/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.8/developing/data-types/#counters), [set](/riak/kv/2.0.8/developing/data-types/#sets), or [map](/riak/kv/2.0.8/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.8/developing/data-types/#counters), [set]({{}}riak/kv/2.0.8/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.8/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.8/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.8/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.8/developing/usage/creating-objects.md b/content/riak/kv/2.0.8/developing/usage/creating-objects.md index c18fcc3296..9d72bd10b6 100644 --- a/content/riak/kv/2.0.8/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.8/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.8/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.8/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -23,7 +23,7 @@ PUT /types//buckets//keys/ # If you're using HTTP to interact with Riak, you can also use POST ``` -As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type](/riak/kv/2.0.8/using/cluster-operations/bucket-types). +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{}}riak/kv/2.0.8/using/cluster-operations/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -118,7 +118,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, you run the same read operation as in [Reading Objects](/riak/kv/2.0.8/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types](/riak/kv/2.0.8/using/cluster-operations/bucket-types). +Now, you run the same read operation as in [Reading Objects]({{}}riak/kv/2.0.8/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{}}riak/kv/2.0.8/using/cluster-operations/bucket-types). ### Store an Object @@ -138,7 +138,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.8/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.8/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.8/developing/usage/custom-extractors.md b/content/riak/kv/2.0.8/developing/usage/custom-extractors.md index 473b046099..4b7600257c 100644 --- a/content/riak/kv/2.0.8/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.8/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.8/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.8/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.8/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.8/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.8/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.8/developing/usage/deleting-objects.md b/content/riak/kv/2.0.8/developing/usage/deleting-objects.md index aba4f15625..bf9efe811a 100644 --- a/content/riak/kv/2.0.8/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.8/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.8/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.8/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.8/developing/usage/document-store.md b/content/riak/kv/2.0.8/developing/usage/document-store.md index a738fc884f..0bd5b7740e 100644 --- a/content/riak/kv/2.0.8/developing/usage/document-store.md +++ b/content/riak/kv/2.0.8/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.8/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.8/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.8/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.8/developing/data-types/#maps). +[Riak maps]({{}}riak/kv/2.0.8/developing/data-types/#maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.8/developing/data-types/#maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.8/developing/data-types/#maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.8/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.8/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.8/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.8/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.8/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.8/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.8/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.8/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.8/developing/usage/mapreduce.md b/content/riak/kv/2.0.8/developing/usage/mapreduce.md index ee2e78b4b3..24aa97a27d 100644 --- a/content/riak/kv/2.0.8/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.8/developing/usage/mapreduce.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.8/dev/using/mapreduce --- -[usage 2i]: /riak/kv/2.0.8/developing/usage/secondary-indexes -[usage search]: /riak/kv/2.0.8/developing/usage/search -[usage types]: /riak/kv/2.0.8/developing/usage/bucket-types -[api http]: /riak/kv/2.0.8/developing/api/http -[api pb]: /riak/kv/2.0.8/developing/api/protocol-buffers -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[guide mapreduce]: /riak/kv/2.0.8/developing/app-guide/advanced-mapreduce +[usage 2i]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search +[usage types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[api http]: {{}}riak/kv/2.0.8/developing/api/http +[api pb]: {{}}riak/kv/2.0.8/developing/api/protocol-buffers +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[guide mapreduce]: {{}}riak/kv/2.0.8/developing/app-guide/advanced-mapreduce {{% note title="Use MapReduce sparingly" %}} In Riak KV, MapReduce is the primary method for non-primary-key-based @@ -116,7 +116,7 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example diff --git a/content/riak/kv/2.0.8/developing/usage/reading-objects.md b/content/riak/kv/2.0.8/developing/usage/reading-objects.md index e8c2590fe6..df0d39c7d0 100644 --- a/content/riak/kv/2.0.8/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.8/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.8/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type](/riak/kv/2.0.8/using/cluster-operations/bucket-types) page. +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{}}riak/kv/2.0.8/using/cluster-operations/bucket-types) page. ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.8/developing/usage/replication.md b/content/riak/kv/2.0.8/developing/usage/replication.md index 0dbb3603c6..20e41766d0 100644 --- a/content/riak/kv/2.0.8/developing/usage/replication.md +++ b/content/riak/kv/2.0.8/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.8/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.8/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.8/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.8/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.8/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.8/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.8/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.8/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.8/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.8/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.8/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.8/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.8/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.8/developing/usage/search-schemas.md b/content/riak/kv/2.0.8/developing/usage/search-schemas.md index e551cb0e95..aa64506ccf 100644 --- a/content/riak/kv/2.0.8/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.8/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.0.8/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.8/developing/data-types/), and [more](/riak/kv/2.0.8/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.8/developing/data-types/), and [more]({{}}riak/kv/2.0.8/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.8/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.8/developing/usage/search.md b/content/riak/kv/2.0.8/developing/usage/search.md index c21e9e2021..a0971ed314 100644 --- a/content/riak/kv/2.0.8/developing/usage/search.md +++ b/content/riak/kv/2.0.8/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.8/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.8/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.8/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.8/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.8/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.8/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.8/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.8/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.8/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.8/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.8/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.8/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.8/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.8/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.8/developing/usage/searching-data-types.md b/content/riak/kv/2.0.8/developing/usage/searching-data-types.md index f7f630377e..2191c5504e 100644 --- a/content/riak/kv/2.0.8/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.8/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.8/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.8/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.8/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.8/developing/data-types/#counters), [sets](/riak/kv/2.0.8/developing/data-types/#sets), and [maps](/riak/kv/2.0.8/developing/data-types/#maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.8/developing/data-types/#counters), [sets]({{}}riak/kv/2.0.8/developing/data-types/#sets), and [maps]({{}}riak/kv/2.0.8/developing/data-types/#maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.8/developing/data-types/#counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.8/developing/data-types/#counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.8/developing/data-types/#sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.8/developing/data-types/#sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.8/developing/data-types/#maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.8/developing/data-types/#maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.8/developing/data-types/#counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.8/developing/data-types/#counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.8/developing/data-types/#sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.8/developing/data-types/#sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.8/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.8/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.8/developing/data-types/#maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.8/developing/data-types/#maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.8/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.8/developing/usage/secondary-indexes.md index 8b4880a7a6..ade23e9a66 100644 --- a/content/riak/kv/2.0.8/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.8/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.0.8/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.8/setup/planning/backend/memory -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.8/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.8/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.8/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.8/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.8/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.8/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.8/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.8/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.8/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.8/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.8/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.8/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.8/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.8/developing/usage/security.md b/content/riak/kv/2.0.8/developing/usage/security.md index 081a0fb5de..ff0919b159 100644 --- a/content/riak/kv/2.0.8/developing/usage/security.md +++ b/content/riak/kv/2.0.8/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.0.8/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.8/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.8/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.8/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.8/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.8/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.8/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - [`riak-admin security`](/riak/kv/2.0.8/using/security/managing-sources/#managing-sources) + [`riak-admin security`]({{}}riak/kv/2.0.8/using/security/managing-sources/#managing-sources) command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.8/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.8/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.8/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.8/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.8/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.8/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.8/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.8/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.8/developing/usage/security/php) -* [Python](/riak/kv/2.0.8/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.8/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.8/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.8/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.8/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.8/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.8/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.8/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.8/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.8/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.8/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.8/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.8/developing/usage/security/erlang.md b/content/riak/kv/2.0.8/developing/usage/security/erlang.md index 31f548b7a7..aab58e8e0e 100644 --- a/content/riak/kv/2.0.8/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.8/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.8/using/security/managing-sources/), [PAM-](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.8/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.8/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.8/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.8/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.8/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.8/developing/usage/security/java.md b/content/riak/kv/2.0.8/developing/usage/security/java.md index b077443352..09c9a2cf13 100644 --- a/content/riak/kv/2.0.8/developing/usage/security/java.md +++ b/content/riak/kv/2.0.8/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.8/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.8/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.8/developing/usage/security/php.md b/content/riak/kv/2.0.8/developing/usage/security/php.md index ea798d4445..22ca4ac7a2 100644 --- a/content/riak/kv/2.0.8/developing/usage/security/php.md +++ b/content/riak/kv/2.0.8/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.8/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.8/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.8/developing/usage/security/python.md b/content/riak/kv/2.0.8/developing/usage/security/python.md index 5b9e626c09..569bc8381b 100644 --- a/content/riak/kv/2.0.8/developing/usage/security/python.md +++ b/content/riak/kv/2.0.8/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.8/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.8/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.8/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.8/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.8/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.8/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.8/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.8/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.8/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.8/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.8/developing/usage/security/ruby.md b/content/riak/kv/2.0.8/developing/usage/security/ruby.md index 8bbb02b5d7..beb984f774 100644 --- a/content/riak/kv/2.0.8/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.8/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.8/using/security/managing-sources/) or [PAM](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.8/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.8/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.8/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.8/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.8/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.8/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.8/developing/usage/updating-objects.md b/content/riak/kv/2.0.8/developing/usage/updating-objects.md index b41993263d..6696fe18e1 100644 --- a/content/riak/kv/2.0.8/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.8/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.8/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.8/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.8/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.8/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.8/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.8/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.8/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.8/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.8/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.8/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.8/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.8/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.8/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.8/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.8/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.8/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.8/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.8/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.8/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.8/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.8/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.8/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.8/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.8/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.8/index.md b/content/riak/kv/2.0.8/index.md index 652c6e10e2..3e6c3b56c2 100644 --- a/content/riak/kv/2.0.8/index.md +++ b/content/riak/kv/2.0.8/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.8/configuring -[dev index]: /riak/kv/2.0.8/developing -[downloads]: /riak/kv/2.0.8/downloads/ -[install index]: /riak/kv/2.0.8/setup/installing/ -[plan index]: /riak/kv/2.0.8/setup/planning -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.8/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.8/developing/usage/search -[getting started]: /riak/kv/2.0.8/developing/getting-started -[dev client libraries]: /riak/kv/2.0.8/developing/client-libraries +[config index]: {{}}riak/kv/2.0.8/configuring +[dev index]: {{}}riak/kv/2.0.8/developing +[downloads]: {{}}riak/kv/2.0.8/downloads/ +[install index]: {{}}riak/kv/2.0.8/setup/installing/ +[plan index]: {{}}riak/kv/2.0.8/setup/planning +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.8/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search +[getting started]: {{}}riak/kv/2.0.8/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.8/developing/client-libraries diff --git a/content/riak/kv/2.0.8/learn/concepts.md b/content/riak/kv/2.0.8/learn/concepts.md index b67164c28b..86049bf095 100644 --- a/content/riak/kv/2.0.8/learn/concepts.md +++ b/content/riak/kv/2.0.8/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.8/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.8/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.8/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.8/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.8/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.8/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.8/learn/concepts/vnodes -[config index]: /riak/kv/2.0.8/configuring -[plan index]: /riak/kv/2.0.8/setup/planning -[use index]: /riak/kv/2.0.8/using/ +[concept aae]: {{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.8/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.8/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.8/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.8/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.8/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.8/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.8/configuring +[plan index]: {{}}riak/kv/2.0.8/setup/planning +[use index]: {{}}riak/kv/2.0.8/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.8/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.8/learn/concepts/active-anti-entropy.md index 648baa51f0..49d247d443 100644 --- a/content/riak/kv/2.0.8/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.8/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.8/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.8/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.0.8/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.8/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.8/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.0.8/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.8/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.8/developing/usage/search +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.8/learn/concepts/buckets.md b/content/riak/kv/2.0.8/learn/concepts/buckets.md index 4b9eaa5bcf..5c782ec9a7 100644 --- a/content/riak/kv/2.0.8/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.8/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.0.8/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.8/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.8/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.8/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.8/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.8/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.8/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.8/configuring/basic -[dev api http]: /riak/kv/2.0.8/developing/api/http -[dev data types]: /riak/kv/2.0.8/developing/data-types -[glossary ring]: /riak/kv/2.0.8/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.8/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.8/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.8/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.8/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.8/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.8/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.8/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.8/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.8/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.8/configuring/basic +[dev api http]: {{}}riak/kv/2.0.8/developing/api/http +[dev data types]: {{}}riak/kv/2.0.8/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.8/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.8/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.8/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.8/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.8/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.8/learn/concepts/capability-negotiation.md index a43f70f45d..a15cc55ad8 100644 --- a/content/riak/kv/2.0.8/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.8/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.8/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.8/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.8/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.8/developing/usage/mapreduce In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.8/learn/concepts/causal-context.md b/content/riak/kv/2.0.8/learn/concepts/causal-context.md index 7ffd437a84..5108bb2578 100644 --- a/content/riak/kv/2.0.8/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.8/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.8/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.8/developing/api/http -[dev key value]: /riak/kv/2.0.8/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.8/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.8/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.8/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.8/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.8/developing/api/http +[dev key value]: {{}}riak/kv/2.0.8/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.8/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.8/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.8/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.8/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -73,7 +73,7 @@ Causal context comes in two forms in Riak: **vector clocks** and **dotted version vectors**. More information in both can be found in the sections below. -In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). If, however, `allow_mult` is set to `false`, then Riak will not generate diff --git a/content/riak/kv/2.0.8/learn/concepts/clusters.md b/content/riak/kv/2.0.8/learn/concepts/clusters.md index d86e5df857..2c0f2e6864 100644 --- a/content/riak/kv/2.0.8/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.8/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.8/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.8/learn/concepts/replication -[glossary node]: /riak/kv/2.0.8/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.8/learn/dynamo -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.8/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.8/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.8/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.8/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.8/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.8/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.8/learn/concepts/crdts.md b/content/riak/kv/2.0.8/learn/concepts/crdts.md index c78ad210d5..dd8be4b204 100644 --- a/content/riak/kv/2.0.8/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.8/learn/concepts/crdts.md @@ -17,20 +17,20 @@ aliases: --- [crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf -[data types converg]: /riak/kv/2.0.8/learn/concepts/crdts/#convergence +[data types converg]: {{}}riak/kv/2.0.8/learn/concepts/crdts/#convergence [crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html -[data types impl]: /riak/kv/2.0.8/learn/concepts/crdts/#implementation -[concept causal context dvv]: /riak/kv/2.0.8/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.8/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.8/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.8/developing/data-types +[data types impl]: {{}}riak/kv/2.0.8/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.8/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.8/developing/data-types [riak_dt]: https://github.com/basho/riak_dt -[dev data types context]: /riak/kv/2.1.4/developing/data-types/#data-types-and-context -[glossary node]: /riak/kv/2.0.8/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution +[dev data types context]: {{}}riak/kv/2.0.8/developing/data-types/#data-types-and-context +[glossary node]: {{}}riak/kv/2.0.8/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: diff --git a/content/riak/kv/2.0.8/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.8/learn/concepts/eventual-consistency.md index c185e8742b..a6146c4043 100644 --- a/content/riak/kv/2.0.8/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.8/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.8/learn/concepts/replication -[glossary node]: /riak/kv/2.0.8/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.8/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.8/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.8/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.8/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.8/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.8/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.8/developing/data-modeling/). +or models]({{}}riak/kv/2.0.8/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.8/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.8/learn/concepts/keys-and-objects.md index 60cda9e536..74ac88c339 100644 --- a/content/riak/kv/2.0.8/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.8/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.8/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.8/learn/concepts/replication.md b/content/riak/kv/2.0.8/learn/concepts/replication.md index d8a687cd33..e7956e89ff 100644 --- a/content/riak/kv/2.0.8/learn/concepts/replication.md +++ b/content/riak/kv/2.0.8/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.8/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.8/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.8/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.8/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.8/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.8/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.8/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.8/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.8/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.8/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.8/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.8/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.8/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.8/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.8/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.8/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.8/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.8/learn/concepts/strong-consistency.md index 0a15072fc2..29e3e4c8a9 100644 --- a/content/riak/kv/2.0.8/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.8/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.8/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.8/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.8/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.8/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.8/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.8/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.8/learn/concepts/vnodes.md b/content/riak/kv/2.0.8/learn/concepts/vnodes.md index 4ebcb93834..7a5110db31 100644 --- a/content/riak/kv/2.0.8/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.8/learn/concepts/vnodes.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.8/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.8/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.8/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.8/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.8/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.8/learn/glossary/#ring -[plan backend]: /riak/kv/2.0.8/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.8/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.8/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.8/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.8/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.8/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.8/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.8/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.8/learn/glossary/#ring +[plan backend]: {{}}riak/kv/2.0.8/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.8/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.8/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.8/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.8/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.8/learn/dynamo.md b/content/riak/kv/2.0.8/learn/dynamo.md index efbce2fa5c..dccd05fadb 100644 --- a/content/riak/kv/2.0.8/learn/dynamo.md +++ b/content/riak/kv/2.0.8/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.8/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.8/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.8/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.8/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.8/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.8/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.8/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.8/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.8/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.8/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.8/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.8/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.8/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.8/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.8/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.8/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.8/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.8/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.8/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.8/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.8/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.8/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.8/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.8/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.8/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.8/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.8/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.8/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.8/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.8/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.8/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.8/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.8/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.8/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.8/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.8/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.8/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.8/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.8/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.8/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.8/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.8/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.8/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.8/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.8/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.8/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.8/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.8/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.8/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.8/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.8/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.8/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.8/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.8/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.8/learn/glossary.md b/content/riak/kv/2.0.8/learn/glossary.md index 4fe83e5803..fd9af1473e 100644 --- a/content/riak/kv/2.0.8/learn/glossary.md +++ b/content/riak/kv/2.0.8/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.8/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.8/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.8/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.8/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.8/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.8/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.8/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.8/developing/api/http -[dev data model]: /riak/kv/2.0.8/developing/data-modeling -[dev data types]: /riak/kv/2.0.8/developing/data-types -[glossary read rep]: /riak/kv/2.0.8/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.8/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.8/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.8/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.8/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.8/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.8/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.8/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.8/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.8/developing/api/http +[dev data model]: {{}}riak/kv/2.0.8/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.8/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.8/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.8/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.8/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.8/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.8/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.8/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.8/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.8/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.8/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.8/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.8/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.8/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.8/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.8/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.8/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.8/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.8/learn/use-cases.md b/content/riak/kv/2.0.8/learn/use-cases.md index 3ca9c5e77f..25c55289d7 100644 --- a/content/riak/kv/2.0.8/learn/use-cases.md +++ b/content/riak/kv/2.0.8/learn/use-cases.md @@ -16,19 +16,19 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.8/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.8/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.8/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.8/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.8/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.8/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.8/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.8/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.8/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[usage mapreduce]: /riak/kv/2.0.8/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.8/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.8/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.8/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.8/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.8/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.8/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.8/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.8/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.8/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.8/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.8/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[usage mapreduce]: {{}}riak/kv/2.0.8/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing @@ -115,7 +115,7 @@ based on, for example, a campaign or company ID for easy retrieval. In the advertising industry, being able to serve ads quickly to many users and platforms is often the most important factor in selecting and -tuning a database. Riak's tunable [apps replication properties](/riak/kv/2.0.8/developing/app-guide/replication-properties) can be set +tuning a database. Riak's tunable [apps replication properties]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties) can be set to favor fast read performance. By setting R to 1, only one of N replicas will need to be returned to complete a read operation, yielding lower read latency than an R value equal to the number of replicas @@ -321,7 +321,7 @@ part of a URL string, etc. In Riak, you can store content of any kind, from HTML files to plain text to JSON or XML or another document type entirely. Keep in mind that -data in Riak is opaque, with the exception of [Riak Data Types](/riak/kv/2.0.8/developing/data-types), +data in Riak is opaque, with the exception of [Riak Data Types]({{}}riak/kv/2.0.8/developing/data-types), and so Riak won't "know" about the object unless it is indexed [using Riak Search][usage search] or [using secondary indexes][usage secondary-indexes]. diff --git a/content/riak/kv/2.0.8/learn/why-riak-kv.md b/content/riak/kv/2.0.8/learn/why-riak-kv.md index b30519e359..e1119a80b5 100644 --- a/content/riak/kv/2.0.8/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.8/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.8/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.8/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.8/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.8/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.8/developing/data-types -[glossary read rep]: /riak/kv/2.0.8/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.8/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.8/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.8/release-notes.md b/content/riak/kv/2.0.8/release-notes.md index 0e8b756dcf..a63e7fe4a7 100644 --- a/content/riak/kv/2.0.8/release-notes.md +++ b/content/riak/kv/2.0.8/release-notes.md @@ -42,13 +42,13 @@ This is an LTS (long term support) bugfix release that includes improvements to * Logging has been added to clear and exchange trees for audit of administrative operations. * All above work captured in [yokozuna PR 704](https://github.com/basho/yokozuna/pull/704). -* Additional [Cuttlefish parameters](/riak/kv/2.0.8/configuring/reference/#search) have been added to support the Riak search batching updates. These configs will allow you to set batching parameters based on your needs and have, in certain cases, led to significantly higher write throughput to Solr. +* Additional [Cuttlefish parameters]({{}}riak/kv/2.0.8/configuring/reference/#search) have been added to support the Riak search batching updates. These configs will allow you to set batching parameters based on your needs and have, in certain cases, led to significantly higher write throughput to Solr. * [[yokozuna PR 704](https://github.com/basho/yokozuna/pull/704)] ## Bugs Fixed -* LevelDB has been upgraded to version 2.0.33, which resolves the [AAE stall product advisory](http://docs.basho.com/community/productadvisories/aaestall/). +* LevelDB has been upgraded to version 2.0.33, which resolves the [AAE stall product advisory]({{}}community/productadvisories/aaestall/). * [[riak_kv PR 1527](https://github.com/basho/riak_kv/pull/1527)] A race condition was occurring where a `gen_fsm` timeout event was not reliably sent, even when the timeout was set to zero, and another message or event could preempt or unset the timeout. To fix this, a timeout event is manually sent using `gen_fsm:send_event`. * [[riak PR 886](https://github.com/basho/riak/pull/886), [riak_ee PR 412](https://github.com/basho/riak_ee/pull/412), and [node_package PR 210](https://github.com/basho/node_package/pull/210)] Atom usage in `riak` and `riak-admin` commands has been restricted to 1000. Previously, the OS PID was being used as a pseudo-random number generator, but the range was too large since each nodename used would generate an entry in the atom table. `riak-admin top` uses $$ to randomize the name used to connect to the local Riak node, and the large range of possible OS PIDs can result in atom table exhaustion on long running nodes/clusters. The nodename used by `riak top` has been changed to match `riak-admin top` convention, using `$RANDOM` with the range restricted to 1-1000. * [[riak_core Issue 855](https://github.com/basho/riak_core/issues/855)/[riak_core PR 886](https://github.com/basho/riak_core/pull/886)] If updates to the same key in the ring metadata occurred on different nodes during the same second, they were not reconciled. This could lead to nodes flip-flopping the value and many gossip messages causing extremely high message queues and heap usage by the gossip processes. Nodenames have been added to the `merge_meta` comparison to avoid this issue. diff --git a/content/riak/kv/2.0.8/setup/downgrade.md b/content/riak/kv/2.0.8/setup/downgrade.md index 3178c58aff..cc74817d1d 100644 --- a/content/riak/kv/2.0.8/setup/downgrade.md +++ b/content/riak/kv/2.0.8/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.8/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.0.8/setup/upgrading/cluster -[config ref]: /riak/kv/2.0.8/configuring/reference -[concept aae]: /riak/kv/2.0.8/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.0.8/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.0.8/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.0.8/configuring/reference +[concept aae]: {{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#aae-status Downgrades of Riak KV are tested and generally supported for two feature release versions (see warning below), with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. diff --git a/content/riak/kv/2.0.8/setup/installing.md b/content/riak/kv/2.0.8/setup/installing.md index 92bf4c0247..a9dabfa9b0 100644 --- a/content/riak/kv/2.0.8/setup/installing.md +++ b/content/riak/kv/2.0.8/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.8/installing/ --- -[install aws]: /riak/kv/2.0.8/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.8/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.8/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.8/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.8/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.8/setup/installing/smartos -[install solaris]: /riak/kv/2.0.8/setup/installing/solaris -[install suse]: /riak/kv/2.0.8/setup/installing/suse -[install windows azure]: /riak/kv/2.0.8/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.8/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.8/setup/upgrading +[install aws]: {{}}riak/kv/2.0.8/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.8/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.8/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.8/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.8/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.8/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.8/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.8/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.8/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.8/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.8/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.8/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.8/setup/installing/amazon-web-services.md index bb4a166ebc..12545a017c 100644 --- a/content/riak/kv/2.0.8/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.8/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.8/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.8/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.8/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.8/setup/installing/debian-ubuntu.md index 29e8e4db09..2ee64fbed3 100644 --- a/content/riak/kv/2.0.8/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.8/setup/installing/debian-ubuntu.md @@ -18,10 +18,10 @@ aliases: - /riak/kv/2.0.8/installing/debian-ubuntu/ --- -[install source index]: /riak/kv/2.0.8/setup/installing/source/ -[security index]: /riak/kv/2.0.8/using/security/ -[install source erlang]: /riak/kv/2.0.8/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.8/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.8/using/security/ +[install source erlang]: {{}}riak/kv/2.0.8/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.0.8/setup/installing/freebsd.md b/content/riak/kv/2.0.8/setup/installing/freebsd.md index bab97ba277..e7a3665fcf 100644 --- a/content/riak/kv/2.0.8/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.8/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.8/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.8/downloads/ -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.8/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.8/downloads/ +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.8/setup/installing/mac-osx.md b/content/riak/kv/2.0.8/setup/installing/mac-osx.md index 5aa2199b8a..d4c4d287f4 100644 --- a/content/riak/kv/2.0.8/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.8/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.8/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.8/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.8/setup/installing/rhel-centos.md b/content/riak/kv/2.0.8/setup/installing/rhel-centos.md index 15ccc0b45e..8afc6108bf 100644 --- a/content/riak/kv/2.0.8/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.8/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.8/setup/installing/source -[install source erlang]: /riak/kv/2.0.8/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.8/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.8/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.0.8/setup/installing/smartos.md b/content/riak/kv/2.0.8/setup/installing/smartos.md index 7a03378b11..1d77aab534 100644 --- a/content/riak/kv/2.0.8/setup/installing/smartos.md +++ b/content/riak/kv/2.0.8/setup/installing/smartos.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.8/installing/smartos/ --- -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify {{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.8" %}} SmartOS is no longer supported in Riak KV 2.0.8+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). diff --git a/content/riak/kv/2.0.8/setup/installing/solaris.md b/content/riak/kv/2.0.8/setup/installing/solaris.md index dda9a3c6c6..ee8b0e1578 100644 --- a/content/riak/kv/2.0.8/setup/installing/solaris.md +++ b/content/riak/kv/2.0.8/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.8/setup/installing/source.md b/content/riak/kv/2.0.8/setup/installing/source.md index 2adc31d297..4e130c1f02 100644 --- a/content/riak/kv/2.0.8/setup/installing/source.md +++ b/content/riak/kv/2.0.8/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.8/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.8/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.8/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.8/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.8/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.8/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.8/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.8/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.8/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.8/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.8/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.8/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.8/setup/installing/source/erlang.md b/content/riak/kv/2.0.8/setup/installing/source/erlang.md index dcad7edc92..a1cccd5136 100644 --- a/content/riak/kv/2.0.8/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.8/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.8/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.8/setup/installing -[security basics]: /riak/kv/2.0.8/using/security/basics +[install index]: {{}}riak/kv/2.0.8/setup/installing +[security basics]: {{}}riak/kv/2.0.8/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.8/setup/installing/source/jvm.md b/content/riak/kv/2.0.8/setup/installing/source/jvm.md index bec4ce2706..43bee2c96a 100644 --- a/content/riak/kv/2.0.8/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.8/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.8/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.8/developing/usage/search +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.8/setup/installing/suse.md b/content/riak/kv/2.0.8/setup/installing/suse.md index 99a63c149e..280241da77 100644 --- a/content/riak/kv/2.0.8/setup/installing/suse.md +++ b/content/riak/kv/2.0.8/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.8/installing/suse/ --- -[install verify]: /riak/kv/2.0.8/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.8/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.8/setup/installing/verify.md b/content/riak/kv/2.0.8/setup/installing/verify.md index 583c74660f..d9cf4d9976 100644 --- a/content/riak/kv/2.0.8/setup/installing/verify.md +++ b/content/riak/kv/2.0.8/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.8/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.8/developing/client-libraries -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.8/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.8/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.8/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.8/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.8/setup/installing/windows-azure.md b/content/riak/kv/2.0.8/setup/installing/windows-azure.md index ab748561eb..658438509a 100644 --- a/content/riak/kv/2.0.8/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.8/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.8/setup/planning/backend.md b/content/riak/kv/2.0.8/setup/planning/backend.md index 284797fea4..89db6f6fb7 100644 --- a/content/riak/kv/2.0.8/setup/planning/backend.md +++ b/content/riak/kv/2.0.8/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.8/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.8/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.8/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.8/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.8/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.8/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.8/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.8/setup/planning/backend/bitcask.md index 5365591668..13b4967666 100644 --- a/content/riak/kv/2.0.8/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.8/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.8/using/admin/riak-cli -[config reference]: /riak/kv/2.0.8/configuring/reference -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.8/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.8/developing/usage/search - -[glossary aae]: /riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.8/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.8/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.8/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.8/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.8/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.8/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.8/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.8/setup/planning/backend/leveldb.md index a98d7a5c6f..e46a18088c 100644 --- a/content/riak/kv/2.0.8/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.8/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.8/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.8/configuring/reference -[perf index]: /riak/kv/2.0.8/using/performance -[config reference#aae]: /riak/kv/2.0.8/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[perf index]: {{}}riak/kv/2.0.8/using/performance +[config reference#aae]: {{}}riak/kv/2.0.8/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.8/setup/planning/backend/memory.md b/content/riak/kv/2.0.8/setup/planning/backend/memory.md index 2a46e4926c..37d3c90b2e 100644 --- a/content/riak/kv/2.0.8/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.8/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.8/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.8/configuring/reference -[plan backend multi]: /riak/kv/2.0.8/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.8/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.8/setup/planning/backend/multi.md b/content/riak/kv/2.0.8/setup/planning/backend/multi.md index fd73d213e6..3709b408a2 100644 --- a/content/riak/kv/2.0.8/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.8/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.8/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.8/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.8/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.8/configuring/reference -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.8/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.8/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.8/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.8/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.8/setup/planning/best-practices.md b/content/riak/kv/2.0.8/setup/planning/best-practices.md index 3b476274fd..e1c2489837 100644 --- a/content/riak/kv/2.0.8/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.8/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.8/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.8/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.8/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.8/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.8/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.8/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.8/setup/planning/bitcask-capacity-calc.md index b001c9ef59..baac5930a2 100644 --- a/content/riak/kv/2.0.8/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.8/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.8/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.8/setup/planning/cluster-capacity.md index 218c299ac1..2d35409c33 100644 --- a/content/riak/kv/2.0.8/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.8/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.8/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.8/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.8/setup/planning -[concept replication]: /riak/kv/2.0.8/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.8/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.8/configuring/reference -[perf benchmark]: /riak/kv/2.0.8/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.8/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.8/setup/planning +[concept replication]: {{}}riak/kv/2.0.8/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.8/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.8/setup/planning/operating-system.md b/content/riak/kv/2.0.8/setup/planning/operating-system.md index a1e45b1ef9..d190997d1e 100644 --- a/content/riak/kv/2.0.8/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.8/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.8/downloads/ +[downloads]: {{}}riak/kv/2.0.8/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.8/setup/planning/start.md b/content/riak/kv/2.0.8/setup/planning/start.md index a2da521a07..8a8fa9fd71 100644 --- a/content/riak/kv/2.0.8/setup/planning/start.md +++ b/content/riak/kv/2.0.8/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.8/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.8/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.8/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.8/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.8/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.8/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.8/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.8/setup/upgrading/checklist.md b/content/riak/kv/2.0.8/setup/upgrading/checklist.md index bed2d21f56..bffd81a819 100644 --- a/content/riak/kv/2.0.8/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.8/setup/upgrading/checklist.md @@ -15,24 +15,24 @@ aliases: - /riak/kv/2.0.8/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.8/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.8/using/performance +[perf open files]: {{}}riak/kv/2.0.8/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.8/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.8/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.8/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.8/configuring/reference -[config backend]: /riak/kv/2.0.8/configuring/backend -[usage search]: /riak/kv/2.0.8/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.8/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.8/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.8/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.8/using/admin/commands -[use admin riak control]: /riak/kv/2.0.8/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.8/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.8/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.8/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.8/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.8/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[config backend]: {{}}riak/kv/2.0.8/configuring/backend +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.8/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.8/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.8/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.8/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.8/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.8/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.8/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.8/setup/upgrading/search.md b/content/riak/kv/2.0.8/setup/upgrading/search.md new file mode 100644 index 0000000000..f64a89ee7d --- /dev/null +++ b/content/riak/kv/2.0.8/setup/upgrading/search.md @@ -0,0 +1,273 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.0.7" +menu: + riak_kv-2.0.8: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + +
+
Upgrade First
+ Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. +
+ +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + +
+
Check Results Before Switching (Optional)
+ Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/?q=...`. +
+ +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.8/setup/upgrading/version.md b/content/riak/kv/2.0.8/setup/upgrading/version.md index 94d1dfbfce..66eb465eed 100644 --- a/content/riak/kv/2.0.8/setup/upgrading/version.md +++ b/content/riak/kv/2.0.8/setup/upgrading/version.md @@ -19,18 +19,18 @@ aliases: --- -[production checklist]: /riak/kv/2.0.8/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.8/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.8/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.8/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.8/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.8/release-notes +[production checklist]: {{}}riak/kv/2.0.8/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.8/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.8/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.8/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.8/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.8/release-notes [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.8/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.8/using/reference/jmx -[snmp]: /riak/kv/2.0.8/using/reference/snmp -[Release Notes]: /riak/kv/2.0.8/release-notes +[cluster ops mdc]: {{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.8/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.8/using/reference/snmp +[Release Notes]: {{}}riak/kv/2.0.8/release-notes ## Overview diff --git a/content/riak/kv/2.0.8/using/admin/commands.md b/content/riak/kv/2.0.8/using/admin/commands.md index 4080790abd..2e5805b58b 100644 --- a/content/riak/kv/2.0.8/using/admin/commands.md +++ b/content/riak/kv/2.0.8/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.8/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.8/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.8/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.8/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.8/using/admin/riak-admin.md b/content/riak/kv/2.0.8/using/admin/riak-admin.md index db56910c62..e08111cad6 100644 --- a/content/riak/kv/2.0.8/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.8/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.0.8/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.8/configuring/reference -[use admin commands]: /riak/kv/2.0.8/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.8/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.8/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.8/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.8/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.8/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.8/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.8/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.8/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.8/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.8/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.8/setup/downgrade -[security index]: /riak/kv/2.0.8/using/security/ -[security managing]: /riak/kv/2.0.8/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.8/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.8/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.8/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.8/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.8/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.8/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.8/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.8/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.8/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.8/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.8/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.8/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.8/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.8/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.8/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.8/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.8/setup/downgrade +[security index]: {{}}riak/kv/2.0.8/using/security/ +[security managing]: {{}}riak/kv/2.0.8/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.8/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.8/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.8/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.8/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.8/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#stats ## `riak-admin` diff --git a/content/riak/kv/2.0.8/using/admin/riak-cli.md b/content/riak/kv/2.0.8/using/admin/riak-cli.md index c7358dcd0e..ea62388a8e 100644 --- a/content/riak/kv/2.0.8/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.8/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.8/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.8/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.8/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.8/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.8/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.8/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.8/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.8/using/admin/riak-control.md b/content/riak/kv/2.0.8/using/admin/riak-control.md index d50765cf16..9e486a6e46 100644 --- a/content/riak/kv/2.0.8/using/admin/riak-control.md +++ b/content/riak/kv/2.0.8/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.8/configuring/reference +[config reference]: {{}}riak/kv/2.0.8/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.8/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.8/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.8/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.8/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.8/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.8/using/cluster-operations/active-anti-entropy.md index 09624a537f..0e141109c3 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/active-anti-entropy.md @@ -15,8 +15,8 @@ aliases: - /riak/2.0.8/ops/advanced/aae/ --- -[config search#throttledelay]: /riak/kv/2.0.8/configuring/search/#search-anti-entropy-throttle-$tier-delay -[config search#throttle]: riak/kv/2.0.8/configuring/search/#search-anti-entropy-throttle +[config search#throttledelay]: {{}}riak/kv/2.0.8/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{}}riak/kv/2.0.8/configuring/search/#search-anti-entropy-throttle Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. @@ -57,12 +57,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -90,7 +90,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes.md index 518987b25e..25c75ffdb9 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.8/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.8/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.8/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.8/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.8/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.8/using/cluster-operations/backing-up.md index 837022fb33..1a922ae70b 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.8/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[config reference]: /riak/kv/2.0.8/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.8/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.8/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.8/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.8/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.8/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.8/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.8/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.8/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.8/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.8/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.8/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.8/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.8/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.8/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.8/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.8/using/cluster-operations/bucket-types.md index 275a580e99..8818a1c5c7 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.8/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.8/using/cluster-operations/changing-cluster-info.md index 9ca1ab9b51..b51e90da9c 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.8/configuring/reference +[config reference]: {{}}riak/kv/2.0.8/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.8/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.8/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.8/using/cluster-operations/handoff.md b/content/riak/kv/2.0.8/using/cluster-operations/handoff.md index 1a082c63af..e20319a3da 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.8/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.8/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.8/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.8/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.8/using/cluster-operations/logging.md b/content/riak/kv/2.0.8/using/cluster-operations/logging.md index 546cd842ed..00a7b78bb1 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.8/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.8/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.8/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.8/using/cluster-operations/replacing-node.md index 1ce1aca757..6c37e0e200 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.8/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.8/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.8/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.8/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.8/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.8/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.8/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.8/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.8/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.8/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.8/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.8/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.8/using/cluster-operations/strong-consistency.md index e1e6eb34cb..c37ec1b388 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.8/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.8/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.8/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.8/using/cluster-operations/v2-multi-datacenter.md index 60cfe0a11b..65cb321f23 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/v2-multi-datacenter.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication system is largely @@ -163,7 +163,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -183,7 +183,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -221,7 +221,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.8/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.8/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -242,7 +242,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter.md index 8ef49bedec..99fdcb3b9b 100644 --- a/content/riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.8/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.8/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.8/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.8/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.8/using/performance.md b/content/riak/kv/2.0.8/using/performance.md index 225d96f55a..58d508500d 100644 --- a/content/riak/kv/2.0.8/using/performance.md +++ b/content/riak/kv/2.0.8/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.8/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.8/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -253,12 +253,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.8/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.8/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.8/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.8/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.8/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.8/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.8/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.8/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.8/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.8/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.8/using/performance/benchmarking.md b/content/riak/kv/2.0.8/using/performance/benchmarking.md index 1381f7089d..92fd52c5e3 100644 --- a/content/riak/kv/2.0.8/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.8/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.8/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.8/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.8/using/performance/latency-reduction.md b/content/riak/kv/2.0.8/using/performance/latency-reduction.md index a720552bbb..6dff079b29 100644 --- a/content/riak/kv/2.0.8/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.8/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.8/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.8/using/performance/multi-datacenter-tuning.md index 0c0eca67b0..26ec03055e 100644 --- a/content/riak/kv/2.0.8/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.8/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.8/using/performance +[perf index]: {{}}riak/kv/2.0.8/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.8/using/performance/open-files-limit.md b/content/riak/kv/2.0.8/using/performance/open-files-limit.md index 216c45de8b..3b371d0435 100644 --- a/content/riak/kv/2.0.8/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.8/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/tuning/open-files-limit/ --- -[plan backend]: /riak/kv/2.0.8/setup/planning/backend/ +[plan backend]: {{}}riak/kv/2.0.8/setup/planning/backend/ [blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. diff --git a/content/riak/kv/2.0.8/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.0.8/using/performance/v2-scheduling-fullsync.md index c26acbd621..ee8b7cfe36 100644 --- a/content/riak/kv/2.0.8/using/performance/v2-scheduling-fullsync.md +++ b/content/riak/kv/2.0.8/using/performance/v2-scheduling-fullsync.md @@ -14,7 +14,7 @@ commercial_offering: true --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. {{% /note %}} diff --git a/content/riak/kv/2.0.8/using/reference/bucket-types.md b/content/riak/kv/2.0.8/using/reference/bucket-types.md index 96b7ba084c..8484175ad2 100644 --- a/content/riak/kv/2.0.8/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.8/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.8/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.8/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.8/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.8/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.8/developing/data-types), and [strong consistency](/riak/kv/2.0.8/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.8/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.8/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.8/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.8/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.8/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.8/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.8/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.8/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.8/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.8/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.8/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.8/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.8/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.8/learn/concepts/buckets) and [keys](/riak/kv/2.0.8/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.8/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.8/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.8/using/reference/custom-code.md b/content/riak/kv/2.0.8/using/reference/custom-code.md index b0a9d8c783..c0ac3ef5b4 100644 --- a/content/riak/kv/2.0.8/using/reference/custom-code.md +++ b/content/riak/kv/2.0.8/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.8/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.8/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.8/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.8/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.8/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.8/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.8/using/reference/handoff.md b/content/riak/kv/2.0.8/using/reference/handoff.md index b26411fb19..7c7644899d 100644 --- a/content/riak/kv/2.0.8/using/reference/handoff.md +++ b/content/riak/kv/2.0.8/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.8/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.8/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.8/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.8/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.8/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.8/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.8/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.8/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.8/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.8/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.8/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.8/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.8/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.8/using/reference/jmx.md b/content/riak/kv/2.0.8/using/reference/jmx.md index 11910a5edd..f29260f562 100644 --- a/content/riak/kv/2.0.8/using/reference/jmx.md +++ b/content/riak/kv/2.0.8/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.8/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.8/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.8/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.8/using/reference/logging.md b/content/riak/kv/2.0.8/using/reference/logging.md index f5c44b47a9..8b05e54c31 100644 --- a/content/riak/kv/2.0.8/using/reference/logging.md +++ b/content/riak/kv/2.0.8/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.8/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.8/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.8/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.8/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.8/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.8/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.8/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.8/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.8/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.8/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.8/using/reference/multi-datacenter/comparison.md index 0ceabe5e7d..04c4328e27 100644 --- a/content/riak/kv/2.0.8/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.8/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.8/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.8/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.8/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.8/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.8/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.8/using/reference/runtime-interaction.md b/content/riak/kv/2.0.8/using/reference/runtime-interaction.md index 6ae7b5eca0..0913e89a1b 100644 --- a/content/riak/kv/2.0.8/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.8/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.8/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.8/configuring/reference -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.8/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.8/using/reference/search.md b/content/riak/kv/2.0.8/using/reference/search.md index 052b4c99ce..81c33655bf 100644 --- a/content/riak/kv/2.0.8/using/reference/search.md +++ b/content/riak/kv/2.0.8/using/reference/search.md @@ -15,21 +15,21 @@ aliases: - /riak/kv/2.0.8/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters -[configuring search]: /riak/kv/2.0.8/configuring/search +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters +[configuring search]: {{}}riak/kv/2.0.8/configuring/search > **Note on search 2.0 vs. legacy search** > > This document refers to Riak search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak search, visit [the old Using Riak search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -search, you should check out the [Using Search](/riak/kv/2.0.8/developing/usage/search) document. +search, you should check out the [Using Search]({{}}riak/kv/2.0.8/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -127,7 +127,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.8/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.8/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -289,7 +289,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.8/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -299,7 +299,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.8/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -354,7 +354,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.8/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.8/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.8/using/reference/secondary-indexes.md b/content/riak/kv/2.0.8/using/reference/secondary-indexes.md index d4340484ec..572a294236 100644 --- a/content/riak/kv/2.0.8/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.8/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.0.8/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.0.8/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.8/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.8/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.8/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.8/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.8/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.8/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.8/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.8/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.8/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.8/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.8/using/reference/statistics-monitoring.md index da5c6fbb59..17eb3a398d 100644 --- a/content/riak/kv/2.0.8/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.8/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.8/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.8/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.8/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.8/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.8/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.8/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.8/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.8/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.8/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.8/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -156,7 +156,7 @@ Metric | Description ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.8/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.8/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -181,14 +181,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.8/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.8/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.8/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -235,7 +235,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.8/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.8/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -259,7 +259,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.8/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.8/developing/api/http/status) endpoint is also available. #### Nagios @@ -333,14 +333,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.8/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.8/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.8/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.8/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -362,9 +362,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.8/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.8/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.8/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.8/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -380,9 +380,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.8/using/reference/strong-consistency.md b/content/riak/kv/2.0.8/using/reference/strong-consistency.md index f2e11dd9f7..91c43b1ae8 100644 --- a/content/riak/kv/2.0.8/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.8/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.8/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.8/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.8/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.8/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.8/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.8/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.8/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.8/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.8/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.8/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.8/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.8/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.8/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.8/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.8/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.8/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter.md index 1ce4e9a753..5b0f1a200c 100644 --- a/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter.md @@ -16,7 +16,7 @@ toc: true [v2 mdc fullsync]: ./scheduling-fullsync {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/using/reference/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/using/reference/v3-multi-datacenter/) instead. {{% /note %}} diff --git a/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter/architecture.md index ff3647f939..ba3adab127 100644 --- a/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter/architecture.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/using/reference/v3-multi-datacenter/architecture/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/using/reference/v3-multi-datacenter/architecture/) instead. {{% /note %}} @@ -83,7 +83,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.8/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.8/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -95,7 +95,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -113,7 +113,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -121,6 +121,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.8/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.8/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.8/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.8/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter/scheduling-fullsync.md index 66cac9a842..6eee1e3fd5 100644 --- a/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.8/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.8/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.8/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. {{% /note %}} diff --git a/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/aae.md index 8f28c9c604..d1131bd863 100644 --- a/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.8/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.8/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/architecture.md index d1e5a13603..90a0276308 100644 --- a/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.8/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.8/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.8/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.8/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/cascading-writes.md index 1583c3d308..74f5ef365d 100644 --- a/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.8/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 8636f1237c..2b6d97a9ef 100644 --- a/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.8/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.8/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.8/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.8/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.8/using/repair-recovery/errors.md b/content/riak/kv/2.0.8/using/repair-recovery/errors.md index 3146b44a06..8e10282854 100644 --- a/content/riak/kv/2.0.8/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.8/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.8/configuring/reference +[config reference]: {{}}riak/kv/2.0.8/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.8/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.8/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.8/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.8/using/repair-recovery/failure-recovery.md index 9bb0c386af..7eedf84d50 100644 --- a/content/riak/kv/2.0.8/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.8/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.8/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.8/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.8/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.8/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -116,7 +116,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.0.8/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.8/using/repair-recovery/repairs.md b/content/riak/kv/2.0.8/using/repair-recovery/repairs.md index c62e785d65..70f6f7a198 100644 --- a/content/riak/kv/2.0.8/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.8/using/repair-recovery/repairs.md @@ -20,13 +20,13 @@ aliases: - /riak/2.0.8/ops/running/recovery/repairing-partitions - /riak/kv/2.0.8/ops/running/recovery/repairing-partitions --- -[cluster ops aae]: /riak/kv/2.0.8/using/cluster-operations/active-anti-entropy/ -[config ref]: /riak/kv/2.0.8/configuring/reference/ +[cluster ops aae]: {{}}riak/kv/2.0.8/using/cluster-operations/active-anti-entropy/ +[config ref]: {{}}riak/kv/2.0.8/configuring/reference/ [Erlang shell]: http://learnyousomeerlang.com/starting-out -[glossary AAE]: /riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae -[glossary readrep]: /riak/kv/2.0.8/learn/glossary/#read-repair -[search config]: /riak/kv/2.0.8/configuring/search/#search-config-settings -[tiered storage]: /riak/kv/2.0.8/setup/planning/backend/leveldb/#tiered-storage +[glossary AAE]: {{}}riak/kv/2.0.8/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{}}riak/kv/2.0.8/learn/glossary/#read-repair +[search config]: {{}}riak/kv/2.0.8/configuring/search/#search-config-settings +[tiered storage]: {{}}riak/kv/2.0.8/setup/planning/backend/leveldb/#tiered-storage @@ -236,23 +236,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.8/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.8/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.8/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.8/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.8/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.8/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.8/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.0.8/using/repair-recovery/rolling-replaces.md index 47a9a5ceb0..5ee3a5703f 100644 --- a/content/riak/kv/2.0.8/using/repair-recovery/rolling-replaces.md +++ b/content/riak/kv/2.0.8/using/repair-recovery/rolling-replaces.md @@ -12,9 +12,9 @@ menu: toc: true --- -[upgrade]: /riak/kv/2.0.8/setup/upgrading/cluster/ -[rolling restarts]: /riak/kv/2.0.8/using/repair-recovery/rolling-restart/ -[add node]: /riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes +[upgrade]: {{}}riak/kv/2.0.8/setup/upgrading/cluster/ +[rolling restarts]: {{}}riak/kv/2.0.8/using/repair-recovery/rolling-restart/ +[add node]: {{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. diff --git a/content/riak/kv/2.0.8/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.8/using/repair-recovery/rolling-restart.md index 7c0dbaa510..4bfdae5778 100644 --- a/content/riak/kv/2.0.8/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.8/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.8/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.8/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.8/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.8/using/running-a-cluster.md b/content/riak/kv/2.0.8/using/running-a-cluster.md index 0b55bb11c9..9816c71ddb 100644 --- a/content/riak/kv/2.0.8/using/running-a-cluster.md +++ b/content/riak/kv/2.0.8/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.8/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.8/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.8/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.8/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.8/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.8/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.8/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.8/using/security.md b/content/riak/kv/2.0.8/using/security.md index 7ca44f81c1..7bfe1baf61 100644 --- a/content/riak/kv/2.0.8/using/security.md +++ b/content/riak/kv/2.0.8/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.8/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.8/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.8/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.8/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.8/using/security/basics +[security managing]: {{}}riak/kv/2.0.8/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.0.8/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.8/using/security/basics.md b/content/riak/kv/2.0.8/using/security/basics.md index d5a1ad5876..c39215a854 100644 --- a/content/riak/kv/2.0.8/using/security/basics.md +++ b/content/riak/kv/2.0.8/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.8/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.8/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.8/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.8/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.8/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.8/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.8/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.8/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.8/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.8/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.8/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.8/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.8/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.8/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.8/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.8/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.8/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.8/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.8/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.8/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.8/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.8/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.8/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.8/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.8/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.8/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.8/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.8/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.8/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.8/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.8/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.8/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.8/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.8/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.8/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.8/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.8/configuring/reference/#directories).
platform_data_dir The directory in which Riak stores its storage backend data, as well -as active anti-entropy data, and cluster metadata. ./data
alive_tokens Determines the number of ticks the leader will wait to hear from its -associated vnode before assuming that the vnode +associated vnode before assuming that the vnode is unhealthy and stepping down as leader. If the vnode does not respond to the leader before ensemble_tick * alive_tokens milliseconds have elapsed, the leader will @@ -1833,8 +1833,8 @@ package) and in R14B04 via a custom repository and branch.
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="{{< baseurl >}}riak/kv/2.0.8/learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.8/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.8/using/security/managing-sources.md b/content/riak/kv/2.0.8/using/security/managing-sources.md index 49b464bf28..8f1b98e55f 100644 --- a/content/riak/kv/2.0.8/using/security/managing-sources.md +++ b/content/riak/kv/2.0.8/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.8/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.8/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.8/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.8/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.8/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.8/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.8/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.8/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.8/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.8/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.8/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.8/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.8/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.8/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.0.9/_reference-links.md b/content/riak/kv/2.0.9/_reference-links.md index d613aea53c..4f6c3dfdc2 100644 --- a/content/riak/kv/2.0.9/_reference-links.md +++ b/content/riak/kv/2.0.9/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.0.9/downloads/ -[install index]: /riak/kv/2.0.9/setup/installing -[upgrade index]: /riak/kv/2.0.9/upgrading -[plan index]: /riak/kv/2.0.9/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.0.9/configuring/reference/ -[manage index]: /riak/kv/2.0.9/using/managing -[performance index]: /riak/kv/2.0.9/using/performance -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.0.9/downloads/ +[install index]: {{}}riak/kv/2.0.9/setup/installing +[upgrade index]: {{}}riak/kv/2.0.9/upgrading +[plan index]: {{}}riak/kv/2.0.9/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.0.9/configuring/reference/ +[manage index]: {{}}riak/kv/2.0.9/using/managing +[performance index]: {{}}riak/kv/2.0.9/using/performance +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.0.9/setup/planning -[plan start]: /riak/kv/2.0.9/setup/planning/start -[plan backend]: /riak/kv/2.0.9/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.9/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.9/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.0.9/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.0.9/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.0.9/setup/planning/best-practices -[plan future]: /riak/kv/2.0.9/setup/planning/future +[plan index]: {{}}riak/kv/2.0.9/setup/planning +[plan start]: {{}}riak/kv/2.0.9/setup/planning/start +[plan backend]: {{}}riak/kv/2.0.9/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.9/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.0.9/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.0.9/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.0.9/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.0.9/setup/planning/future ## Installing -[install index]: /riak/kv/2.0.9/setup/installing -[install aws]: /riak/kv/2.0.9/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.9/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.9/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.9/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.9/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.9/setup/installing/smartos -[install solaris]: /riak/kv/2.0.9/setup/installing/solaris -[install suse]: /riak/kv/2.0.9/setup/installing/suse -[install windows azure]: /riak/kv/2.0.9/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.0.9/setup/installing +[install aws]: {{}}riak/kv/2.0.9/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.9/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.9/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.9/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.9/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.9/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.9/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.9/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.9/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.9/setup/installing/source -[install source erlang]: /riak/kv/2.0.9/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.0.9/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.0.9/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.9/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.0.9/setup/installing/source/jvm -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.0.9/setup/upgrading -[upgrade checklist]: /riak/kv/2.0.9/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.0.9/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.0.9/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.0.9/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.0.9/setup/downgrade +[upgrade index]: {{}}riak/kv/2.0.9/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.0.9/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.0.9/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.0.9/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.0.9/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.0.9/setup/downgrade ## Configuring -[config index]: /riak/kv/2.0.9/configuring -[config basic]: /riak/kv/2.0.9/configuring/basic -[config backend]: /riak/kv/2.0.9/configuring/backend -[config manage]: /riak/kv/2.0.9/configuring/managing -[config reference]: /riak/kv/2.0.9/configuring/reference/ -[config strong consistency]: /riak/kv/2.0.9/configuring/strong-consistency -[config load balance]: /riak/kv/2.0.9/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.0.9/configuring/mapreduce -[config search]: /riak/kv/2.0.9/configuring/search/ +[config index]: {{}}riak/kv/2.0.9/configuring +[config basic]: {{}}riak/kv/2.0.9/configuring/basic +[config backend]: {{}}riak/kv/2.0.9/configuring/backend +[config manage]: {{}}riak/kv/2.0.9/configuring/managing +[config reference]: {{}}riak/kv/2.0.9/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.0.9/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.0.9/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.0.9/configuring/mapreduce +[config search]: {{}}riak/kv/2.0.9/configuring/search/ -[config v3 mdc]: /riak/kv/2.0.9/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.9/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.0.9/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.0.9/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.0.9/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.0.9/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.0.9/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.0.9/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.0.9/using/ -[use admin commands]: /riak/kv/2.0.9/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.0.9/using/running-a-cluster +[use index]: {{}}riak/kv/2.0.9/using/ +[use admin commands]: {{}}riak/kv/2.0.9/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.0.9/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.0.9/using/reference/custom-code -[use ref handoff]: /riak/kv/2.0.9/using/reference/handoff -[use ref monitoring]: /riak/kv/2.0.9/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.0.9/using/reference/search -[use ref 2i]: /riak/kv/2.0.9/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.0.9/using/reference/snmp -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.0.9/using/reference/jmx -[use ref obj del]: /riak/kv/2.0.9/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.0.9/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.0.9/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.0.9/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.0.9/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.0.9/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.0.9/using/reference/search +[use ref 2i]: {{}}riak/kv/2.0.9/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.0.9/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.0.9/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.0.9/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.0.9/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.0.9/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.0.9/using/admin/ -[use admin commands]: /riak/kv/2.0.9/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.0.9/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.0.9/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.0.9/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.0.9/using/admin/ +[use admin commands]: {{}}riak/kv/2.0.9/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.0.9/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.0.9/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.0.9/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.0.9/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.0.9/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.0.9/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.0.9/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.0.9/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.0.9/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.0.9/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.0.9/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.0.9/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.0.9/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.0.9/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.0.9/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.0.9/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.0.9/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.0.9/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.0.9/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.0.9/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.0.9/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.0.9/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.0.9/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.0.9/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.0.9/using/repair-recovery -[repair recover index]: /riak/kv/2.0.9/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.0.9/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.0.9/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.0.9/using/security/ -[security basics]: /riak/kv/2.0.9/using/security/basics -[security managing]: /riak/kv/2.0.9/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.0.9/using/security/ +[security basics]: {{}}riak/kv/2.0.9/using/security/basics +[security managing]: {{}}riak/kv/2.0.9/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.0.9/using/performance/ -[perf benchmark]: /riak/kv/2.0.9/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.0.9/using/performance/erlang -[perf aws]: /riak/kv/2.0.9/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.0.9/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.0.9/using/performance/ +[perf benchmark]: {{}}riak/kv/2.0.9/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.0.9/using/performance/erlang +[perf aws]: {{}}riak/kv/2.0.9/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.0.9/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.0.9/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.0.9/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.0.9/developing -[dev client libraries]: /riak/kv/2.0.9/developing/client-libraries -[dev data model]: /riak/kv/2.0.9/developing/data-modeling -[dev data types]: /riak/kv/2.0.9/developing/data-types -[dev kv model]: /riak/kv/2.0.9/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.0.9/developing +[dev client libraries]: {{}}riak/kv/2.0.9/developing/client-libraries +[dev data model]: {{}}riak/kv/2.0.9/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.9/developing/data-types +[dev kv model]: {{}}riak/kv/2.0.9/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.0.9/developing/getting-started -[getting started java]: /riak/kv/2.0.9/developing/getting-started/java -[getting started ruby]: /riak/kv/2.0.9/developing/getting-started/ruby -[getting started python]: /riak/kv/2.0.9/developing/getting-started/python -[getting started php]: /riak/kv/2.0.9/developing/getting-started/php -[getting started csharp]: /riak/kv/2.0.9/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.0.9/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.0.9/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.0.9/developing/getting-started/golang - -[obj model java]: /riak/kv/2.0.9/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.9/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.9/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.9/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.9/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.9/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.9/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.0.9/developing/getting-started +[getting started java]: {{}}riak/kv/2.0.9/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.0.9/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.0.9/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.0.9/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.0.9/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.0.9/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.0.9/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.0.9/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.0.9/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.9/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.9/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.9/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.9/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.9/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.9/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.0.9/developing/usage -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.9/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.0.9/developing/usage/content-types -[usage create objects]: /riak/kv/2.0.9/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.0.9/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.0.9/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.0.9/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.9/developing/usage/search -[usage search schema]: /riak/kv/2.0.9/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.9/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.0.9/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.0.9/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.0.9/developing/usage +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.9/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.0.9/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.0.9/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.0.9/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.0.9/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.0.9/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.9/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.9/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.0.9/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.0.9/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.0.9/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.0.9/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.0.9/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.0.9/developing/api/backend -[dev api http]: /riak/kv/2.0.9/developing/api/http -[dev api http status]: /riak/kv/2.0.9/developing/api/http/status -[dev api pbc]: /riak/kv/2.0.9/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.0.9/developing/api/backend +[dev api http]: {{}}riak/kv/2.0.9/developing/api/http +[dev api http status]: {{}}riak/kv/2.0.9/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.0.9/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.0.9/learn/glossary/ -[glossary aae]: /riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.0.9/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.0.9/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.0.9/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode -[concept aae]: /riak/kv/2.0.9/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.9/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.9/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.0.9/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.9/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.9/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.9/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.9/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.9/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.0.9/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.9/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.9/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.9/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.0.9/add-ons.md b/content/riak/kv/2.0.9/add-ons.md index 1da00fc8c5..b60536fb9a 100644 --- a/content/riak/kv/2.0.9/add-ons.md +++ b/content/riak/kv/2.0.9/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.0.9/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.0.9/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.0.9/add-ons/redis/developing-rra.md b/content/riak/kv/2.0.9/add-ons/redis/developing-rra.md index 78e00f9c7f..4d761f48fd 100644 --- a/content/riak/kv/2.0.9/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.0.9/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.0.9/developing/api/http +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.0.9/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.0.9/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.0.9/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.0.9/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.0.9/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.0.9/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.0.9/add-ons/redis/redis-add-on-features.md index 46619592fa..fb11379374 100644 --- a/content/riak/kv/2.0.9/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.0.9/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.0.9/add-ons/redis/set-up-rra.md b/content/riak/kv/2.0.9/add-ons/redis/set-up-rra.md index e99dd80e35..d459f84354 100644 --- a/content/riak/kv/2.0.9/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.0.9/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.0.9/setup/installing -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.0.9/setup/installing +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.0.9/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.0.9/add-ons/redis/set-up-rra/deployment-models.md index 5c268b41b6..d42ea34e9d 100644 --- a/content/riak/kv/2.0.9/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/kv/2.0.9/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/kv/2.0.9/add-ons/redis/using-rra.md b/content/riak/kv/2.0.9/add-ons/redis/using-rra.md index 7aa47dbb9e..1eb59bf6cd 100644 --- a/content/riak/kv/2.0.9/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.0.9/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.0.9/developing/api/http/ +[dev api http]: {{}}riak/kv/2.0.9/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.0.9/configuring/backend.md b/content/riak/kv/2.0.9/configuring/backend.md index 4243424865..333d1388ec 100644 --- a/content/riak/kv/2.0.9/configuring/backend.md +++ b/content/riak/kv/2.0.9/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.9/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.9/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.9/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.0.9/configuring/basic.md b/content/riak/kv/2.0.9/configuring/basic.md index 0d6f025ad5..57e477f17b 100644 --- a/content/riak/kv/2.0.9/configuring/basic.md +++ b/content/riak/kv/2.0.9/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.0.9/ops/building/configuration/ --- -[config reference]: /riak/kv/2.0.9/configuring/reference -[use running cluster]: /riak/kv/2.0.9/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.0.9/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.0.9/using/performance/erlang -[plan start]: /riak/kv/2.0.9/setup/planning/start -[plan best practices]: /riak/kv/2.0.9/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.0.9/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.0.9/setup/planning/backend -[plan backend multi]: /riak/kv/2.0.9/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.0.9/using/performance/benchmarking -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.9/using/performance -[perf aws]: /riak/kv/2.0.9/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.0.9/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[use running cluster]: {{}}riak/kv/2.0.9/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.0.9/using/performance/erlang +[plan start]: {{}}riak/kv/2.0.9/setup/planning/start +[plan best practices]: {{}}riak/kv/2.0.9/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.0.9/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.0.9/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.0.9/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.0.9/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.9/using/performance +[perf aws]: {{}}riak/kv/2.0.9/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.0.9/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.0.9/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.9/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.0.9/configuring/global-object-expiration.md b/content/riak/kv/2.0.9/configuring/global-object-expiration.md index 9f492b64de..9b2177915e 100644 --- a/content/riak/kv/2.0.9/configuring/global-object-expiration.md +++ b/content/riak/kv/2.0.9/configuring/global-object-expiration.md @@ -10,7 +10,6 @@ menu: project: "riak_kv" project_version: "2.0.9" toc: true -canonical_link: "https://docs.basho.com/riak/kv/latest/configuring/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/kv/2.0.9/configuring/load-balancing-proxy.md b/content/riak/kv/2.0.9/configuring/load-balancing-proxy.md index 647fde5e4a..48d3d744cf 100644 --- a/content/riak/kv/2.0.9/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.0.9/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.0.9/configuring/managing.md b/content/riak/kv/2.0.9/configuring/managing.md index 4abbbe189b..d4263b2c1a 100644 --- a/content/riak/kv/2.0.9/configuring/managing.md +++ b/content/riak/kv/2.0.9/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.0.9/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.0.9/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.0.9/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.0.9/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.0.9/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.0.9/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.0.9/configuring/mapreduce.md b/content/riak/kv/2.0.9/configuring/mapreduce.md index 6cf8c25578..b12c84f2f7 100644 --- a/content/riak/kv/2.0.9/configuring/mapreduce.md +++ b/content/riak/kv/2.0.9/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.0.9/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.0.9/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.0.9/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.0.9/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.9/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.0.9/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.0.9/configuring/reference.md b/content/riak/kv/2.0.9/configuring/reference.md index 7f6ae3a154..5c5213e899 100644 --- a/content/riak/kv/2.0.9/configuring/reference.md +++ b/content/riak/kv/2.0.9/configuring/reference.md @@ -200,7 +200,7 @@ executables are stored. +as active anti-entropy data, and cluster metadata. @@ -1684,7 +1684,7 @@ abandons the leader (in milliseconds). This must be set greater than the diff --git a/content/riak/kv/2.0.9/configuring/search.md b/content/riak/kv/2.0.9/configuring/search.md index 8d9a3ce4d5..f76b015a81 100644 --- a/content/riak/kv/2.0.9/configuring/search.md +++ b/content/riak/kv/2.0.9/configuring/search.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.9/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.0.9/developing/usage/search -[usage search schema]: /riak/kv/2.0.9/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.0.9/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.0.9/developing/usage/custom-extractors -[cluster-ops aae throttle]: /riak/kv/2.0.9/using/cluster-operations/active-anti-entropy/#throttling -[config reference]: /riak/kv/2.0.9/configuring/reference -[config reference#search]: /riak/kv/2.0.9/configuring/reference/#search -[glossary aae]: /riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.0.9/using/security/ +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search +[usage search schema]: {{}}riak/kv/2.0.9/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.0.9/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.0.9/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.0.9/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[config reference#search]: {{}}riak/kv/2.0.9/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.0.9/using/security/ [java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads [java se docs]: http://www.oracle.com/technetwork/java/javase/documentation @@ -106,15 +106,15 @@ Valid values: `on` or `off` ### `search.index.error_threshold.failure_count` -The number of failures encountered while updating a search index within [`search.queue.error_threshold.failure_interval`](#search-queue-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. Valid values: Integer ### `search.index.error_threshold.failure_interval` -The window of time during which `search.queue.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. -If [`search.queue.error_threshold.failure_count`](#search-queue-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.queue.error_threshold.reset_interval`](search-queue-error-threshold-reset-interval) has passed. +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. Valid values: Milliseconds diff --git a/content/riak/kv/2.0.9/configuring/strong-consistency.md b/content/riak/kv/2.0.9/configuring/strong-consistency.md index e10aced62c..b0e945040c 100644 --- a/content/riak/kv/2.0.9/configuring/strong-consistency.md +++ b/content/riak/kv/2.0.9/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.0.9/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.0.9/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.0.9/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.0.9/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.0.9/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.0.9/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.0.9/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.0.9/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.0.9/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.0.9/learn/concepts/causal-context -[dev data types]: /riak/kv/2.0.9/developing/data-types -[glossary aae]: /riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.0.9/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.0.9/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.0.9/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.0.9/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.0.9/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.0.9/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.0.9/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.0.9/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.0.9/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.0.9/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.0.9/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.0.9/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.0.9/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.0.9/developing/data-types +[glossary aae]: {{}}riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.0.9/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.0.9/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.0.9/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.0.9/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.0.9/configuring/v2-multi-datacenter.md b/content/riak/kv/2.0.9/configuring/v2-multi-datacenter.md index f256e4c9bd..ad69b7e754 100644 --- a/content/riak/kv/2.0.9/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.9/configuring/v2-multi-datacenter.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/configuring/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication capabilities offer a diff --git a/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/nat.md index fd6aed6fad..979fe40079 100644 --- a/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/nat.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/configuring/v3-multi-datacenter/nat/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/nat/) instead. {{% /note %}} Riak Enterprise supports replication of data on networks that use static diff --git a/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/quick-start.md index d96a4ff995..2a8f16e53e 100644 --- a/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/quick-start.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start/) instead. {{% /note %}} The Riak Multi-Datacenter Replication Quick Start will walk you through diff --git a/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl.md index 3c87b30187..f926dc5f78 100644 --- a/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.9/configuring/v2-multi-datacenter/ssl.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl/) instead. {{% /note %}} ## Features diff --git a/content/riak/kv/2.0.9/configuring/v3-multi-datacenter.md b/content/riak/kv/2.0.9/configuring/v3-multi-datacenter.md index 45a187a508..afc028883a 100644 --- a/content/riak/kv/2.0.9/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.9/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.0.9/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.0.9/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/nat.md index 2cb49cdfcb..02f60359c1 100644 --- a/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start.md index 73dd23d4e0..de2583429c 100644 --- a/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.0.9/using/performance -[config v3 mdc]: /riak/kv/2.0.9/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.0.9/using/performance +[config v3 mdc]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl.md index 1405bc4aae..e3eca2d49c 100644 --- a/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.0.9/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.0.9/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.0.9/developing/api/backend.md b/content/riak/kv/2.0.9/developing/api/backend.md index 506ef5f3c1..d2bb31896c 100644 --- a/content/riak/kv/2.0.9/developing/api/backend.md +++ b/content/riak/kv/2.0.9/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/backend-api --- -[plan backend]: /riak/kv/2.0.9/setup/planning/backend +[plan backend]: {{}}riak/kv/2.0.9/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.0.9/developing/api/http.md b/content/riak/kv/2.0.9/developing/api/http.md index a287cc4e81..c81031e404 100644 --- a/content/riak/kv/2.0.9/developing/api/http.md +++ b/content/riak/kv/2.0.9/developing/api/http.md @@ -29,20 +29,20 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.0.9/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.0.9/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.0.9/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.0.9/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.0.9/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.0.9/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.0.9/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.0.9/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.0.9/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.0.9/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.0.9/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.9/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.0.9/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.0.9/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.0.9/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.9/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.0.9/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.0.9/developing/api/http/delete-object) ## Riak-Data-Type-related Operations @@ -52,9 +52,9 @@ Method | URL `POST` | `/types//buckets//datatypes` `POST` | `/types//buckets//datatypes/` -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.0.9/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.0.9/developing/data-types/#usage-examples) -and subpages e.g. [sets](/riak/kv/2.0.9/developing/data-types/sets). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.0.9/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.0.9/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.0.9/developing/data-types/sets). Advanced users may consult the technical documentation inside the Riak KV internal module `riak_kv_wm_crdt`. @@ -63,26 +63,26 @@ KV internal module `riak_kv_wm_crdt`. Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.0.9/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.0.9/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.0.9/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.0.9/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.9/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.0.9/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.0.9/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.0.9/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.0.9/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.0.9/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.0.9/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.0.9/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.0.9/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.0.9/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.0.9/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.0.9/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.0.9/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.0.9/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.0.9/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.0.9/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.0.9/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.0.9/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.0.9/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.0.9/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.0.9/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.0.9/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.0.9/developing/api/http/counters.md b/content/riak/kv/2.0.9/developing/api/http/counters.md index 0678f00a3d..1baebfa622 100644 --- a/content/riak/kv/2.0.9/developing/api/http/counters.md +++ b/content/riak/kv/2.0.9/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.0.9/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.0.9/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.0.9/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.0.9/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.0.9/developing/api/http/fetch-object.md b/content/riak/kv/2.0.9/developing/api/http/fetch-object.md index f4634791ae..3d365cab3c 100644 --- a/content/riak/kv/2.0.9/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.0.9/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.0.9/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.0.9/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.0.9/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.9/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.0.9/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.0.9/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.0.9/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.0.9/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.9/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.9/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.0.9/developing/api/http/fetch-search-index.md b/content/riak/kv/2.0.9/developing/api/http/fetch-search-index.md index fb004744c3..fd02d1e2c8 100644 --- a/content/riak/kv/2.0.9/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.0.9/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.0.9/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.0.9/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.0.9/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.0.9/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.0.9/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.0.9/developing/api/http/fetch-search-schema.md index 894b172809..60eecbc2f9 100644 --- a/content/riak/kv/2.0.9/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.0.9/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.0.9/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.0.9/developing/api/http/get-bucket-props.md b/content/riak/kv/2.0.9/developing/api/http/get-bucket-props.md index 4f1d065a1b..c81f57e2ea 100644 --- a/content/riak/kv/2.0.9/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.0.9/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.0.9/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.0.9/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.0.9/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.0.9/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.0.9/developing/api/http/link-walking.md b/content/riak/kv/2.0.9/developing/api/http/link-walking.md index ee93f90ebd..9894bd7745 100644 --- a/content/riak/kv/2.0.9/developing/api/http/link-walking.md +++ b/content/riak/kv/2.0.9/developing/api/http/link-walking.md @@ -21,8 +21,8 @@ This feature is deprecated and will be removed in a future version. Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.0.9/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.0.9/learn/glossary/#links). ## Request @@ -68,7 +68,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.0.9/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.0.9/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.0.9/developing/api/http/list-resources.md b/content/riak/kv/2.0.9/developing/api/http/list-resources.md index 70fa104a35..4ee9969e40 100644 --- a/content/riak/kv/2.0.9/developing/api/http/list-resources.md +++ b/content/riak/kv/2.0.9/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.0.9/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.0.9/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.0.9/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.0.9/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.0.9/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.0.9/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.0.9/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.0.9/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.0.9/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.0.9/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.0.9/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.0.9/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.0.9/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.0.9/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.0.9/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.0.9/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.0.9/developing/api/http/mapreduce.md b/content/riak/kv/2.0.9/developing/api/http/mapreduce.md index b59e71daf8..ac44657728 100644 --- a/content/riak/kv/2.0.9/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.0.9/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.0.9/developing/api/http/search-index-info.md b/content/riak/kv/2.0.9/developing/api/http/search-index-info.md index c1c579e7ce..2e8161ba57 100644 --- a/content/riak/kv/2.0.9/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.0.9/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.0.9/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.0.9/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.0.9/developing/api/http/search-query.md b/content/riak/kv/2.0.9/developing/api/http/search-query.md index 8f246078b8..8ee1329e6f 100644 --- a/content/riak/kv/2.0.9/developing/api/http/search-query.md +++ b/content/riak/kv/2.0.9/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.0.9/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.0.9/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.0.9/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.0.9/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.0.9/developing/api/http/secondary-indexes.md b/content/riak/kv/2.0.9/developing/api/http/secondary-indexes.md index f846094350..094f6b724a 100644 --- a/content/riak/kv/2.0.9/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.0.9/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.0.9/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.0.9/developing/api/http/set-bucket-props.md b/content/riak/kv/2.0.9/developing/api/http/set-bucket-props.md index e653badacf..fa60ec241f 100644 --- a/content/riak/kv/2.0.9/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.0.9/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.0.9/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.0.9/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.0.9/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.0.9/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.0.9/developing/api/http/status.md b/content/riak/kv/2.0.9/developing/api/http/status.md index 3e630d4107..69ed5d4a0f 100644 --- a/content/riak/kv/2.0.9/developing/api/http/status.md +++ b/content/riak/kv/2.0.9/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.0.9/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.0.9/developing/api/http/store-object.md b/content/riak/kv/2.0.9/developing/api/http/store-object.md index 577b569d56..850dc53853 100644 --- a/content/riak/kv/2.0.9/developing/api/http/store-object.md +++ b/content/riak/kv/2.0.9/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.0.9/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.0.9/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.0.9/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.0.9/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.0.9/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.0.9/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.0.9/developing/api/http/store-search-index.md b/content/riak/kv/2.0.9/developing/api/http/store-search-index.md index 517367829a..e3aa85e1fa 100644 --- a/content/riak/kv/2.0.9/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.0.9/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.0.9/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.0.9/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.0.9/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.0.9/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.0.9/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.0.9/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.0.9/developing/api/http/store-search-schema.md b/content/riak/kv/2.0.9/developing/api/http/store-search-schema.md index 9a8d380e18..24f470de5e 100644 --- a/content/riak/kv/2.0.9/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.0.9/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.0.9/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/index/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.0.9/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers.md index e271492345..04c6dd4d70 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.0.9/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.0.9/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.0.9/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.0.9/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.0.9/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.0.9/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.0.9/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.0.9/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.0.9/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.0.9/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/auth-req.md index 99522bf2c6..ab2e6faaff 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.0.9/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.0.9/using/security/basics). diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/delete-object.md index a74b576cc2..5219064c12 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.0.9/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.0.9/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store.md index f290850883..e79974ec63 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.0.9/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.0.9/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-fetch.md index 539ae595b0..8a324055e9 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.0.9/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.0.9/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.0.9/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.0.9/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.0.9/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.0.9/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store.md index 14ba6111db..ac3fab08ce 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store.md index 155cd6d750..03f8b92187 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-store.md index 0dab01536c..05f4e7c0b6 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.0.9/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.0.9/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.0.9/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.0.9/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.0.9/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.0.9/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.0.9/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-union.md index 8a6b5c8aa6..9ea5d2d79b 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.0.9/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object.md index 2f1b1977a3..1d36acdc69 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.0.9/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.0.9/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.0.9/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.0.9/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props.md index df15bdd2aa..a880d2b869 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.0.9/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.0.9/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.0.9/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.0.9/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) {{% /note %}} diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-type.md index d26777f340..ec79fc94dd 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.0.9/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.0.9/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-client-id.md index e2437dd306..1b0288410d 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.0.9/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/mapreduce.md index 7e5a0b7eb9..82241b741e 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.0.9/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.0.9/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.0.9/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.0.9/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/reset-bucket-props.md index eb3f4d96f3..a643ed4869 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/secondary-indexes.md index 58ead1f3a2..b453f9f5c5 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.0.9/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props.md index 959c6f2056..b6aa3c2d36 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-type.md index 431f4ab854..051febf444 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.9/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.0.9/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/store-object.md index a7e81c04a7..b33379527e 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.0.9/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.0.9/learn/concepts/buckets), and [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.0.9/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.0.9/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.0.9/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.0.9/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.0.9/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.0.9/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-delete.md index ab72563b42..c172181df4 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-delete.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-delete.md @@ -29,5 +29,5 @@ message RpbYokozunaIndexDeleteReq { ## Response -Returns a [RpbDelResp](/riak/kv/2.0.9/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbDelResp]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-get.md index 81791b7319..fc127c72c8 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.9/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.9/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-put.md index 51666d7850..c63f97e5eb 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-index-put.md @@ -37,9 +37,9 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.0.9/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.0.9/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.9/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-get.md index 734d4ef924..df0d462531 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.0.9/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-put.md index 35fa6a959c..39aca278b8 100644 --- a/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.0.9/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.0.9/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.0.9/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.0.9/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.0.9/developing/app-guide.md b/content/riak/kv/2.0.9/developing/app-guide.md index ac06596b54..6d8bac1b9d 100644 --- a/content/riak/kv/2.0.9/developing/app-guide.md +++ b/content/riak/kv/2.0.9/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.0.9/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.0.9/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.0.9/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.0.9/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.0.9/developing/key-value-modeling -[dev data types]: /riak/kv/2.0.9/developing/data-types -[dev data types#counters]: /riak/kv/2.0.9/developing/data-types/#counters -[dev data types#sets]: /riak/kv/2.0.9/developing/data-types/#sets -[dev data types#maps]: /riak/kv/2.0.9/developing/data-types/#maps -[usage create objects]: /riak/kv/2.0.9/developing/usage/creating-objects -[usage search]: /riak/kv/2.0.9/developing/usage/search -[use ref search]: /riak/kv/2.0.9/using/reference/search -[usage 2i]: /riak/kv/2.0.9/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.0.9/developing/client-libraries -[concept crdts]: /riak/kv/2.0.9/learn/concepts/crdts -[dev data model]: /riak/kv/2.0.9/developing/data-modeling -[usage mapreduce]: /riak/kv/2.0.9/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.0.9/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.0.9/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.9/setup/planning/backend/memory -[obj model java]: /riak/kv/2.0.9/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.0.9/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.0.9/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.0.9/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.0.9/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.0.9/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.0.9/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.0.9/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.0.9/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.0.9/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.0.9/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[install index]: /riak/kv/2.0.9/setup/installing -[getting started]: /riak/kv/2.0.9/developing/getting-started -[usage index]: /riak/kv/2.0.9/developing/usage -[glossary]: /riak/kv/2.0.9/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.0.9/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.0.9/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.0.9/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.0.9/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.0.9/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.0.9/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.0.9/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.0.9/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.0.9/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search +[use ref search]: {{}}riak/kv/2.0.9/using/reference/search +[usage 2i]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.0.9/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.0.9/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.0.9/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.0.9/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.0.9/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.0.9/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.0.9/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.0.9/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.0.9/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.0.9/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.0.9/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.0.9/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.0.9/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.0.9/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.0.9/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.0.9/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.0.9/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.0.9/setup/installing +[getting started]: {{}}riak/kv/2.0.9/developing/getting-started +[usage index]: {{}}riak/kv/2.0.9/developing/usage +[glossary]: {{}}riak/kv/2.0.9/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.0.9/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.0.9/developing/app-guide/advanced-mapreduce.md index 5d09794409..689722c17e 100644 --- a/content/riak/kv/2.0.9/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.0.9/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.9/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.0.9/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.0.9/using/reference/custom-code -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.9/configuring/reference +[usage 2i]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.0.9/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.9/configuring/reference [google mr]: http://research.google.com/archive/mapreduce.html [mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map [function contrib]: https://github.com/basho/riak_function_contrib @@ -728,7 +728,7 @@ You can use streaming with Erlang via the Riak KV local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.0.9/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.0.9/developing/app-guide/cluster-metadata.md index 4e27650200..31b64caff7 100644 --- a/content/riak/kv/2.0.9/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.0.9/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.0.9/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.0.9/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.0.9/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.0.9/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.0.9/developing/app-guide/replication-properties.md b/content/riak/kv/2.0.9/developing/app-guide/replication-properties.md index 4bb5daa6c7..328fba249e 100644 --- a/content/riak/kv/2.0.9/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.0.9/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.9/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.9/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.0.9/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.0.9/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.0.9/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.9/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.9/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.0.9/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.0.9/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.9/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.9/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.0.9/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.0.9/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.9/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.9/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.0.9/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.0.9/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.9/developing/app-guide/strong-consistency.md b/content/riak/kv/2.0.9/developing/app-guide/strong-consistency.md index f63c78a7aa..5fc00eabda 100644 --- a/content/riak/kv/2.0.9/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.0.9/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.0.9/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/kv/2.0.9/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.0.9/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.0.9/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.0.9/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.0.9/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.0.9/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.0.9/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.0.9/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/kv/2.0.9/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.0.9/developing/client-libraries -[getting started]: /riak/kv/2.0.9/developing/getting-started -[config strong consistency#details]: /riak/kv/2.0.9/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.0.9/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.0.9/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.0.9/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.0.9/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.0.9/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.0.9/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.0.9/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.0.9/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.0.9/developing/client-libraries +[getting started]: {{}}riak/kv/2.0.9/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.0.9/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.0.9/developing/app-guide/write-once.md b/content/riak/kv/2.0.9/developing/app-guide/write-once.md index 9a2c635427..78b86057fb 100644 --- a/content/riak/kv/2.0.9/developing/app-guide/write-once.md +++ b/content/riak/kv/2.0.9/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.9/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[bucket type]: /riak/kv/2.0.9/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.0.9/developing/data-types -[strong consistency]: /riak/kv/2.0.9/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.0.9/developing/data-types +[strong consistency]: {{}}riak/kv/2.0.9/developing/app-guide/strong-consistency Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. @@ -98,7 +98,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -149,7 +149,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.0.9/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.0.9/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.0.9/developing/client-libraries.md b/content/riak/kv/2.0.9/developing/client-libraries.md index 5904c93e8d..9743c863ee 100644 --- a/content/riak/kv/2.0.9/developing/client-libraries.md +++ b/content/riak/kv/2.0.9/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.0.9/developing/data-modeling.md b/content/riak/kv/2.0.9/developing/data-modeling.md index 99f8fcdd52..3388f3acf4 100644 --- a/content/riak/kv/2.0.9/developing/data-modeling.md +++ b/content/riak/kv/2.0.9/developing/data-modeling.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.9/dev/using/data-modeling --- -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list @@ -28,9 +28,9 @@ provide links to videos and documentation for further exploration. How you structure your application to run on Riak should take into account the unique needs of your use case, including access patterns such as read/write distribution, latency differences between various -operations, use of Riak features including [Data Types](/riak/kv/2.0.9/developing/data-types/), -[MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/), [Search](/riak/kv/2.0.9/developing/usage/search/), -[secondary indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes/) and more. This guide +operations, use of Riak features including [Data Types]({{}}riak/kv/2.0.9/developing/data-types/), +[MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/), [Search]({{}}riak/kv/2.0.9/developing/usage/search/), +[secondary indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/) and more. This guide is intended to be illustrative only. ## High Read/Write, Simple Applications @@ -39,20 +39,20 @@ The following are examples of Riak use cases that require high read/write performance without necessarily utilizing complex data structures: -* [Session Storage](/riak/kv/2.0.9/developing/data-modeling/#session-storage) -* [Serving Advertisements](/riak/kv/2.0.9/developing/data-modeling/#serving-advertisements) -* [Log Data](/riak/kv/2.0.9/developing/data-modeling/#log-data) -* [Sensor Data](/riak/kv/2.0.9/developing/data-modeling/#sensor-data) +* [Session Storage]({{}}riak/kv/2.0.9/developing/data-modeling/#session-storage) +* [Serving Advertisements]({{}}riak/kv/2.0.9/developing/data-modeling/#serving-advertisements) +* [Log Data]({{}}riak/kv/2.0.9/developing/data-modeling/#log-data) +* [Sensor Data]({{}}riak/kv/2.0.9/developing/data-modeling/#sensor-data) ## Content Management, Social Applications The following application types require more subtle relationships between objects, e.g. one-to-many and many-to-many relationships. -* [User Accounts](/riak/kv/2.0.9/developing/data-modeling/#user-accounts) -* [User Settings and Preferences](/riak/kv/2.0.9/developing/data-modeling/#user-settings-and-preferences) -* [User Events and Timelines](/riak/kv/2.0.9/developing/data-modeling/#user-events-and-timelines) -* [Articles, Blog Posts, and Other Content](/riak/kv/2.0.9/developing/data-modeling/#articles-blog-posts-and-other-content) +* [User Accounts]({{}}riak/kv/2.0.9/developing/data-modeling/#user-accounts) +* [User Settings and Preferences]({{}}riak/kv/2.0.9/developing/data-modeling/#user-settings-and-preferences) +* [User Events and Timelines]({{}}riak/kv/2.0.9/developing/data-modeling/#user-events-and-timelines) +* [Articles, Blog Posts, and Other Content]({{}}riak/kv/2.0.9/developing/data-modeling/#articles-blog-posts-and-other-content) ## Session Storage @@ -70,11 +70,11 @@ administrative changes to schemas. Riak has features that allow for more complex session storage use cases. The [Bitcask][plan backend bitcask] storage backend, for example, supports automatic expiry of keys, which frees application developers from implementing manual -session expiry. Riak's [MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/) system can also be +session expiry. Riak's [MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/) system can also be used to perform batch processing analysis on large bodies of session data, for example to compute the average number of active users. If sessions must be retrieved using multiple keys (e.g. a UUID or email -address), [using secondary indexes](/riak/kv/2.0.9/developing/usage/secondary-indexes/) can provide an easy solution. +address), [using secondary indexes]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/) can provide an easy solution. ### Session Storage Community Examples @@ -129,7 +129,7 @@ involves serving reads. ## Log Data A common use case for Riak is storing large amounts of log data, either -for analysis [using MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce) or as a storage system used in +for analysis [using MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce) or as a storage system used in conjunction with a secondary analytics cluster used to perform more advanced analytics tasks. To store log data, you can use a bucket called `logs` (just to give an example) and use a unique value, such as a date, @@ -177,9 +177,9 @@ and then store update data as the value. That data could then be queried on the basis of the interval. Alternatively, a timestamp could be attached to each object as a -[secondary index](/riak/kv/2.0.9/developing/usage/secondary-indexes/), which would allow you to +[secondary index]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/), which would allow you to perform queries on specific time interval ranges or to perform -[MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/) queries against the indexes. +[MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/) queries against the indexes. ### Sensor Data Complex Case @@ -215,7 +215,7 @@ and a read request could be performed on the corresponding key. There are, however, several drawbacks to this approach. What happens if a user wants to change their username later on? The most common solution would be to use a UUID-type key for the user and store the user's -username as a [secondary index](/riak/kv/2.0.9/developing/usage/secondary-indexes/) for efficient +username as a [secondary index]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/) for efficient lookup. ### User Accounts Complex Case @@ -224,7 +224,7 @@ For simple retrieval of a specific account, a user ID (plus perhaps a secondary index on a username or email) is enough. If you foresee the need to make queries on additional user attributes (e.g. creation time, user type, or region), plan ahead and either set up additional secondary -indexes or consider using [Riak Search](/riak/kv/2.0.9/developing/usage/search/) to index the JSON +indexes or consider using [Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/) to index the JSON contents of the user account. ### User Accounts Community Examples @@ -308,9 +308,9 @@ part of a URL string, etc. In Riak, you can store content of any kind, from HTML files to plain text to JSON or XML or another document type entirely. Keep in mind that -data in Riak is opaque, with the exception of [Riak Data Types](/riak/kv/2.0.9/developing/data-types), +data in Riak is opaque, with the exception of [Riak Data Types]({{}}riak/kv/2.0.9/developing/data-types), and so Riak won't "know" about the object unless it is indexed -[using Riak Search](/riak/kv/2.0.9/developing/usage/search/) or [using secondary indexes](/riak/kv/2.0.9/developing/usage/secondary-indexes/). +[using Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/) or [using secondary indexes]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/). ### Articles et al Complex Case @@ -329,9 +329,9 @@ with comments would require your application to call from the posts and comments buckets to assemble the view. Other possible cases may involve performing operations on content beyond -key/value pairs. [Riak Search](/riak/kv/2.0.9/developing/usage/search/) is recommended for use cases +key/value pairs. [Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/) is recommended for use cases involving full-text search. For lighter-weight querying, -[using secondary indexes](/riak/kv/2.0.9/developing/usage/secondary-indexes/) \(2i) enables you to add metadata to objects to +[using secondary indexes]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/) \(2i) enables you to add metadata to objects to either query for exact matches or to perform range queries. 2i also enables you to tag posts with dates, timestamps, topic areas, or other pieces of information useful for later retrieval. diff --git a/content/riak/kv/2.0.9/developing/data-types.md b/content/riak/kv/2.0.9/developing/data-types.md index 64887d807d..da5e926ab6 100644 --- a/content/riak/kv/2.0.9/developing/data-types.md +++ b/content/riak/kv/2.0.9/developing/data-types.md @@ -43,9 +43,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -268,5 +268,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.0.9/developing/faq.md b/content/riak/kv/2.0.9/developing/faq.md index 911ab00e91..6f96687db4 100644 --- a/content/riak/kv/2.0.9/developing/faq.md +++ b/content/riak/kv/2.0.9/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.0.9/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.0.9/using/performance/benchmarking -[Bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.0.9/developing/usage +[[Basho Bench]: {{}}riak/kv/2.0.9/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.0.9/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.0.9/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.0.9/configuring/reference +[commit hooks]: {{}}riak/kv/2.0.9/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.0.9/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.0.9/developing/client-libraries -[MapReduce]: /riak/kv/2.0.9/developing/usage/mapreduce -[Memory]: /riak/kv/2.0.9/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.0.9/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.0.9/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.0.9/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.0.9/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.0.9/developing/getting-started.md b/content/riak/kv/2.0.9/developing/getting-started.md index d802efbf2d..a95afd0150 100644 --- a/content/riak/kv/2.0.9/developing/getting-started.md +++ b/content/riak/kv/2.0.9/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.0.9/setup/installing -[dev client libraries]: /riak/kv/2.0.9/developing/client-libraries +[install index]: {{}}riak/kv/2.0.9/setup/installing +[dev client libraries]: {{}}riak/kv/2.0.9/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.0.9/developing/getting-started/csharp.md b/content/riak/kv/2.0.9/developing/getting-started/csharp.md index 3582ba7f73..e3f4849eb5 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/csharp.md +++ b/content/riak/kv/2.0.9/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.9/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.9/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.9/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.9/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.0.9/developing/getting-started/csharp/querying.md b/content/riak/kv/2.0.9/developing/getting-started/csharp/querying.md index 90dd835cb7..ce4da1e377 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.0.9/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.9/developing/getting-started/erlang.md b/content/riak/kv/2.0.9/developing/getting-started/erlang.md index c5920ff646..7a9ea7b843 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/erlang.md +++ b/content/riak/kv/2.0.9/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.9/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.9/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.9/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.9/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.0.9/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.0.9/developing/getting-started/erlang/object-modeling.md index fe339c8add..118ae028f2 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.0.9/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.9/developing/getting-started/erlang/querying.md b/content/riak/kv/2.0.9/developing/getting-started/erlang/querying.md index d428ec0b53..5e945ac1a1 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.0.9/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.0.9/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.0.9/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.9/developing/getting-started/golang.md b/content/riak/kv/2.0.9/developing/getting-started/golang.md index 10311e2e35..ccc42cf567 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/golang.md +++ b/content/riak/kv/2.0.9/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.9/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.9/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.9/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.9/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.0.9/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.0.9/developing/getting-started/golang/object-modeling.md index ee4b97bf56..be0e2486e4 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.0.9/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.0.9/developing/getting-started/golang/querying.md b/content/riak/kv/2.0.9/developing/getting-started/golang/querying.md index 5a00cc9de5..5e696bcec1 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.0.9/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.0.9/developing/getting-started/java.md b/content/riak/kv/2.0.9/developing/getting-started/java.md index 4d47f1ee29..b560ab5d1e 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/java.md +++ b/content/riak/kv/2.0.9/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.9/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.9/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.0.9/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.9/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.0.9/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.0.9/developing/getting-started/java/crud-operations.md index db5d4fa7c0..9ebf9b4784 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.0.9/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.9/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.9/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/) documentation. ## Updating Objects @@ -85,8 +85,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.9/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.9/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -196,6 +196,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.0.9/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.0.9/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.0.9/developing/getting-started/java/querying.md b/content/riak/kv/2.0.9/developing/getting-started/java/querying.md index 997611e19a..a7580cda40 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.0.9/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.9/developing/getting-started/nodejs.md b/content/riak/kv/2.0.9/developing/getting-started/nodejs.md index 242094633f..6d8cb5b69b 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.0.9/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.9/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.9/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.9/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.9/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.0.9/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.0.9/developing/getting-started/nodejs/querying.md index c9bbce5487..ab9e6f6a88 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.0.9/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.0.9/developing/getting-started/php.md b/content/riak/kv/2.0.9/developing/getting-started/php.md index b9a36ed765..0ac4d488c7 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/php.md +++ b/content/riak/kv/2.0.9/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.9/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.9/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.9/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.9/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.0.9/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.0.9/developing/getting-started/php/crud-operations.md index a3030d3166..5649a3cd7d 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.0.9/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.0.9/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.0.9/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.0.9/developing/getting-started/php/querying.md b/content/riak/kv/2.0.9/developing/getting-started/php/querying.md index a7b25d559d..ef6ed53767 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.0.9/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.9/developing/getting-started/python.md b/content/riak/kv/2.0.9/developing/getting-started/python.md index 82dde1f1d5..b69516b242 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/python.md +++ b/content/riak/kv/2.0.9/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.9/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.9/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.9/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.9/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.0.9/developing/getting-started/python/querying.md b/content/riak/kv/2.0.9/developing/getting-started/python/querying.md index c67da0e72a..f8b91bbee7 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.0.9/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.9/developing/getting-started/ruby.md b/content/riak/kv/2.0.9/developing/getting-started/ruby.md index e7d7ec4a4c..9a7537309c 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/ruby.md +++ b/content/riak/kv/2.0.9/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.0.9/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.0.9/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.0.9/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.0.9/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.0.9/developing/getting-started/ruby/querying.md b/content/riak/kv/2.0.9/developing/getting-started/ruby/querying.md index ca902c887b..05fe578e8c 100644 --- a/content/riak/kv/2.0.9/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.0.9/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.0.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.0.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.0.9/developing/key-value-modeling.md b/content/riak/kv/2.0.9/developing/key-value-modeling.md index a2de9325d3..d64451ca56 100644 --- a/content/riak/kv/2.0.9/developing/key-value-modeling.md +++ b/content/riak/kv/2.0.9/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.0.9/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.0.9/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.0.9/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.0.9/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.0.9/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.0.9/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.0.9/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.0.9/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.0.9/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.0.9/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.0.9/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.0.9/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.0.9/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.0.9/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.0.9/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.0.9/developing/data-types/#sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.0.9/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.0.9/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.0.9/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.0.9/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.0.9/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.9/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.9/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.0.9/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.0.9/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.0.9/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.0.9/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.0.9/developing/usage/commit-hooks.md b/content/riak/kv/2.0.9/developing/usage/commit-hooks.md index a2f9e8ddd1..f9c27a61d0 100644 --- a/content/riak/kv/2.0.9/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.0.9/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.0.9/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.0.9/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.0.9/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.0.9/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.0.9/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.0.9/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.0.9/developing/usage/conflict-resolution.md b/content/riak/kv/2.0.9/developing/usage/conflict-resolution.md index 2a722c091d..cfcef25419 100644 --- a/content/riak/kv/2.0.9/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.0.9/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.9/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.0.9/learn/concepts/clusters) system in which any [node](/riak/kv/2.0.9/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.0.9/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.0.9/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.0.9/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.0.9/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.0.9/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.0.9/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.0.9/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.0.9/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.0.9/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.0.9/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.0.9/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.0.9/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.0.9/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.0.9/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the [`allow_mult`](#siblings) parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -[`last_write_wins`](/riak/kv/2.0.9/learn/concepts/buckets). If `last_write_wins` is set to `false`, +[`last_write_wins`]({{}}riak/kv/2.0.9/learn/concepts/buckets). If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.0.9/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.9/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.9/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.9/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.9/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.0.9/configuring/reference) to change the [default bucket properties](/riak/kv/2.0.9/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.0.9/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.0.9/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.0.9/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.0.9/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.0.9/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.0.9/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.0.9/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.0.9/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.0.9/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.0.9/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.0.9/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.0.9/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.0.9/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.9/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.0.9/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.0.9/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.0.9/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.0.9/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.0.9/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -610,7 +610,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.0.9/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.0.9/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -665,7 +665,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/csharp.md index 4d4a8e8ebc..cddeaf8b80 100644 --- a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.9/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/golang.md index 5d844c8160..f86b5d7774 100644 --- a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.9/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/java.md index 93e8a68aff..7e63ba94ef 100644 --- a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.9/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.9/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.9/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.9/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.9/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.9/developing/data-types/#counters), [set](/riak/kv/2.0.9/developing/data-types/#sets), or [map](/riak/kv/2.0.9/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.9/developing/data-types/#counters), [set]({{}}riak/kv/2.0.9/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.9/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.9/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.9/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/nodejs.md index fcfab2a841..46cbf1ce75 100644 --- a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.9/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/php.md index aaa3f54bbd..1122bf4c8a 100644 --- a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.9/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.9/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.9/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.9/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.9/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.9/developing/data-types/#counters), [set](/riak/kv/2.0.9/developing/data-types/#sets), or [map](/riak/kv/2.0.9/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.9/developing/data-types/#counters), [set]({{}}riak/kv/2.0.9/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.9/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.9/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.9/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/python.md index 33bfa2ac03..3471ef7fe8 100644 --- a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.9/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.9/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.9/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.9/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.9/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.9/developing/data-types/#counters), [set](/riak/kv/2.0.9/developing/data-types/#sets), or [map](/riak/kv/2.0.9/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.9/developing/data-types/#counters), [set]({{}}riak/kv/2.0.9/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.9/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.9/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.9/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/ruby.md index 1283ddd6f0..f4dfc19b15 100644 --- a/content/riak/kv/2.0.9/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.0.9/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.9/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.0.9/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.0.9/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.0.9/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.0.9/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.0.9/developing/data-types/#counters), [set](/riak/kv/2.0.9/developing/data-types/#sets), or [map](/riak/kv/2.0.9/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.0.9/developing/data-types/#counters), [set]({{}}riak/kv/2.0.9/developing/data-types/#sets), or [map]({{}}riak/kv/2.0.9/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.0.9/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.0.9/developing/data-types/#sets). diff --git a/content/riak/kv/2.0.9/developing/usage/creating-objects.md b/content/riak/kv/2.0.9/developing/usage/creating-objects.md index 053d315536..cf83eecd1f 100644 --- a/content/riak/kv/2.0.9/developing/usage/creating-objects.md +++ b/content/riak/kv/2.0.9/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.0.9/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.0.9/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -23,7 +23,7 @@ PUT /types//buckets//keys/ # If you're using HTTP to interact with Riak, you can also use POST ``` -As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type](/riak/kv/2.0.9/using/cluster-operations/bucket-types). +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{}}riak/kv/2.0.9/using/cluster-operations/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -118,7 +118,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, you run the same read operation as in [Reading Objects](/riak/kv/2.0.9/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types](/riak/kv/2.0.9/using/cluster-operations/bucket-types). +Now, you run the same read operation as in [Reading Objects]({{}}riak/kv/2.0.9/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{}}riak/kv/2.0.9/using/cluster-operations/bucket-types). ### Store an Object @@ -138,7 +138,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.0.9/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.0.9/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.0.9/developing/usage/custom-extractors.md b/content/riak/kv/2.0.9/developing/usage/custom-extractors.md index 357c54d870..aab49b0730 100644 --- a/content/riak/kv/2.0.9/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.0.9/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.0.9/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.0.9/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.0.9/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.0.9/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.0.9/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.0.9/developing/usage/deleting-objects.md b/content/riak/kv/2.0.9/developing/usage/deleting-objects.md index 6007a4f5d7..dbeed0662f 100644 --- a/content/riak/kv/2.0.9/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.0.9/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.0.9/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.0.9/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.0.9/developing/usage/document-store.md b/content/riak/kv/2.0.9/developing/usage/document-store.md index 23030f51a4..8eadb03457 100644 --- a/content/riak/kv/2.0.9/developing/usage/document-store.md +++ b/content/riak/kv/2.0.9/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.0.9/developing/usage/search/) and [Riak Data Types](/riak/kv/2.0.9/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.0.9/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.0.9/developing/data-types/#maps). +[Riak maps]({{}}riak/kv/2.0.9/developing/data-types/#maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.0.9/developing/data-types/#maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.0.9/developing/data-types/#maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.0.9/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.0.9/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.0.9/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.0.9/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.0.9/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.0.9/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.0.9/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.0.9/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.0.9/developing/usage/mapreduce.md b/content/riak/kv/2.0.9/developing/usage/mapreduce.md index b1b99bdb33..fc201c33ef 100644 --- a/content/riak/kv/2.0.9/developing/usage/mapreduce.md +++ b/content/riak/kv/2.0.9/developing/usage/mapreduce.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.9/dev/using/mapreduce --- -[usage 2i]: /riak/kv/2.0.9/developing/usage/secondary-indexes -[usage search]: /riak/kv/2.0.9/developing/usage/search -[usage types]: /riak/kv/2.0.9/developing/usage/bucket-types -[api http]: /riak/kv/2.0.9/developing/api/http -[api pb]: /riak/kv/2.0.9/developing/api/protocol-buffers -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[guide mapreduce]: /riak/kv/2.0.9/developing/app-guide/advanced-mapreduce +[usage 2i]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search +[usage types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[api http]: {{}}riak/kv/2.0.9/developing/api/http +[api pb]: {{}}riak/kv/2.0.9/developing/api/protocol-buffers +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[guide mapreduce]: {{}}riak/kv/2.0.9/developing/app-guide/advanced-mapreduce {{% note title="Use MapReduce sparingly" %}} In Riak KV, MapReduce is the primary method for non-primary-key-based @@ -116,7 +116,7 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example diff --git a/content/riak/kv/2.0.9/developing/usage/reading-objects.md b/content/riak/kv/2.0.9/developing/usage/reading-objects.md index 4f9e62ec87..75ebcd8394 100644 --- a/content/riak/kv/2.0.9/developing/usage/reading-objects.md +++ b/content/riak/kv/2.0.9/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.0.9/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type](/riak/kv/2.0.9/using/cluster-operations/bucket-types) page. +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{}}riak/kv/2.0.9/using/cluster-operations/bucket-types) page. ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.0.9/developing/usage/replication.md b/content/riak/kv/2.0.9/developing/usage/replication.md index 355a8de311..51f4420db1 100644 --- a/content/riak/kv/2.0.9/developing/usage/replication.md +++ b/content/riak/kv/2.0.9/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.0.9/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.0.9/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.0.9/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.0.9/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.0.9/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.0.9/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.0.9/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.0.9/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.0.9/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.0.9/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.0.9/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.0.9/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.0.9/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.0.9/developing/usage/search-schemas.md b/content/riak/kv/2.0.9/developing/usage/search-schemas.md index 15504d7917..3fcb74dee6 100644 --- a/content/riak/kv/2.0.9/developing/usage/search-schemas.md +++ b/content/riak/kv/2.0.9/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.0.9/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.0.9/developing/data-types/), and [more](/riak/kv/2.0.9/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.0.9/developing/data-types/), and [more]({{}}riak/kv/2.0.9/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.0.9/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.0.9/developing/usage/search.md b/content/riak/kv/2.0.9/developing/usage/search.md index 1531b98cc2..f54fb73f99 100644 --- a/content/riak/kv/2.0.9/developing/usage/search.md +++ b/content/riak/kv/2.0.9/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.0.9/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.0.9/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.9/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.9/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.0.9/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.0.9/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.0.9/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.0.9/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.0.9/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.0.9/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.0.9/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.0.9/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.0.9/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.0.9/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.0.9/developing/usage/searching-data-types.md b/content/riak/kv/2.0.9/developing/usage/searching-data-types.md index 7216988343..2272ca9d7b 100644 --- a/content/riak/kv/2.0.9/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.0.9/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.9/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.0.9/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.0.9/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.0.9/developing/data-types/#counters), [sets](/riak/kv/2.0.9/developing/data-types/#sets), and [maps](/riak/kv/2.0.9/developing/data-types/#maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.0.9/developing/data-types/#counters), [sets]({{}}riak/kv/2.0.9/developing/data-types/#sets), and [maps]({{}}riak/kv/2.0.9/developing/data-types/#maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.0.9/developing/data-types/#counters) indexes each +The default schema for [counters]({{}}riak/kv/2.0.9/developing/data-types/#counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.0.9/developing/data-types/#sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.0.9/developing/data-types/#sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.0.9/developing/data-types/#maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.0.9/developing/data-types/#maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) for [storing counters](/riak/kv/2.0.9/developing/data-types/#counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.0.9/developing/data-types/#counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types) for [storing sets](/riak/kv/2.0.9/developing/data-types/#sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.0.9/developing/data-types/#sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.0.9/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.0.9/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.0.9/developing/data-types/#maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.0.9/developing/data-types/#maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.0.9/developing/usage/secondary-indexes.md b/content/riak/kv/2.0.9/developing/usage/secondary-indexes.md index 62b27aa74a..52afd004cc 100644 --- a/content/riak/kv/2.0.9/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.0.9/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.0.9/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.9/setup/planning/backend/memory -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.9/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.0.9/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.0.9/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.0.9/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.0.9/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.0.9/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.0.9/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.0.9/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.0.9/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.9/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.0.9/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.0.9/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.0.9/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.0.9/developing/usage/security.md b/content/riak/kv/2.0.9/developing/usage/security.md index 02292fde3c..e8a17363e8 100644 --- a/content/riak/kv/2.0.9/developing/usage/security.md +++ b/content/riak/kv/2.0.9/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.0.9/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.0.9/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.0.9/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.0.9/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.0.9/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.0.9/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.0.9/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - [`riak-admin security`](/riak/kv/2.0.9/using/security/managing-sources/#managing-sources) + [`riak-admin security`]({{}}riak/kv/2.0.9/using/security/managing-sources/#managing-sources) command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.0.9/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.0.9/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.0.9/using/security/basics) -* [Managing Security Sources](/riak/kv/2.0.9/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.0.9/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.0.9/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.0.9/developing/usage/security/java) -* [Ruby](/riak/kv/2.0.9/developing/usage/security/ruby) -* [PHP](/riak/kv/2.0.9/developing/usage/security/php) -* [Python](/riak/kv/2.0.9/developing/usage/security/python) -* [Erlang](/riak/kv/2.0.9/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.0.9/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.0.9/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.0.9/developing/usage/security/php) +* [Python]({{}}riak/kv/2.0.9/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.0.9/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.0.9/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.0.9/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.0.9/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.0.9/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.0.9/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.0.9/developing/usage/security/erlang.md b/content/riak/kv/2.0.9/developing/usage/security/erlang.md index c29bdf04c9..2634443beb 100644 --- a/content/riak/kv/2.0.9/developing/usage/security/erlang.md +++ b/content/riak/kv/2.0.9/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.0.9/using/security/managing-sources/), [PAM-](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.0.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.0.9/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.0.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.9/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.9/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.9/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.9/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.9/developing/usage/security/java.md b/content/riak/kv/2.0.9/developing/usage/security/java.md index 1cbdd0472e..604dff71b9 100644 --- a/content/riak/kv/2.0.9/developing/usage/security/java.md +++ b/content/riak/kv/2.0.9/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.9/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.9/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.0.9/developing/usage/security/php.md b/content/riak/kv/2.0.9/developing/usage/security/php.md index 2982859254..9264b98944 100644 --- a/content/riak/kv/2.0.9/developing/usage/security/php.md +++ b/content/riak/kv/2.0.9/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.9/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.0.9/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.0.9/developing/usage/security/python.md b/content/riak/kv/2.0.9/developing/usage/security/python.md index 181da3d41f..6c65b4e3f3 100644 --- a/content/riak/kv/2.0.9/developing/usage/security/python.md +++ b/content/riak/kv/2.0.9/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.9/using/security/managing-sources/) or [PAM-](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.0.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.0.9/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.0.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.9/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.0.9/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.0.9/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.0.9/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.9/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.0.9/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.0.9/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.0.9/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.0.9/developing/usage/security/ruby.md b/content/riak/kv/2.0.9/developing/usage/security/ruby.md index 1fc769aeb7..2da1e15e23 100644 --- a/content/riak/kv/2.0.9/developing/usage/security/ruby.md +++ b/content/riak/kv/2.0.9/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.0.9/using/security/managing-sources/) or [PAM](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.0.9/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.0.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.0.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.0.9/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.0.9/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.0.9/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.0.9/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.0.9/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.0.9/developing/usage/updating-objects.md b/content/riak/kv/2.0.9/developing/usage/updating-objects.md index 3477339ddd..00ea784184 100644 --- a/content/riak/kv/2.0.9/developing/usage/updating-objects.md +++ b/content/riak/kv/2.0.9/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/dev/using/updates --- -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.0.9/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.0.9/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.0.9/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.0.9/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.0.9/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.0.9/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.0.9/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.0.9/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.0.9/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.0.9/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.0.9/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.0.9/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.0.9/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.0.9/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.0.9/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.0.9/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.0.9/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.0.9/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.0.9/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.0.9/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.0.9/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.0.9/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.0.9/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.0.9/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.0.9/index.md b/content/riak/kv/2.0.9/index.md index 3250eb365b..b72ad19da3 100644 --- a/content/riak/kv/2.0.9/index.md +++ b/content/riak/kv/2.0.9/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.0.9/configuring -[dev index]: /riak/kv/2.0.9/developing -[downloads]: /riak/kv/2.0.9/downloads/ -[install index]: /riak/kv/2.0.9/setup/installing/ -[plan index]: /riak/kv/2.0.9/setup/planning -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.0.9/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.0.9/developing/usage/search -[getting started]: /riak/kv/2.0.9/developing/getting-started -[dev client libraries]: /riak/kv/2.0.9/developing/client-libraries +[config index]: {{}}riak/kv/2.0.9/configuring +[dev index]: {{}}riak/kv/2.0.9/developing +[downloads]: {{}}riak/kv/2.0.9/downloads/ +[install index]: {{}}riak/kv/2.0.9/setup/installing/ +[plan index]: {{}}riak/kv/2.0.9/setup/planning +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.0.9/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search +[getting started]: {{}}riak/kv/2.0.9/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.0.9/developing/client-libraries diff --git a/content/riak/kv/2.0.9/learn/concepts.md b/content/riak/kv/2.0.9/learn/concepts.md index eb5cfd6aaa..fa7ad44cf8 100644 --- a/content/riak/kv/2.0.9/learn/concepts.md +++ b/content/riak/kv/2.0.9/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.0.9/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.0.9/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.0.9/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.9/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.9/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.9/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.0.9/learn/concepts/vnodes -[config index]: /riak/kv/2.0.9/configuring -[plan index]: /riak/kv/2.0.9/setup/planning -[use index]: /riak/kv/2.0.9/using/ +[concept aae]: {{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.0.9/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.0.9/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.9/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.9/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.9/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.0.9/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.0.9/configuring +[plan index]: {{}}riak/kv/2.0.9/setup/planning +[use index]: {{}}riak/kv/2.0.9/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.0.9/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.0.9/learn/concepts/active-anti-entropy.md index 6454d12bb0..af16750a85 100644 --- a/content/riak/kv/2.0.9/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.0.9/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.0.9/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.0.9/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.0.9/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.0.9/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.0.9/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.0.9/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.0.9/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.0.9/developing/usage/search +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.0.9/learn/concepts/buckets.md b/content/riak/kv/2.0.9/learn/concepts/buckets.md index 71d1fb92de..585ded1cda 100644 --- a/content/riak/kv/2.0.9/learn/concepts/buckets.md +++ b/content/riak/kv/2.0.9/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.0.9/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.0.9/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.0.9/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.0.9/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.0.9/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.0.9/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.0.9/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[config basic]: /riak/kv/2.0.9/configuring/basic -[dev api http]: /riak/kv/2.0.9/developing/api/http -[dev data types]: /riak/kv/2.0.9/developing/data-types -[glossary ring]: /riak/kv/2.0.9/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.0.9/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.9/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.0.9/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.9/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.0.9/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.0.9/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.0.9/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.0.9/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.0.9/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.0.9/configuring/basic +[dev api http]: {{}}riak/kv/2.0.9/developing/api/http +[dev data types]: {{}}riak/kv/2.0.9/developing/data-types +[glossary ring]: {{}}riak/kv/2.0.9/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.9/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.0.9/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.9/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.0.9/learn/concepts/capability-negotiation.md b/content/riak/kv/2.0.9/learn/concepts/capability-negotiation.md index 9f41f9bbc0..de8cbb5fef 100644 --- a/content/riak/kv/2.0.9/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.0.9/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.0.9/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.0.9/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.0.9/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.0.9/developing/usage/mapreduce In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.0.9/learn/concepts/causal-context.md b/content/riak/kv/2.0.9/learn/concepts/causal-context.md index 4d1baa5efe..6e55215ecb 100644 --- a/content/riak/kv/2.0.9/learn/concepts/causal-context.md +++ b/content/riak/kv/2.0.9/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.0.9/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.0.9/developing/api/http -[dev key value]: /riak/kv/2.0.9/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.0.9/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.0.9/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.0.9/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.0.9/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.0.9/developing/api/http +[dev key value]: {{}}riak/kv/2.0.9/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.0.9/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.0.9/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.0.9/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.0.9/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -73,7 +73,7 @@ Causal context comes in two forms in Riak: **vector clocks** and **dotted version vectors**. More information in both can be found in the sections below. -In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). If, however, `allow_mult` is set to `false`, then Riak will not generate diff --git a/content/riak/kv/2.0.9/learn/concepts/clusters.md b/content/riak/kv/2.0.9/learn/concepts/clusters.md index 3e7e47c53a..0f86e5c12e 100644 --- a/content/riak/kv/2.0.9/learn/concepts/clusters.md +++ b/content/riak/kv/2.0.9/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.0.9/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.9/learn/concepts/replication -[glossary node]: /riak/kv/2.0.9/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.0.9/learn/dynamo -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.0.9/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.0.9/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.9/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.9/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.0.9/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.0.9/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.0.9/learn/concepts/crdts.md b/content/riak/kv/2.0.9/learn/concepts/crdts.md index 398115d2b6..bb4264df81 100644 --- a/content/riak/kv/2.0.9/learn/concepts/crdts.md +++ b/content/riak/kv/2.0.9/learn/concepts/crdts.md @@ -17,20 +17,20 @@ aliases: --- [crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf -[data types converg]: /riak/kv/2.0.9/learn/concepts/crdts/#convergence +[data types converg]: {{}}riak/kv/2.0.9/learn/concepts/crdts/#convergence [crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html -[data types impl]: /riak/kv/2.0.9/learn/concepts/crdts/#implementation -[concept causal context dvv]: /riak/kv/2.0.9/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.0.9/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.0.9/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.0.9/developing/data-types +[data types impl]: {{}}riak/kv/2.0.9/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.0.9/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.0.9/developing/data-types [riak_dt]: https://github.com/basho/riak_dt -[dev data types context]: /riak/kv/2.1.4/developing/data-types/#data-types-and-context -[glossary node]: /riak/kv/2.0.9/learn/glossary/#node -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution +[dev data types context]: {{}}riak/kv/2.0.9/developing/data-types/#data-types-and-context +[glossary node]: {{}}riak/kv/2.0.9/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: diff --git a/content/riak/kv/2.0.9/learn/concepts/eventual-consistency.md b/content/riak/kv/2.0.9/learn/concepts/eventual-consistency.md index 1e4251c19a..8da9a5e9e0 100644 --- a/content/riak/kv/2.0.9/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.0.9/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[concept replication]: /riak/kv/2.0.9/learn/concepts/replication -[glossary node]: /riak/kv/2.0.9/learn/glossary/#node -[glossary read rep]: /riak/kv/2.0.9/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.0.9/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.0.9/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.0.9/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.0.9/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.0.9/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.0.9/developing/data-modeling/). +or models]({{}}riak/kv/2.0.9/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.0.9/learn/concepts/keys-and-objects.md b/content/riak/kv/2.0.9/learn/concepts/keys-and-objects.md index bb888eca26..9b5b6eaa65 100644 --- a/content/riak/kv/2.0.9/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.0.9/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.9/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.0.9/learn/concepts/replication.md b/content/riak/kv/2.0.9/learn/concepts/replication.md index d81c096bdc..d073a5c115 100644 --- a/content/riak/kv/2.0.9/learn/concepts/replication.md +++ b/content/riak/kv/2.0.9/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.0.9/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.0.9/learn/concepts/vnodes -[glossary node]: /riak/kv/2.0.9/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.9/learn/glossary/#ring -[usage replication]: /riak/kv/2.0.9/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.0.9/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.0.9/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.9/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.0.9/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.0.9/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.0.9/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.0.9/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.0.9/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.0.9/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.0.9/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.0.9/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.0.9/learn/concepts/strong-consistency.md b/content/riak/kv/2.0.9/learn/concepts/strong-consistency.md index 8a7b15ae25..c6e0d61266 100644 --- a/content/riak/kv/2.0.9/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.0.9/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.9/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.9/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.9/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.9/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.9/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.9/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.0.9/learn/concepts/vnodes.md b/content/riak/kv/2.0.9/learn/concepts/vnodes.md index 39f13db62d..bb98e6b265 100644 --- a/content/riak/kv/2.0.9/learn/concepts/vnodes.md +++ b/content/riak/kv/2.0.9/learn/concepts/vnodes.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context]: /riak/kv/2.0.9/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.0.9/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.0.9/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.9/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.0.9/learn/glossary/#node -[glossary ring]: /riak/kv/2.0.9/learn/glossary/#ring -[plan backend]: /riak/kv/2.0.9/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.9/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.0.9/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.0.9/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.0.9/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.0.9/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.9/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.0.9/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.0.9/learn/glossary/#ring +[plan backend]: {{}}riak/kv/2.0.9/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.9/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.0.9/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.0.9/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.0.9/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.0.9/learn/dynamo.md b/content/riak/kv/2.0.9/learn/dynamo.md index e34e87397f..f05cad67e8 100644 --- a/content/riak/kv/2.0.9/learn/dynamo.md +++ b/content/riak/kv/2.0.9/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.0.9/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.0.9/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.0.9/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.0.9/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.0.9/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.0.9/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.0.9/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.0.9/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.0.9/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.0.9/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.0.9/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.0.9/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.0.9/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.0.9/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.0.9/developing/api/http/) +>[REST API]({{}}riak/kv/2.0.9/developing/api/http/) > ->[Writing Data](/riak/kv/2.0.9/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.0.9/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.0.9/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.0.9/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.0.9/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.0.9/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.0.9/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.0.9/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.0.9/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.0.9/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.0.9/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.0.9/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.0.9/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.0.9/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.0.9/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.0.9/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.0.9/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.0.9/setup/planning/backend/ -[Bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.0.9/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.0.9/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.0.9/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.0.9/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.0.9/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.0.9/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.0.9/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.9/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.0.9/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.0.9/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.0.9/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.0.9/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.0.9/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.0.9/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.0.9/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.0.9/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.0.9/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.0.9/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.0.9/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.0.9/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.0.9/learn/glossary.md b/content/riak/kv/2.0.9/learn/glossary.md index e738cbba9c..e1163d775d 100644 --- a/content/riak/kv/2.0.9/learn/glossary.md +++ b/content/riak/kv/2.0.9/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.0.9/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[concept crdts]: /riak/kv/2.0.9/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.0.9/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.0.9/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.0.9/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.0.9/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.0.9/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.0.9/developing/api/http -[dev data model]: /riak/kv/2.0.9/developing/data-modeling -[dev data types]: /riak/kv/2.0.9/developing/data-types -[glossary read rep]: /riak/kv/2.0.9/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.0.9/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.0.9/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.0.9/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.0.9/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.0.9/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.0.9/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.0.9/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.0.9/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.0.9/developing/api/http +[dev data model]: {{}}riak/kv/2.0.9/developing/data-modeling +[dev data types]: {{}}riak/kv/2.0.9/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.9/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.0.9/learn/dynamo -[plan cluster capacity]: /riak/kv/2.0.9/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.0.9/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.0.9/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.0.9/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.0.9/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.0.9/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.0.9/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.0.9/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.9/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.9/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.0.9/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.0.9/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.0.9/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.0.9/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.0.9/learn/use-cases.md b/content/riak/kv/2.0.9/learn/use-cases.md index bd37625928..b13ffc900c 100644 --- a/content/riak/kv/2.0.9/learn/use-cases.md +++ b/content/riak/kv/2.0.9/learn/use-cases.md @@ -16,19 +16,19 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.0.9/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.0.9/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.0.9/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.0.9/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.0.9/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.0.9/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.0.9/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.0.9/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.0.9/developing/data-types -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[usage mapreduce]: /riak/kv/2.0.9/developing/usage/mapreduce -[usage search]: /riak/kv/2.0.9/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.0.9/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.0.9/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.0.9/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.0.9/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.0.9/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.0.9/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.0.9/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.0.9/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.0.9/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.0.9/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[usage mapreduce]: {{}}riak/kv/2.0.9/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing @@ -115,7 +115,7 @@ based on, for example, a campaign or company ID for easy retrieval. In the advertising industry, being able to serve ads quickly to many users and platforms is often the most important factor in selecting and -tuning a database. Riak's tunable [apps replication properties](/riak/kv/2.0.9/developing/app-guide/replication-properties) can be set +tuning a database. Riak's tunable [apps replication properties]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties) can be set to favor fast read performance. By setting R to 1, only one of N replicas will need to be returned to complete a read operation, yielding lower read latency than an R value equal to the number of replicas @@ -321,7 +321,7 @@ part of a URL string, etc. In Riak, you can store content of any kind, from HTML files to plain text to JSON or XML or another document type entirely. Keep in mind that -data in Riak is opaque, with the exception of [Riak Data Types](/riak/kv/2.0.9/developing/data-types), +data in Riak is opaque, with the exception of [Riak Data Types]({{}}riak/kv/2.0.9/developing/data-types), and so Riak won't "know" about the object unless it is indexed [using Riak Search][usage search] or [using secondary indexes][usage secondary-indexes]. diff --git a/content/riak/kv/2.0.9/learn/why-riak-kv.md b/content/riak/kv/2.0.9/learn/why-riak-kv.md index d72a93e6cc..160ad92c62 100644 --- a/content/riak/kv/2.0.9/learn/why-riak-kv.md +++ b/content/riak/kv/2.0.9/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.0.9/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.0.9/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.0.9/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.0.9/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.0.9/developing/data-types -[glossary read rep]: /riak/kv/2.0.9/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.0.9/developing/data-types +[glossary read rep]: {{}}riak/kv/2.0.9/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.0.9/release-notes.md b/content/riak/kv/2.0.9/release-notes.md index 23e243cffe..64e87a3f03 100644 --- a/content/riak/kv/2.0.9/release-notes.md +++ b/content/riak/kv/2.0.9/release-notes.md @@ -57,13 +57,13 @@ This is an LTS (long term support) bugfix release that includes improvements to * Logging has been added to clear and exchange trees for audit of administrative operations. * All above work captured in [yokozuna PR 704](https://github.com/basho/yokozuna/pull/704). -* Additional [Cuttlefish parameters](/riak/kv/2.0.9/configuring/reference/#search) have been added to support the Riak search batching updates. These configs will allow you to set batching parameters based on your needs and have, in certain cases, led to significantly higher write throughput to Solr. +* Additional [Cuttlefish parameters]({{}}riak/kv/2.0.9/configuring/reference/#search) have been added to support the Riak search batching updates. These configs will allow you to set batching parameters based on your needs and have, in certain cases, led to significantly higher write throughput to Solr. * [[yokozuna PR 704](https://github.com/basho/yokozuna/pull/704)] ### Bugs Fixed -* LevelDB has been upgraded to version 2.0.33, which resolves the [AAE stall product advisory](http://docs.basho.com/community/productadvisories/aaestall/). +* LevelDB has been upgraded to version 2.0.33, which resolves the [AAE stall product advisory]({{}}community/productadvisories/aaestall/). * [[riak_kv PR 1527](https://github.com/basho/riak_kv/pull/1527)] A race condition was occurring where a `gen_fsm` timeout event was not reliably sent, even when the timeout was set to zero, and another message or event could preempt or unset the timeout. To fix this, a timeout event is manually sent using `gen_fsm:send_event`. * [[riak PR 886](https://github.com/basho/riak/pull/886), [riak_ee PR 412](https://github.com/basho/riak_ee/pull/412), and [node_package PR 210](https://github.com/basho/node_package/pull/210)] Atom usage in `riak` and `riak-admin` commands has been restricted to 1000. Previously, the OS PID was being used as a pseudo-random number generator, but the range was too large since each nodename used would generate an entry in the atom table. `riak-admin top` uses $$ to randomize the name used to connect to the local Riak node, and the large range of possible OS PIDs can result in atom table exhaustion on long running nodes/clusters. The nodename used by `riak top` has been changed to match `riak-admin top` convention, using `$RANDOM` with the range restricted to 1-1000. * [[riak_core Issue 855](https://github.com/basho/riak_core/issues/855)/[riak_core PR 886](https://github.com/basho/riak_core/pull/886)] If updates to the same key in the ring metadata occurred on different nodes during the same second, they were not reconciled. This could lead to nodes flip-flopping the value and many gossip messages causing extremely high message queues and heap usage by the gossip processes. Nodenames have been added to the `merge_meta` comparison to avoid this issue. diff --git a/content/riak/kv/2.0.9/setup/downgrade.md b/content/riak/kv/2.0.9/setup/downgrade.md index 8d3db5dd67..a2beb7efe9 100644 --- a/content/riak/kv/2.0.9/setup/downgrade.md +++ b/content/riak/kv/2.0.9/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.9/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.0.9/setup/upgrading/cluster -[config ref]: /riak/kv/2.0.9/configuring/reference -[concept aae]: /riak/kv/2.0.9/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.0.9/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.0.9/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.0.9/configuring/reference +[concept aae]: {{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#aae-status Downgrades of Riak KV are tested and generally supported for two feature release versions (see warning below), with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. diff --git a/content/riak/kv/2.0.9/setup/installing.md b/content/riak/kv/2.0.9/setup/installing.md index d54c12836f..64fe3eb664 100644 --- a/content/riak/kv/2.0.9/setup/installing.md +++ b/content/riak/kv/2.0.9/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.0.9/installing/ --- -[install aws]: /riak/kv/2.0.9/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.0.9/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.0.9/setup/installing/freebsd -[install mac osx]: /riak/kv/2.0.9/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.0.9/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.0.9/setup/installing/smartos -[install solaris]: /riak/kv/2.0.9/setup/installing/solaris -[install suse]: /riak/kv/2.0.9/setup/installing/suse -[install windows azure]: /riak/kv/2.0.9/setup/installing/windows-azure -[install source index]: /riak/kv/2.0.9/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.0.9/setup/upgrading +[install aws]: {{}}riak/kv/2.0.9/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.0.9/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.0.9/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.0.9/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.0.9/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.0.9/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.0.9/setup/installing/solaris +[install suse]: {{}}riak/kv/2.0.9/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.0.9/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.0.9/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.0.9/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.0.9/setup/installing/amazon-web-services.md b/content/riak/kv/2.0.9/setup/installing/amazon-web-services.md index f027d9fdb3..d3c53492ab 100644 --- a/content/riak/kv/2.0.9/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.0.9/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.0.9/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.0.9/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.0.9/setup/installing/debian-ubuntu.md b/content/riak/kv/2.0.9/setup/installing/debian-ubuntu.md index e5b22c02e9..67ef2bbcc1 100644 --- a/content/riak/kv/2.0.9/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.0.9/setup/installing/debian-ubuntu.md @@ -18,10 +18,10 @@ aliases: - /riak/kv/2.0.9/installing/debian-ubuntu/ --- -[install source index]: /riak/kv/2.0.9/setup/installing/source/ -[security index]: /riak/kv/2.0.9/using/security/ -[install source erlang]: /riak/kv/2.0.9/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.9/setup/installing/source/ +[security index]: {{}}riak/kv/2.0.9/using/security/ +[install source erlang]: {{}}riak/kv/2.0.9/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.0.9/setup/installing/freebsd.md b/content/riak/kv/2.0.9/setup/installing/freebsd.md index 86c2403103..1bbce04620 100644 --- a/content/riak/kv/2.0.9/setup/installing/freebsd.md +++ b/content/riak/kv/2.0.9/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.0.9/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.9/downloads/ -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.9/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.9/downloads/ +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.0.9/setup/installing/mac-osx.md b/content/riak/kv/2.0.9/setup/installing/mac-osx.md index e9cac49628..ff76ba4097 100644 --- a/content/riak/kv/2.0.9/setup/installing/mac-osx.md +++ b/content/riak/kv/2.0.9/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.0.9/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.0.9/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.0.9/setup/installing/rhel-centos.md b/content/riak/kv/2.0.9/setup/installing/rhel-centos.md index 89627b94e3..f9cadc5ce9 100644 --- a/content/riak/kv/2.0.9/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.0.9/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.0.9/setup/installing/source -[install source erlang]: /riak/kv/2.0.9/setup/installing/source/erlang -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[install source index]: {{}}riak/kv/2.0.9/setup/installing/source +[install source erlang]: {{}}riak/kv/2.0.9/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.0.9/setup/installing/smartos.md b/content/riak/kv/2.0.9/setup/installing/smartos.md index 3f6315f06c..08d5fa0ab4 100644 --- a/content/riak/kv/2.0.9/setup/installing/smartos.md +++ b/content/riak/kv/2.0.9/setup/installing/smartos.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.9/installing/smartos/ --- -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify {{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.9" %}} SmartOS is no longer supported in Riak KV 2.0.9+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). diff --git a/content/riak/kv/2.0.9/setup/installing/solaris.md b/content/riak/kv/2.0.9/setup/installing/solaris.md index d0bc5fd019..343c4e4c28 100644 --- a/content/riak/kv/2.0.9/setup/installing/solaris.md +++ b/content/riak/kv/2.0.9/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.0.9/setup/installing/source.md b/content/riak/kv/2.0.9/setup/installing/source.md index 5428454131..90e940e38c 100644 --- a/content/riak/kv/2.0.9/setup/installing/source.md +++ b/content/riak/kv/2.0.9/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.0.9/setup/installing/source/erlang -[downloads]: /riak/kv/2.0.9/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.0.9/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.0.9/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.0.9/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.0.9/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.0.9/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.0.9/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.0.9/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.0.9/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.0.9/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.0.9/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.0.9/setup/installing/source/erlang.md b/content/riak/kv/2.0.9/setup/installing/source/erlang.md index dc13396d3c..a923aefbb2 100644 --- a/content/riak/kv/2.0.9/setup/installing/source/erlang.md +++ b/content/riak/kv/2.0.9/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.9/installing/source/erlang/ --- -[install index]: /riak/kv/2.0.9/setup/installing -[security basics]: /riak/kv/2.0.9/using/security/basics +[install index]: {{}}riak/kv/2.0.9/setup/installing +[security basics]: {{}}riak/kv/2.0.9/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.0.9/setup/installing/source/jvm.md b/content/riak/kv/2.0.9/setup/installing/source/jvm.md index 4a4a81fe8d..427962d65c 100644 --- a/content/riak/kv/2.0.9/setup/installing/source/jvm.md +++ b/content/riak/kv/2.0.9/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.0.9/installing/source/jvm/ --- -[usage search]: /riak/kv/2.0.9/developing/usage/search +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.0.9/setup/installing/suse.md b/content/riak/kv/2.0.9/setup/installing/suse.md index 88daf1ea1d..8e78058d82 100644 --- a/content/riak/kv/2.0.9/setup/installing/suse.md +++ b/content/riak/kv/2.0.9/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.0.9/installing/suse/ --- -[install verify]: /riak/kv/2.0.9/setup/installing/verify +[install verify]: {{}}riak/kv/2.0.9/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.0.9/setup/installing/verify.md b/content/riak/kv/2.0.9/setup/installing/verify.md index 42a9723fe0..8cff7efd4b 100644 --- a/content/riak/kv/2.0.9/setup/installing/verify.md +++ b/content/riak/kv/2.0.9/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.0.9/installing/verify-install/ --- -[client libraries]: /riak/kv/2.0.9/developing/client-libraries -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.0.9/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.0.9/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.0.9/developing/client-libraries +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.0.9/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.0.9/setup/installing/windows-azure.md b/content/riak/kv/2.0.9/setup/installing/windows-azure.md index 8cb3cd81de..71ba70960b 100644 --- a/content/riak/kv/2.0.9/setup/installing/windows-azure.md +++ b/content/riak/kv/2.0.9/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.0.9/setup/planning/backend.md b/content/riak/kv/2.0.9/setup/planning/backend.md index 4fdbc1b4e5..9c45889665 100644 --- a/content/riak/kv/2.0.9/setup/planning/backend.md +++ b/content/riak/kv/2.0.9/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.9/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.9/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.0.9/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.0.9/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.0.9/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.0.9/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.0.9/setup/planning/backend/bitcask.md b/content/riak/kv/2.0.9/setup/planning/backend/bitcask.md index 86b9a29f5b..734bd9143f 100644 --- a/content/riak/kv/2.0.9/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.0.9/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.0.9/using/admin/riak-cli -[config reference]: /riak/kv/2.0.9/configuring/reference -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.0.9/setup/planning/backend/multi -[usage search]: /riak/kv/2.0.9/developing/usage/search - -[glossary aae]: /riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.0.9/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.0.9/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.0.9/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.0.9/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.0.9/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.0.9/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.0.9/setup/planning/backend/leveldb.md b/content/riak/kv/2.0.9/setup/planning/backend/leveldb.md index d960f3417e..718e3d2c4a 100644 --- a/content/riak/kv/2.0.9/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.0.9/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.9/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[config reference]: /riak/kv/2.0.9/configuring/reference -[perf index]: /riak/kv/2.0.9/using/performance -[config reference#aae]: /riak/kv/2.0.9/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[perf index]: {{}}riak/kv/2.0.9/using/performance +[config reference#aae]: {{}}riak/kv/2.0.9/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.0.9/setup/planning/backend/memory.md b/content/riak/kv/2.0.9/setup/planning/backend/memory.md index 939732163f..a5d80f97a3 100644 --- a/content/riak/kv/2.0.9/setup/planning/backend/memory.md +++ b/content/riak/kv/2.0.9/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.9/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.0.9/configuring/reference -[plan backend multi]: /riak/kv/2.0.9/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[plan backend multi]: {{}}riak/kv/2.0.9/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.0.9/setup/planning/backend/multi.md b/content/riak/kv/2.0.9/setup/planning/backend/multi.md index ee20dd0291..3d18d50f52 100644 --- a/content/riak/kv/2.0.9/setup/planning/backend/multi.md +++ b/content/riak/kv/2.0.9/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.9/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.0.9/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.0.9/setup/planning/backend/memory -[config reference]: /riak/kv/2.0.9/configuring/reference -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.0.9/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.0.9/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.0.9/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.0.9/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.0.9/setup/planning/best-practices.md b/content/riak/kv/2.0.9/setup/planning/best-practices.md index 28456ba891..c1819d3282 100644 --- a/content/riak/kv/2.0.9/setup/planning/best-practices.md +++ b/content/riak/kv/2.0.9/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.9/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.0.9/using/reference/handoff -[config mapreduce]: /riak/kv/2.0.9/configuring/mapreduce -[glossary aae]: /riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.0.9/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.0.9/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.0.9/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.0.9/setup/planning/bitcask-capacity-calc.md index 5fdfb0a522..43fdaffe6c 100644 --- a/content/riak/kv/2.0.9/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.0.9/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.0.9/setup/planning/cluster-capacity.md b/content/riak/kv/2.0.9/setup/planning/cluster-capacity.md index 2f9f21bc2a..bd58dcb01b 100644 --- a/content/riak/kv/2.0.9/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.0.9/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.9/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.0.9/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.0.9/setup/planning -[concept replication]: /riak/kv/2.0.9/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.0.9/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.0.9/configuring/reference -[perf benchmark]: /riak/kv/2.0.9/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.0.9/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.0.9/setup/planning +[concept replication]: {{}}riak/kv/2.0.9/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[perf benchmark]: {{}}riak/kv/2.0.9/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.0.9/setup/planning/operating-system.md b/content/riak/kv/2.0.9/setup/planning/operating-system.md index 91be45ed4a..31d5b9b7fb 100644 --- a/content/riak/kv/2.0.9/setup/planning/operating-system.md +++ b/content/riak/kv/2.0.9/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.0.9/downloads/ +[downloads]: {{}}riak/kv/2.0.9/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.0.9/setup/planning/start.md b/content/riak/kv/2.0.9/setup/planning/start.md index e74f7d0e67..296995555e 100644 --- a/content/riak/kv/2.0.9/setup/planning/start.md +++ b/content/riak/kv/2.0.9/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.9/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.0.9/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.0.9/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.0.9/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.0.9/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.0.9/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.0.9/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.0.9/setup/upgrading/checklist.md b/content/riak/kv/2.0.9/setup/upgrading/checklist.md index 2e880e3aea..8a16668cc6 100644 --- a/content/riak/kv/2.0.9/setup/upgrading/checklist.md +++ b/content/riak/kv/2.0.9/setup/upgrading/checklist.md @@ -15,24 +15,24 @@ aliases: - /riak/kv/2.0.9/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.0.9/using/performance/open-files-limit -[perf index]: /riak/kv/2.0.9/using/performance +[perf open files]: {{}}riak/kv/2.0.9/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.0.9/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.0.9/using/security/basics -[cluster ops load balance]: /riak/kv/2.0.9/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.0.9/configuring/reference -[config backend]: /riak/kv/2.0.9/configuring/backend -[usage search]: /riak/kv/2.0.9/developing/usage/search -[usage conflict resolution]: /riak/kv/2.0.9/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.0.9/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.0.9/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.0.9/using/admin/commands -[use admin riak control]: /riak/kv/2.0.9/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.0.9/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.0.9/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.0.9/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.0.9/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.0.9/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[config backend]: {{}}riak/kv/2.0.9/configuring/backend +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.0.9/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.0.9/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.0.9/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.0.9/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.0.9/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.0.9/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.0.9/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. diff --git a/content/riak/kv/2.0.9/setup/upgrading/search.md b/content/riak/kv/2.0.9/setup/upgrading/search.md new file mode 100644 index 0000000000..af9e1650a8 --- /dev/null +++ b/content/riak/kv/2.0.9/setup/upgrading/search.md @@ -0,0 +1,273 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.0.7" +menu: + riak_kv-2.0.9: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + +
+
Upgrade First
+ Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. +
+ +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + +
+
Check Results Before Switching (Optional)
+ Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/?q=...`. +
+ +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.0.9/setup/upgrading/version.md b/content/riak/kv/2.0.9/setup/upgrading/version.md index 8a9b3959aa..e4facb351c 100644 --- a/content/riak/kv/2.0.9/setup/upgrading/version.md +++ b/content/riak/kv/2.0.9/setup/upgrading/version.md @@ -19,18 +19,18 @@ aliases: --- -[production checklist]: /riak/kv/2.0.9/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.0.9/using/admin/riak-control -[use admin commands]: /riak/kv/2.0.9/using/admin/commands -[use admin riak-admin]: /riak/kv/2.0.9/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.0.9/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.0.9/release-notes +[production checklist]: {{}}riak/kv/2.0.9/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.0.9/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.0.9/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.0.9/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.0.9/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.0.9/release-notes [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.0.9/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.0.9/using/reference/jmx -[snmp]: /riak/kv/2.0.9/using/reference/snmp -[Release Notes]: /riak/kv/2.0.9/release-notes +[cluster ops mdc]: {{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.0.9/using/reference/jmx +[snmp]: {{}}riak/kv/2.0.9/using/reference/snmp +[Release Notes]: {{}}riak/kv/2.0.9/release-notes ## Overview diff --git a/content/riak/kv/2.0.9/using/admin/commands.md b/content/riak/kv/2.0.9/using/admin/commands.md index 62680ccbe5..1937626f8a 100644 --- a/content/riak/kv/2.0.9/using/admin/commands.md +++ b/content/riak/kv/2.0.9/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.0.9/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.0.9/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.0.9/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.0.9/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.0.9/using/admin/riak-admin.md b/content/riak/kv/2.0.9/using/admin/riak-admin.md index f1c0eea423..9b79291480 100644 --- a/content/riak/kv/2.0.9/using/admin/riak-admin.md +++ b/content/riak/kv/2.0.9/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.0.9/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.0.9/configuring/reference -[use admin commands]: /riak/kv/2.0.9/using/admin/commands -[use admin commands#join]: /riak/kv/2.0.9/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.0.9/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.0.9/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.0.9/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.0.9/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.0.9/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.0.9/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.0.9/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.0.9/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.0.9/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.0.9/setup/downgrade -[security index]: /riak/kv/2.0.9/using/security/ -[security managing]: /riak/kv/2.0.9/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.0.9/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.0.9/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.0.9/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.0.9/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.0.9/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.0.9/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[use admin commands]: {{}}riak/kv/2.0.9/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.0.9/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.0.9/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.0.9/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.0.9/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.0.9/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.0.9/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.0.9/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.0.9/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.0.9/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.0.9/setup/downgrade +[security index]: {{}}riak/kv/2.0.9/using/security/ +[security managing]: {{}}riak/kv/2.0.9/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.0.9/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.0.9/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.0.9/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.0.9/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.0.9/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#stats ## `riak-admin` diff --git a/content/riak/kv/2.0.9/using/admin/riak-cli.md b/content/riak/kv/2.0.9/using/admin/riak-cli.md index 83c332e19a..70c5f6a3f1 100644 --- a/content/riak/kv/2.0.9/using/admin/riak-cli.md +++ b/content/riak/kv/2.0.9/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.0.9/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.0.9/configuring/reference/ +[configuration file]: {{}}riak/kv/2.0.9/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.0.9/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.0.9/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.0.9/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.0.9/configuring/reference/ ## riak diff --git a/content/riak/kv/2.0.9/using/admin/riak-control.md b/content/riak/kv/2.0.9/using/admin/riak-control.md index e74916bf58..7f34002b21 100644 --- a/content/riak/kv/2.0.9/using/admin/riak-control.md +++ b/content/riak/kv/2.0.9/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.0.9/configuring/reference +[config reference]: {{}}riak/kv/2.0.9/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.0.9/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.0.9/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.0.9/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.0.9/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.0.9/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.0.9/using/cluster-operations/active-anti-entropy.md index b56e1d8c65..b336e8576f 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/active-anti-entropy.md @@ -15,8 +15,8 @@ aliases: - /riak/2.0.9/ops/advanced/aae/ --- -[config search#throttledelay]: /riak/kv/2.0.9/configuring/search/#search-anti-entropy-throttle-$tier-delay -[config search#throttle]: riak/kv/2.0.9/configuring/search/#search-anti-entropy-throttle +[config search#throttledelay]: {{}}riak/kv/2.0.9/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{}}riak/kv/2.0.9/configuring/search/#search-anti-entropy-throttle Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. @@ -57,12 +57,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -90,7 +90,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes.md index 5d4916ddb6..32b543eb2c 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.0.9/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.0.9/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.0.9/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.0.9/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.0.9/using/cluster-operations/backing-up.md b/content/riak/kv/2.0.9/using/cluster-operations/backing-up.md index 609f6c2188..54100c7325 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.9/ops/running/backups --- -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[config reference]: /riak/kv/2.0.9/configuring/reference -[plan backend leveldb]: /riak/kv/2.0.9/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.0.9/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency -[concept aae]: /riak/kv/2.0.9/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.0.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.0.9/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.0.9/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.0.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.0.9/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.0.9/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.0.9/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.0.9/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.0.9/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.0.9/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.0.9/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.0.9/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.0.9/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.0.9/using/cluster-operations/bucket-types.md b/content/riak/kv/2.0.9/using/cluster-operations/bucket-types.md index d056d60881..5feccdd675 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.0.9/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.0.9/using/cluster-operations/changing-cluster-info.md index e7da56e82b..27f9140dfc 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.0.9/configuring/reference +[config reference]: {{}}riak/kv/2.0.9/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.0.9/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.0.9/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.0.9/using/cluster-operations/handoff.md b/content/riak/kv/2.0.9/using/cluster-operations/handoff.md index cc8d54efc1..70649e6335 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.0.9/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.0.9/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.0.9/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.0.9/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.0.9/using/cluster-operations/logging.md b/content/riak/kv/2.0.9/using/cluster-operations/logging.md index c1ef0d1647..f466fb6c7d 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/logging.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.0.9/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.0.9/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.0.9/using/cluster-operations/replacing-node.md b/content/riak/kv/2.0.9/using/cluster-operations/replacing-node.md index d6205daec6..12f0a5ff0b 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.0.9/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.0.9/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.0.9/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.0.9/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.0.9/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.0.9/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.0.9/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.0.9/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.0.9/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.0.9/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.0.9/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.0.9/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.0.9/using/cluster-operations/strong-consistency.md index 60cf90150b..bcf0af16f4 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.0.9/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.0.9/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.0.9/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.0.9/using/cluster-operations/v2-multi-datacenter.md index ed3011b98c..249eea70c5 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/v2-multi-datacenter.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication system is largely @@ -163,7 +163,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -183,7 +183,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -221,7 +221,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.0.9/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.0.9/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -242,7 +242,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter.md index 00cf343152..9cff9b97d4 100644 --- a/content/riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.0.9/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.0.9/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.0.9/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.0.9/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.0.9/using/performance.md b/content/riak/kv/2.0.9/using/performance.md index 3b577fbc54..ac2e9e5788 100644 --- a/content/riak/kv/2.0.9/using/performance.md +++ b/content/riak/kv/2.0.9/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.0.9/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.0.9/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -253,12 +253,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.0.9/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.0.9/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.0.9/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.0.9/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.0.9/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.0.9/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.0.9/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.0.9/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.0.9/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.0.9/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.0.9/using/performance/benchmarking.md b/content/riak/kv/2.0.9/using/performance/benchmarking.md index 353a8611c7..b8f17a40ab 100644 --- a/content/riak/kv/2.0.9/using/performance/benchmarking.md +++ b/content/riak/kv/2.0.9/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.0.9/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.0.9/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.0.9/using/performance/latency-reduction.md b/content/riak/kv/2.0.9/using/performance/latency-reduction.md index 0608cadbfe..d7bca5beed 100644 --- a/content/riak/kv/2.0.9/using/performance/latency-reduction.md +++ b/content/riak/kv/2.0.9/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.0.9/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.0.9/using/performance/multi-datacenter-tuning.md index 12cb21c20a..0379d4a5a2 100644 --- a/content/riak/kv/2.0.9/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.0.9/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.0.9/using/performance +[perf index]: {{}}riak/kv/2.0.9/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.0.9/using/performance/open-files-limit.md b/content/riak/kv/2.0.9/using/performance/open-files-limit.md index f63fdc1eb0..0ff157cccf 100644 --- a/content/riak/kv/2.0.9/using/performance/open-files-limit.md +++ b/content/riak/kv/2.0.9/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/tuning/open-files-limit/ --- -[plan backend]: /riak/kv/2.0.9/setup/planning/backend/ +[plan backend]: {{}}riak/kv/2.0.9/setup/planning/backend/ [blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. diff --git a/content/riak/kv/2.0.9/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.0.9/using/performance/v2-scheduling-fullsync.md index 83812687c8..e97cc289b0 100644 --- a/content/riak/kv/2.0.9/using/performance/v2-scheduling-fullsync.md +++ b/content/riak/kv/2.0.9/using/performance/v2-scheduling-fullsync.md @@ -14,7 +14,7 @@ commercial_offering: true --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. {{% /note %}} diff --git a/content/riak/kv/2.0.9/using/reference/bucket-types.md b/content/riak/kv/2.0.9/using/reference/bucket-types.md index 1d25ce5cc5..4479477016 100644 --- a/content/riak/kv/2.0.9/using/reference/bucket-types.md +++ b/content/riak/kv/2.0.9/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.0.9/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.0.9/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.0.9/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.0.9/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.0.9/developing/data-types), and [strong consistency](/riak/kv/2.0.9/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.0.9/developing/data-types), and [strong consistency]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.0.9/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.0.9/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.0.9/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.0.9/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.0.9/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.0.9/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.0.9/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.0.9/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.0.9/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.0.9/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.0.9/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.0.9/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.0.9/learn/concepts/buckets) and [keys](/riak/kv/2.0.9/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.0.9/learn/concepts/buckets) and [keys]({{}}riak/kv/2.0.9/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.0.9/using/reference/custom-code.md b/content/riak/kv/2.0.9/using/reference/custom-code.md index 1b475b6708..d12c18bbd0 100644 --- a/content/riak/kv/2.0.9/using/reference/custom-code.md +++ b/content/riak/kv/2.0.9/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.0.9/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.0.9/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.0.9/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.0.9/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.0.9/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.0.9/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.0.9/using/reference/handoff.md b/content/riak/kv/2.0.9/using/reference/handoff.md index 3f4af97e3e..5b3a37d3eb 100644 --- a/content/riak/kv/2.0.9/using/reference/handoff.md +++ b/content/riak/kv/2.0.9/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.0.9/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.0.9/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.0.9/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.0.9/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.0.9/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.0.9/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.0.9/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.0.9/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.0.9/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.0.9/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.0.9/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.0.9/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.0.9/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.0.9/using/reference/jmx.md b/content/riak/kv/2.0.9/using/reference/jmx.md index 8170c93af3..050b3f9ed3 100644 --- a/content/riak/kv/2.0.9/using/reference/jmx.md +++ b/content/riak/kv/2.0.9/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.0.9/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.0.9/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.0.9/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.0.9/using/reference/logging.md b/content/riak/kv/2.0.9/using/reference/logging.md index afef0270a8..ba5b742ee5 100644 --- a/content/riak/kv/2.0.9/using/reference/logging.md +++ b/content/riak/kv/2.0.9/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.0.9/ops/running/logging --- -[cluster ops log]: /riak/kv/2.0.9/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.0.9/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.0.9/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.0.9/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.0.9/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.0.9/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.0.9/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.0.9/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.0.9/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.0.9/using/reference/multi-datacenter/comparison.md index 4e44484114..9217a88ab6 100644 --- a/content/riak/kv/2.0.9/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.0.9/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.0.9/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.0.9/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.0.9/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.0.9/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.0.9/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.0.9/using/reference/runtime-interaction.md b/content/riak/kv/2.0.9/using/reference/runtime-interaction.md index 4471e74e74..5e359ef1de 100644 --- a/content/riak/kv/2.0.9/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.0.9/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.0.9/ops/advanced/runtime --- -[config reference]: /riak/kv/2.0.9/configuring/reference -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.0.9/configuring/reference +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.0.9/using/reference/search.md b/content/riak/kv/2.0.9/using/reference/search.md index 76b3a32cab..fb8efac94a 100644 --- a/content/riak/kv/2.0.9/using/reference/search.md +++ b/content/riak/kv/2.0.9/using/reference/search.md @@ -15,21 +15,21 @@ aliases: - /riak/kv/2.0.9/dev/advanced/search --- -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters -[configuring search]: /riak/kv/2.0.9/configuring/search +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters +[configuring search]: {{}}riak/kv/2.0.9/configuring/search > **Note on search 2.0 vs. legacy search** > > This document refers to Riak search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak search, visit [the old Using Riak search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -search, you should check out the [Using Search](/riak/kv/2.0.9/developing/usage/search) document. +search, you should check out the [Using Search]({{}}riak/kv/2.0.9/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -127,7 +127,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.0.9/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.0.9/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -289,7 +289,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.0.9/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -299,7 +299,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.0.9/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -354,7 +354,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.0.9/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.0.9/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.0.9/using/reference/secondary-indexes.md b/content/riak/kv/2.0.9/using/reference/secondary-indexes.md index 729a946aad..48cbb3ff3f 100644 --- a/content/riak/kv/2.0.9/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.0.9/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.0.9/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.0.9/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.0.9/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.0.9/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.0.9/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.0.9/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.0.9/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.0.9/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.0.9/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.0.9/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.0.9/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.0.9/using/reference/statistics-monitoring.md b/content/riak/kv/2.0.9/using/reference/statistics-monitoring.md index decd35b551..336dd898f6 100644 --- a/content/riak/kv/2.0.9/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.0.9/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.0.9/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.0.9/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.0.9/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.0.9/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.0.9/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.0.9/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.0.9/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.0.9/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.0.9/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.0.9/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -156,7 +156,7 @@ Metric | Description ## Command-line Interface -The [`riak-admin`](/riak/kv/2.0.9/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.0.9/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -181,14 +181,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.0.9/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.0.9/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.0.9/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -235,7 +235,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.0.9/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.0.9/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -259,7 +259,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.0.9/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.0.9/developing/api/http/status) endpoint is also available. #### Nagios @@ -333,14 +333,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.0.9/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.0.9/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.0.9/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.0.9/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -362,9 +362,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.0.9/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.0.9/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.0.9/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.0.9/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -380,9 +380,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.0.9/using/reference/strong-consistency.md b/content/riak/kv/2.0.9/using/reference/strong-consistency.md index 3006ce18e3..fa5b951eb8 100644 --- a/content/riak/kv/2.0.9/using/reference/strong-consistency.md +++ b/content/riak/kv/2.0.9/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.0.9/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.0.9/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.0.9/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.0.9/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.0.9/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.0.9/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.0.9/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.0.9/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.0.9/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.0.9/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.0.9/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.0.9/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.0.9/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.0.9/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.0.9/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.0.9/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter.md index 5467fe498e..c5d5a6334c 100644 --- a/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter.md +++ b/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter.md @@ -16,7 +16,7 @@ toc: true [v2 mdc fullsync]: ./scheduling-fullsync {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/using/reference/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/using/reference/v3-multi-datacenter/) instead. {{% /note %}} diff --git a/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter/architecture.md index 34026db166..c7aff874ae 100644 --- a/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter/architecture.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/using/reference/v3-multi-datacenter/architecture/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/using/reference/v3-multi-datacenter/architecture/) instead. {{% /note %}} @@ -83,7 +83,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.0.9/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.0.9/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -95,7 +95,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -113,7 +113,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -121,6 +121,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.0.9/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.0.9/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.0.9/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.0.9/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter/scheduling-fullsync.md index fe38c24583..cf611ac0a5 100644 --- a/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.9/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.0.9/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.0.9/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. {{% /note %}} diff --git a/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/aae.md index 806a87c62d..fa6ba53795 100644 --- a/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.0.9/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.0.9/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/architecture.md index 540c0e88ef..a3c73fec3c 100644 --- a/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.0.9/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.0.9/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.0.9/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.0.9/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/cascading-writes.md index 0de2327347..42af2320d7 100644 --- a/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.0.9/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/scheduling-fullsync.md index aab6ff2a18..aa0600e357 100644 --- a/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.0.9/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.0.9/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.0.9/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.0.9/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.0.9/using/repair-recovery/errors.md b/content/riak/kv/2.0.9/using/repair-recovery/errors.md index 0af5b5fa12..b39f6047e0 100644 --- a/content/riak/kv/2.0.9/using/repair-recovery/errors.md +++ b/content/riak/kv/2.0.9/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.0.9/configuring/reference +[config reference]: {{}}riak/kv/2.0.9/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.0.9/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.0.9/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.0.9/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.0.9/using/repair-recovery/failure-recovery.md index ba7226ab3c..2dd3acd6df 100644 --- a/content/riak/kv/2.0.9/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.0.9/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.0.9/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.0.9/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.0.9/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.0.9/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -116,7 +116,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.0.9/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.0.9/using/repair-recovery/repairs.md b/content/riak/kv/2.0.9/using/repair-recovery/repairs.md index 2c01dff28b..3c1b247134 100644 --- a/content/riak/kv/2.0.9/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.0.9/using/repair-recovery/repairs.md @@ -20,13 +20,13 @@ aliases: - /riak/2.0.9/ops/running/recovery/repairing-partitions - /riak/kv/2.0.9/ops/running/recovery/repairing-partitions --- -[cluster ops aae]: /riak/kv/2.0.9/using/cluster-operations/active-anti-entropy/ -[config ref]: /riak/kv/2.0.9/configuring/reference/ +[cluster ops aae]: {{}}riak/kv/2.0.9/using/cluster-operations/active-anti-entropy/ +[config ref]: {{}}riak/kv/2.0.9/configuring/reference/ [Erlang shell]: http://learnyousomeerlang.com/starting-out -[glossary AAE]: /riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae -[glossary readrep]: /riak/kv/2.0.9/learn/glossary/#read-repair -[search config]: /riak/kv/2.0.9/configuring/search/#search-config-settings -[tiered storage]: /riak/kv/2.0.9/setup/planning/backend/leveldb/#tiered-storage +[glossary AAE]: {{}}riak/kv/2.0.9/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{}}riak/kv/2.0.9/learn/glossary/#read-repair +[search config]: {{}}riak/kv/2.0.9/configuring/search/#search-config-settings +[tiered storage]: {{}}riak/kv/2.0.9/setup/planning/backend/leveldb/#tiered-storage @@ -236,23 +236,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.0.9/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.0.9/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.0.9/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.0.9/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.0.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.0.9/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.0.9/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.0.9/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.0.9/using/repair-recovery/rolling-replaces.md index 4eaf400de2..be28b17fd3 100644 --- a/content/riak/kv/2.0.9/using/repair-recovery/rolling-replaces.md +++ b/content/riak/kv/2.0.9/using/repair-recovery/rolling-replaces.md @@ -12,9 +12,9 @@ menu: toc: true --- -[upgrade]: /riak/kv/2.0.9/setup/upgrading/cluster/ -[rolling restarts]: /riak/kv/2.0.9/using/repair-recovery/rolling-restart/ -[add node]: /riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes +[upgrade]: {{}}riak/kv/2.0.9/setup/upgrading/cluster/ +[rolling restarts]: {{}}riak/kv/2.0.9/using/repair-recovery/rolling-restart/ +[add node]: {{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. diff --git a/content/riak/kv/2.0.9/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.0.9/using/repair-recovery/rolling-restart.md index c24633a274..6474b08a6e 100644 --- a/content/riak/kv/2.0.9/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.0.9/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.0.9/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.0.9/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.0.9/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.0.9/using/running-a-cluster.md b/content/riak/kv/2.0.9/using/running-a-cluster.md index aaae8b46cf..dcb40f3639 100644 --- a/content/riak/kv/2.0.9/using/running-a-cluster.md +++ b/content/riak/kv/2.0.9/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.0.9/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.0.9/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.0.9/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.0.9/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.0.9/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.0.9/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.0.9/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.0.9/using/security.md b/content/riak/kv/2.0.9/using/security.md index 76a9041053..85b879d7cf 100644 --- a/content/riak/kv/2.0.9/using/security.md +++ b/content/riak/kv/2.0.9/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.0.9/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.0.9/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.0.9/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.0.9/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.0.9/using/security/basics +[security managing]: {{}}riak/kv/2.0.9/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.0.9/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.0.9/using/security/basics.md b/content/riak/kv/2.0.9/using/security/basics.md index 4d2cefd5b4..7891d2eb7e 100644 --- a/content/riak/kv/2.0.9/using/security/basics.md +++ b/content/riak/kv/2.0.9/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.0.9/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.0.9/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.0.9/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.0.9/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.0.9/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.0.9/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.0.9/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.0.9/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.0.9/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.0.9/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.0.9/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.0.9/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.0.9/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.0.9/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.0.9/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.0.9/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.0.9/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.0.9/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.0.9/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.0.9/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.0.9/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.0.9/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.0.9/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.0.9/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.0.9/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.0.9/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.0.9/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.0.9/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.0.9/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.0.9/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.0.9/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.0.9/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.0.9/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.0.9/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.0.9/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.0.9/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.0.9/configuring/reference/#directories).
platform_data_dir The directory in which Riak stores its storage backend data, as well -as active anti-entropy data, and cluster metadata. ./data
alive_tokens Determines the number of ticks the leader will wait to hear from its -associated vnode before assuming that the vnode +associated vnode before assuming that the vnode is unhealthy and stepping down as leader. If the vnode does not respond to the leader before ensemble_tick * alive_tokens milliseconds have elapsed, the leader will @@ -1833,8 +1833,8 @@ package) and in R14B04 via a custom repository and branch.
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="{{< baseurl >}}riak/kv/2.0.9/learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.0.9/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.0.9/using/security/managing-sources.md b/content/riak/kv/2.0.9/using/security/managing-sources.md index bd74ff1c3e..f8442ec621 100644 --- a/content/riak/kv/2.0.9/using/security/managing-sources.md +++ b/content/riak/kv/2.0.9/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.0.9/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.0.9/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.0.9/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.0.9/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.0.9/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.0.9/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.0.9/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.0.9/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.0.9/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.0.9/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.0.9/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.0.9/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.0.9/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.0.9/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.1.1/_reference-links.md b/content/riak/kv/2.1.1/_reference-links.md index 88e3837b03..31f80c7de4 100644 --- a/content/riak/kv/2.1.1/_reference-links.md +++ b/content/riak/kv/2.1.1/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.1.1/downloads/ -[install index]: /riak/kv/2.1.1/setup/installing -[upgrade index]: /riak/kv/2.1.1/upgrading -[plan index]: /riak/kv/2.1.1/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.1.1/configuring/reference/ -[manage index]: /riak/kv/2.1.1/using/managing -[performance index]: /riak/kv/2.1.1/using/performance -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.1.1/downloads/ +[install index]: {{}}riak/kv/2.1.1/setup/installing +[upgrade index]: {{}}riak/kv/2.1.1/upgrading +[plan index]: {{}}riak/kv/2.1.1/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.1.1/configuring/reference/ +[manage index]: {{}}riak/kv/2.1.1/using/managing +[performance index]: {{}}riak/kv/2.1.1/using/performance +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.1.1/setup/planning -[plan start]: /riak/kv/2.1.1/setup/planning/start -[plan backend]: /riak/kv/2.1.1/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.1/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.1.1/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.1.1/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.1.1/setup/planning/best-practices -[plan future]: /riak/kv/2.1.1/setup/planning/future +[plan index]: {{}}riak/kv/2.1.1/setup/planning +[plan start]: {{}}riak/kv/2.1.1/setup/planning/start +[plan backend]: {{}}riak/kv/2.1.1/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.1/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.1.1/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.1.1/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.1.1/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.1.1/setup/planning/future ## Installing -[install index]: /riak/kv/2.1.1/setup/installing -[install aws]: /riak/kv/2.1.1/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.1.1/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.1.1/setup/installing/freebsd -[install mac osx]: /riak/kv/2.1.1/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.1.1/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.1.1/setup/installing/smartos -[install solaris]: /riak/kv/2.1.1/setup/installing/solaris -[install suse]: /riak/kv/2.1.1/setup/installing/suse -[install windows azure]: /riak/kv/2.1.1/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.1.1/setup/installing +[install aws]: {{}}riak/kv/2.1.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.1.1/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.1.1/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.1.1/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.1.1/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.1.1/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.1.1/setup/installing/solaris +[install suse]: {{}}riak/kv/2.1.1/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.1.1/setup/installing/windows-azure -[install source index]: /riak/kv/2.1.1/setup/installing/source -[install source erlang]: /riak/kv/2.1.1/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.1.1/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.1.1/setup/installing/source +[install source erlang]: {{}}riak/kv/2.1.1/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.1.1/setup/installing/source/jvm -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.1.1/setup/upgrading -[upgrade checklist]: /riak/kv/2.1.1/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.1.1/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.1.1/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.1.1/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.1.1/setup/downgrade +[upgrade index]: {{}}riak/kv/2.1.1/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.1.1/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.1.1/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.1.1/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.1.1/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.1.1/setup/downgrade ## Configuring -[config index]: /riak/kv/2.1.1/configuring -[config basic]: /riak/kv/2.1.1/configuring/basic -[config backend]: /riak/kv/2.1.1/configuring/backend -[config manage]: /riak/kv/2.1.1/configuring/managing -[config reference]: /riak/kv/2.1.1/configuring/reference/ -[config strong consistency]: /riak/kv/2.1.1/configuring/strong-consistency -[config load balance]: /riak/kv/2.1.1/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.1.1/configuring/mapreduce -[config search]: /riak/kv/2.1.1/configuring/search/ +[config index]: {{}}riak/kv/2.1.1/configuring +[config basic]: {{}}riak/kv/2.1.1/configuring/basic +[config backend]: {{}}riak/kv/2.1.1/configuring/backend +[config manage]: {{}}riak/kv/2.1.1/configuring/managing +[config reference]: {{}}riak/kv/2.1.1/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.1.1/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.1.1/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.1.1/configuring/mapreduce +[config search]: {{}}riak/kv/2.1.1/configuring/search/ -[config v3 mdc]: /riak/kv/2.1.1/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.1.1/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.1.1/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.1.1/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.1.1/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.1.1/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.1.1/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.1.1/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.1.1/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.1.1/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.1.1/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.1.1/using/ -[use admin commands]: /riak/kv/2.1.1/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.1.1/using/running-a-cluster +[use index]: {{}}riak/kv/2.1.1/using/ +[use admin commands]: {{}}riak/kv/2.1.1/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.1.1/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.1.1/using/reference/custom-code -[use ref handoff]: /riak/kv/2.1.1/using/reference/handoff -[use ref monitoring]: /riak/kv/2.1.1/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.1.1/using/reference/search -[use ref 2i]: /riak/kv/2.1.1/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.1.1/using/reference/snmp -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.1.1/using/reference/jmx -[use ref obj del]: /riak/kv/2.1.1/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.1.1/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.1.1/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.1.1/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.1.1/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.1.1/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.1.1/using/reference/search +[use ref 2i]: {{}}riak/kv/2.1.1/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.1.1/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.1.1/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.1.1/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.1.1/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.1.1/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.1.1/using/admin/ -[use admin commands]: /riak/kv/2.1.1/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.1.1/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.1.1/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.1.1/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.1.1/using/admin/ +[use admin commands]: {{}}riak/kv/2.1.1/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.1.1/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.1.1/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.1.1/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.1.1/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.1.1/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.1.1/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.1.1/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.1.1/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.1.1/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.1.1/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.1.1/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.1.1/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.1.1/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.1.1/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.1.1/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.1.1/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.1.1/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.1.1/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.1.1/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.1.1/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.1.1/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.1.1/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.1.1/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.1.1/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.1.1/using/repair-recovery -[repair recover index]: /riak/kv/2.1.1/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.1.1/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.1.1/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.1.1/using/security/ -[security basics]: /riak/kv/2.1.1/using/security/basics -[security managing]: /riak/kv/2.1.1/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.1.1/using/security/ +[security basics]: {{}}riak/kv/2.1.1/using/security/basics +[security managing]: {{}}riak/kv/2.1.1/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.1.1/using/performance/ -[perf benchmark]: /riak/kv/2.1.1/using/performance/benchmarking -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.1.1/using/performance/erlang -[perf aws]: /riak/kv/2.1.1/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.1.1/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.1.1/using/performance/ +[perf benchmark]: {{}}riak/kv/2.1.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.1.1/using/performance/erlang +[perf aws]: {{}}riak/kv/2.1.1/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.1.1/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.1.1/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.1.1/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.1.1/developing -[dev client libraries]: /riak/kv/2.1.1/developing/client-libraries -[dev data model]: /riak/kv/2.1.1/developing/data-modeling -[dev data types]: /riak/kv/2.1.1/developing/data-types -[dev kv model]: /riak/kv/2.1.1/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.1.1/developing +[dev client libraries]: {{}}riak/kv/2.1.1/developing/client-libraries +[dev data model]: {{}}riak/kv/2.1.1/developing/data-modeling +[dev data types]: {{}}riak/kv/2.1.1/developing/data-types +[dev kv model]: {{}}riak/kv/2.1.1/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.1.1/developing/getting-started -[getting started java]: /riak/kv/2.1.1/developing/getting-started/java -[getting started ruby]: /riak/kv/2.1.1/developing/getting-started/ruby -[getting started python]: /riak/kv/2.1.1/developing/getting-started/python -[getting started php]: /riak/kv/2.1.1/developing/getting-started/php -[getting started csharp]: /riak/kv/2.1.1/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.1.1/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.1.1/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.1.1/developing/getting-started/golang - -[obj model java]: /riak/kv/2.1.1/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.1.1/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.1.1/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.1.1/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.1.1/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.1.1/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.1.1/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.1.1/developing/getting-started +[getting started java]: {{}}riak/kv/2.1.1/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.1.1/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.1.1/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.1.1/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.1.1/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.1.1/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.1.1/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.1.1/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.1.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.1.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.1.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.1.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.1.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.1.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.1.1/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.1.1/developing/usage -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.1.1/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.1.1/developing/usage/content-types -[usage create objects]: /riak/kv/2.1.1/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.1.1/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.1.1/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.1.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.1/developing/usage/search -[usage search schema]: /riak/kv/2.1.1/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.1.1/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.1.1/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.1.1/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.1.1/developing/usage +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.1.1/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.1.1/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.1.1/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.1.1/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.1.1/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.1.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search +[usage search schema]: {{}}riak/kv/2.1.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.1.1/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.1.1/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.1.1/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.1.1/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.1.1/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.1.1/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.1.1/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.1.1/developing/api/backend -[dev api http]: /riak/kv/2.1.1/developing/api/http -[dev api http status]: /riak/kv/2.1.1/developing/api/http/status -[dev api pbc]: /riak/kv/2.1.1/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.1.1/developing/api/backend +[dev api http]: {{}}riak/kv/2.1.1/developing/api/http +[dev api http status]: {{}}riak/kv/2.1.1/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.1.1/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.1.1/learn/glossary/ -[glossary aae]: /riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.1.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.1.1/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.1.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode -[concept aae]: /riak/kv/2.1.1/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.1.1/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.1.1/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.1.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.1/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.1.1/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.1.1/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.1.1/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.1.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.1.1/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.1.1/add-ons.md b/content/riak/kv/2.1.1/add-ons.md index 096e40c43f..9530771d64 100644 --- a/content/riak/kv/2.1.1/add-ons.md +++ b/content/riak/kv/2.1.1/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.1.1/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.1.1/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.1.1/add-ons/redis/developing-rra.md b/content/riak/kv/2.1.1/add-ons/redis/developing-rra.md index ed4f191407..e60dc74667 100644 --- a/content/riak/kv/2.1.1/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.1.1/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.1.1/developing/api/http +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.1.1/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.1.1/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.1.1/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.1.1/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.1.1/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.1.1/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.1.1/add-ons/redis/redis-add-on-features.md index d8002c895e..5ad8c37fc2 100644 --- a/content/riak/kv/2.1.1/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.1.1/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.1.1/add-ons/redis/set-up-rra.md b/content/riak/kv/2.1.1/add-ons/redis/set-up-rra.md index c1ffaefa4b..2339e2c54e 100644 --- a/content/riak/kv/2.1.1/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.1.1/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.1.1/setup/installing -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.1.1/setup/installing +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.1.1/add-ons/redis/using-rra.md b/content/riak/kv/2.1.1/add-ons/redis/using-rra.md index 44858bbba3..53f22493b4 100644 --- a/content/riak/kv/2.1.1/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.1.1/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.1.1/developing/api/http/ +[dev api http]: {{}}riak/kv/2.1.1/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.1.1/configuring/backend.md b/content/riak/kv/2.1.1/configuring/backend.md index 6c05a93d4d..34515f7dc0 100644 --- a/content/riak/kv/2.1.1/configuring/backend.md +++ b/content/riak/kv/2.1.1/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.1/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.1/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.1.1/configuring/basic.md b/content/riak/kv/2.1.1/configuring/basic.md index 0c38ea334a..496d112a2a 100644 --- a/content/riak/kv/2.1.1/configuring/basic.md +++ b/content/riak/kv/2.1.1/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.1.1/ops/building/configuration/ --- -[config reference]: /riak/kv/2.1.1/configuring/reference -[use running cluster]: /riak/kv/2.1.1/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.1.1/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.1.1/using/performance/erlang -[plan start]: /riak/kv/2.1.1/setup/planning/start -[plan best practices]: /riak/kv/2.1.1/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.1.1/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.1.1/setup/planning/backend -[plan backend multi]: /riak/kv/2.1.1/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.1.1/using/performance/benchmarking -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit -[perf index]: /riak/kv/2.1.1/using/performance -[perf aws]: /riak/kv/2.1.1/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.1.1/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[use running cluster]: {{}}riak/kv/2.1.1/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.1.1/using/performance/erlang +[plan start]: {{}}riak/kv/2.1.1/setup/planning/start +[plan best practices]: {{}}riak/kv/2.1.1/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.1.1/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.1.1/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.1.1/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.1.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.1.1/using/performance +[perf aws]: {{}}riak/kv/2.1.1/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.1.1/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.1.1/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.1.1/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.1.1/configuring/load-balancing-proxy.md b/content/riak/kv/2.1.1/configuring/load-balancing-proxy.md index d59813b981..f16f37694d 100644 --- a/content/riak/kv/2.1.1/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.1.1/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.1.1/configuring/managing.md b/content/riak/kv/2.1.1/configuring/managing.md index e75ba466fc..961fa01fa3 100644 --- a/content/riak/kv/2.1.1/configuring/managing.md +++ b/content/riak/kv/2.1.1/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.1.1/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.1.1/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.1.1/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.1.1/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.1.1/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.1.1/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.1.1/configuring/mapreduce.md b/content/riak/kv/2.1.1/configuring/mapreduce.md index 938f89bd2b..3e3352724b 100644 --- a/content/riak/kv/2.1.1/configuring/mapreduce.md +++ b/content/riak/kv/2.1.1/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.1.1/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.1.1/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.1.1/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.1.1/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.1.1/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.1.1/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.1.1/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.1.1/configuring/reference.md b/content/riak/kv/2.1.1/configuring/reference.md index 17de2de70a..b5030c2811 100644 --- a/content/riak/kv/2.1.1/configuring/reference.md +++ b/content/riak/kv/2.1.1/configuring/reference.md @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch. diff --git a/content/riak/kv/2.1.1/configuring/search.md b/content/riak/kv/2.1.1/configuring/search.md index bac0e68dc2..510837facc 100644 --- a/content/riak/kv/2.1.1/configuring/search.md +++ b/content/riak/kv/2.1.1/configuring/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.1.1/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.1.1/developing/usage/search -[usage search schema]: /riak/kv/2.1.1/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.1.1/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.1.1/developing/usage/custom-extractors -[config reference]: /riak/kv/2.1.1/configuring/reference -[config reference#search]: /riak/kv/2.1.1/configuring/reference/#search -[glossary aae]: /riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.1.1/using/security/ +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search +[usage search schema]: {{}}riak/kv/2.1.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.1.1/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.1.1/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[config reference#search]: {{}}riak/kv/2.1.1/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.1.1/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.1.1/configuring/strong-consistency.md b/content/riak/kv/2.1.1/configuring/strong-consistency.md index 2085891937..d370995a17 100644 --- a/content/riak/kv/2.1.1/configuring/strong-consistency.md +++ b/content/riak/kv/2.1.1/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.1.1/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.1.1/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.1.1/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.1.1/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.1.1/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.1.1/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.1.1/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.1.1/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.1.1/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.1.1/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.1.1/learn/concepts/causal-context -[dev data types]: /riak/kv/2.1.1/developing/data-types -[glossary aae]: /riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.1.1/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.1.1/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.1.1/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.1.1/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.1.1/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.1.1/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.1.1/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.1.1/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.1.1/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.1.1/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.1.1/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.1.1/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.1.1/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.1.1/developing/data-types +[glossary aae]: {{}}riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.1.1/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.1.1/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.1.1/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.1.1/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.1.1/configuring/v2-multi-datacenter.md b/content/riak/kv/2.1.1/configuring/v2-multi-datacenter.md index 828c4310db..c89984dbf7 100644 --- a/content/riak/kv/2.1.1/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.1.1/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.1.1/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.1.1/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.1.1/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.1.1/configuring/v2-multi-datacenter/nat.md index 12d93cd37e..724dc059ed 100644 --- a/content/riak/kv/2.1.1/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.1.1/configuring/v2-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.1.1/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.1.1/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.1.1/configuring/v3-multi-datacenter.md b/content/riak/kv/2.1.1/configuring/v3-multi-datacenter.md index e628e44957..06e0dbde5f 100644 --- a/content/riak/kv/2.1.1/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.1.1/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.1.1/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.1.1/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/nat.md index efa2533e8c..444c7eda23 100644 --- a/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/quick-start.md index f0ff901ce0..60205e0987 100644 --- a/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.1.1/using/performance -[config v3 mdc]: /riak/kv/2.1.1/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.1.1/using/performance +[config v3 mdc]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl.md index f9842f4b0e..dc8cee9dcd 100644 --- a/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.1.1/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.1.1/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.1.1/developing/api/backend.md b/content/riak/kv/2.1.1/developing/api/backend.md index 414afe3287..ef4bd83a4f 100644 --- a/content/riak/kv/2.1.1/developing/api/backend.md +++ b/content/riak/kv/2.1.1/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/backend-api --- -[plan backend]: /riak/kv/2.1.1/setup/planning/backend +[plan backend]: {{}}riak/kv/2.1.1/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.1.1/developing/api/http.md b/content/riak/kv/2.1.1/developing/api/http.md index 7a2a659f44..248fd71892 100644 --- a/content/riak/kv/2.1.1/developing/api/http.md +++ b/content/riak/kv/2.1.1/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.1.1/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.1.1/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.1.1/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.1.1/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.1.1/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.1.1/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.1.1/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.1.1/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.1.1/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.1.1/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.1.1/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.1.1/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.1.1/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.1.1/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.1.1/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.1.1/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.1.1/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.1.1/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.1.1/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.1.1/developing/data-types). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.1.1/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.1.1/developing/data-types). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.1.1/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.1.1/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.1.1/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.1.1/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.1.1/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.1.1/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.1.1/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.1.1/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.1.1/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.1.1/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.1.1/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.1.1/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.1.1/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.1.1/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.1.1/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.1.1/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.1.1/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.1.1/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.1.1/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.1.1/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.1.1/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.1.1/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.1.1/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.1.1/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.1.1/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.1.1/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.1.1/developing/api/http/counters.md b/content/riak/kv/2.1.1/developing/api/http/counters.md index 133929bf9c..5d095f0f9a 100644 --- a/content/riak/kv/2.1.1/developing/api/http/counters.md +++ b/content/riak/kv/2.1.1/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.1.1/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.1.1/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.1.1/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.1.1/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.1.1/developing/api/http/fetch-object.md b/content/riak/kv/2.1.1/developing/api/http/fetch-object.md index ea106fc9d1..91394d16df 100644 --- a/content/riak/kv/2.1.1/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.1.1/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.1.1/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.1.1/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.1.1/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.1.1/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.1.1/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.1.1/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.1.1/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.1.1/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.1.1/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.1.1/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.1.1/developing/api/http/fetch-search-index.md b/content/riak/kv/2.1.1/developing/api/http/fetch-search-index.md index e351c584ca..6cb805ca90 100644 --- a/content/riak/kv/2.1.1/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.1.1/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.1.1/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.1.1/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.1.1/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.1.1/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.1.1/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.1.1/developing/api/http/fetch-search-schema.md index 393aa87222..ef7dee64d1 100644 --- a/content/riak/kv/2.1.1/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.1.1/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.1.1/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.1.1/developing/api/http/get-bucket-props.md b/content/riak/kv/2.1.1/developing/api/http/get-bucket-props.md index d6bf6111ea..ef023b0dc0 100644 --- a/content/riak/kv/2.1.1/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.1.1/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.1.1/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.1.1/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.1.1/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.1.1/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.1.1/developing/api/http/link-walking.md b/content/riak/kv/2.1.1/developing/api/http/link-walking.md index 7c3f44a10c..aa466221e6 100644 --- a/content/riak/kv/2.1.1/developing/api/http/link-walking.md +++ b/content/riak/kv/2.1.1/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.1.1/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.1.1/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.1.1/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.1.1/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.1.1/developing/api/http/list-resources.md b/content/riak/kv/2.1.1/developing/api/http/list-resources.md index 13919909c5..e04c154ef0 100644 --- a/content/riak/kv/2.1.1/developing/api/http/list-resources.md +++ b/content/riak/kv/2.1.1/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.1.1/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.1.1/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.1.1/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.1.1/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.1.1/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.1.1/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.1.1/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.1.1/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.1.1/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.1.1/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.1.1/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.1.1/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.1.1/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.1.1/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.1.1/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.1.1/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.1.1/developing/api/http/mapreduce.md b/content/riak/kv/2.1.1/developing/api/http/mapreduce.md index 16d0690f11..5176868ae9 100644 --- a/content/riak/kv/2.1.1/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.1.1/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.1.1/developing/api/http/search-index-info.md b/content/riak/kv/2.1.1/developing/api/http/search-index-info.md index bbee3324a4..57f0fa80ba 100644 --- a/content/riak/kv/2.1.1/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.1.1/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.1.1/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.1.1/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.1.1/developing/api/http/search-query.md b/content/riak/kv/2.1.1/developing/api/http/search-query.md index f30fb9a18a..a064350e59 100644 --- a/content/riak/kv/2.1.1/developing/api/http/search-query.md +++ b/content/riak/kv/2.1.1/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.1.1/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.1.1/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.1.1/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.1.1/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.1.1/developing/api/http/secondary-indexes.md b/content/riak/kv/2.1.1/developing/api/http/secondary-indexes.md index b2c961cb17..5bb7a1e853 100644 --- a/content/riak/kv/2.1.1/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.1.1/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.1.1/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.1.1/developing/api/http/set-bucket-props.md b/content/riak/kv/2.1.1/developing/api/http/set-bucket-props.md index 72fa9acdd7..25aa844cf6 100644 --- a/content/riak/kv/2.1.1/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.1.1/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.1.1/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.1.1/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.1.1/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.1.1/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.1.1/developing/api/http/status.md b/content/riak/kv/2.1.1/developing/api/http/status.md index 2e561f6e5f..93254ded1b 100644 --- a/content/riak/kv/2.1.1/developing/api/http/status.md +++ b/content/riak/kv/2.1.1/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.1.1/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.1.1/developing/api/http/store-object.md b/content/riak/kv/2.1.1/developing/api/http/store-object.md index 8bd3bf137e..753a022c75 100644 --- a/content/riak/kv/2.1.1/developing/api/http/store-object.md +++ b/content/riak/kv/2.1.1/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.1.1/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.1.1/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.1.1/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.1.1/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.1.1/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.1.1/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.1.1/developing/api/http/store-search-index.md b/content/riak/kv/2.1.1/developing/api/http/store-search-index.md index f60dbdef63..ceef0e1a73 100644 --- a/content/riak/kv/2.1.1/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.1.1/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.1.1/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.1.1/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.1.1/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.1.1/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.1.1/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.1.1/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.1.1/developing/api/http/store-search-schema.md b/content/riak/kv/2.1.1/developing/api/http/store-search-schema.md index 09f0eb71bd..851395499c 100644 --- a/content/riak/kv/2.1.1/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.1.1/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.1.1/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.1.1/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers.md index aad709278f..c7980f1f9a 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.1.1/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.1.1/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.1.1/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.1.1/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.1.1/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.1.1/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.1.1/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.1.1/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.1.1/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.1.1/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/auth-req.md index ca7d9b912c..7a71f4ac62 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.1.1/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.1.1/using/security/basics). diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/delete-object.md index f0e7af3a30..60b8b93cee 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.1.1/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.1.1/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store.md index 6f44592c80..91d88da527 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.1.1/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.1.1/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-fetch.md index 001fff5cb2..808d3f178f 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.1.1/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.1.1/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.1.1/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.1.1/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.1.1/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store.md index 8e7e65c28d..5481d65b29 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store.md index 66e3cbb8e3..a957a8ddee 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-store.md index 03b0490dc5..bae95a5d49 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.1.1/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.1.1/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.1.1/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.1.1/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.1.1/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.1.1/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.1.1/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-union.md index cba3a9218a..d43f7d3ef2 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.1.1/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object.md index f13bf2c7a6..51c952cf5b 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.1.1/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.1.1/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.1.1/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.1.1/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props.md index ec2c517a0d..43ebc8ec1e 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.1.1/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.1.1/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.1.1/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.1.1/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riakcs/latest/cookbooks/mdc-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/latest/cookbooks/mdc-overview/) {{% /note %}} diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-type.md index 50a4f7781e..12eaf7d2d1 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.1.1/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.1.1/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-client-id.md index 1d3949432b..a2b7b0186b 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.1.1/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/mapreduce.md index 9963eae065..321f272271 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.1.1/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.1.1/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.1.1/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.1.1/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/reset-bucket-props.md index 80b24eb1e3..a6b8ae615f 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/secondary-indexes.md index 9e835b9c68..06fdb4334d 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.1.1/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props.md index 4352afc44b..c72bbef8f0 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-type.md index 76ee262c96..f64ae01d96 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.1/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.1.1/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/store-object.md index 2adf0812a2..730ceeacad 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.1.1/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.1.1/learn/concepts/buckets), and [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.1.1/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.1.1/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.1.1/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.1.1/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.1.1/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.1.1/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-get.md index 978f010db2..c8b2d265aa 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.1.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.1.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-put.md index 923e242593..32b855e7f5 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-index-put.md @@ -37,4 +37,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.1.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.1.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-get.md index fb2942c2d7..1e37cf180b 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.1.1/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-put.md index 1ec32ef0f9..db03508117 100644 --- a/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.1.1/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.1.1/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.1.1/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.1.1/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.1.1/developing/app-guide.md b/content/riak/kv/2.1.1/developing/app-guide.md index 28f0e81a32..ad4f0814ce 100644 --- a/content/riak/kv/2.1.1/developing/app-guide.md +++ b/content/riak/kv/2.1.1/developing/app-guide.md @@ -15,48 +15,48 @@ aliases: - /riak/kv/2.1.1/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.1.1/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.1.1/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.1.1/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.1.1/developing/key-value-modeling -[dev data types]: /riak/kv/2.1.1/developing/data-types -[dev data types#counters]: /riak/kv/2.1.1/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.1.1/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.1.1/developing/data-types/maps -[usage create objects]: /riak/kv/2.1.1/developing/usage/creating-objects -[usage search]: /riak/kv/2.1.1/developing/usage/search -[use ref search]: /riak/kv/2.1.1/using/reference/search -[usage 2i]: /riak/kv/2.1.1/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.1.1/developing/client-libraries -[concept crdts]: /riak/kv/2.1.1/learn/concepts/crdts -[dev data model]: /riak/kv/2.1.1/developing/data-modeling -[usage mapreduce]: /riak/kv/2.1.1/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.1.1/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.1.1/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.1/setup/planning/backend/memory -[obj model java]: /riak/kv/2.1.1/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.1.1/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.1.1/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.1.1/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.1.1/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.1.1/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.1.1/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.1.1/using/reference/strong-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.1.1/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.1.1/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.1.1/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.1.1/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[install index]: /riak/kv/2.1.1/setup/installing -[getting started]: /riak/kv/2.1.1/developing/getting-started -[usage index]: /riak/kv/2.1.1/developing/usage -[glossary]: /riak/kv/2.1.1/learn/glossary -[write-once]: /riak/kv/2.1.1/developing/app-guide/write-once +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.1.1/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.1.1/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.1.1/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.1.1/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.1.1/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.1.1/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.1.1/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.1.1/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.1.1/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search +[use ref search]: {{}}riak/kv/2.1.1/using/reference/search +[usage 2i]: {{}}riak/kv/2.1.1/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.1.1/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.1.1/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.1.1/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.1.1/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.1.1/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.1.1/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.1.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.1.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.1.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.1.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.1.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.1.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.1.1/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.1.1/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.1.1/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.1.1/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.1.1/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.1.1/setup/installing +[getting started]: {{}}riak/kv/2.1.1/developing/getting-started +[usage index]: {{}}riak/kv/2.1.1/developing/usage +[glossary]: {{}}riak/kv/2.1.1/learn/glossary +[write-once]: {{}}riak/kv/2.1.1/developing/app-guide/write-once So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -119,7 +119,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.1.1/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.1.1/developing/app-guide/advanced-mapreduce.md index 34d5349cae..8e4b4fad4e 100644 --- a/content/riak/kv/2.1.1/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.1.1/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.1.1/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.1.1/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.1.1/using/reference/custom-code -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[config reference]: /riak/kv/2.1.1/configuring/reference +[usage 2i]: {{}}riak/kv/2.1.1/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.1.1/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.1.1/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.1.1/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.1.1/developing/app-guide/cluster-metadata.md index 2674a13932..245ace3146 100644 --- a/content/riak/kv/2.1.1/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.1.1/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.1.1/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.1.1/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.1.1/developing/app-guide/replication-properties.md b/content/riak/kv/2.1.1/developing/app-guide/replication-properties.md index 8db5b93ce0..13d4ac6c81 100644 --- a/content/riak/kv/2.1.1/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.1.1/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.1/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.1.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.1.1/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.1.1/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.1.1/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.1.1/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.1.1/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.1.1/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.1.1/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.1.1/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.1.1/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.1.1/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.1.1/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.1.1/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.1.1/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.1.1/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.1.1/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.1.1/developing/app-guide/strong-consistency.md b/content/riak/kv/2.1.1/developing/app-guide/strong-consistency.md index 2c59f1cf75..817becfff5 100644 --- a/content/riak/kv/2.1.1/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.1.1/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.1.1/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/2.1.3/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.1.1/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.1.1/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.1.1/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.1.1/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.1.1/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.1.1/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/2.1.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.1.1/developing/client-libraries -[getting started]: /riak/kv/2.1.1/developing/getting-started -[config strong consistency#details]: /riak/kv/2.1.1/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.1.1/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.1.1/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.1.1/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.1.1/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.1.1/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.1.1/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.1.1/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.1.1/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.1.1/developing/client-libraries +[getting started]: {{}}riak/kv/2.1.1/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.1.1/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.1.1/developing/app-guide/write-once.md b/content/riak/kv/2.1.1/developing/app-guide/write-once.md index ee4d3e8357..3750c738cc 100644 --- a/content/riak/kv/2.1.1/developing/app-guide/write-once.md +++ b/content/riak/kv/2.1.1/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.1.1/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[bucket type]: /riak/kv/2.1.1/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.1.1/developing/data-types -[strong consistency]: /riak/kv/2.1.1/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.1.1/developing/data-types +[strong consistency]: {{}}riak/kv/2.1.1/developing/app-guide/strong-consistency Riak 2.1.0 introduces the concept of write-once buckets, buckets whose entries are intended to be written exactly once and never updated or overwritten. @@ -103,7 +103,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -154,7 +154,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.1.1/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.1.1/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.1.1/developing/client-libraries.md b/content/riak/kv/2.1.1/developing/client-libraries.md index 98fdb84434..21a1a78823 100644 --- a/content/riak/kv/2.1.1/developing/client-libraries.md +++ b/content/riak/kv/2.1.1/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.1.1/developing/data-types.md b/content/riak/kv/2.1.1/developing/data-types.md index a44864a8f1..39fc0de214 100644 --- a/content/riak/kv/2.1.1/developing/data-types.md +++ b/content/riak/kv/2.1.1/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.1.1/developing/faq.md b/content/riak/kv/2.1.1/developing/faq.md index a1e5bbfc78..06e6f9ef34 100644 --- a/content/riak/kv/2.1.1/developing/faq.md +++ b/content/riak/kv/2.1.1/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.1.1/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.1.1/using/performance/benchmarking -[Bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.1.1/developing/usage +[[Basho Bench]: {{}}riak/kv/2.1.1/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.1.1/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.1.1/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.1.1/configuring/reference +[commit hooks]: {{}}riak/kv/2.1.1/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.1.1/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.1.1/developing/client-libraries -[MapReduce]: /riak/kv/2.1.1/developing/usage/mapreduce -[Memory]: /riak/kv/2.1.1/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.1.1/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.1.1/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.1.1/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.1.1/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.1.1/developing/getting-started.md b/content/riak/kv/2.1.1/developing/getting-started.md index 05cc39a44c..94084c5851 100644 --- a/content/riak/kv/2.1.1/developing/getting-started.md +++ b/content/riak/kv/2.1.1/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.1.1/setup/installing -[dev client libraries]: /riak/kv/2.1.1/developing/client-libraries +[install index]: {{}}riak/kv/2.1.1/setup/installing +[dev client libraries]: {{}}riak/kv/2.1.1/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.1.1/developing/getting-started/csharp.md b/content/riak/kv/2.1.1/developing/getting-started/csharp.md index 5d9bf1d494..2914f8892b 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/csharp.md +++ b/content/riak/kv/2.1.1/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.1/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.1/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.1.1/developing/getting-started/csharp/querying.md b/content/riak/kv/2.1.1/developing/getting-started/csharp/querying.md index 5b71420a93..1230c80737 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.1.1/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.1/developing/getting-started/erlang.md b/content/riak/kv/2.1.1/developing/getting-started/erlang.md index 56b7424e3e..5fc45db405 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/erlang.md +++ b/content/riak/kv/2.1.1/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.1/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.1/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.1.1/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.1.1/developing/getting-started/erlang/object-modeling.md index e42d36a6f3..31f0c93212 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.1.1/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.1.1/developing/getting-started/erlang/querying.md b/content/riak/kv/2.1.1/developing/getting-started/erlang/querying.md index ce213ec3e6..0ec80fea62 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.1.1/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.1.1/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.1.1/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.1/developing/getting-started/golang.md b/content/riak/kv/2.1.1/developing/getting-started/golang.md index f49f4ae9d9..fb3a21122b 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/golang.md +++ b/content/riak/kv/2.1.1/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.1/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.1/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.1/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.1/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.1.1/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.1.1/developing/getting-started/golang/object-modeling.md index e824b4f3d0..7a9487236d 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.1.1/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.1.1/developing/getting-started/golang/querying.md b/content/riak/kv/2.1.1/developing/getting-started/golang/querying.md index acf2943cdf..c94176f679 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.1.1/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.1.1/developing/getting-started/java.md b/content/riak/kv/2.1.1/developing/getting-started/java.md index 897d970669..c37caf31c1 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/java.md +++ b/content/riak/kv/2.1.1/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.1.1/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.1/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.1.1/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.1.1/developing/getting-started/java/crud-operations.md index 939ab90cb0..7c4a99efe2 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.1.1/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.1.1/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.1.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.1.1/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.1.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.1.1/developing/getting-started/java/querying.md b/content/riak/kv/2.1.1/developing/getting-started/java/querying.md index 16a861999c..8624eaadab 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.1.1/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.1/developing/getting-started/nodejs.md b/content/riak/kv/2.1.1/developing/getting-started/nodejs.md index 8fbb0e40b2..7af80a20e7 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.1.1/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.1/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.1/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.1.1/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.1.1/developing/getting-started/nodejs/querying.md index fcfcada91a..88265b247e 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.1.1/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.1/developing/getting-started/php.md b/content/riak/kv/2.1.1/developing/getting-started/php.md index 6ac7f75710..152a754cab 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/php.md +++ b/content/riak/kv/2.1.1/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.1/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.1/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.1.1/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.1.1/developing/getting-started/php/crud-operations.md index fe582cd020..494434693d 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.1.1/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.1.1/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.1.1/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.1.1/developing/getting-started/php/querying.md b/content/riak/kv/2.1.1/developing/getting-started/php/querying.md index 64a28e15f7..1262e04791 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.1.1/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.1/developing/getting-started/python.md b/content/riak/kv/2.1.1/developing/getting-started/python.md index 43dbb0c157..b2985dc3de 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/python.md +++ b/content/riak/kv/2.1.1/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.1/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.1/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.1.1/developing/getting-started/python/querying.md b/content/riak/kv/2.1.1/developing/getting-started/python/querying.md index f1b32368e2..1d948b1e4b 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.1.1/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.1/developing/getting-started/ruby.md b/content/riak/kv/2.1.1/developing/getting-started/ruby.md index a2aca2f203..e038737889 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/ruby.md +++ b/content/riak/kv/2.1.1/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.1/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.1/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.1/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.1.1/developing/getting-started/ruby/querying.md b/content/riak/kv/2.1.1/developing/getting-started/ruby/querying.md index a5a7ebf7e5..46a804d6f9 100644 --- a/content/riak/kv/2.1.1/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.1.1/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.1/developing/key-value-modeling.md b/content/riak/kv/2.1.1/developing/key-value-modeling.md index f41be796d6..dbbbeb22e2 100644 --- a/content/riak/kv/2.1.1/developing/key-value-modeling.md +++ b/content/riak/kv/2.1.1/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.1.1/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.1.1/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.1.1/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.1.1/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.1.1/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.1.1/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.1.1/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.1.1/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.1.1/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.1.1/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.1.1/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.1.1/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.1.1/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.1.1/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.1.1/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.1.1/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.1.1/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.1.1/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.1.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.1.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.1.1/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.1/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.1.1/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.1.1/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.1.1/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.1.1/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.1.1/developing/usage/commit-hooks.md b/content/riak/kv/2.1.1/developing/usage/commit-hooks.md index e692a25a42..8960065912 100644 --- a/content/riak/kv/2.1.1/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.1.1/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.1.1/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.1.1/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.1.1/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.1.1/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.1.1/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.1.1/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.1.1/developing/usage/conflict-resolution.md b/content/riak/kv/2.1.1/developing/usage/conflict-resolution.md index 616c791f62..85179c17e8 100644 --- a/content/riak/kv/2.1.1/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.1.1/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.1.1/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.1.1/learn/concepts/clusters) system in which any [node](/riak/kv/2.1.1/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.1.1/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.1.1/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.1.1/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.1.1/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.1.1/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.1.1/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.1.1/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.1.1/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.1.1/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.1.1/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.1.1/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.1.1/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.1.1/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.1.1/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.1.1/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.1.1/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.1.1/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.1.1/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.1.1/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.1.1/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.1.1/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.1.1/configuring/reference) to change the [default bucket properties](/riak/kv/2.1.1/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.1.1/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.1.1/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.1.1/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.1.1/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.1.1/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.1.1/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.1.1/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.1.1/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.1.1/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.1.1/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.1.1/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.1.1/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.1.1/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.1/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.1.1/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.1.1/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.1.1/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.1.1/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.1.1/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -611,7 +611,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.1.1/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.1.1/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -666,7 +666,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/csharp.md index 180e629243..8b6f853076 100644 --- a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.1/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/golang.md index b341fd836c..65585b6757 100644 --- a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.1/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/java.md index ea0f64171d..ecb1dea6ea 100644 --- a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.1/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.1/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.1/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.1/developing/data-types/counters), [set](/riak/kv/2.1.1/developing/data-types/sets), or [map](/riak/kv/2.1.1/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.1/developing/data-types/counters), [set]({{}}riak/kv/2.1.1/developing/data-types/sets), or [map]({{}}riak/kv/2.1.1/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.1/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.1/developing/data-types/sets). diff --git a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/nodejs.md index 8cfcd10140..0999ec6491 100644 --- a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.1/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/php.md index 44b7009276..c40f5cac42 100644 --- a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.1/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.1/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.1/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.1/developing/data-types/counters), [set](/riak/kv/2.1.1/developing/data-types/sets), or [map](/riak/kv/2.1.1/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.1/developing/data-types/counters), [set]({{}}riak/kv/2.1.1/developing/data-types/sets), or [map]({{}}riak/kv/2.1.1/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.1/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.1/developing/data-types/sets). diff --git a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/python.md index a171cd9dcc..e36712e433 100644 --- a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.1/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.1/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.1/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.1/developing/data-types/counters), [set](/riak/kv/2.1.1/developing/data-types/sets), or [map](/riak/kv/2.1.1/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.1/developing/data-types/counters), [set]({{}}riak/kv/2.1.1/developing/data-types/sets), or [map]({{}}riak/kv/2.1.1/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.1/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.1/developing/data-types/sets). diff --git a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/ruby.md index 4baaa30c0a..b128acf659 100644 --- a/content/riak/kv/2.1.1/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.1.1/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.1/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.1/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.1/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.1/developing/data-types/counters), [set](/riak/kv/2.1.1/developing/data-types/sets), or [map](/riak/kv/2.1.1/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.1/developing/data-types/counters), [set]({{}}riak/kv/2.1.1/developing/data-types/sets), or [map]({{}}riak/kv/2.1.1/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.1/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.1/developing/data-types/sets). diff --git a/content/riak/kv/2.1.1/developing/usage/creating-objects.md b/content/riak/kv/2.1.1/developing/usage/creating-objects.md index 7c6dad68d9..63da2063a9 100644 --- a/content/riak/kv/2.1.1/developing/usage/creating-objects.md +++ b/content/riak/kv/2.1.1/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.1.1/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.1.1/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.1.1/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.1.1/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.1.1/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.1.1/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.1.1/developing/usage/custom-extractors.md b/content/riak/kv/2.1.1/developing/usage/custom-extractors.md index fa577a173d..1806166278 100644 --- a/content/riak/kv/2.1.1/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.1.1/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.1.1/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.1.1/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.1.1/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.1.1/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.1.1/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.1.1/developing/usage/deleting-objects.md b/content/riak/kv/2.1.1/developing/usage/deleting-objects.md index 4cb7308527..1e788948c9 100644 --- a/content/riak/kv/2.1.1/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.1.1/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.1.1/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.1.1/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.1.1/developing/usage/document-store.md b/content/riak/kv/2.1.1/developing/usage/document-store.md index f0139e8890..794b74d7e9 100644 --- a/content/riak/kv/2.1.1/developing/usage/document-store.md +++ b/content/riak/kv/2.1.1/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.1.1/developing/usage/search/) and [Riak Data Types](/riak/kv/2.1.1/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.1.1/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.1.1/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.1.1/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.1.1/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.1.1/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.1.1/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.1.1/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.1.1/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.1.1/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.1.1/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.1.1/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.1.1/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.1.1/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.1.1/developing/usage/mapreduce.md b/content/riak/kv/2.1.1/developing/usage/mapreduce.md index c5491fc9b8..11d5659547 100644 --- a/content/riak/kv/2.1.1/developing/usage/mapreduce.md +++ b/content/riak/kv/2.1.1/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.1.1/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.1.1/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.1.1/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.1.1/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.1.1/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.1.1/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.1.1/developing/usage/search/) and [secondary indexes](/riak/kv/2.1.1/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.1.1/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.1.1/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.1.1/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.1.1/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.1.1/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.1.1/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.1.1/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.1.1/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.1.1/developing/usage/reading-objects.md b/content/riak/kv/2.1.1/developing/usage/reading-objects.md index 17534c615b..87418b7410 100644 --- a/content/riak/kv/2.1.1/developing/usage/reading-objects.md +++ b/content/riak/kv/2.1.1/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.1.1/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.1.1/developing/usage/replication.md b/content/riak/kv/2.1.1/developing/usage/replication.md index 902b176786..4408219145 100644 --- a/content/riak/kv/2.1.1/developing/usage/replication.md +++ b/content/riak/kv/2.1.1/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.1.1/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.1.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using Strong +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -71,7 +71,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.1.1/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.1.1/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -95,8 +95,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -105,7 +105,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -317,7 +317,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.1.1/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.1.1/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -355,7 +355,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.1.1/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.1.1/setup/planning/backend/multi). ## Delete Quorum with RW @@ -530,9 +530,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.1.1/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.1.1/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.1.1/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.1.1/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -546,7 +546,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.1.1/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -556,8 +556,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.1.1/developing/usage/search-schemas.md b/content/riak/kv/2.1.1/developing/usage/search-schemas.md index 017ec41e81..3a01f24f0b 100644 --- a/content/riak/kv/2.1.1/developing/usage/search-schemas.md +++ b/content/riak/kv/2.1.1/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.1.1/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.1.1/developing/data-types/), and [more](/riak/kv/2.1.1/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types/), and [more]({{}}riak/kv/2.1.1/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -224,7 +224,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.1.1/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.1.1/developing/usage/search.md b/content/riak/kv/2.1.1/developing/usage/search.md index 18b6c2dfbe..ec256653e6 100644 --- a/content/riak/kv/2.1.1/developing/usage/search.md +++ b/content/riak/kv/2.1.1/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.1.1/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.1.1/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.1.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.1/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.1.1/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.1.1/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.1.1/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.1.1/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.1.1/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.1.1/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.1.1/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.1.1/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.1.1/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.1.1/developing/usage/searching-data-types.md b/content/riak/kv/2.1.1/developing/usage/searching-data-types.md index 56582d06f2..9bb7140989 100644 --- a/content/riak/kv/2.1.1/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.1.1/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.1/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.1.1/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.1.1/developing/data-types/counters), [sets](/riak/kv/2.1.1/developing/data-types/sets), and [maps](/riak/kv/2.1.1/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.1.1/developing/data-types/counters), [sets]({{}}riak/kv/2.1.1/developing/data-types/sets), and [maps]({{}}riak/kv/2.1.1/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.1.1/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.1.1/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.1.1/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.1.1/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.1.1/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.1.1/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) for [storing counters](/riak/kv/2.1.1/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.1.1/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types) for [storing sets](/riak/kv/2.1.1/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.1.1/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.1.1/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.1.1/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.1.1/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.1.1/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.1.1/developing/usage/secondary-indexes.md b/content/riak/kv/2.1.1/developing/usage/secondary-indexes.md index e05ca59af0..03217e343b 100644 --- a/content/riak/kv/2.1.1/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.1.1/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.1.1/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.1/setup/planning/backend/memory -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.1.1/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.1.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.1.1/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.1.1/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.1.1/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.1.1/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.1.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.1.1/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.1.1/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.1.1/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.1.1/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.1.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.1/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.1.1/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.1.1/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.1.1/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.1.1/developing/usage/security.md b/content/riak/kv/2.1.1/developing/usage/security.md index bfba83dfab..9d28c50650 100644 --- a/content/riak/kv/2.1.1/developing/usage/security.md +++ b/content/riak/kv/2.1.1/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.1.1/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.1.1/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.1.1/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.1.1/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.1.1/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.1.1/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.1.1/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.1.1/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.1.1/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.1.1/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.1.1/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.1.1/using/security/basics) -* [Managing Security Sources](/riak/kv/2.1.1/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.1.1/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.1.1/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.1.1/developing/usage/security/java) -* [Ruby](/riak/kv/2.1.1/developing/usage/security/ruby) -* [PHP](/riak/kv/2.1.1/developing/usage/security/php) -* [Python](/riak/kv/2.1.1/developing/usage/security/python) -* [Erlang](/riak/kv/2.1.1/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.1.1/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.1.1/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.1.1/developing/usage/security/php) +* [Python]({{}}riak/kv/2.1.1/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.1.1/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.1.1/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.1.1/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.1.1/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.1.1/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.1.1/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.1.1/developing/usage/security/erlang.md b/content/riak/kv/2.1.1/developing/usage/security/erlang.md index c804d5a7d1..0e6e4242ff 100644 --- a/content/riak/kv/2.1.1/developing/usage/security/erlang.md +++ b/content/riak/kv/2.1.1/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.1.1/using/security/managing-sources/), [PAM-](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.1.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.1.1/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.1.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.1/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.1/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.1.1/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.1.1/developing/usage/security/java.md b/content/riak/kv/2.1.1/developing/usage/security/java.md index 2e540ef395..24798357a6 100644 --- a/content/riak/kv/2.1.1/developing/usage/security/java.md +++ b/content/riak/kv/2.1.1/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.1.1/developing/usage/security/php.md b/content/riak/kv/2.1.1/developing/usage/security/php.md index 5319268336..55d10649ec 100644 --- a/content/riak/kv/2.1.1/developing/usage/security/php.md +++ b/content/riak/kv/2.1.1/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.1.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.1.1/developing/usage/security/python.md b/content/riak/kv/2.1.1/developing/usage/security/python.md index 6001bd7bee..2957f7d7d2 100644 --- a/content/riak/kv/2.1.1/developing/usage/security/python.md +++ b/content/riak/kv/2.1.1/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.1/using/security/managing-sources/) or [PAM-](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.1.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.1.1/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.1.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.1.1/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.1.1/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.1/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.1.1/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.1.1/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.1.1/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.1.1/developing/usage/security/ruby.md b/content/riak/kv/2.1.1/developing/usage/security/ruby.md index 6569ad0b53..88b3046cde 100644 --- a/content/riak/kv/2.1.1/developing/usage/security/ruby.md +++ b/content/riak/kv/2.1.1/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.1/using/security/managing-sources/) or [PAM](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.1.1/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.1.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.1.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.1.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.1/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.1.1/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.1.1/developing/usage/updating-objects.md b/content/riak/kv/2.1.1/developing/usage/updating-objects.md index 344afaba29..c8769d2fc8 100644 --- a/content/riak/kv/2.1.1/developing/usage/updating-objects.md +++ b/content/riak/kv/2.1.1/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/dev/using/updates --- -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.1.1/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.1.1/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.1.1/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.1.1/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.1.1/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.1.1/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.1.1/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.1.1/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.1.1/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.1.1/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.1.1/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.1.1/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.1.1/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.1.1/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.1.1/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.1.1/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.1.1/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.1.1/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.1.1/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.1.1/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.1.1/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.1.1/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.1.1/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.1.1/index.md b/content/riak/kv/2.1.1/index.md index c0db0aab53..af5e26ad8f 100644 --- a/content/riak/kv/2.1.1/index.md +++ b/content/riak/kv/2.1.1/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.1.1/configuring -[dev index]: /riak/kv/2.1.1/developing -[downloads]: /riak/kv/2.1.1/downloads/ -[install index]: /riak/kv/2.1.1/setup/installing/ -[plan index]: /riak/kv/2.1.1/setup/planning -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.1.1/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.1.1/developing/usage/search -[getting started]: /riak/kv/2.1.1/developing/getting-started -[dev client libraries]: /riak/kv/2.1.1/developing/client-libraries +[config index]: {{}}riak/kv/2.1.1/configuring +[dev index]: {{}}riak/kv/2.1.1/developing +[downloads]: {{}}riak/kv/2.1.1/downloads/ +[install index]: {{}}riak/kv/2.1.1/setup/installing/ +[plan index]: {{}}riak/kv/2.1.1/setup/planning +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.1.1/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search +[getting started]: {{}}riak/kv/2.1.1/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.1.1/developing/client-libraries diff --git a/content/riak/kv/2.1.1/introduction.md b/content/riak/kv/2.1.1/introduction.md index 454d3043d5..6177a6a8de 100644 --- a/content/riak/kv/2.1.1/introduction.md +++ b/content/riak/kv/2.1.1/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.1.1/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.1.1/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.1.1/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.1.1/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.1.1/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.1.1/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.1.1/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.1.1/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.1.1/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.1.1/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.1.1/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.1.1/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.1.1/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.1.1/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.1.1/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.1.1/developing/data-types/maps#flags), [registers](/riak/kv/2.1.1/developing/data-types/maps#registers), -[counters](/riak/kv/2.1.1/developing/data-types/counters), [sets](/riak/kv/2.1.1/developing/data-types/sets), and -[maps](/riak/kv/2.1.1/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.1.1/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.1.1/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.1.1/developing/data-types/counters), [sets]({{}}riak/kv/2.1.1/developing/data-types/sets), and +[maps]({{}}riak/kv/2.1.1/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.1.1/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.1.1/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.1.1/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.1.1/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.1.1/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.1.1/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.1.1/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.1.1/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.1.1/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.1.1/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.1.1/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.1.1/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.1.1/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.1.1/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.1.1/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.1.1/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.1.1/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.1.1/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.1.1/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.1.1/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.1.1/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.1.1/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.1.1/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.1.1/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.1.1/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.1.1/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.1.1/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.1.1/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.1.1/learn/concepts.md b/content/riak/kv/2.1.1/learn/concepts.md index ea40327455..97039007cf 100644 --- a/content/riak/kv/2.1.1/learn/concepts.md +++ b/content/riak/kv/2.1.1/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.1.1/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.1.1/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.1.1/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[concept crdts]: /riak/kv/2.1.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.1/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.1.1/learn/concepts/vnodes -[config index]: /riak/kv/2.1.1/configuring -[plan index]: /riak/kv/2.1.1/setup/planning -[use index]: /riak/kv/2.1.1/using/ +[concept aae]: {{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.1.1/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.1.1/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.1.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.1.1/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.1.1/configuring +[plan index]: {{}}riak/kv/2.1.1/setup/planning +[use index]: {{}}riak/kv/2.1.1/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.1.1/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.1.1/learn/concepts/active-anti-entropy.md index 71ce3603a3..324a1279de 100644 --- a/content/riak/kv/2.1.1/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.1.1/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.1.1/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.1.1/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.1.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.1.1/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.1.1/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.1.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.1.1/developing/usage/search +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.1.1/learn/concepts/buckets.md b/content/riak/kv/2.1.1/learn/concepts/buckets.md index 80c9ba8d71..166a4847bb 100644 --- a/content/riak/kv/2.1.1/learn/concepts/buckets.md +++ b/content/riak/kv/2.1.1/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.1.1/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.1.1/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.1.1/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.1.1/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.1.1/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.1.1/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.1.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.1/using/reference/strong-consistency -[config basic]: /riak/kv/2.1.1/configuring/basic -[dev api http]: /riak/kv/2.1.1/developing/api/http -[dev data types]: /riak/kv/2.1.1/developing/data-types -[glossary ring]: /riak/kv/2.1.1/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.1/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.1.1/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.1.1/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.1.1/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.1.1/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.1.1/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.1.1/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.1.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.1.1/configuring/basic +[dev api http]: {{}}riak/kv/2.1.1/developing/api/http +[dev data types]: {{}}riak/kv/2.1.1/developing/data-types +[glossary ring]: {{}}riak/kv/2.1.1/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.1/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.1.1/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.1.1/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.1.1/learn/concepts/capability-negotiation.md b/content/riak/kv/2.1.1/learn/concepts/capability-negotiation.md index f59db68d94..9cfa716586 100644 --- a/content/riak/kv/2.1.1/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.1.1/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.1.1/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.1.1/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.1.1/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.1.1/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.1.1/learn/concepts/causal-context.md b/content/riak/kv/2.1.1/learn/concepts/causal-context.md index e81b0c1cbf..ea88cf4267 100644 --- a/content/riak/kv/2.1.1/learn/concepts/causal-context.md +++ b/content/riak/kv/2.1.1/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.1.1/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.1.1/developing/api/http -[dev key value]: /riak/kv/2.1.1/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.1.1/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.1.1/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.1.1/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.1.1/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.1.1/developing/api/http +[dev key value]: {{}}riak/kv/2.1.1/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.1.1/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.1.1/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.1.1/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.1.1/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -78,7 +78,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.1.1/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.1.1/learn/concepts/clusters.md b/content/riak/kv/2.1.1/learn/concepts/clusters.md index 4b5c490d73..3b320ea417 100644 --- a/content/riak/kv/2.1.1/learn/concepts/clusters.md +++ b/content/riak/kv/2.1.1/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.1.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.1/learn/concepts/replication -[glossary node]: /riak/kv/2.1.1/learn/glossary/#node -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.1.1/learn/dynamo -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.1.1/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.1.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.1/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.1.1/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.1.1/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.1.1/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.1.1/learn/concepts/crdts.md b/content/riak/kv/2.1.1/learn/concepts/crdts.md index f0f577e7fd..0666e7338d 100644 --- a/content/riak/kv/2.1.1/learn/concepts/crdts.md +++ b/content/riak/kv/2.1.1/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.1.1/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.1.1/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.1.1/developing/data-types -[glossary node]: /riak/kv/2.1.1/learn/glossary/#node -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.1.1/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.1.1/developing/data-types +[glossary node]: {{}}riak/kv/2.1.1/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.1.1/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.1.1/developing/usage/search/). +indexes]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.1.1/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.1.1/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.1.1/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.1.1/learn/concepts/eventual-consistency.md b/content/riak/kv/2.1.1/learn/concepts/eventual-consistency.md index 92a76e30c8..549f5aead7 100644 --- a/content/riak/kv/2.1.1/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.1.1/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[concept replication]: /riak/kv/2.1.1/learn/concepts/replication -[glossary node]: /riak/kv/2.1.1/learn/glossary/#node -[glossary read rep]: /riak/kv/2.1.1/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.1.1/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.1.1/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.1.1/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.1.1/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.1.1/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.1.1/developing/data-modeling/). +or models]({{}}riak/kv/2.1.1/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.1.1/learn/concepts/keys-and-objects.md b/content/riak/kv/2.1.1/learn/concepts/keys-and-objects.md index c004925b43..2149d3c9e8 100644 --- a/content/riak/kv/2.1.1/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.1.1/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.1/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.1.1/learn/concepts/replication.md b/content/riak/kv/2.1.1/learn/concepts/replication.md index 3985be9fa5..e3cb066bc4 100644 --- a/content/riak/kv/2.1.1/learn/concepts/replication.md +++ b/content/riak/kv/2.1.1/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.1.1/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.1.1/learn/concepts/vnodes -[glossary node]: /riak/kv/2.1.1/learn/glossary/#node -[glossary ring]: /riak/kv/2.1.1/learn/glossary/#ring -[usage replication]: /riak/kv/2.1.1/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.1.1/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.1.1/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.1.1/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.1.1/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.1.1/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.1.1/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.1.1/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.1.1/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.1.1/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.1.1/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.1.1/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.1.1/learn/concepts/strong-consistency.md b/content/riak/kv/2.1.1/learn/concepts/strong-consistency.md index 9ecf5c0977..54733023b4 100644 --- a/content/riak/kv/2.1.1/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.1.1/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.1/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.1.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.1.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.1.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.1.1/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.1.1/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.1.1/learn/concepts/vnodes.md b/content/riak/kv/2.1.1/learn/concepts/vnodes.md index af907f0e55..d895454e93 100644 --- a/content/riak/kv/2.1.1/learn/concepts/vnodes.md +++ b/content/riak/kv/2.1.1/learn/concepts/vnodes.md @@ -16,16 +16,16 @@ aliases: --- -[concept causal context]: /riak/kv/2.1.1/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.1.1/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.1.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.1/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.1.1/learn/glossary/#node -[glossary ring]: /riak/kv/2.1.1/learn/glossary/#ring -[perf strong consistency]: /riak/kv/2.1.1/using/performance/strong-consistency -[plan backend]: /riak/kv/2.1.1/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.1.1/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.1.1/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.1.1/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.1.1/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.1.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.1/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.1.1/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.1.1/learn/glossary/#ring +[perf strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[plan backend]: {{}}riak/kv/2.1.1/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.1.1/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.1.1/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -81,7 +81,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -103,7 +103,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.1.1/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.1.1/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.1.1/learn/dynamo.md b/content/riak/kv/2.1.1/learn/dynamo.md index 912acd3b08..1cbeb7161d 100644 --- a/content/riak/kv/2.1.1/learn/dynamo.md +++ b/content/riak/kv/2.1.1/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.1.1/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.1.1/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.1.1/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.1.1/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.1.1/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.1.1/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.1.1/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.1.1/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.1.1/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.1.1/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.1.1/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.1.1/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.1.1/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.1.1/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.1.1/developing/api/http/) +>[REST API]({{}}riak/kv/2.1.1/developing/api/http/) > ->[Writing Data](/riak/kv/2.1.1/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.1.1/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.1.1/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.1.1/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.1.1/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.1.1/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.1.1/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.1.1/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.1.1/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.1.1/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.1.1/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.1.1/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.1.1/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.1.1/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.1.1/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.1.1/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.1.1/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.1.1/setup/planning/backend/ -[Bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.1.1/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.1.1/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.1.1/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.1.1/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.1.1/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.1.1/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.1.1/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.1.1/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.1.1/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.1.1/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.1.1/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.1.1/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.1.1/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.1.1/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.1.1/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.1.1/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.1.1/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.1.1/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.1.1/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.1.1/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.1.1/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.1.1/learn/glossary.md b/content/riak/kv/2.1.1/learn/glossary.md index 6858718ea5..02e260285f 100644 --- a/content/riak/kv/2.1.1/learn/glossary.md +++ b/content/riak/kv/2.1.1/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.1.1/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[concept crdts]: /riak/kv/2.1.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.1/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.1.1/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.1.1/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.1.1/developing/api/http -[dev data model]: /riak/kv/2.1.1/developing/data-modeling -[dev data types]: /riak/kv/2.1.1/developing/data-types -[glossary read rep]: /riak/kv/2.1.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.1.1/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.1.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.1/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.1.1/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.1.1/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.1.1/developing/api/http +[dev data model]: {{}}riak/kv/2.1.1/developing/data-modeling +[dev data types]: {{}}riak/kv/2.1.1/developing/data-types +[glossary read rep]: {{}}riak/kv/2.1.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.1.1/learn/dynamo -[plan cluster capacity]: /riak/kv/2.1.1/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.1.1/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.1.1/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.1.1/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.1.1/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.1.1/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.1.1/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.1.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.1/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.1.1/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.1.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.1.1/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.1.1/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.1.1/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.1.1/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.1.1/learn/use-cases.md b/content/riak/kv/2.1.1/learn/use-cases.md index 743891fbf8..f0703865b0 100644 --- a/content/riak/kv/2.1.1/learn/use-cases.md +++ b/content/riak/kv/2.1.1/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.1.1/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.1.1/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.1.1/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.1.1/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.1.1/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.1.1/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.1.1/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.1.1/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.1.1/developing/data-types -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.1.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.1/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.1.1/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.1.1/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.1.1/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.1.1/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.1.1/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.1.1/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.1.1/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.1.1/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.1.1/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.1.1/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.1.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.1.1/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.1.1/learn/why-riak-kv.md b/content/riak/kv/2.1.1/learn/why-riak-kv.md index d48b778f18..ca13d911d3 100644 --- a/content/riak/kv/2.1.1/learn/why-riak-kv.md +++ b/content/riak/kv/2.1.1/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.1.1/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.1.1/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.1.1/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.1.1/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.1.1/developing/data-types -[glossary read rep]: /riak/kv/2.1.1/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.1.1/developing/data-types +[glossary read rep]: {{}}riak/kv/2.1.1/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.1.1/release-notes.md b/content/riak/kv/2.1.1/release-notes.md index 09d0db15f7..6cade3e1e2 100644 --- a/content/riak/kv/2.1.1/release-notes.md +++ b/content/riak/kv/2.1.1/release-notes.md @@ -19,7 +19,7 @@ aliases: ## Fixes Riak 2.1.0 introduced a bug that has been fixed in Riak 2.1.1. The default configuration for handoff.ip caused vnodes marked for transfer during handoff to be removed without transferring data to their new destination nodes. A mandatory change to configuration (riak.conf) mitigates this issue for 2.1.0 users. While not all users were impacted by this issue, we recommend that all 2.1.0 users upgrade to 2.1.1. -Detailed information on the issue is available in the Basho Documentation [Product Advisories](http://docs.basho.com/community/productadvisories/210-dataloss/). +Detailed information on the issue is available in the Basho Documentation [Product Advisories]({{}}community/productadvisories/210-dataloss/). * Make default `handoff_ip` value 0.0.0.0 in vars.config. * [riak/pull/734](https://github.com/basho/riak/pull/734) diff --git a/content/riak/kv/2.1.1/setup/downgrade.md b/content/riak/kv/2.1.1/setup/downgrade.md index 6ba7f8088a..4f6fd01714 100644 --- a/content/riak/kv/2.1.1/setup/downgrade.md +++ b/content/riak/kv/2.1.1/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.1/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.1.1/setup/upgrading/cluster -[config ref]: /riak/kv/2.1.1/configuring/reference -[concept aae]: /riak/kv/2.1.1/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.1.1/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.1.1/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.1.1/configuring/reference +[concept aae]: {{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#aae-status Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade]. diff --git a/content/riak/kv/2.1.1/setup/installing.md b/content/riak/kv/2.1.1/setup/installing.md index 7b14d46f53..1adea6ca58 100644 --- a/content/riak/kv/2.1.1/setup/installing.md +++ b/content/riak/kv/2.1.1/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.1.1/installing/ --- -[install aws]: /riak/kv/2.1.1/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.1.1/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.1.1/setup/installing/freebsd -[install mac osx]: /riak/kv/2.1.1/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.1.1/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.1.1/setup/installing/smartos -[install solaris]: /riak/kv/2.1.1/setup/installing/solaris -[install suse]: /riak/kv/2.1.1/setup/installing/suse -[install windows azure]: /riak/kv/2.1.1/setup/installing/windows-azure -[install source index]: /riak/kv/2.1.1/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.1.1/setup/upgrading +[install aws]: {{}}riak/kv/2.1.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.1.1/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.1.1/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.1.1/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.1.1/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.1.1/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.1.1/setup/installing/solaris +[install suse]: {{}}riak/kv/2.1.1/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.1.1/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.1.1/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.1.1/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.1.1/setup/installing/amazon-web-services.md b/content/riak/kv/2.1.1/setup/installing/amazon-web-services.md index 08e288ec12..634fc4cf99 100644 --- a/content/riak/kv/2.1.1/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.1.1/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.1.1/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.1.1/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.1.1/setup/installing/debian-ubuntu.md b/content/riak/kv/2.1.1/setup/installing/debian-ubuntu.md index c457429524..2443639a41 100644 --- a/content/riak/kv/2.1.1/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.1.1/setup/installing/debian-ubuntu.md @@ -20,10 +20,10 @@ aliases: -[install source index]: /riak/kv/2.1.1/setup/installing/source/ -[security index]: /riak/kv/2.1.1/using/security/ -[install source erlang]: /riak/kv/2.1.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[install source index]: {{}}riak/kv/2.1.1/setup/installing/source/ +[security index]: {{}}riak/kv/2.1.1/using/security/ +[install source erlang]: {{}}riak/kv/2.1.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.1.1/setup/installing/freebsd.md b/content/riak/kv/2.1.1/setup/installing/freebsd.md index e7f110155f..fa78d074ff 100644 --- a/content/riak/kv/2.1.1/setup/installing/freebsd.md +++ b/content/riak/kv/2.1.1/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.1.1/setup/installing/source/erlang -[downloads]: /riak/kv/2.1.1/downloads/ -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.1.1/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.1.1/downloads/ +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.1.1/setup/installing/mac-osx.md b/content/riak/kv/2.1.1/setup/installing/mac-osx.md index 1f79d8145d..9204b6da98 100644 --- a/content/riak/kv/2.1.1/setup/installing/mac-osx.md +++ b/content/riak/kv/2.1.1/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.1.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.1.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.1.1/setup/installing/rhel-centos.md b/content/riak/kv/2.1.1/setup/installing/rhel-centos.md index ea1215eef9..732bb3ecc4 100644 --- a/content/riak/kv/2.1.1/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.1.1/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.1.1/setup/installing/source -[install source erlang]: /riak/kv/2.1.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[install source index]: {{}}riak/kv/2.1.1/setup/installing/source +[install source erlang]: {{}}riak/kv/2.1.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.1.1/setup/installing/smartos.md b/content/riak/kv/2.1.1/setup/installing/smartos.md index 860064b682..062c4661c8 100644 --- a/content/riak/kv/2.1.1/setup/installing/smartos.md +++ b/content/riak/kv/2.1.1/setup/installing/smartos.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.1.1/setup/installing/solaris.md b/content/riak/kv/2.1.1/setup/installing/solaris.md index 61d74c5399..e7e1d26197 100644 --- a/content/riak/kv/2.1.1/setup/installing/solaris.md +++ b/content/riak/kv/2.1.1/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.1.1/setup/installing/source.md b/content/riak/kv/2.1.1/setup/installing/source.md index 01084a0d9b..2bfab9c270 100644 --- a/content/riak/kv/2.1.1/setup/installing/source.md +++ b/content/riak/kv/2.1.1/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.1.1/setup/installing/source/erlang -[downloads]: /riak/kv/2.1.1/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.1.1/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.1.1/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.1.1/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.1.1/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.1.1/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.1.1/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.1.1/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.1.1/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.1.1/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.1.1/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.1.1/setup/installing/source/erlang.md b/content/riak/kv/2.1.1/setup/installing/source/erlang.md index 0f40fea8fa..1cfe1d88a2 100644 --- a/content/riak/kv/2.1.1/setup/installing/source/erlang.md +++ b/content/riak/kv/2.1.1/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.1/installing/source/erlang/ --- -[install index]: /riak/kv/2.1.1/setup/installing -[security basics]: /riak/kv/2.1.1/using/security/basics +[install index]: {{}}riak/kv/2.1.1/setup/installing +[security basics]: {{}}riak/kv/2.1.1/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho8.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.1.1/setup/installing/source/jvm.md b/content/riak/kv/2.1.1/setup/installing/source/jvm.md index db44f104bd..c15a4ae8e2 100644 --- a/content/riak/kv/2.1.1/setup/installing/source/jvm.md +++ b/content/riak/kv/2.1.1/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.1.1/installing/source/jvm/ --- -[usage search]: /riak/kv/2.1.1/developing/usage/search +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.1.1/setup/installing/suse.md b/content/riak/kv/2.1.1/setup/installing/suse.md index d3ed8a21a1..17e02e57ae 100644 --- a/content/riak/kv/2.1.1/setup/installing/suse.md +++ b/content/riak/kv/2.1.1/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.1.1/installing/suse/ --- -[install verify]: /riak/kv/2.1.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.1/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.1.1/setup/installing/verify.md b/content/riak/kv/2.1.1/setup/installing/verify.md index 7a1db1b388..27a59a551f 100644 --- a/content/riak/kv/2.1.1/setup/installing/verify.md +++ b/content/riak/kv/2.1.1/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.1.1/installing/verify-install/ --- -[client libraries]: /riak/kv/2.1.1/developing/client-libraries -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.1.1/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.1.1/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.1.1/developing/client-libraries +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.1.1/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.1.1/setup/installing/windows-azure.md b/content/riak/kv/2.1.1/setup/installing/windows-azure.md index 13f31b2dd1..d2d4144925 100644 --- a/content/riak/kv/2.1.1/setup/installing/windows-azure.md +++ b/content/riak/kv/2.1.1/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.1.1/setup/planning/backend.md b/content/riak/kv/2.1.1/setup/planning/backend.md index d65c48dcec..55814bc6d0 100644 --- a/content/riak/kv/2.1.1/setup/planning/backend.md +++ b/content/riak/kv/2.1.1/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.1/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.1/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.1.1/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.1/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.1.1/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.1.1/setup/planning/backend/bitcask.md b/content/riak/kv/2.1.1/setup/planning/backend/bitcask.md index 48ec18a9df..6ccb308475 100644 --- a/content/riak/kv/2.1.1/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.1.1/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.1.1/using/admin/riak-cli -[config reference]: /riak/kv/2.1.1/configuring/reference -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.1.1/setup/planning/backend/multi -[usage search]: /riak/kv/2.1.1/developing/usage/search - -[glossary aae]: /riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.1.1/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.1.1/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.1.1/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.1.1/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.1.1/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.1.1/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.1.1/setup/planning/backend/leveldb.md b/content/riak/kv/2.1.1/setup/planning/backend/leveldb.md index 3fee4977e0..84b331346c 100644 --- a/content/riak/kv/2.1.1/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.1.1/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.1/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[config reference]: /riak/kv/2.1.1/configuring/reference -[perf index]: /riak/kv/2.1.1/using/performance -[config reference#aae]: /riak/kv/2.1.1/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[perf index]: {{}}riak/kv/2.1.1/using/performance +[config reference#aae]: {{}}riak/kv/2.1.1/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.1.1/setup/planning/backend/memory.md b/content/riak/kv/2.1.1/setup/planning/backend/memory.md index 36c84102dd..905496ed54 100644 --- a/content/riak/kv/2.1.1/setup/planning/backend/memory.md +++ b/content/riak/kv/2.1.1/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.1/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.1.1/configuring/reference -[plan backend multi]: /riak/kv/2.1.1/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[plan backend multi]: {{}}riak/kv/2.1.1/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.1.1/setup/planning/backend/multi.md b/content/riak/kv/2.1.1/setup/planning/backend/multi.md index cd009985ae..cb56fc1e27 100644 --- a/content/riak/kv/2.1.1/setup/planning/backend/multi.md +++ b/content/riak/kv/2.1.1/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.1/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.1.1/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.1/setup/planning/backend/memory -[config reference]: /riak/kv/2.1.1/configuring/reference -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.1.1/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.1.1/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.1/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.1.1/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.1.1/setup/planning/best-practices.md b/content/riak/kv/2.1.1/setup/planning/best-practices.md index 2ff2ac58c8..db95fe19cb 100644 --- a/content/riak/kv/2.1.1/setup/planning/best-practices.md +++ b/content/riak/kv/2.1.1/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.1/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.1.1/using/reference/handoff -[config mapreduce]: /riak/kv/2.1.1/configuring/mapreduce -[glossary aae]: /riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.1.1/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.1.1/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.1.1/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.1.1/setup/planning/bitcask-capacity-calc.md index c5ef21dd1e..26116d008e 100644 --- a/content/riak/kv/2.1.1/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.1.1/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.1.1/setup/planning/cluster-capacity.md b/content/riak/kv/2.1.1/setup/planning/cluster-capacity.md index 9a169861bf..28dd599b2a 100644 --- a/content/riak/kv/2.1.1/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.1.1/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.1/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.1.1/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.1.1/setup/planning -[concept replication]: /riak/kv/2.1.1/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.1.1/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.1.1/configuring/reference -[perf benchmark]: /riak/kv/2.1.1/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.1.1/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.1.1/setup/planning +[concept replication]: {{}}riak/kv/2.1.1/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[perf benchmark]: {{}}riak/kv/2.1.1/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.1.1/setup/planning/operating-system.md b/content/riak/kv/2.1.1/setup/planning/operating-system.md index 7561857d0d..984d800dac 100644 --- a/content/riak/kv/2.1.1/setup/planning/operating-system.md +++ b/content/riak/kv/2.1.1/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.1.1/downloads/ +[downloads]: {{}}riak/kv/2.1.1/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.1.1/setup/planning/start.md b/content/riak/kv/2.1.1/setup/planning/start.md index f2fb7a7d47..4ad833d16a 100644 --- a/content/riak/kv/2.1.1/setup/planning/start.md +++ b/content/riak/kv/2.1.1/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.1/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.1.1/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.1.1/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.1.1/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.1.1/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.1.1/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.1.1/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.1.1/setup/upgrading/checklist.md b/content/riak/kv/2.1.1/setup/upgrading/checklist.md index 4dbc340778..20854449c2 100644 --- a/content/riak/kv/2.1.1/setup/upgrading/checklist.md +++ b/content/riak/kv/2.1.1/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.1.1/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.1.1/using/performance/open-files-limit -[perf index]: /riak/kv/2.1.1/using/performance +[perf open files]: {{}}riak/kv/2.1.1/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.1.1/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.1.1/using/security/basics -[cluster ops load balance]: /riak/kv/2.1.1/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.1.1/configuring/reference -[config backend]: /riak/kv/2.1.1/configuring/backend -[usage search]: /riak/kv/2.1.1/developing/usage/search -[usage conflict resolution]: /riak/kv/2.1.1/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.1.1/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.1.1/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.1.1/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.1.1/using/admin/commands -[use admin riak control]: /riak/kv/2.1.1/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.1.1/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.1.1/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.1.1/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.1.1/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.1.1/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[config backend]: {{}}riak/kv/2.1.1/configuring/backend +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.1.1/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.1.1/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.1.1/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.1.1/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.1.1/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.1.1/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.1.1/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.1.1/setup/upgrading/cluster.md b/content/riak/kv/2.1.1/setup/upgrading/cluster.md index 1383f8c83f..7841aa2342 100644 --- a/content/riak/kv/2.1.1/setup/upgrading/cluster.md +++ b/content/riak/kv/2.1.1/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.1.1/ops/upgrading/rolling-upgrades/ - /riak/kv/2.1.1/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.1.1/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.1.1/using/admin/riak-control -[use admin commands]: /riak/kv/2.1.1/using/admin/commands -[use admin riak-admin]: /riak/kv/2.1.1/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.1.1/developing/usage/secondary-indexes +[production checklist]: {{}}riak/kv/2.1.1/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.1.1/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.1.1/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.1.1/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.1.1/developing/usage/secondary-indexes [release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.1.1/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.1.1/using/reference/jmx -[snmp]: /riak/kv/2.1.1/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.1.1/using/reference/jmx +[snmp]: {{}}riak/kv/2.1.1/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.1.1/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.1.1/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported diff --git a/content/riak/kv/2.1.1/setup/upgrading/search.md b/content/riak/kv/2.1.1/setup/upgrading/search.md index b4070e0388..048c4fa7f7 100644 --- a/content/riak/kv/2.1.1/setup/upgrading/search.md +++ b/content/riak/kv/2.1.1/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.1.1/setup/upgrading/version.md b/content/riak/kv/2.1.1/setup/upgrading/version.md index 5ce5d4a588..f698a9d013 100644 --- a/content/riak/kv/2.1.1/setup/upgrading/version.md +++ b/content/riak/kv/2.1.1/setup/upgrading/version.md @@ -20,7 +20,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.1.1/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.1.1/introduction). ## New Clients @@ -36,14 +36,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.1.1/introduction) like [data types](/riak/kv/2.1.1/developing/data-types) or the new [Riak Search](/riak/kv/2.1.1/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.1.1/introduction) like [data types]({{}}riak/kv/2.1.1/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.1.1/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.1.1/learn/concepts/buckets) and [key](/riak/kv/2.1.1/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.1.1/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.1.1/learn/concepts/buckets) and [key]({{}}riak/kv/2.1.1/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.1.1/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.1.1/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.1.1/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.1.1/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.1.1/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -56,7 +56,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.1.1/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.1.1/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -75,8 +75,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.1.1/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.1.1/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.1.1/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.1.1/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -86,17 +86,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/2.1.1/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.1.1/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.1.1/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.1.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.1.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.1.1/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.1.1/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -104,20 +104,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.1.1/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.1.1/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.1.1/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.1.1/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -126,11 +126,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.1.1/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.1.1/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.1.1/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -140,12 +140,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.1.1/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.1.1/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/2.1.1/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.1.1/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.1.1/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.1.1/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.1.1/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.1.1/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.1.1/using/security/basics) or the new [configuration files](/riak/kv/2.1.1/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.1.1/using/security/basics) or the new [configuration files]({{}}riak/kv/2.1.1/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -155,7 +155,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.1.1/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.1.1/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -166,12 +166,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.1.1/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.1.1/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.1.1/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.1.1/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -209,7 +209,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.1.1/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.1.1/setup/upgrading/search). ## Migrating from Short Names @@ -220,12 +220,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.1.1/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.1.1/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.1.1/using.md b/content/riak/kv/2.1.1/using.md index 86a9479926..68f9a6a5af 100644 --- a/content/riak/kv/2.1.1/using.md +++ b/content/riak/kv/2.1.1/using.md @@ -15,7 +15,7 @@ toc: true [use running cluster]: ../using/running-a-cluster [use admin index]: ../using/admin/ [cluster ops index]: ../using/cluster-operations -[repair recover index]: ../repair-recovery +[repair recover index]: ../using/repair-recovery [security index]: ../using/security [perf index]: ../using/performance [troubleshoot index]: ../using/troubleshooting diff --git a/content/riak/kv/2.1.1/using/admin/commands.md b/content/riak/kv/2.1.1/using/admin/commands.md index cb5ada1dcf..2e9b2d991b 100644 --- a/content/riak/kv/2.1.1/using/admin/commands.md +++ b/content/riak/kv/2.1.1/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.1/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.1.1/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.1.1/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.1.1/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.1.1/using/admin/riak-admin.md b/content/riak/kv/2.1.1/using/admin/riak-admin.md index 12383202dc..c7f5ee7e5f 100644 --- a/content/riak/kv/2.1.1/using/admin/riak-admin.md +++ b/content/riak/kv/2.1.1/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.1.1/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.1.1/configuring/reference -[use admin commands]: /riak/kv/2.1.1/using/admin/commands -[use admin commands#join]: /riak/kv/2.1.1/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.1.1/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.1.1/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.1.1/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.1.1/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.1.1/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.1.1/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.1.1/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.1.1/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.1.1/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.1.1/setup/downgrade -[security index]: /riak/kv/2.1.1/using/security/ -[security managing]: /riak/kv/2.1.1/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.1.1/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.1.1/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.1.1/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.1.1/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.1.1/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.1.1/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[use admin commands]: {{}}riak/kv/2.1.1/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.1.1/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.1.1/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.1.1/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.1.1/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.1.1/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.1.1/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.1.1/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.1.1/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.1.1/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.1.1/setup/downgrade +[security index]: {{}}riak/kv/2.1.1/using/security/ +[security managing]: {{}}riak/kv/2.1.1/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.1.1/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.1.1/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.1.1/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.1.1/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.1.1/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.1.1/using/admin/riak-cli.md b/content/riak/kv/2.1.1/using/admin/riak-cli.md index cdfa43fd8c..5c1a505dfc 100644 --- a/content/riak/kv/2.1.1/using/admin/riak-cli.md +++ b/content/riak/kv/2.1.1/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.1/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.1.1/configuring/reference/ +[configuration file]: {{}}riak/kv/2.1.1/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.1.1/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.1.1/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.1.1/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.1.1/configuring/reference/ ## riak @@ -301,7 +301,7 @@ riak ertspath ## chkconfig -Checks whether the [configuration file](/riak/kv/2.1.1/configuring/reference/) is valid. If so, `config is OK` will be included in the output. +Checks whether the [configuration file]({{}}riak/kv/2.1.1/configuring/reference/) is valid. If so, `config is OK` will be included in the output. ```bash riak chkconfig @@ -339,11 +339,11 @@ The `riak top` command is the direct equivalent of `riak-admin top`: riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } ``` -More detailed information can be found in the [`riak-admin`](/riak/kv/2.1.1/using/admin/riak-admin/#top) documentation. +More detailed information can be found in the [`riak-admin`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#top) documentation. ## config -Provides information about the current [configuration](/riak/kv/2.1.1/configuring/reference/) of a Riak node, i.e. the parameters and values in the node's `riak.conf` or `app.config` (depending on which configuration system is being used). +Provides information about the current [configuration]({{}}riak/kv/2.1.1/configuring/reference/) of a Riak node, i.e. the parameters and values in the node's `riak.conf` or `app.config` (depending on which configuration system is being used). ```bash riak config { generate | effective | describe VARIABLE } [-l debug] diff --git a/content/riak/kv/2.1.1/using/admin/riak-control.md b/content/riak/kv/2.1.1/using/admin/riak-control.md index 817f00b2f7..4eafb95a8f 100644 --- a/content/riak/kv/2.1.1/using/admin/riak-control.md +++ b/content/riak/kv/2.1.1/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.1.1/configuring/reference +[config reference]: {{}}riak/kv/2.1.1/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.1.1/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.1.1/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.1.1/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.1.1/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.1.1/using/cluster-operations.md b/content/riak/kv/2.1.1/using/cluster-operations.md index 4002cee1b7..5f52df81b9 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations.md +++ b/content/riak/kv/2.1.1/using/cluster-operations.md @@ -20,7 +20,6 @@ toc: true [ops log]: ./logging [ops backup]: ./backing-up [ops handoff]: ./handoff -[ops obj del]: ./object-deletion [ops strong consistency]: ./strong-consistency [ops v3 mdc]: ./v3-multi-datacenter [ops v2 mdc]: ./v2-multi-datacenter @@ -84,13 +83,6 @@ Information on using the `riak-admin handoff` interface to enable and disable ha [Learn More >>][ops handoff] -#### [Object Deletion][ops obj del] - -Describes possible settings for `delete_mode`. - -[Learn More >>][ops obj del] - - #### [Monitoring Strong Consistency][ops strong consistency] Overview of the various statistics used in monitoring strong consistency. diff --git a/content/riak/kv/2.1.1/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.1.1/using/cluster-operations/active-anti-entropy.md index 4d193d72cc..1120ce8a5f 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes.md index 2e336715d1..0b1bc93516 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.1.1/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.1.1/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.1.1/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.1.1/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.1.1/using/cluster-operations/backing-up.md b/content/riak/kv/2.1.1/using/cluster-operations/backing-up.md index cb3092b74f..fcb8924d62 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.1/ops/running/backups --- -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters -[config reference]: /riak/kv/2.1.1/configuring/reference -[plan backend leveldb]: /riak/kv/2.1.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.1.1/using/reference/strong-consistency -[concept aae]: /riak/kv/2.1.1/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.1.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.1.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.1.1/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.1.1/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.1.1/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.1.1/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.1.1/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.1.1/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.1.1/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.1.1/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.1.1/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.1.1/using/cluster-operations/bucket-types.md b/content/riak/kv/2.1.1/using/cluster-operations/bucket-types.md index 2047420dbe..e754dbec4d 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.1.1/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.1.1/using/cluster-operations/changing-cluster-info.md index 16782c18db..521141ec09 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.1.1/configuring/reference +[config reference]: {{}}riak/kv/2.1.1/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.1.1/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.1.1/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.1.1/using/cluster-operations/handoff.md b/content/riak/kv/2.1.1/using/cluster-operations/handoff.md index 4e8b8dd2ef..e394dc5eca 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.1.1/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.1.1/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.1.1/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.1.1/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.1.1/using/cluster-operations/logging.md b/content/riak/kv/2.1.1/using/cluster-operations/logging.md index 8aa74ffbaa..5ca2c6b1e3 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/logging.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.1.1/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.1.1/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.1.1/using/cluster-operations/replacing-node.md b/content/riak/kv/2.1.1/using/cluster-operations/replacing-node.md index fb347f0d0a..cf89266287 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.1.1/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.1.1/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.1.1/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.1.1/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.1.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.1.1/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.1.1/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.1.1/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.1.1/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.1.1/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.1.1/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.1.1/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.1.1/using/cluster-operations/strong-consistency.md index 8a84ee72fc..ebe48c0f97 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.1.1/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.1.1/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.1.1/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.1.1/using/cluster-operations/v2-multi-datacenter.md index 34455ad247..f8eb14b95d 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.1.1/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.1.1/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter.md index 96c1bcf8de..e57d2370d7 100644 --- a/content/riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.1.1/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.1.1/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.1.1/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.1.1/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.1.1/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.1.1/using/performance.md b/content/riak/kv/2.1.1/using/performance.md index bcbc406b7f..0572f15f9c 100644 --- a/content/riak/kv/2.1.1/using/performance.md +++ b/content/riak/kv/2.1.1/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.1.1/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.1.1/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.1.1/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.1.1/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.1.1/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.1.1/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.1.1/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.1.1/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.1.1/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.1.1/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.1.1/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.1.1/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.1.1/using/performance/benchmarking.md b/content/riak/kv/2.1.1/using/performance/benchmarking.md index 424d54777d..b97bca827d 100644 --- a/content/riak/kv/2.1.1/using/performance/benchmarking.md +++ b/content/riak/kv/2.1.1/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.1.1/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.1.1/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.1.1/using/performance/latency-reduction.md b/content/riak/kv/2.1.1/using/performance/latency-reduction.md index 7eb45a0f44..99df402fe9 100644 --- a/content/riak/kv/2.1.1/using/performance/latency-reduction.md +++ b/content/riak/kv/2.1.1/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.1.1/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.1.1/using/performance/multi-datacenter-tuning.md index 3293355334..8c1f3ff3e1 100644 --- a/content/riak/kv/2.1.1/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.1.1/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.1.1/using/performance +[perf index]: {{}}riak/kv/2.1.1/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.1.1/using/performance/open-files-limit.md b/content/riak/kv/2.1.1/using/performance/open-files-limit.md index 8d095fb459..2a6180a0a7 100644 --- a/content/riak/kv/2.1.1/using/performance/open-files-limit.md +++ b/content/riak/kv/2.1.1/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.1.1/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.1.1/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.1.1/using/reference/bucket-types.md b/content/riak/kv/2.1.1/using/reference/bucket-types.md index 0fb22c0015..c7e93deda0 100644 --- a/content/riak/kv/2.1.1/using/reference/bucket-types.md +++ b/content/riak/kv/2.1.1/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.1.1/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.1.1/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.1.1/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.1.1/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,14 +39,14 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype`, `consistent`, and `write_once` properties, related to - [Riak data types](/riak/kv/2.1.1/developing/data-types), [strong consistency](/riak/kv/2.1.1/developing/app-guide/strong-consistency), and - [write-once buckets](/riak/kv/2.1.1/developing/app-guide/write-once) respectively + [Riak data types]({{}}riak/kv/2.1.1/developing/data-types), [strong consistency]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency), and + [write-once buckets]({{}}riak/kv/2.1.1/developing/app-guide/write-once) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.1.1/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.1.1/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -82,8 +82,8 @@ system of bucket configuration, including the following: `riak-admin bucket-type` interface (discussed in depth below) enables you to manage bucket configurations on the operations side, without recourse to Riak clients. -* Some special usecases -- [strong consistency](/riak/kv/2.1.1/configuring/strong-consistency), - [data types](/riak/kv/2.1.1/developing/data-types), and [write-once buckets](/riak/kv/2.1.1/developing/app-guide/write-once) -- are only +* Some special usecases -- [strong consistency]({{}}riak/kv/2.1.1/configuring/strong-consistency), + [data types]({{}}riak/kv/2.1.1/developing/data-types), and [write-once buckets]({{}}riak/kv/2.1.1/developing/app-guide/write-once) -- are only available through bucket properties or bucket types. For these reasons, we recommend _always_ using bucket types in versions @@ -123,7 +123,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.1.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.1/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.1/developing/getting-started) section. If creation is successful, you should see the following output: @@ -525,7 +525,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.1.1/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.1.1/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -538,7 +538,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.1.1/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -589,8 +589,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.1.1/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.1.1/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.1.1/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.1.1/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -718,7 +718,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.1.1/learn/concepts/buckets) and [keys](/riak/kv/2.1.1/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.1.1/learn/concepts/buckets) and [keys]({{}}riak/kv/2.1.1/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.1.1/using/reference/custom-code.md b/content/riak/kv/2.1.1/using/reference/custom-code.md index 815eeeb903..a2b2f91b9a 100644 --- a/content/riak/kv/2.1.1/using/reference/custom-code.md +++ b/content/riak/kv/2.1.1/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.1.1/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.1.1/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.1.1/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.1.1/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.1.1/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.1.1/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.1.1/using/reference/handoff.md b/content/riak/kv/2.1.1/using/reference/handoff.md index c6114c515b..409a6b17ec 100644 --- a/content/riak/kv/2.1.1/using/reference/handoff.md +++ b/content/riak/kv/2.1.1/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.1.1/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.1.1/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.1.1/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.1.1/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.1.1/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.1.1/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.1.1/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.1.1/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.1.1/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.1.1/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.1.1/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.1.1/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.1.1/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.1.1/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.1.1/using/reference/jmx.md b/content/riak/kv/2.1.1/using/reference/jmx.md index 24483f37e9..d98e96d65d 100644 --- a/content/riak/kv/2.1.1/using/reference/jmx.md +++ b/content/riak/kv/2.1.1/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.1/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.1.1/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.1.1/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.1.1/using/reference/logging.md b/content/riak/kv/2.1.1/using/reference/logging.md index a7912c7f1d..f694261d21 100644 --- a/content/riak/kv/2.1.1/using/reference/logging.md +++ b/content/riak/kv/2.1.1/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.1/ops/running/logging --- -[cluster ops log]: /riak/kv/2.1.1/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.1.1/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.1.1/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.1.1/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -46,7 +46,7 @@ File | Significance `console.log` | Console log output `crash.log` | Crash logs `erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. -`error.log` | [Common errors](../../repair-recover/errors) emitted by Riak. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.1.1/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.1.1/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.1.1/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.1.1/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.1.1/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.1.1/using/reference/multi-datacenter/comparison.md index 90063fc5ce..fdf862e28c 100644 --- a/content/riak/kv/2.1.1/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.1.1/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.1.1/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.1.1/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.1.1/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.1.1/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.1.1/using/reference/runtime-interaction.md b/content/riak/kv/2.1.1/using/reference/runtime-interaction.md index f781e6a62c..29b1baeab2 100644 --- a/content/riak/kv/2.1.1/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.1.1/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.1/ops/advanced/runtime --- -[config reference]: /riak/kv/2.1.1/configuring/reference -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.1.1/configuring/reference +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.1.1/using/reference/search.md b/content/riak/kv/2.1.1/using/reference/search.md index 5091de0c42..e71259906d 100644 --- a/content/riak/kv/2.1.1/using/reference/search.md +++ b/content/riak/kv/2.1.1/using/reference/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.1.1/dev/advanced/search --- -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.1.1/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.1.1/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -126,7 +126,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.1.1/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.1.1/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -288,7 +288,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.1.1/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -298,7 +298,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -353,7 +353,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.1.1/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.1.1/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.1.1/using/reference/secondary-indexes.md b/content/riak/kv/2.1.1/using/reference/secondary-indexes.md index 8c496c488a..dadccf95c2 100644 --- a/content/riak/kv/2.1.1/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.1.1/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.1.1/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.1.1/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.1.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.1.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.1.1/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.1.1/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.1.1/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.1.1/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.1.1/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.1.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.1.1/using/reference/statistics-monitoring.md b/content/riak/kv/2.1.1/using/reference/statistics-monitoring.md index 6327275b99..38f25b6e08 100644 --- a/content/riak/kv/2.1.1/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.1.1/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.1.1/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.1.1/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.1.1/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.1.1/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.1.1/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.1.1/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.1.1/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.1.1/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.1.1/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.1.1/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.1.1/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.1.1/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.1.1/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.1.1/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.1.1/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.1.1/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.1.1/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.1.1/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.1.1/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.1.1/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.1.1/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.1.1/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.1.1/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,9 +349,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.1.1/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.1.1/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.1.1/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.1.1/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -367,9 +367,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.1.1/using/reference/strong-consistency.md b/content/riak/kv/2.1.1/using/reference/strong-consistency.md index 708252317b..6677095a0a 100644 --- a/content/riak/kv/2.1.1/using/reference/strong-consistency.md +++ b/content/riak/kv/2.1.1/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.1.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.1/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.1.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.1/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.1.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.1.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.1.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.1.1/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.1.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.1.1/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.1.1/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.1.1/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.1.1/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.1.1/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.1.1/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.1.1/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.1.1/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.1.1/using/reference/v2-multi-datacenter/architecture.md index debf6a443c..fba6c7c5e2 100644 --- a/content/riak/kv/2.1.1/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.1.1/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.1.1/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.1.1/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.1.1/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.1.1/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.1.1/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.1.1/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/aae.md index d69e4b64f0..60e7cfff15 100644 --- a/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.1.1/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.1.1/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.1.1/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/architecture.md index c4810b0970..9e0969dd66 100644 --- a/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.1.1/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.1.1/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.1.1/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.1.1/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/cascading-writes.md index d5e7d38830..7e8c6b2330 100644 --- a/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.1.1/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 376f148368..1367d7e9d5 100644 --- a/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.1.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.1/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.1.1/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.1.1/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.1.1/using/repair-recovery.md b/content/riak/kv/2.1.1/using/repair-recovery.md index 0c058ccbc4..fbb7c0223a 100644 --- a/content/riak/kv/2.1.1/using/repair-recovery.md +++ b/content/riak/kv/2.1.1/using/repair-recovery.md @@ -15,7 +15,7 @@ toc: true [repair recover fail]: ./failure-recovery/ [repair recover errors]: ./errors/ [repair recover repairs]: ./repairs/ -[repair recover restart]: ./rolling-restarts/ +[repair recover restart]: ./rolling-restart/ ## In This Section diff --git a/content/riak/kv/2.1.1/using/repair-recovery/errors.md b/content/riak/kv/2.1.1/using/repair-recovery/errors.md index f200a7dc99..5243dddf26 100644 --- a/content/riak/kv/2.1.1/using/repair-recovery/errors.md +++ b/content/riak/kv/2.1.1/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.1.1/configuring/reference +[config reference]: {{}}riak/kv/2.1.1/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.1.1/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.1.1/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.1.1/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.1.1/using/repair-recovery/failure-recovery.md index 7bcbb234d2..ae0ff6e1c6 100644 --- a/content/riak/kv/2.1.1/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.1.1/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.1.1/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.1.1/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.1.1/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.1.1/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -115,7 +115,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.1.1/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.1.1/using/repair-recovery/repairs.md b/content/riak/kv/2.1.1/using/repair-recovery/repairs.md index 07c66b605c..4f421a61fe 100644 --- a/content/riak/kv/2.1.1/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.1.1/using/repair-recovery/repairs.md @@ -149,7 +149,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.1.1/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.1.1/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -218,23 +218,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.1.1/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.1.1/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.1.1/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.1.1/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.1.1/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.1.1/using/repair-recovery/rolling-restart.md index 00c880292f..da5cc6e21e 100644 --- a/content/riak/kv/2.1.1/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.1.1/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.1/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.1.1/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.1.1/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.1.1/using/running-a-cluster.md b/content/riak/kv/2.1.1/using/running-a-cluster.md index e1151b09ed..ad98d073f8 100644 --- a/content/riak/kv/2.1.1/using/running-a-cluster.md +++ b/content/riak/kv/2.1.1/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.1.1/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.1.1/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.1.1/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.1.1/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.1.1/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.1.1/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.1.1/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.1.1/using/security.md b/content/riak/kv/2.1.1/using/security.md index cea9871ab7..0a02a092f9 100644 --- a/content/riak/kv/2.1.1/using/security.md +++ b/content/riak/kv/2.1.1/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.1.1/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.1.1/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.1.1/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.1.1/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.1.1/using/security/basics +[security managing]: {{}}riak/kv/2.1.1/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.1.1/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.1.1/using/security/basics.md b/content/riak/kv/2.1.1/using/security/basics.md index 686fa63fbd..30b7e13c43 100644 --- a/content/riak/kv/2.1.1/using/security/basics.md +++ b/content/riak/kv/2.1.1/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.1.1/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.1.1/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.1.1/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.1.1/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.1.1/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.1.1/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.1.1/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.1.1/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.1.1/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.1.1/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.1.1/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.1.1/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.1.1/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.1.1/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.1.1/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.1.1/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.1.1/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.1.1/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.1.1/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.1.1/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.1.1/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.1.1/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.1.1/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.1.1/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.1.1/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.1.1/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.1.1/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.1.1/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.1.1/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.1.1/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.1.1/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.1.1/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.1.1/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.1.1/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.1.1/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.1.1/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.1.1/configuring/reference/#directories).
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="../../learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.1.1/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.1.1/using/security/managing-sources.md b/content/riak/kv/2.1.1/using/security/managing-sources.md index f267d0ae43..c2194e6256 100644 --- a/content/riak/kv/2.1.1/using/security/managing-sources.md +++ b/content/riak/kv/2.1.1/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.1.1/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.1.1/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.1.1/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.1.1/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.1.1/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.1.1/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.1.1/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.1.1/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.1.1/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.1.1/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.1.1/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.1.1/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.1.1/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.1.1/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.1.3/_reference-links.md b/content/riak/kv/2.1.3/_reference-links.md index da0ee6a3e7..21b096169e 100644 --- a/content/riak/kv/2.1.3/_reference-links.md +++ b/content/riak/kv/2.1.3/_reference-links.md @@ -4,247 +4,247 @@ ## Common -[downloads]: /riak/kv/2.1.3/downloads/ -[install index]: /riak/kv/2.1.3/setup/installing -[upgrade index]: /riak/kv/2.1.3/upgrading -[plan index]: /riak/kv/2.1.3/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.1.3/configuring/reference/ -[manage index]: /riak/kv/2.1.3/using/managing -[performance index]: /riak/kv/2.1.3/using/performance -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.1.3/downloads/ +[install index]: {{}}riak/kv/2.1.3/setup/installing +[upgrade index]: {{}}riak/kv/2.1.3/upgrading +[plan index]: {{}}riak/kv/2.1.3/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.1.3/configuring/reference/ +[manage index]: {{}}riak/kv/2.1.3/using/managing +[performance index]: {{}}riak/kv/2.1.3/using/performance +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.1.3/setup/planning -[plan start]: /riak/kv/2.1.3/setup/planning/start -[plan backend]: /riak/kv/2.1.3/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.3/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.3/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.1.3/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.1.3/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.1.3/setup/planning/best-practices -[plan future]: /riak/kv/2.1.3/setup/planning/future +[plan index]: {{}}riak/kv/2.1.3/setup/planning +[plan start]: {{}}riak/kv/2.1.3/setup/planning/start +[plan backend]: {{}}riak/kv/2.1.3/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.3/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.1.3/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.1.3/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.1.3/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.1.3/setup/planning/future ## Installing -[install index]: /riak/kv/2.1.3/setup/installing -[install aws]: /riak/kv/2.1.3/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.1.3/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.1.3/setup/installing/freebsd -[install mac osx]: /riak/kv/2.1.3/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.1.3/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.1.3/setup/installing/smartos -[install solaris]: /riak/kv/2.1.3/setup/installing/solaris -[install suse]: /riak/kv/2.1.3/setup/installing/suse -[install windows azure]: /riak/kv/2.1.3/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.1.3/setup/installing +[install aws]: {{}}riak/kv/2.1.3/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.1.3/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.1.3/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.1.3/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.1.3/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.1.3/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.1.3/setup/installing/solaris +[install suse]: {{}}riak/kv/2.1.3/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.1.3/setup/installing/windows-azure -[install source index]: /riak/kv/2.1.3/setup/installing/source -[install source erlang]: /riak/kv/2.1.3/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.1.3/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.1.3/setup/installing/source +[install source erlang]: {{}}riak/kv/2.1.3/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.1.3/setup/installing/source/jvm -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.1.3/setup/upgrading -[upgrade checklist]: /riak/kv/2.1.3/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.1.3/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.1.3/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.1.3/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.1.3/setup/downgrade +[upgrade index]: {{}}riak/kv/2.1.3/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.1.3/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.1.3/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.1.3/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.1.3/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.1.3/setup/downgrade ## Configuring -[config index]: /riak/kv/2.1.3/configuring -[config basic]: /riak/kv/2.1.3/configuring/basic -[config backend]: /riak/kv/2.1.3/configuring/backend -[config manage]: /riak/kv/2.1.3/configuring/managing -[config reference]: /riak/kv/2.1.3/configuring/reference/ -[config strong consistency]: /riak/kv/2.1.3/configuring/strong-consistency -[config load balance]: /riak/kv/2.1.3/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.1.3/configuring/mapreduce -[config search]: /riak/kv/2.1.3/configuring/search/ +[config index]: {{}}riak/kv/2.1.3/configuring +[config basic]: {{}}riak/kv/2.1.3/configuring/basic +[config backend]: {{}}riak/kv/2.1.3/configuring/backend +[config manage]: {{}}riak/kv/2.1.3/configuring/managing +[config reference]: {{}}riak/kv/2.1.3/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.1.3/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.1.3/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.1.3/configuring/mapreduce +[config search]: {{}}riak/kv/2.1.3/configuring/search/ -[config v3 mdc]: /riak/kv/2.1.3/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.1.3/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.1.3/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.1.3/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.1.3/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.1.3/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.1.3/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.1.3/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.1.3/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.1.3/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.1.3/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.1.3/using/ -[use admin commands]: /riak/kv/2.1.3/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.1.3/using/running-a-cluster +[use index]: {{}}riak/kv/2.1.3/using/ +[use admin commands]: {{}}riak/kv/2.1.3/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.1.3/using/running-a-cluster ### Reference -[use ref bucket types]: /riak/kv/2.1.3/using/reference/bucket-types -[use ref custom code]: /riak/kv/2.1.3/using/reference/custom-code -[use ref handoff]: /riak/kv/2.1.3/using/reference/handoff -[use ref monitoring]: /riak/kv/2.1.3/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.1.3/using/reference/search -[use ref 2i]: /riak/kv/2.1.3/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.1.3/using/reference/snmp -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.1.3/using/reference/jmx -[use ref obj del]: /riak/kv/2.1.3/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.1.3/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.1.3/using/reference/v2-multi-datacenter +[use ref bucket types]: {{}}riak/kv/2.1.3/using/reference/bucket-types +[use ref custom code]: {{}}riak/kv/2.1.3/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.1.3/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.1.3/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.1.3/using/reference/search +[use ref 2i]: {{}}riak/kv/2.1.3/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.1.3/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.1.3/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.1.3/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.1.3/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.1.3/using/admin/ -[use admin commands]: /riak/kv/2.1.3/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.1.3/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.1.3/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.1.3/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.1.3/using/admin/ +[use admin commands]: {{}}riak/kv/2.1.3/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.1.3/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.1.3/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.1.3/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.1.3/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.1.3/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.1.3/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.1.3/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.1.3/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.1.3/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.1.3/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.1.3/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.1.3/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.1.3/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.1.3/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.1.3/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.1.3/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.1.3/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.1.3/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.1.3/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.1.3/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.1.3/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.1.3/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.1.3/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.1.3/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.1.3/using/repair-recovery -[repair recover fail]: /riak/kv/2.1.3/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.1.3/using/repair-recovery +[repair recover fail]: {{}}riak/kv/2.1.3/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.1.3/using/security/ -[security basics]: /riak/kv/2.1.3/using/security/basics -[security managing]: /riak/kv/2.1.3/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.1.3/using/security/ +[security basics]: {{}}riak/kv/2.1.3/using/security/basics +[security managing]: {{}}riak/kv/2.1.3/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.1.3/using/performance/ -[perf benchmark]: /riak/kv/2.1.3/using/performance/benchmarking -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.1.3/using/performance/erlang -[perf aws]: /riak/kv/2.1.3/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.1.3/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.1.3/using/performance/ +[perf benchmark]: {{}}riak/kv/2.1.3/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.1.3/using/performance/erlang +[perf aws]: {{}}riak/kv/2.1.3/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.1.3/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.1.3/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.1.3/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.1.3/developing -[dev client libraries]: /riak/kv/2.1.3/developing/client-libraries -[dev data model]: /riak/kv/2.1.3/developing/data-modeling -[dev data types]: /riak/kv/2.1.3/developing/data-types -[dev kv model]: /riak/kv/2.1.3/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.1.3/developing +[dev client libraries]: {{}}riak/kv/2.1.3/developing/client-libraries +[dev data model]: {{}}riak/kv/2.1.3/developing/data-modeling +[dev data types]: {{}}riak/kv/2.1.3/developing/data-types +[dev kv model]: {{}}riak/kv/2.1.3/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.1.3/developing/getting-started -[getting started java]: /riak/kv/2.1.3/developing/getting-started/java -[getting started ruby]: /riak/kv/2.1.3/developing/getting-started/ruby -[getting started python]: /riak/kv/2.1.3/developing/getting-started/python -[getting started php]: /riak/kv/2.1.3/developing/getting-started/php -[getting started csharp]: /riak/kv/2.1.3/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.1.3/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.1.3/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.1.3/developing/getting-started/golang - -[obj model java]: /riak/kv/2.1.3/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.1.3/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.1.3/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.1.3/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.1.3/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.1.3/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.1.3/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.1.3/developing/getting-started +[getting started java]: {{}}riak/kv/2.1.3/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.1.3/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.1.3/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.1.3/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.1.3/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.1.3/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.1.3/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.1.3/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.1.3/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.1.3/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.1.3/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.1.3/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.1.3/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.1.3/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.1.3/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.1.3/developing/usage -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types/ -[usage commit hooks]: /riak/kv/2.1.3/developing/usage/commit-hooks/ -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.1.3/developing/usage/content-types -[usage create objects]: /riak/kv/2.1.3/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.1.3/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.1.3/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.1.3/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.3/developing/usage/search -[usage search schema]: /riak/kv/2.1.3/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.1.3/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.1.3/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.1.3/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.1.3/developing/usage +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types/ +[usage commit hooks]: {{}}riak/kv/2.1.3/developing/usage/commit-hooks/ +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.1.3/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.1.3/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.1.3/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.1.3/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.1.3/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search +[usage search schema]: {{}}riak/kv/2.1.3/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.1.3/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.1.3/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.1.3/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.1.3/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.1.3/developing/app-guide/strong-consistency -[apps write once]: /riak/kv/2.1.3/developing/app-guide/write-once +[apps mapreduce]: {{}}riak/kv/2.1.3/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.1.3/developing/app-guide/strong-consistency +[apps write once]: {{}}riak/kv/2.1.3/developing/app-guide/write-once ### API -[dev api backend]: /riak/kv/2.1.3/developing/api/backend -[dev api http]: /riak/kv/2.1.3/developing/api/http -[dev api http status]: /riak/kv/2.1.3/developing/api/http/status -[dev api pbc]: /riak/kv/2.1.3/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.1.3/developing/api/backend +[dev api http]: {{}}riak/kv/2.1.3/developing/api/http +[dev api http status]: {{}}riak/kv/2.1.3/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.1.3/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.1.3/learn/glossary/ -[glossary aae]: /riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.1.3/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.1.3/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.1.3/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.1.3/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.1.3/learn/concepts/causal-context/ -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.1.3/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.3/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.3/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.1.3/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.1.3/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/ +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.1.3/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.1.3/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.1.3/add-ons.md b/content/riak/kv/2.1.3/add-ons.md index bc5c17c643..9fd26582d0 100644 --- a/content/riak/kv/2.1.3/add-ons.md +++ b/content/riak/kv/2.1.3/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.1.3/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.1.3/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.1.3/add-ons/redis/developing-rra.md b/content/riak/kv/2.1.3/add-ons/redis/developing-rra.md index d0d456d51a..686d5c89e1 100644 --- a/content/riak/kv/2.1.3/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.1.3/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.1.3/developing/api/http +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.1.3/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.1.3/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.1.3/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.1.3/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.1.3/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.1.3/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.1.3/add-ons/redis/redis-add-on-features.md index b5925c6190..bf03c479a2 100644 --- a/content/riak/kv/2.1.3/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.1.3/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.1.3/add-ons/redis/set-up-rra.md b/content/riak/kv/2.1.3/add-ons/redis/set-up-rra.md index 01895900e2..c0f3f22650 100644 --- a/content/riak/kv/2.1.3/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.1.3/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.1.3/setup/installing -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.1.3/setup/installing +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.1.3/add-ons/redis/using-rra.md b/content/riak/kv/2.1.3/add-ons/redis/using-rra.md index c0606b1fb8..599901c6b1 100644 --- a/content/riak/kv/2.1.3/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.1.3/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.1.3/developing/api/http/ +[dev api http]: {{}}riak/kv/2.1.3/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.1.3/configuring/backend.md b/content/riak/kv/2.1.3/configuring/backend.md index c5cabaefae..dd620ba510 100644 --- a/content/riak/kv/2.1.3/configuring/backend.md +++ b/content/riak/kv/2.1.3/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.3/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.3/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.3/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.1.3/configuring/basic.md b/content/riak/kv/2.1.3/configuring/basic.md index bbcd756119..b0c9cce26f 100644 --- a/content/riak/kv/2.1.3/configuring/basic.md +++ b/content/riak/kv/2.1.3/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.1.3/ops/building/configuration/ --- -[config reference]: /riak/kv/2.1.3/configuring/reference -[use running cluster]: /riak/kv/2.1.3/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.1.3/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.1.3/using/performance/erlang -[plan start]: /riak/kv/2.1.3/setup/planning/start -[plan best practices]: /riak/kv/2.1.3/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.1.3/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.1.3/setup/planning/backend -[plan backend multi]: /riak/kv/2.1.3/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.1.3/using/performance/benchmarking -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit -[perf index]: /riak/kv/2.1.3/using/performance -[perf aws]: /riak/kv/2.1.3/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.1.3/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[use running cluster]: {{}}riak/kv/2.1.3/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.1.3/using/performance/erlang +[plan start]: {{}}riak/kv/2.1.3/setup/planning/start +[plan best practices]: {{}}riak/kv/2.1.3/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.1.3/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.1.3/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.1.3/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.1.3/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.1.3/using/performance +[perf aws]: {{}}riak/kv/2.1.3/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.1.3/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.1.3/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.1.3/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.1.3/configuring/load-balancing-proxy.md b/content/riak/kv/2.1.3/configuring/load-balancing-proxy.md index 9c9e660e02..494fd5dbd2 100644 --- a/content/riak/kv/2.1.3/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.1.3/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.1.3/configuring/managing.md b/content/riak/kv/2.1.3/configuring/managing.md index 89d59d8866..1ac84ad4d0 100644 --- a/content/riak/kv/2.1.3/configuring/managing.md +++ b/content/riak/kv/2.1.3/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.1.3/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.1.3/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.1.3/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.1.3/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.1.3/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.1.3/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.1.3/configuring/mapreduce.md b/content/riak/kv/2.1.3/configuring/mapreduce.md index cfb506cc11..4ea90b34fa 100644 --- a/content/riak/kv/2.1.3/configuring/mapreduce.md +++ b/content/riak/kv/2.1.3/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.1.3/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.1.3/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.1.3/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.1.3/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.1.3/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.1.3/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.1.3/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.1.3/configuring/reference.md b/content/riak/kv/2.1.3/configuring/reference.md index cb7189a599..09493f104b 100644 --- a/content/riak/kv/2.1.3/configuring/reference.md +++ b/content/riak/kv/2.1.3/configuring/reference.md @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch. diff --git a/content/riak/kv/2.1.3/configuring/search.md b/content/riak/kv/2.1.3/configuring/search.md index 09b641b375..d8b73279ef 100644 --- a/content/riak/kv/2.1.3/configuring/search.md +++ b/content/riak/kv/2.1.3/configuring/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.1.3/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.1.3/developing/usage/search -[usage search schema]: /riak/kv/2.1.3/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.1.3/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.1.3/developing/usage/custom-extractors -[config reference]: /riak/kv/2.1.3/configuring/reference -[config reference#search]: /riak/kv/2.1.3/configuring/reference/#search -[glossary aae]: /riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.1.3/using/security/ +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search +[usage search schema]: {{}}riak/kv/2.1.3/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.1.3/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.1.3/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[config reference#search]: {{}}riak/kv/2.1.3/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.1.3/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.1.3/configuring/strong-consistency.md b/content/riak/kv/2.1.3/configuring/strong-consistency.md index 50732dc429..f23716e17b 100644 --- a/content/riak/kv/2.1.3/configuring/strong-consistency.md +++ b/content/riak/kv/2.1.3/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.1.3/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.1.3/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.1.3/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.1.3/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.1.3/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.1.3/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.1.3/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.1.3/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.1.3/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.1.3/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.1.3/learn/concepts/causal-context -[dev data types]: /riak/kv/2.1.3/developing/data-types -[glossary aae]: /riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.1.3/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.1.3/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.1.3/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.1.3/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.1.3/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.1.3/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.1.3/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.1.3/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.1.3/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.1.3/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.1.3/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.1.3/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.1.3/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.1.3/developing/data-types +[glossary aae]: {{}}riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.1.3/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.1.3/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.1.3/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.1.3/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.1.3/configuring/v2-multi-datacenter.md b/content/riak/kv/2.1.3/configuring/v2-multi-datacenter.md index 8493cbd90b..9b281154e6 100644 --- a/content/riak/kv/2.1.3/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.1.3/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.1.3/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.1.3/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.1.3/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.1.3/configuring/v2-multi-datacenter/nat.md index f1348b6c51..2a058ef5da 100644 --- a/content/riak/kv/2.1.3/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.1.3/configuring/v2-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.1.3/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.1.3/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.1.3/configuring/v3-multi-datacenter.md b/content/riak/kv/2.1.3/configuring/v3-multi-datacenter.md index 87c50f076a..159a098fae 100644 --- a/content/riak/kv/2.1.3/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.1.3/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.1.3/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.1.3/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/nat.md index 6b8de4c22d..9c9f86eaf5 100644 --- a/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/quick-start.md index 2bc4eca810..3035ab63cd 100644 --- a/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.1.3/using/performance -[config v3 mdc]: /riak/kv/2.1.3/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.1.3/using/performance +[config v3 mdc]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl.md index 6767f4c58b..6511a13c56 100644 --- a/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.1.3/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.1.3/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.1.3/developing/api/backend.md b/content/riak/kv/2.1.3/developing/api/backend.md index 8f7a51caa2..eeb90183be 100644 --- a/content/riak/kv/2.1.3/developing/api/backend.md +++ b/content/riak/kv/2.1.3/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/backend-api --- -[plan backend]: /riak/kv/2.1.3/setup/planning/backend +[plan backend]: {{}}riak/kv/2.1.3/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.1.3/developing/api/http.md b/content/riak/kv/2.1.3/developing/api/http.md index 9550d6612c..28d440f0b3 100644 --- a/content/riak/kv/2.1.3/developing/api/http.md +++ b/content/riak/kv/2.1.3/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.1.3/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.1.3/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.1.3/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.1.3/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.1.3/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.1.3/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.1.3/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.1.3/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.1.3/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.1.3/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.1.3/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.1.3/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.1.3/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.1.3/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.1.3/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.1.3/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.1.3/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.1.3/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.1.3/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.1.3/developing/data-types). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.1.3/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.1.3/developing/data-types). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.1.3/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.1.3/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.1.3/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.1.3/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.1.3/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.1.3/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.1.3/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.1.3/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.1.3/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.1.3/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.1.3/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.1.3/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.1.3/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.1.3/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.1.3/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.1.3/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.1.3/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.1.3/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.1.3/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.1.3/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.1.3/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.1.3/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.1.3/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.1.3/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.1.3/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.1.3/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.1.3/developing/api/http/counters.md b/content/riak/kv/2.1.3/developing/api/http/counters.md index cc88d9c95f..19729318a0 100644 --- a/content/riak/kv/2.1.3/developing/api/http/counters.md +++ b/content/riak/kv/2.1.3/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.1.3/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.1.3/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.1.3/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.1.3/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.1.3/developing/api/http/fetch-object.md b/content/riak/kv/2.1.3/developing/api/http/fetch-object.md index 3360e98645..6541060c50 100644 --- a/content/riak/kv/2.1.3/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.1.3/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.1.3/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.1.3/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.1.3/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.1.3/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.1.3/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.1.3/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.1.3/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.1.3/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.1.3/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.1.3/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.1.3/developing/api/http/fetch-search-index.md b/content/riak/kv/2.1.3/developing/api/http/fetch-search-index.md index 19ad293ade..d1b22d946a 100644 --- a/content/riak/kv/2.1.3/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.1.3/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.1.3/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.1.3/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.1.3/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.1.3/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.1.3/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.1.3/developing/api/http/fetch-search-schema.md index 334457d40a..556f624800 100644 --- a/content/riak/kv/2.1.3/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.1.3/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.1.3/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.1.3/developing/api/http/get-bucket-props.md b/content/riak/kv/2.1.3/developing/api/http/get-bucket-props.md index 9b51fae556..88912651af 100644 --- a/content/riak/kv/2.1.3/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.1.3/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.1.3/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.1.3/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.1.3/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.1.3/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.1.3/developing/api/http/link-walking.md b/content/riak/kv/2.1.3/developing/api/http/link-walking.md index 2461c9419e..592f53c2ce 100644 --- a/content/riak/kv/2.1.3/developing/api/http/link-walking.md +++ b/content/riak/kv/2.1.3/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.1.3/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.1.3/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.1.3/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.1.3/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.1.3/developing/api/http/list-resources.md b/content/riak/kv/2.1.3/developing/api/http/list-resources.md index 11f985a98d..f9bf94d22a 100644 --- a/content/riak/kv/2.1.3/developing/api/http/list-resources.md +++ b/content/riak/kv/2.1.3/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.1.3/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.1.3/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.1.3/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.1.3/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.1.3/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.1.3/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.1.3/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.1.3/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.1.3/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.1.3/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.1.3/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.1.3/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.1.3/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.1.3/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.1.3/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.1.3/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.1.3/developing/api/http/mapreduce.md b/content/riak/kv/2.1.3/developing/api/http/mapreduce.md index 53ad5dca75..b05c4d61a2 100644 --- a/content/riak/kv/2.1.3/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.1.3/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.1.3/developing/api/http/search-index-info.md b/content/riak/kv/2.1.3/developing/api/http/search-index-info.md index 86111eda78..94fbc10499 100644 --- a/content/riak/kv/2.1.3/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.1.3/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.1.3/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.1.3/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.1.3/developing/api/http/search-query.md b/content/riak/kv/2.1.3/developing/api/http/search-query.md index 06e6026599..8f3c5faa41 100644 --- a/content/riak/kv/2.1.3/developing/api/http/search-query.md +++ b/content/riak/kv/2.1.3/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.1.3/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.1.3/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.1.3/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.1.3/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.1.3/developing/api/http/secondary-indexes.md b/content/riak/kv/2.1.3/developing/api/http/secondary-indexes.md index c61e72f1f6..6b5e283351 100644 --- a/content/riak/kv/2.1.3/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.1.3/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.1.3/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.1.3/developing/api/http/set-bucket-props.md b/content/riak/kv/2.1.3/developing/api/http/set-bucket-props.md index 6008cd4806..c7a42f3a29 100644 --- a/content/riak/kv/2.1.3/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.1.3/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.1.3/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.1.3/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.1.3/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.1.3/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.1.3/developing/api/http/status.md b/content/riak/kv/2.1.3/developing/api/http/status.md index a9fc11d3ff..fcbdf8f995 100644 --- a/content/riak/kv/2.1.3/developing/api/http/status.md +++ b/content/riak/kv/2.1.3/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.1.3/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.1.3/developing/api/http/store-object.md b/content/riak/kv/2.1.3/developing/api/http/store-object.md index dc73c0164d..8bb9629ca1 100644 --- a/content/riak/kv/2.1.3/developing/api/http/store-object.md +++ b/content/riak/kv/2.1.3/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.1.3/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.1.3/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.1.3/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.1.3/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.1.3/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.1.3/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.1.3/developing/api/http/store-search-index.md b/content/riak/kv/2.1.3/developing/api/http/store-search-index.md index 13fd8ddec9..9f5bc9d46a 100644 --- a/content/riak/kv/2.1.3/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.1.3/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.1.3/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.1.3/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.1.3/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.1.3/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.1.3/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.1.3/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.1.3/developing/api/http/store-search-schema.md b/content/riak/kv/2.1.3/developing/api/http/store-search-schema.md index 0f126e81e5..aeb106e20a 100644 --- a/content/riak/kv/2.1.3/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.1.3/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.1.3/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.1.3/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers.md index 6638387085..24184e8ec4 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.1.3/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.1.3/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.1.3/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.1.3/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.1.3/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.1.3/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.1.3/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.1.3/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.1.3/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.1.3/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/auth-req.md index ca22b6c6d7..d21ebdc992 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.1.3/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.1.3/using/security/basics). diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/delete-object.md index 9b6e7da004..c6763ef077 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.1.3/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.1.3/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store.md index bfbc1d9a57..e0a5e575e8 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.1.3/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.1.3/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-fetch.md index 56584e7ca5..fc17074036 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.1.3/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.1.3/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.1.3/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.1.3/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.1.3/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store.md index ead52fb6cd..023ce83b7c 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store.md index ff7fd3f112..04e3d78b8d 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-store.md index 1aa304f4d6..fb526afa54 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.1.3/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.1.3/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.1.3/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.1.3/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.1.3/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.1.3/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.1.3/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-union.md index 62de1e3f49..e9671735c8 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.1.3/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object.md index 62ddbfc3d2..05969d6be6 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.1.3/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.1.3/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.1.3/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.1.3/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props.md index 49912b80f7..cf261d478d 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.1.3/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.1.3/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.1.3/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.1.3/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riakcs/latest/cookbooks/mdc-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/latest/cookbooks/mdc-overview/) {{% /note %}} diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-type.md index f6cbb2ca4c..e33671c29d 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.1.3/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.1.3/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-client-id.md index ab61c062a4..8f5e82a8df 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.1.3/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/mapreduce.md index 6f15584cfe..06837ffe37 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.1.3/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.1.3/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.1.3/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.1.3/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/reset-bucket-props.md index 5f9be3c38a..0162502afa 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/secondary-indexes.md index aa4944991a..05f22ad8ae 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.1.3/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props.md index a78e76bf40..64823dc72d 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-type.md index e07cdbced8..91148c81f9 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.3/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.1.3/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/store-object.md index 7f18a78876..dba74cc552 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.1.3/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.1.3/learn/concepts/buckets), and [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.1.3/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.1.3/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.1.3/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.1.3/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.1.3/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.1.3/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-get.md index b946e5c7a0..c0df1f0489 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.1.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.1.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-put.md index 7b247c71c4..125b62464e 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-index-put.md @@ -37,4 +37,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.1.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.1.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-get.md index 0d9a7856c7..6f32bd1c65 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.1.3/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-put.md index 8a37f606ca..4604d2e127 100644 --- a/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.1.3/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.1.3/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.1.3/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.1.3/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.1.3/developing/app-guide.md b/content/riak/kv/2.1.3/developing/app-guide.md index 45af6fd1f9..a8051f7a92 100644 --- a/content/riak/kv/2.1.3/developing/app-guide.md +++ b/content/riak/kv/2.1.3/developing/app-guide.md @@ -16,48 +16,48 @@ aliases: - /riak/kv/2.1.3/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.1.3/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.1.3/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.1.3/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.1.3/developing/key-value-modeling -[dev data types]: /riak/kv/2.1.3/developing/data-types -[dev data types#counters]: /riak/kv/2.1.3/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.1.3/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.1.3/developing/data-types/maps -[usage create objects]: /riak/kv/2.1.3/developing/usage/creating-objects -[usage search]: /riak/kv/2.1.3/developing/usage/search -[use ref search]: /riak/kv/2.1.3/using/reference/search -[usage 2i]: /riak/kv/2.1.3/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.1.3/developing/client-libraries -[concept crdts]: /riak/kv/2.1.3/learn/concepts/crdts -[dev data model]: /riak/kv/2.1.3/developing/data-modeling -[usage mapreduce]: /riak/kv/2.1.3/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.1.3/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.1.3/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.3/setup/planning/backend/memory -[obj model java]: /riak/kv/2.1.3/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.1.3/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.1.3/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.1.3/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.1.3/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.1.3/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.1.3/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.1.3/using/reference/strong-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.1.3/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.1.3/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.1.3/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.1.3/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[install index]: /riak/kv/2.1.3/setup/installing -[getting started]: /riak/kv/2.1.3/developing/getting-started -[usage index]: /riak/kv/2.1.3/developing/usage -[glossary]: /riak/kv/2.1.3/learn/glossary -[write-once]: /riak/kv/2.1.3/developing/app-guide/write-once +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.1.3/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.1.3/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.1.3/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.1.3/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.1.3/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.1.3/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.1.3/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.1.3/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.1.3/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search +[use ref search]: {{}}riak/kv/2.1.3/using/reference/search +[usage 2i]: {{}}riak/kv/2.1.3/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.1.3/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.1.3/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.1.3/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.1.3/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.1.3/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.1.3/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.1.3/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.1.3/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.1.3/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.1.3/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.1.3/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.1.3/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.1.3/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.1.3/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.1.3/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.1.3/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.1.3/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.1.3/setup/installing +[getting started]: {{}}riak/kv/2.1.3/developing/getting-started +[usage index]: {{}}riak/kv/2.1.3/developing/usage +[glossary]: {{}}riak/kv/2.1.3/learn/glossary +[write-once]: {{}}riak/kv/2.1.3/developing/app-guide/write-once So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -120,7 +120,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.1.3/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.1.3/developing/app-guide/advanced-mapreduce.md index 8c6d2911f2..bd61a7b3dc 100644 --- a/content/riak/kv/2.1.3/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.1.3/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.1.3/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.1.3/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.1.3/using/reference/custom-code -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[config reference]: /riak/kv/2.1.3/configuring/reference +[usage 2i]: {{}}riak/kv/2.1.3/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.1.3/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.1.3/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.1.3/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.1.3/developing/app-guide/cluster-metadata.md index cc73fadb77..64b6c3aee5 100644 --- a/content/riak/kv/2.1.3/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.1.3/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.1.3/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.1.3/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.1.3/developing/app-guide/replication-properties.md b/content/riak/kv/2.1.3/developing/app-guide/replication-properties.md index 846422bf9b..abc58f54d4 100644 --- a/content/riak/kv/2.1.3/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.1.3/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.3/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.1.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.1.3/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.1.3/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.1.3/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.1.3/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.1.3/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.1.3/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.1.3/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.1.3/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.1.3/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.1.3/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.1.3/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.1.3/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.1.3/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.1.3/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.1.3/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.1.3/developing/app-guide/strong-consistency.md b/content/riak/kv/2.1.3/developing/app-guide/strong-consistency.md index 36f71ef1f2..7d8a792562 100644 --- a/content/riak/kv/2.1.3/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.1.3/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.1.3/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/2.1.3/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.1.3/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.1.3/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.1.3/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.1.3/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.1.3/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.1.3/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/2.1.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.1.3/developing/client-libraries -[getting started]: /riak/kv/2.1.3/developing/getting-started -[config strong consistency#details]: /riak/kv/2.1.3/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.1.3/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.1.3/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.1.3/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.1.3/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.1.3/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.1.3/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.1.3/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.1.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.1.3/developing/client-libraries +[getting started]: {{}}riak/kv/2.1.3/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.1.3/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.1.3/developing/app-guide/write-once.md b/content/riak/kv/2.1.3/developing/app-guide/write-once.md index 6446ab01e6..c937068be7 100644 --- a/content/riak/kv/2.1.3/developing/app-guide/write-once.md +++ b/content/riak/kv/2.1.3/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.1.3/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[bucket type]: /riak/kv/2.1.3/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.1.3/developing/data-types -[strong consistency]: /riak/kv/2.1.3/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.1.3/developing/data-types +[strong consistency]: {{}}riak/kv/2.1.3/developing/app-guide/strong-consistency Riak 2.1.0 introduces the concept of write-once buckets, buckets whose entries are intended to be written exactly once and never updated or overwritten. @@ -103,7 +103,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -154,7 +154,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.1.3/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.1.3/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.1.3/developing/client-libraries.md b/content/riak/kv/2.1.3/developing/client-libraries.md index a364c2c395..47ff62cca2 100644 --- a/content/riak/kv/2.1.3/developing/client-libraries.md +++ b/content/riak/kv/2.1.3/developing/client-libraries.md @@ -37,7 +37,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.1.3/developing/data-types.md b/content/riak/kv/2.1.3/developing/data-types.md index bde1822f29..1ed5724dde 100644 --- a/content/riak/kv/2.1.3/developing/data-types.md +++ b/content/riak/kv/2.1.3/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.1.3/developing/faq.md b/content/riak/kv/2.1.3/developing/faq.md index 9c89511856..0bb325d6f1 100644 --- a/content/riak/kv/2.1.3/developing/faq.md +++ b/content/riak/kv/2.1.3/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.1.3/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.1.3/using/performance/benchmarking -[Bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.1.3/developing/usage +[[Basho Bench]: {{}}riak/kv/2.1.3/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.1.3/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.1.3/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.1.3/configuring/reference +[commit hooks]: {{}}riak/kv/2.1.3/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.1.3/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.1.3/developing/client-libraries -[MapReduce]: /riak/kv/2.1.3/developing/usage/mapreduce -[Memory]: /riak/kv/2.1.3/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.1.3/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.1.3/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.1.3/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.1.3/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.1.3/developing/getting-started.md b/content/riak/kv/2.1.3/developing/getting-started.md index d474993ba5..75e62f094c 100644 --- a/content/riak/kv/2.1.3/developing/getting-started.md +++ b/content/riak/kv/2.1.3/developing/getting-started.md @@ -13,8 +13,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.1.3/setup/installing -[dev client libraries]: /riak/kv/2.1.3/developing/client-libraries +[install index]: {{}}riak/kv/2.1.3/setup/installing +[dev client libraries]: {{}}riak/kv/2.1.3/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.1.3/developing/getting-started/csharp.md b/content/riak/kv/2.1.3/developing/getting-started/csharp.md index 17aea2eb22..ac2d0ecdf0 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/csharp.md +++ b/content/riak/kv/2.1.3/developing/getting-started/csharp.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/taste-of-riak/csharp --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -77,4 +77,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.3/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.3/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.1.3/developing/getting-started/csharp/querying.md b/content/riak/kv/2.1.3/developing/getting-started/csharp/querying.md index 26cf7fea2b..fea7793f8d 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.1.3/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.3/developing/getting-started/erlang.md b/content/riak/kv/2.1.3/developing/getting-started/erlang.md index 6d97f5a4a5..27964465f1 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/erlang.md +++ b/content/riak/kv/2.1.3/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.3/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.3/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.1.3/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.1.3/developing/getting-started/erlang/object-modeling.md index 394d2dc3ec..0cd890a540 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.1.3/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.1.3/developing/getting-started/erlang/querying.md b/content/riak/kv/2.1.3/developing/getting-started/erlang/querying.md index dcde354c2b..f0474fc248 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.1.3/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.1.3/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.1.3/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.3/developing/getting-started/golang.md b/content/riak/kv/2.1.3/developing/getting-started/golang.md index c29d671680..9f0ce4bfd0 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/golang.md +++ b/content/riak/kv/2.1.3/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.3/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.3/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.3/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.3/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.1.3/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.1.3/developing/getting-started/golang/object-modeling.md index 19dd14a605..4f30c2b8d8 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.1.3/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.1.3/developing/getting-started/golang/querying.md b/content/riak/kv/2.1.3/developing/getting-started/golang/querying.md index 7a196bb431..dd1b0af6a0 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.1.3/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.1.3/developing/getting-started/java.md b/content/riak/kv/2.1.3/developing/getting-started/java.md index 8edde6372a..7ea98f4c64 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/java.md +++ b/content/riak/kv/2.1.3/developing/getting-started/java.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/taste-of-riak/java --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -84,4 +84,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.1.3/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.3/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.1.3/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.1.3/developing/getting-started/java/crud-operations.md index 23409a73bf..fceae96a53 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.1.3/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.1.3/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.1.3/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.1.3/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.1.3/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.1.3/developing/getting-started/java/querying.md b/content/riak/kv/2.1.3/developing/getting-started/java/querying.md index 28a57a3ccd..c65e23fb3c 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.1.3/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.3/developing/getting-started/nodejs.md b/content/riak/kv/2.1.3/developing/getting-started/nodejs.md index 884c1ae7a4..bab8d04f22 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.1.3/developing/getting-started/nodejs.md @@ -20,7 +20,7 @@ aliases: [node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.3/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.3/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.1.3/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.1.3/developing/getting-started/nodejs/querying.md index f88789a525..943b33acdb 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.1.3/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.3/developing/getting-started/php.md b/content/riak/kv/2.1.3/developing/getting-started/php.md index 68eca240f4..6642c35045 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/php.md +++ b/content/riak/kv/2.1.3/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.3/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.3/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.1.3/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.1.3/developing/getting-started/php/crud-operations.md index 7926923bf0..6dc220fbc9 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.1.3/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.1.3/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.1.3/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.1.3/developing/getting-started/php/querying.md b/content/riak/kv/2.1.3/developing/getting-started/php/querying.md index 99430740e2..06814b7e1c 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.1.3/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.3/developing/getting-started/python.md b/content/riak/kv/2.1.3/developing/getting-started/python.md index 63b93ea241..77a8a5ec98 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/python.md +++ b/content/riak/kv/2.1.3/developing/getting-started/python.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/taste-of-riak/python --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -94,4 +94,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.3/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.3/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.1.3/developing/getting-started/python/querying.md b/content/riak/kv/2.1.3/developing/getting-started/python/querying.md index 0c5e0cf758..4938e85999 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.1.3/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.3/developing/getting-started/ruby.md b/content/riak/kv/2.1.3/developing/getting-started/ruby.md index 7ff4f8cfa3..649425c3e1 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/ruby.md +++ b/content/riak/kv/2.1.3/developing/getting-started/ruby.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/taste-of-riak/ruby --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.3/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -59,4 +59,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.3/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.3/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.1.3/developing/getting-started/ruby/querying.md b/content/riak/kv/2.1.3/developing/getting-started/ruby/querying.md index abaf73bf97..83de28914e 100644 --- a/content/riak/kv/2.1.3/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.1.3/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.3/developing/key-value-modeling.md b/content/riak/kv/2.1.3/developing/key-value-modeling.md index 2f5a9ce2de..97b6abb0b6 100644 --- a/content/riak/kv/2.1.3/developing/key-value-modeling.md +++ b/content/riak/kv/2.1.3/developing/key-value-modeling.md @@ -17,7 +17,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.1.3/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.1.3/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.1.3/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.1.3/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -25,7 +25,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.1.3/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.1.3/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -38,12 +38,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.1.3/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.1.3/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.1.3/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.1.3/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.1.3/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -80,7 +80,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.1.3/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.1.3/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -138,13 +138,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.1.3/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.1.3/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.1.3/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.1.3/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.1.3/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -153,7 +153,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.1.3/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.1.3/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -191,7 +191,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.1.3/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.3/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -435,8 +435,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.1.3/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.1.3/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -444,7 +444,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.1.3/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.1.3/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.1.3/developing/usage/commit-hooks.md b/content/riak/kv/2.1.3/developing/usage/commit-hooks.md index 46d61f2f2e..ac9b19aaef 100644 --- a/content/riak/kv/2.1.3/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.1.3/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.1.3/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.1.3/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.1.3/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.1.3/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.1.3/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.1.3/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.1.3/developing/usage/conflict-resolution.md b/content/riak/kv/2.1.3/developing/usage/conflict-resolution.md index 8f9ded35d5..34c9acaac0 100644 --- a/content/riak/kv/2.1.3/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.1.3/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.1.3/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.1.3/learn/concepts/clusters) system in which any [node](/riak/kv/2.1.3/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.1.3/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.1.3/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.1.3/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.1.3/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.1.3/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.1.3/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.1.3/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.1.3/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.1.3/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.1.3/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.1.3/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.1.3/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.1.3/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.1.3/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.1.3/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.1.3/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.1.3/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.1.3/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.1.3/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.1.3/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.1.3/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.1.3/configuring/reference) to change the [default bucket properties](/riak/kv/2.1.3/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.1.3/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.1.3/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.1.3/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.1.3/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.1.3/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.1.3/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.1.3/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.1.3/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.1.3/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.1.3/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.1.3/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.1.3/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.1.3/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.3/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.1.3/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.1.3/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.1.3/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.1.3/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.1.3/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -611,7 +611,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.1.3/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.1.3/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -666,7 +666,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/csharp.md index c128fb2202..ab5e918c53 100644 --- a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.3/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/golang.md index 1900eb7f09..0e536d9240 100644 --- a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.3/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/java.md index e01bba4d21..1dbbcaa0fc 100644 --- a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.3/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.3/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.3/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.3/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.3/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.3/developing/data-types/counters), [set](/riak/kv/2.1.3/developing/data-types/sets), or [map](/riak/kv/2.1.3/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.3/developing/data-types/counters), [set]({{}}riak/kv/2.1.3/developing/data-types/sets), or [map]({{}}riak/kv/2.1.3/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.3/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.3/developing/data-types/sets). diff --git a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/nodejs.md index f401bd7905..ed639e3f6c 100644 --- a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.3/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/php.md index 19b686d5e1..a1741ce6f2 100644 --- a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.3/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.3/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.3/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.3/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.3/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.3/developing/data-types/counters), [set](/riak/kv/2.1.3/developing/data-types/sets), or [map](/riak/kv/2.1.3/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.3/developing/data-types/counters), [set]({{}}riak/kv/2.1.3/developing/data-types/sets), or [map]({{}}riak/kv/2.1.3/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.3/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.3/developing/data-types/sets). diff --git a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/python.md index 0d4854a2df..c66755f224 100644 --- a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.3/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.3/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.3/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.3/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.3/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.3/developing/data-types/counters), [set](/riak/kv/2.1.3/developing/data-types/sets), or [map](/riak/kv/2.1.3/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.3/developing/data-types/counters), [set]({{}}riak/kv/2.1.3/developing/data-types/sets), or [map]({{}}riak/kv/2.1.3/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.3/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.3/developing/data-types/sets). diff --git a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/ruby.md index 976f1c6fbe..dd5571e95e 100644 --- a/content/riak/kv/2.1.3/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.1.3/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.3/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.3/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.3/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.3/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.3/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.3/developing/data-types/counters), [set](/riak/kv/2.1.3/developing/data-types/sets), or [map](/riak/kv/2.1.3/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.3/developing/data-types/counters), [set]({{}}riak/kv/2.1.3/developing/data-types/sets), or [map]({{}}riak/kv/2.1.3/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.3/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.3/developing/data-types/sets). diff --git a/content/riak/kv/2.1.3/developing/usage/creating-objects.md b/content/riak/kv/2.1.3/developing/usage/creating-objects.md index 61b7634391..e46a0b6abe 100644 --- a/content/riak/kv/2.1.3/developing/usage/creating-objects.md +++ b/content/riak/kv/2.1.3/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.1.3/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.1.3/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.1.3/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.1.3/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.1.3/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.1.3/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.1.3/developing/usage/custom-extractors.md b/content/riak/kv/2.1.3/developing/usage/custom-extractors.md index de8b103061..118773dc21 100644 --- a/content/riak/kv/2.1.3/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.1.3/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.1.3/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.1.3/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.1.3/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.1.3/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.1.3/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.1.3/developing/usage/deleting-objects.md b/content/riak/kv/2.1.3/developing/usage/deleting-objects.md index 0222270ca9..de8491a773 100644 --- a/content/riak/kv/2.1.3/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.1.3/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.1.3/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.1.3/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.1.3/developing/usage/document-store.md b/content/riak/kv/2.1.3/developing/usage/document-store.md index 52c32938ce..c9c80d84f9 100644 --- a/content/riak/kv/2.1.3/developing/usage/document-store.md +++ b/content/riak/kv/2.1.3/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.1.3/developing/usage/search/) and [Riak Data Types](/riak/kv/2.1.3/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.1.3/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.1.3/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.1.3/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.1.3/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.1.3/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.1.3/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.1.3/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.1.3/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.1.3/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.1.3/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.1.3/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.1.3/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.1.3/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.1.3/developing/usage/mapreduce.md b/content/riak/kv/2.1.3/developing/usage/mapreduce.md index d9f78500c6..7fdaae9ff7 100644 --- a/content/riak/kv/2.1.3/developing/usage/mapreduce.md +++ b/content/riak/kv/2.1.3/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.1.3/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.1.3/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.1.3/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.1.3/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.1.3/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.1.3/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.1.3/developing/usage/search/) and [secondary indexes](/riak/kv/2.1.3/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.1.3/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.1.3/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.1.3/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.1.3/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.1.3/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.1.3/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.1.3/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.1.3/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.1.3/developing/usage/reading-objects.md b/content/riak/kv/2.1.3/developing/usage/reading-objects.md index 79a976787d..1d1d00a3b8 100644 --- a/content/riak/kv/2.1.3/developing/usage/reading-objects.md +++ b/content/riak/kv/2.1.3/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.1.3/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.1.3/developing/usage/replication.md b/content/riak/kv/2.1.3/developing/usage/replication.md index e01240be7b..cf094ee7f0 100644 --- a/content/riak/kv/2.1.3/developing/usage/replication.md +++ b/content/riak/kv/2.1.3/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.1.3/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.1.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using Strong +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -71,7 +71,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.1.3/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.1.3/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -95,8 +95,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -105,7 +105,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -317,7 +317,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.1.3/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.1.3/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -355,7 +355,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.1.3/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.1.3/setup/planning/backend/multi). ## Delete Quorum with RW @@ -530,9 +530,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.1.3/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.1.3/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.1.3/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.1.3/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -546,7 +546,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.1.3/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -556,8 +556,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.1.3/developing/usage/search-schemas.md b/content/riak/kv/2.1.3/developing/usage/search-schemas.md index 7df680fb50..82d1275050 100644 --- a/content/riak/kv/2.1.3/developing/usage/search-schemas.md +++ b/content/riak/kv/2.1.3/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.1.3/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.1.3/developing/data-types/), and [more](/riak/kv/2.1.3/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types/), and [more]({{}}riak/kv/2.1.3/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -224,7 +224,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.1.3/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.1.3/developing/usage/search.md b/content/riak/kv/2.1.3/developing/usage/search.md index 9cf7ac6a2f..90d68e46d0 100644 --- a/content/riak/kv/2.1.3/developing/usage/search.md +++ b/content/riak/kv/2.1.3/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.1.3/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.1.3/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.1.3/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.3/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.1.3/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.1.3/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.1.3/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.1.3/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.1.3/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.1.3/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.1.3/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.1.3/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.1.3/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.1.3/developing/usage/searching-data-types.md b/content/riak/kv/2.1.3/developing/usage/searching-data-types.md index 6827fdb2b3..bcd6cf1f67 100644 --- a/content/riak/kv/2.1.3/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.1.3/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.3/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.1.3/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.1.3/developing/data-types/counters), [sets](/riak/kv/2.1.3/developing/data-types/sets), and [maps](/riak/kv/2.1.3/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.1.3/developing/data-types/counters), [sets]({{}}riak/kv/2.1.3/developing/data-types/sets), and [maps]({{}}riak/kv/2.1.3/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.1.3/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.1.3/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.1.3/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.1.3/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.1.3/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.1.3/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) for [storing counters](/riak/kv/2.1.3/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.1.3/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types) for [storing sets](/riak/kv/2.1.3/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.1.3/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.1.3/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.1.3/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.1.3/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.1.3/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.1.3/developing/usage/secondary-indexes.md b/content/riak/kv/2.1.3/developing/usage/secondary-indexes.md index a4ada2b002..bf9fb2542e 100644 --- a/content/riak/kv/2.1.3/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.1.3/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.1.3/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.3/setup/planning/backend/memory -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.1.3/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.1.3/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.1.3/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.1.3/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.1.3/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.1.3/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.1.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.1.3/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.1.3/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.1.3/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.1.3/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.1.3/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.3/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.1.3/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.1.3/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.1.3/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.1.3/developing/usage/security.md b/content/riak/kv/2.1.3/developing/usage/security.md index 4c7bfd6b2d..8e3a616295 100644 --- a/content/riak/kv/2.1.3/developing/usage/security.md +++ b/content/riak/kv/2.1.3/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.1.3/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.1.3/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.1.3/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.1.3/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.1.3/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.1.3/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.1.3/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.1.3/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.1.3/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.1.3/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.1.3/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.1.3/using/security/basics) -* [Managing Security Sources](/riak/kv/2.1.3/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.1.3/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.1.3/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.1.3/developing/usage/security/java) -* [Ruby](/riak/kv/2.1.3/developing/usage/security/ruby) -* [PHP](/riak/kv/2.1.3/developing/usage/security/php) -* [Python](/riak/kv/2.1.3/developing/usage/security/python) -* [Erlang](/riak/kv/2.1.3/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.1.3/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.1.3/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.1.3/developing/usage/security/php) +* [Python]({{}}riak/kv/2.1.3/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.1.3/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.1.3/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.1.3/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.1.3/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.1.3/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.1.3/developing/usage/security/erlang.md b/content/riak/kv/2.1.3/developing/usage/security/erlang.md index 0484a53bb5..4f86a2ffb3 100644 --- a/content/riak/kv/2.1.3/developing/usage/security/erlang.md +++ b/content/riak/kv/2.1.3/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.1.3/using/security/managing-sources/), [PAM-](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.1.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.1.3/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.1.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.3/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.3/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.3/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.1.3/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.1.3/developing/usage/security/java.md b/content/riak/kv/2.1.3/developing/usage/security/java.md index 0d013bba68..add998f01f 100644 --- a/content/riak/kv/2.1.3/developing/usage/security/java.md +++ b/content/riak/kv/2.1.3/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.3/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.3/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.1.3/developing/usage/security/php.md b/content/riak/kv/2.1.3/developing/usage/security/php.md index 5fd32ab068..13270f4657 100644 --- a/content/riak/kv/2.1.3/developing/usage/security/php.md +++ b/content/riak/kv/2.1.3/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.3/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.1.3/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.1.3/developing/usage/security/python.md b/content/riak/kv/2.1.3/developing/usage/security/python.md index 64510b7027..56cc3bc930 100644 --- a/content/riak/kv/2.1.3/developing/usage/security/python.md +++ b/content/riak/kv/2.1.3/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.3/using/security/managing-sources/) or [PAM-](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.1.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.1.3/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.1.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.3/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.3/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.1.3/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.1.3/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.3/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.1.3/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.1.3/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.1.3/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.1.3/developing/usage/security/ruby.md b/content/riak/kv/2.1.3/developing/usage/security/ruby.md index 1e6d1198ec..492a7c82a3 100644 --- a/content/riak/kv/2.1.3/developing/usage/security/ruby.md +++ b/content/riak/kv/2.1.3/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.3/using/security/managing-sources/) or [PAM](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.1.3/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.1.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.1.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.3/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.1.3/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.3/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.3/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.1.3/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.1.3/developing/usage/updating-objects.md b/content/riak/kv/2.1.3/developing/usage/updating-objects.md index d086732156..ed4f41143c 100644 --- a/content/riak/kv/2.1.3/developing/usage/updating-objects.md +++ b/content/riak/kv/2.1.3/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/dev/using/updates --- -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.1.3/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.1.3/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.1.3/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.1.3/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.1.3/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.1.3/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.1.3/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.1.3/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.1.3/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.1.3/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.1.3/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.1.3/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.1.3/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.1.3/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.1.3/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.1.3/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.1.3/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.1.3/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.1.3/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.1.3/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.1.3/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.1.3/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.1.3/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.1.3/index.md b/content/riak/kv/2.1.3/index.md index 572849b243..b54e615d20 100644 --- a/content/riak/kv/2.1.3/index.md +++ b/content/riak/kv/2.1.3/index.md @@ -15,18 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.1.3/configuring -[dev index]: /riak/kv/2.1.3/developing -[downloads]: /riak/kv/2.1.3/downloads/ -[install index]: /riak/kv/2.1.3/setup/installing/ -[plan index]: /riak/kv/2.1.3/setup/planning -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.1.3/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.1.3/developing/usage/search -[getting started]: /riak/kv/2.1.3/developing/getting-started -[dev client libraries]: /riak/kv/2.1.3/developing/client-libraries - - +[config index]: {{}}riak/kv/2.1.3/configuring +[dev index]: {{}}riak/kv/2.1.3/developing +[downloads]: {{}}riak/kv/2.1.3/downloads/ +[install index]: {{}}riak/kv/2.1.3/setup/installing/ +[plan index]: {{}}riak/kv/2.1.3/setup/planning +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.1.3/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search +[getting started]: {{}}riak/kv/2.1.3/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.1.3/developing/client-libraries Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. diff --git a/content/riak/kv/2.1.3/introduction.md b/content/riak/kv/2.1.3/introduction.md index 432e6bde3f..a42fe418b7 100644 --- a/content/riak/kv/2.1.3/introduction.md +++ b/content/riak/kv/2.1.3/introduction.md @@ -27,7 +27,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.1.3/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.1.3/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +35,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.1.3/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.1.3/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.1.3/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.1.3/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.1.3/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.1.3/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.1.3/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.1.3/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +52,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.1.3/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.1.3/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.1.3/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.1.3/setup/upgrading/version). ## Riak Data Types @@ -73,20 +73,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.1.3/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.1.3/developing/data-types/maps#flags), [registers](/riak/kv/2.1.3/developing/data-types/maps#registers), -[counters](/riak/kv/2.1.3/developing/data-types/counters), [sets](/riak/kv/2.1.3/developing/data-types/sets), and -[maps](/riak/kv/2.1.3/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.1.3/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.1.3/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.1.3/developing/data-types/counters), [sets]({{}}riak/kv/2.1.3/developing/data-types/sets), and +[maps]({{}}riak/kv/2.1.3/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.1.3/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.1.3/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.1.3/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.1.3/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +103,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.1.3/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.1.3/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.1.3/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.1.3/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.1.3/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +125,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.1.3/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.1.3/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.1.3/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.1.3/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.1.3/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.1.3/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.1.3/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.1.3/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +155,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.1.3/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.1.3/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.1.3/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.1.3/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +194,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.1.3/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.1.3/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +214,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.1.3/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.1.3/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +226,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.1.3/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.1.3/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.1.3/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.1.3/learn/concepts.md b/content/riak/kv/2.1.3/learn/concepts.md index 70b451e6be..7d6e1a4d02 100644 --- a/content/riak/kv/2.1.3/learn/concepts.md +++ b/content/riak/kv/2.1.3/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.1.3/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.1.3/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[concept crdts]: /riak/kv/2.1.3/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.3/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.3/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.1.3/learn/concepts/vnodes -[config index]: /riak/kv/2.1.3/configuring -[plan index]: /riak/kv/2.1.3/setup/planning -[use index]: /riak/kv/2.1.3/using/ +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.1.3/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.1.3/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.1.3/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.1.3/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.1.3/configuring +[plan index]: {{}}riak/kv/2.1.3/setup/planning +[use index]: {{}}riak/kv/2.1.3/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.1.3/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.1.3/learn/concepts/active-anti-entropy.md index 06541ad6e4..8fcefac531 100644 --- a/content/riak/kv/2.1.3/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.1.3/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.1.3/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.1.3/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.1.3/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.1.3/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.1.3/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.1.3/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.1.3/developing/usage/search +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.1.3/learn/concepts/buckets.md b/content/riak/kv/2.1.3/learn/concepts/buckets.md index 5321ed2a54..e8d5f90f2a 100644 --- a/content/riak/kv/2.1.3/learn/concepts/buckets.md +++ b/content/riak/kv/2.1.3/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.1.3/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.1.3/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.1.3/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.1.3/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.1.3/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.1.3/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.1.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.3/using/reference/strong-consistency -[config basic]: /riak/kv/2.1.3/configuring/basic -[dev api http]: /riak/kv/2.1.3/developing/api/http -[dev data types]: /riak/kv/2.1.3/developing/data-types -[glossary ring]: /riak/kv/2.1.3/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.3/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.3/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.1.3/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.1.3/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.1.3/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.1.3/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.1.3/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.1.3/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.1.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.1.3/configuring/basic +[dev api http]: {{}}riak/kv/2.1.3/developing/api/http +[dev data types]: {{}}riak/kv/2.1.3/developing/data-types +[glossary ring]: {{}}riak/kv/2.1.3/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.3/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.1.3/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.1.3/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.1.3/learn/concepts/capability-negotiation.md b/content/riak/kv/2.1.3/learn/concepts/capability-negotiation.md index 1c0ae1ad9e..05b25793be 100644 --- a/content/riak/kv/2.1.3/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.1.3/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.1.3/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.1.3/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.1.3/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.1.3/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.1.3/learn/concepts/causal-context.md b/content/riak/kv/2.1.3/learn/concepts/causal-context.md index 3bf6f048bf..10a5d3b1dc 100644 --- a/content/riak/kv/2.1.3/learn/concepts/causal-context.md +++ b/content/riak/kv/2.1.3/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.1.3/developing/api/http -[dev key value]: /riak/kv/2.1.3/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.1.3/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.1.3/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.1.3/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.1.3/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.1.3/developing/api/http +[dev key value]: {{}}riak/kv/2.1.3/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.1.3/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.1.3/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.1.3/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.1.3/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -78,7 +78,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.1.3/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.1.3/learn/concepts/clusters.md b/content/riak/kv/2.1.3/learn/concepts/clusters.md index d6a3347d41..864e278a2a 100644 --- a/content/riak/kv/2.1.3/learn/concepts/clusters.md +++ b/content/riak/kv/2.1.3/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.1.3/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.3/learn/concepts/replication -[glossary node]: /riak/kv/2.1.3/learn/glossary/#node -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.1.3/learn/dynamo -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.1.3/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.1.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.3/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.1.3/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.1.3/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.1.3/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.1.3/learn/concepts/crdts.md b/content/riak/kv/2.1.3/learn/concepts/crdts.md index 36bb9a9ed4..22ea144212 100644 --- a/content/riak/kv/2.1.3/learn/concepts/crdts.md +++ b/content/riak/kv/2.1.3/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.1.3/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.1.3/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.1.3/developing/data-types -[glossary node]: /riak/kv/2.1.3/learn/glossary/#node -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.1.3/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.1.3/developing/data-types +[glossary node]: {{}}riak/kv/2.1.3/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.1.3/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.1.3/developing/usage/search/). +indexes]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.1.3/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.1.3/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.1.3/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.1.3/learn/concepts/eventual-consistency.md b/content/riak/kv/2.1.3/learn/concepts/eventual-consistency.md index f8b0b29048..aef2b76dbd 100644 --- a/content/riak/kv/2.1.3/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.1.3/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[concept replication]: /riak/kv/2.1.3/learn/concepts/replication -[glossary node]: /riak/kv/2.1.3/learn/glossary/#node -[glossary read rep]: /riak/kv/2.1.3/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.1.3/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.1.3/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.1.3/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.1.3/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.1.3/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.1.3/developing/data-modeling/). +or models]({{}}riak/kv/2.1.3/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.1.3/learn/concepts/keys-and-objects.md b/content/riak/kv/2.1.3/learn/concepts/keys-and-objects.md index c3dff8bd70..0ca0bbbaaf 100644 --- a/content/riak/kv/2.1.3/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.1.3/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.3/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.1.3/learn/concepts/replication.md b/content/riak/kv/2.1.3/learn/concepts/replication.md index 0b24ab11f8..d419161540 100644 --- a/content/riak/kv/2.1.3/learn/concepts/replication.md +++ b/content/riak/kv/2.1.3/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.1.3/learn/concepts/vnodes -[glossary node]: /riak/kv/2.1.3/learn/glossary/#node -[glossary ring]: /riak/kv/2.1.3/learn/glossary/#ring -[usage replication]: /riak/kv/2.1.3/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.1.3/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.1.3/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.1.3/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.1.3/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.1.3/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.1.3/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.1.3/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.1.3/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.1.3/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.1.3/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.1.3/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.1.3/learn/concepts/strong-consistency.md b/content/riak/kv/2.1.3/learn/concepts/strong-consistency.md index 6cede1e5c4..fa947d0435 100644 --- a/content/riak/kv/2.1.3/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.1.3/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.3/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.1.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.1.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.1.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.1.3/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.1.3/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.1.3/learn/concepts/vnodes.md b/content/riak/kv/2.1.3/learn/concepts/vnodes.md index bf5751d6b9..14cb241f6d 100644 --- a/content/riak/kv/2.1.3/learn/concepts/vnodes.md +++ b/content/riak/kv/2.1.3/learn/concepts/vnodes.md @@ -16,16 +16,16 @@ aliases: --- -[concept causal context]: /riak/kv/2.1.3/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.1.3/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.1.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.3/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.1.3/learn/glossary/#node -[glossary ring]: /riak/kv/2.1.3/learn/glossary/#ring -[perf strong consistency]: /riak/kv/2.1.3/using/performance/strong-consistency -[plan backend]: /riak/kv/2.1.3/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.1.3/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.1.3/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.1.3/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.1.3/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.1.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.3/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.1.3/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.1.3/learn/glossary/#ring +[perf strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[plan backend]: {{}}riak/kv/2.1.3/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.1.3/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.1.3/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -81,7 +81,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -103,7 +103,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.1.3/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.1.3/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.1.3/learn/dynamo.md b/content/riak/kv/2.1.3/learn/dynamo.md index 7e5f94252f..e8791c2bb6 100644 --- a/content/riak/kv/2.1.3/learn/dynamo.md +++ b/content/riak/kv/2.1.3/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.1.3/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.1.3/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.1.3/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.1.3/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.1.3/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.1.3/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.1.3/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.1.3/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.1.3/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.1.3/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.1.3/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.1.3/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.1.3/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.1.3/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.1.3/developing/api/http/) +>[REST API]({{}}riak/kv/2.1.3/developing/api/http/) > ->[Writing Data](/riak/kv/2.1.3/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.1.3/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.1.3/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.1.3/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.1.3/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.1.3/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.1.3/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.1.3/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.1.3/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.1.3/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.1.3/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.1.3/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.1.3/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.1.3/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.1.3/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.1.3/setup/planning/backend/ -[Bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.1.3/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.1.3/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.1.3/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.1.3/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.1.3/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.1.3/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.1.3/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.1.3/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.1.3/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.1.3/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.1.3/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.1.3/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.1.3/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.1.3/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.1.3/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.1.3/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.1.3/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.1.3/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.1.3/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.1.3/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.1.3/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.1.3/learn/glossary.md b/content/riak/kv/2.1.3/learn/glossary.md index afcee6349d..bb9b1f407f 100644 --- a/content/riak/kv/2.1.3/learn/glossary.md +++ b/content/riak/kv/2.1.3/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.1.3/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[concept crdts]: /riak/kv/2.1.3/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.3/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.3/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.1.3/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.1.3/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.1.3/developing/api/http -[dev data model]: /riak/kv/2.1.3/developing/data-modeling -[dev data types]: /riak/kv/2.1.3/developing/data-types -[glossary read rep]: /riak/kv/2.1.3/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.1.3/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.1.3/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.3/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.1.3/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.1.3/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.1.3/developing/api/http +[dev data model]: {{}}riak/kv/2.1.3/developing/data-modeling +[dev data types]: {{}}riak/kv/2.1.3/developing/data-types +[glossary read rep]: {{}}riak/kv/2.1.3/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.1.3/learn/dynamo -[plan cluster capacity]: /riak/kv/2.1.3/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.1.3/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.1.3/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.1.3/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.1.3/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.1.3/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.1.3/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.1.3/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.3/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.1.3/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.1.3/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.1.3/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.1.3/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.1.3/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.1.3/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.1.3/learn/use-cases.md b/content/riak/kv/2.1.3/learn/use-cases.md index d55ade2856..15e1932cd8 100644 --- a/content/riak/kv/2.1.3/learn/use-cases.md +++ b/content/riak/kv/2.1.3/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.1.3/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.1.3/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.1.3/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.1.3/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.1.3/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.1.3/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.1.3/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.1.3/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.1.3/developing/data-types -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.1.3/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.3/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.1.3/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.1.3/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.1.3/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.1.3/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.1.3/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.1.3/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.1.3/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.1.3/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.1.3/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.1.3/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.1.3/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.1.3/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.1.3/learn/why-riak-kv.md b/content/riak/kv/2.1.3/learn/why-riak-kv.md index b1936d32af..7a7a25ba48 100644 --- a/content/riak/kv/2.1.3/learn/why-riak-kv.md +++ b/content/riak/kv/2.1.3/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.1.3/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.1.3/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.1.3/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.1.3/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.1.3/developing/data-types -[glossary read rep]: /riak/kv/2.1.3/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.1.3/developing/data-types +[glossary read rep]: {{}}riak/kv/2.1.3/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.1.3/release-notes.md b/content/riak/kv/2.1.3/release-notes.md index 3826f6dcd5..69abc0fe51 100644 --- a/content/riak/kv/2.1.3/release-notes.md +++ b/content/riak/kv/2.1.3/release-notes.md @@ -44,7 +44,7 @@ In the above example, the corrected `default_bucket_props` section would look li }] ``` -If you have been depending on this behavior unintentionally, learn more about what `allow_mult` and `dvv_enabled` imply in our [Conflict Resolution](http://docs.basho.com/riak/latest/dev/using/conflict-resolution/) documentation. +If you have been depending on this behavior unintentionally, learn more about what `allow_mult` and `dvv_enabled` imply in our [Conflict Resolution]({{< baseurl >}}riak/kv/latest/dev/using/conflict-resolution/) documentation. For more information about this change, please see [issue #727](https://github.com/basho/riak/issues/727). @@ -138,7 +138,7 @@ For more information about this change, please see [issue #727](https://github.c ## Fixes Riak 2.1.0 introduced a bug that has been fixed in Riak 2.1.1. The default configuration for handoff.ip caused vnodes marked for transfer during handoff to be removed without transferring data to their new destination nodes. A mandatory change to configuration (riak.conf) mitigates this issue for 2.1.0 users. While not all users were impacted by this issue, we recommend that all 2.1.0 users upgrade to 2.1.1. -Detailed information on the issue is available in the Basho Documentation [Product Advisories](http://docs.basho.com/community/productadvisories/210-dataloss/). +Detailed information on the issue is available in the Basho Documentation [Product Advisories]({{}}community/productadvisories/210-dataloss/). * Make default `handoff_ip` value 0.0.0.0 in vars.config. * [riak/pull/734](https://github.com/basho/riak/pull/734) diff --git a/content/riak/kv/2.1.3/setup/downgrade.md b/content/riak/kv/2.1.3/setup/downgrade.md index 105ee032ee..b832cb04d2 100644 --- a/content/riak/kv/2.1.3/setup/downgrade.md +++ b/content/riak/kv/2.1.3/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.3/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.1.3/setup/upgrading/cluster -[config ref]: /riak/kv/2.1.3/configuring/reference -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.1.3/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.1.3/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.1.3/configuring/reference +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#aae-status Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade]. diff --git a/content/riak/kv/2.1.3/setup/installing.md b/content/riak/kv/2.1.3/setup/installing.md index 3789c1876f..67ac2a6008 100644 --- a/content/riak/kv/2.1.3/setup/installing.md +++ b/content/riak/kv/2.1.3/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.1.3/installing/ --- -[install aws]: /riak/kv/2.1.3/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.1.3/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.1.3/setup/installing/freebsd -[install mac osx]: /riak/kv/2.1.3/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.1.3/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.1.3/setup/installing/smartos -[install solaris]: /riak/kv/2.1.3/setup/installing/solaris -[install suse]: /riak/kv/2.1.3/setup/installing/suse -[install windows azure]: /riak/kv/2.1.3/setup/installing/windows-azure -[install source index]: /riak/kv/2.1.3/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.1.3/setup/upgrading +[install aws]: {{}}riak/kv/2.1.3/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.1.3/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.1.3/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.1.3/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.1.3/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.1.3/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.1.3/setup/installing/solaris +[install suse]: {{}}riak/kv/2.1.3/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.1.3/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.1.3/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.1.3/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.1.3/setup/installing/amazon-web-services.md b/content/riak/kv/2.1.3/setup/installing/amazon-web-services.md index fc5dc2e4db..de56a8b91a 100644 --- a/content/riak/kv/2.1.3/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.1.3/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.1.3/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.1.3/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.1.3/setup/installing/debian-ubuntu.md b/content/riak/kv/2.1.3/setup/installing/debian-ubuntu.md index c0c06f5749..7a1d1eb006 100644 --- a/content/riak/kv/2.1.3/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.1.3/setup/installing/debian-ubuntu.md @@ -18,10 +18,10 @@ aliases: - /riak/kv/2.1.3/installing/debian-ubuntu/ --- -[install source index]: /riak/kv/2.1.3/setup/installing/source/ -[security index]: /riak/kv/2.1.3/using/security/ -[install source erlang]: /riak/kv/2.1.3/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[install source index]: {{}}riak/kv/2.1.3/setup/installing/source/ +[security index]: {{}}riak/kv/2.1.3/using/security/ +[install source erlang]: {{}}riak/kv/2.1.3/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.1.3/setup/installing/freebsd.md b/content/riak/kv/2.1.3/setup/installing/freebsd.md index 9844393d8c..a21fa2e53f 100644 --- a/content/riak/kv/2.1.3/setup/installing/freebsd.md +++ b/content/riak/kv/2.1.3/setup/installing/freebsd.md @@ -18,9 +18,9 @@ aliases: - /riak/kv/2.1.3/installing/freebsd/ --- -[install source erlang]: /riak/kv/2.1.3/setup/installing/source/erlang -[downloads]: /riak/kv/2.1.3/downloads/ -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.1.3/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.1.3/downloads/ +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.1.3/setup/installing/mac-osx.md b/content/riak/kv/2.1.3/setup/installing/mac-osx.md index 37798caf81..7e12221195 100644 --- a/content/riak/kv/2.1.3/setup/installing/mac-osx.md +++ b/content/riak/kv/2.1.3/setup/installing/mac-osx.md @@ -18,9 +18,9 @@ aliases: - /riak/kv/2.1.3/installing/mac-osx/ --- -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.1.3/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.1.3/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.1.3/setup/installing/rhel-centos.md b/content/riak/kv/2.1.3/setup/installing/rhel-centos.md index f59d270ef9..05691f9a0e 100644 --- a/content/riak/kv/2.1.3/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.1.3/setup/installing/rhel-centos.md @@ -18,9 +18,9 @@ aliases: - /riak/kv/2.1.3/installing/rhel-centos/ --- -[install source index]: /riak/kv/2.1.3/setup/installing/source -[install source erlang]: /riak/kv/2.1.3/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[install source index]: {{}}riak/kv/2.1.3/setup/installing/source +[install source erlang]: {{}}riak/kv/2.1.3/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.1.3/setup/installing/smartos.md b/content/riak/kv/2.1.3/setup/installing/smartos.md index e9e2c35009..b5d2314c2f 100644 --- a/content/riak/kv/2.1.3/setup/installing/smartos.md +++ b/content/riak/kv/2.1.3/setup/installing/smartos.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.1.3/installing/smartos/ --- -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.1.3/setup/installing/solaris.md b/content/riak/kv/2.1.3/setup/installing/solaris.md index 0885fc58ba..09d6b3f57d 100644 --- a/content/riak/kv/2.1.3/setup/installing/solaris.md +++ b/content/riak/kv/2.1.3/setup/installing/solaris.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.1.3/installing/solaris/ --- -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.1.3/setup/installing/source.md b/content/riak/kv/2.1.3/setup/installing/source.md index 60b154102f..e3ed66162c 100644 --- a/content/riak/kv/2.1.3/setup/installing/source.md +++ b/content/riak/kv/2.1.3/setup/installing/source.md @@ -18,13 +18,13 @@ aliases: - /riak/kv/2.1.3/installing/source/ --- -[install source erlang]: /riak/kv/2.1.3/setup/installing/source/erlang -[downloads]: /riak/kv/2.1.3/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.1.3/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.1.3/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.1.3/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.1.3/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.1.3/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.1.3/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.1.3/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.1.3/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.1.3/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.1.3/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.1.3/setup/installing/source/erlang.md b/content/riak/kv/2.1.3/setup/installing/source/erlang.md index d69d7d1da8..76a3ec17dd 100644 --- a/content/riak/kv/2.1.3/setup/installing/source/erlang.md +++ b/content/riak/kv/2.1.3/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.3/installing/source/erlang/ --- -[install index]: /riak/kv/2.1.3/setup/installing -[security basics]: /riak/kv/2.1.3/using/security/basics +[install index]: {{}}riak/kv/2.1.3/setup/installing +[security basics]: {{}}riak/kv/2.1.3/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho8.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.1.3/setup/installing/source/jvm.md b/content/riak/kv/2.1.3/setup/installing/source/jvm.md index bf1f78dbbf..9016087920 100644 --- a/content/riak/kv/2.1.3/setup/installing/source/jvm.md +++ b/content/riak/kv/2.1.3/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.1.3/installing/source/jvm/ --- -[usage search]: /riak/kv/2.1.3/developing/usage/search +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.1.3/setup/installing/suse.md b/content/riak/kv/2.1.3/setup/installing/suse.md index 8ae9306143..966dec7bb3 100644 --- a/content/riak/kv/2.1.3/setup/installing/suse.md +++ b/content/riak/kv/2.1.3/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.1.3/installing/suse/ --- -[install verify]: /riak/kv/2.1.3/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.3/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.1.3/setup/installing/verify.md b/content/riak/kv/2.1.3/setup/installing/verify.md index 49067cb13a..446388c817 100644 --- a/content/riak/kv/2.1.3/setup/installing/verify.md +++ b/content/riak/kv/2.1.3/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.1.3/installing/verify-install/ --- -[client libraries]: /riak/kv/2.1.3/developing/client-libraries -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.1.3/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.1.3/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.1.3/developing/client-libraries +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.1.3/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.1.3/setup/installing/windows-azure.md b/content/riak/kv/2.1.3/setup/installing/windows-azure.md index c65bdc5f12..bb0dbae61e 100644 --- a/content/riak/kv/2.1.3/setup/installing/windows-azure.md +++ b/content/riak/kv/2.1.3/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.1.3/setup/planning/backend.md b/content/riak/kv/2.1.3/setup/planning/backend.md index 44d1608e67..b3de17aa9a 100644 --- a/content/riak/kv/2.1.3/setup/planning/backend.md +++ b/content/riak/kv/2.1.3/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.3/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.3/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.3/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.1.3/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.3/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.1.3/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.1.3/setup/planning/backend/bitcask.md b/content/riak/kv/2.1.3/setup/planning/backend/bitcask.md index d0eb9617d8..b4e458fe8f 100644 --- a/content/riak/kv/2.1.3/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.1.3/setup/planning/backend/bitcask.md @@ -17,17 +17,17 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.1.3/using/admin/riak-cli -[config reference]: /riak/kv/2.1.3/configuring/reference -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.1.3/setup/planning/backend/multi -[usage search]: /riak/kv/2.1.3/developing/usage/search -[glossary aae]: /riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.1.3/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.1.3/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.1.3/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.1.3/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search +[glossary aae]: {{}}riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.1.3/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.1.3/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.1.3/setup/planning/backend/leveldb.md b/content/riak/kv/2.1.3/setup/planning/backend/leveldb.md index f628555eb6..a88a5fce58 100644 --- a/content/riak/kv/2.1.3/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.1.3/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.3/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[config reference]: /riak/kv/2.1.3/configuring/reference -[perf index]: /riak/kv/2.1.3/using/performance -[config reference#aae]: /riak/kv/2.1.3/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[perf index]: {{}}riak/kv/2.1.3/using/performance +[config reference#aae]: {{}}riak/kv/2.1.3/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.1.3/setup/planning/backend/memory.md b/content/riak/kv/2.1.3/setup/planning/backend/memory.md index 76366e960a..2ee4fb543e 100644 --- a/content/riak/kv/2.1.3/setup/planning/backend/memory.md +++ b/content/riak/kv/2.1.3/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.3/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.1.3/configuring/reference -[plan backend multi]: /riak/kv/2.1.3/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[plan backend multi]: {{}}riak/kv/2.1.3/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.1.3/setup/planning/backend/multi.md b/content/riak/kv/2.1.3/setup/planning/backend/multi.md index 90fb408161..0988041f99 100644 --- a/content/riak/kv/2.1.3/setup/planning/backend/multi.md +++ b/content/riak/kv/2.1.3/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.3/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.1.3/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.3/setup/planning/backend/memory -[config reference]: /riak/kv/2.1.3/configuring/reference -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.1.3/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.1.3/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.3/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.1.3/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.1.3/setup/planning/best-practices.md b/content/riak/kv/2.1.3/setup/planning/best-practices.md index cd85e98258..cf664410bb 100644 --- a/content/riak/kv/2.1.3/setup/planning/best-practices.md +++ b/content/riak/kv/2.1.3/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.3/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.1.3/using/reference/handoff -[config mapreduce]: /riak/kv/2.1.3/configuring/mapreduce -[glossary aae]: /riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.1.3/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.1.3/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.1.3/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.1.3/setup/planning/bitcask-capacity-calc.md index ad9034db30..5142fb665c 100644 --- a/content/riak/kv/2.1.3/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.1.3/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.1.3/setup/planning/cluster-capacity.md b/content/riak/kv/2.1.3/setup/planning/cluster-capacity.md index d8b607416c..1ea809e934 100644 --- a/content/riak/kv/2.1.3/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.1.3/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.3/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.1.3/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.1.3/setup/planning -[concept replication]: /riak/kv/2.1.3/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.1.3/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.1.3/configuring/reference -[perf benchmark]: /riak/kv/2.1.3/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.1.3/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.1.3/setup/planning +[concept replication]: {{}}riak/kv/2.1.3/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[perf benchmark]: {{}}riak/kv/2.1.3/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.1.3/setup/planning/operating-system.md b/content/riak/kv/2.1.3/setup/planning/operating-system.md index b1d248982c..242c11376d 100644 --- a/content/riak/kv/2.1.3/setup/planning/operating-system.md +++ b/content/riak/kv/2.1.3/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.1.3/downloads/ +[downloads]: {{}}riak/kv/2.1.3/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.1.3/setup/planning/start.md b/content/riak/kv/2.1.3/setup/planning/start.md index d44b45581f..1c458c5555 100644 --- a/content/riak/kv/2.1.3/setup/planning/start.md +++ b/content/riak/kv/2.1.3/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.3/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.1.3/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.1.3/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.1.3/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.1.3/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.1.3/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.1.3/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.1.3/setup/upgrading/checklist.md b/content/riak/kv/2.1.3/setup/upgrading/checklist.md index 35fac0b547..c19f9b325d 100644 --- a/content/riak/kv/2.1.3/setup/upgrading/checklist.md +++ b/content/riak/kv/2.1.3/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.1.3/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit -[perf index]: /riak/kv/2.1.3/using/performance +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.1.3/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.1.3/using/security/basics -[cluster ops load balance]: /riak/kv/2.1.3/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.1.3/configuring/reference -[config backend]: /riak/kv/2.1.3/configuring/backend -[usage search]: /riak/kv/2.1.3/developing/usage/search -[usage conflict resolution]: /riak/kv/2.1.3/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.1.3/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.1.3/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.1.3/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.1.3/using/admin/commands -[use admin riak control]: /riak/kv/2.1.3/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.1.3/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.1.3/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.1.3/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.1.3/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.1.3/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[config backend]: {{}}riak/kv/2.1.3/configuring/backend +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.1.3/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.1.3/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.1.3/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.1.3/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.1.3/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.1.3/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.1.3/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.1.3/setup/upgrading/cluster.md b/content/riak/kv/2.1.3/setup/upgrading/cluster.md index 6f7a85d78c..7632dbb307 100644 --- a/content/riak/kv/2.1.3/setup/upgrading/cluster.md +++ b/content/riak/kv/2.1.3/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.1.3/ops/upgrading/rolling-upgrades/ - /riak/kv/2.1.3/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.1.3/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.1.3/using/admin/riak-control -[use admin commands]: /riak/kv/2.1.3/using/admin/commands -[use admin riak-admin]: /riak/kv/2.1.3/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.1.3/developing/usage/secondary-indexes +[production checklist]: {{}}riak/kv/2.1.3/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.1.3/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.1.3/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.1.3/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.1.3/developing/usage/secondary-indexes [release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.1.3/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.1.3/using/reference/jmx -[snmp]: /riak/kv/2.1.3/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.1.3/using/reference/jmx +[snmp]: {{}}riak/kv/2.1.3/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.1.3/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.1.3/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported diff --git a/content/riak/kv/2.1.3/setup/upgrading/search.md b/content/riak/kv/2.1.3/setup/upgrading/search.md index 036a431b63..6df79efd4e 100644 --- a/content/riak/kv/2.1.3/setup/upgrading/search.md +++ b/content/riak/kv/2.1.3/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" --- If you're using Search in a version of Riak prior to 2.0 (1.3.0 to @@ -270,4 +270,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.1.3/setup/upgrading/version.md b/content/riak/kv/2.1.3/setup/upgrading/version.md index e55b9af0b9..b50e406e33 100644 --- a/content/riak/kv/2.1.3/setup/upgrading/version.md +++ b/content/riak/kv/2.1.3/setup/upgrading/version.md @@ -21,7 +21,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.1.3/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.1.3/introduction). ## New Clients @@ -37,14 +37,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.1.3/introduction) like [data types](/riak/kv/2.1.3/developing/data-types) or the new [Riak Search](/riak/kv/2.1.3/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.1.3/introduction) like [data types]({{}}riak/kv/2.1.3/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.1.3/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.1.3/learn/concepts/buckets) and [key](/riak/kv/2.1.3/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.1.3/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.1.3/learn/concepts/buckets) and [key]({{}}riak/kv/2.1.3/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.1.3/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.1.3/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.1.3/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.1.3/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.1.3/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -57,7 +57,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.1.3/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.1.3/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -76,8 +76,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.1.3/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.1.3/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.1.3/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.1.3/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -87,17 +87,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/2.1.3/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.1.3/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.1.3/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.1.3/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.1.3/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.1.3/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.1.3/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -105,20 +105,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.1.3/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.1.3/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.1.3/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.1.3/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -127,11 +127,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.1.3/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.1.3/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.1.3/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -141,12 +141,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.1.3/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.1.3/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/2.1.3/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.1.3/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.1.3/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.1.3/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.1.3/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.1.3/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.1.3/using/security/basics) or the new [configuration files](/riak/kv/2.1.3/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.1.3/using/security/basics) or the new [configuration files]({{}}riak/kv/2.1.3/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -156,7 +156,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.1.3/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.1.3/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -167,12 +167,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.1.3/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.1.3/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.1.3/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.1.3/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -210,7 +210,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.1.3/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.1.3/setup/upgrading/search). ## Migrating from Short Names @@ -221,12 +221,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.1.3/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.1.3/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.1.3/using.md b/content/riak/kv/2.1.3/using.md index a1bd572b65..3df6f81f50 100644 --- a/content/riak/kv/2.1.3/using.md +++ b/content/riak/kv/2.1.3/using.md @@ -15,7 +15,7 @@ toc: true [use running cluster]: ../using/running-a-cluster [use admin index]: ../using/admin/ [cluster ops index]: ../using/cluster-operations -[repair recover index]: ../repair-recovery +[repair recover index]: ../using/repair-recovery [security index]: ../using/security [perf index]: ../using/performance [troubleshoot index]: ../using/troubleshooting diff --git a/content/riak/kv/2.1.3/using/admin/commands.md b/content/riak/kv/2.1.3/using/admin/commands.md index 668aa74891..a2268776c7 100644 --- a/content/riak/kv/2.1.3/using/admin/commands.md +++ b/content/riak/kv/2.1.3/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.3/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.1.3/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.1.3/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.1.3/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.1.3/using/admin/riak-admin.md b/content/riak/kv/2.1.3/using/admin/riak-admin.md index 54b3f02844..41af92e7e4 100644 --- a/content/riak/kv/2.1.3/using/admin/riak-admin.md +++ b/content/riak/kv/2.1.3/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.1.3/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.1.3/configuring/reference -[use admin commands]: /riak/kv/2.1.3/using/admin/commands -[use admin commands#join]: /riak/kv/2.1.3/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.1.3/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.1.3/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.1.3/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.1.3/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.1.3/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.1.3/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.1.3/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.1.3/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.1.3/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.1.3/setup/downgrade -[security index]: /riak/kv/2.1.3/using/security/ -[security managing]: /riak/kv/2.1.3/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.1.3/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.1.3/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.1.3/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.1.3/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.1.3/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.1.3/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[use admin commands]: {{}}riak/kv/2.1.3/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.1.3/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.1.3/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.1.3/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.1.3/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.1.3/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.1.3/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.1.3/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.1.3/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.1.3/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.1.3/setup/downgrade +[security index]: {{}}riak/kv/2.1.3/using/security/ +[security managing]: {{}}riak/kv/2.1.3/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.1.3/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.1.3/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.1.3/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.1.3/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.1.3/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.1.3/using/admin/riak-cli.md b/content/riak/kv/2.1.3/using/admin/riak-cli.md index f39581f036..6d512ec6fa 100644 --- a/content/riak/kv/2.1.3/using/admin/riak-cli.md +++ b/content/riak/kv/2.1.3/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.3/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.1.3/configuring/reference/ +[configuration file]: {{}}riak/kv/2.1.3/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.1.3/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.1.3/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.1.3/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.1.3/configuring/reference/ ## riak diff --git a/content/riak/kv/2.1.3/using/admin/riak-control.md b/content/riak/kv/2.1.3/using/admin/riak-control.md index d60c7a663e..194ff125fd 100644 --- a/content/riak/kv/2.1.3/using/admin/riak-control.md +++ b/content/riak/kv/2.1.3/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.1.3/configuring/reference +[config reference]: {{}}riak/kv/2.1.3/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.1.3/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.1.3/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.1.3/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.1.3/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.1.3/using/cluster-operations.md b/content/riak/kv/2.1.3/using/cluster-operations.md index a904792ee7..11afe55245 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations.md +++ b/content/riak/kv/2.1.3/using/cluster-operations.md @@ -20,7 +20,6 @@ toc: true [ops log]: ./logging [ops backup]: ./backing-up [ops handoff]: ./handoff -[ops obj del]: ./object-deletion [ops strong consistency]: ./strong-consistency [ops v3 mdc]: ./v3-multi-datacenter [ops v2 mdc]: ./v2-multi-datacenter @@ -84,13 +83,6 @@ Information on using the `riak-admin handoff` interface to enable and disable ha [Learn More >>][ops handoff] -#### [Object Deletion][ops obj del] - -Describes possible settings for `delete_mode`. - -[Learn More >>][ops obj del] - - #### [Monitoring Strong Consistency][ops strong consistency] Overview of the various statistics used in monitoring strong consistency. diff --git a/content/riak/kv/2.1.3/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.1.3/using/cluster-operations/active-anti-entropy.md index c262cc7164..166426f8a5 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes.md index c3f79405ab..dd422c9f09 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.1.3/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.1.3/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.1.3/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.1.3/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.1.3/using/cluster-operations/backing-up.md b/content/riak/kv/2.1.3/using/cluster-operations/backing-up.md index f65e903a70..1b38b2cefa 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.3/ops/running/backups --- -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters -[config reference]: /riak/kv/2.1.3/configuring/reference -[plan backend leveldb]: /riak/kv/2.1.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.1.3/using/reference/strong-consistency -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.1.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.1.3/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.1.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.1.3/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.1.3/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.1.3/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.1.3/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.1.3/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.1.3/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.1.3/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.1.3/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.1.3/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.1.3/using/cluster-operations/bucket-types.md b/content/riak/kv/2.1.3/using/cluster-operations/bucket-types.md index a67dc4b305..da211ed1f8 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.1.3/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.1.3/using/cluster-operations/changing-cluster-info.md index c77a0aadbe..ba36cc6feb 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.1.3/configuring/reference +[config reference]: {{}}riak/kv/2.1.3/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.1.3/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.1.3/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.1.3/using/cluster-operations/handoff.md b/content/riak/kv/2.1.3/using/cluster-operations/handoff.md index 96a24c989f..af80520ddf 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.1.3/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.1.3/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.1.3/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.1.3/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.1.3/using/cluster-operations/logging.md b/content/riak/kv/2.1.3/using/cluster-operations/logging.md index 8e922c9649..9fe52aff99 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/logging.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.1.3/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.1.3/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.1.3/using/cluster-operations/replacing-node.md b/content/riak/kv/2.1.3/using/cluster-operations/replacing-node.md index e921bf8fda..904bab81ce 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.1.3/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.1.3/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.1.3/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.1.3/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.1.3/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.1.3/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.1.3/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.1.3/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.1.3/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.1.3/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.1.3/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.1.3/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.1.3/using/cluster-operations/strong-consistency.md index 20d81b2887..3c9592e585 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.1.3/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.1.3/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.1.3/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.1.3/using/cluster-operations/v2-multi-datacenter.md index 4cf0d474e1..91548e107f 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.1.3/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.1.3/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter.md index 752907ceb9..0adfd3e43c 100644 --- a/content/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.1.3/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.1.3/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.1.3/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.1.3/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.1.3/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.1.3/using/performance.md b/content/riak/kv/2.1.3/using/performance.md index 01552ed9a0..0bd7c8d66b 100644 --- a/content/riak/kv/2.1.3/using/performance.md +++ b/content/riak/kv/2.1.3/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.1.3/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.1.3/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.1.3/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.1.3/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.1.3/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.1.3/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.1.3/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.1.3/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.1.3/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.1.3/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.1.3/using/performance/benchmarking.md b/content/riak/kv/2.1.3/using/performance/benchmarking.md index f9b4e172dd..2e12ca6e27 100644 --- a/content/riak/kv/2.1.3/using/performance/benchmarking.md +++ b/content/riak/kv/2.1.3/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.1.3/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.1.3/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.1.3/using/performance/latency-reduction.md b/content/riak/kv/2.1.3/using/performance/latency-reduction.md index 4b5ed23127..a944772988 100644 --- a/content/riak/kv/2.1.3/using/performance/latency-reduction.md +++ b/content/riak/kv/2.1.3/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.1.3/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.1.3/using/performance/multi-datacenter-tuning.md index e6671813ab..621aae4313 100644 --- a/content/riak/kv/2.1.3/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.1.3/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.1.3/using/performance +[perf index]: {{}}riak/kv/2.1.3/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.1.3/using/performance/open-files-limit.md b/content/riak/kv/2.1.3/using/performance/open-files-limit.md index 608156252f..c6f2ee9e61 100644 --- a/content/riak/kv/2.1.3/using/performance/open-files-limit.md +++ b/content/riak/kv/2.1.3/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.1.3/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.1.3/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.1.3/using/reference/bucket-types.md b/content/riak/kv/2.1.3/using/reference/bucket-types.md index 9c036c6ab2..fa6b4dad93 100644 --- a/content/riak/kv/2.1.3/using/reference/bucket-types.md +++ b/content/riak/kv/2.1.3/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.1.3/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.1.3/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.1.3/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.1.3/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,14 +39,14 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype`, `consistent`, and `write_once` properties, related to - [Riak data types](/riak/kv/2.1.3/developing/data-types), [strong consistency](/riak/kv/2.1.3/developing/app-guide/strong-consistency), and - [write-once buckets](/riak/kv/2.1.3/developing/app-guide/write-once) respectively + [Riak data types]({{}}riak/kv/2.1.3/developing/data-types), [strong consistency]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency), and + [write-once buckets]({{}}riak/kv/2.1.3/developing/app-guide/write-once) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.1.3/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.1.3/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -82,8 +82,8 @@ system of bucket configuration, including the following: `riak-admin bucket-type` interface (discussed in depth below) enables you to manage bucket configurations on the operations side, without recourse to Riak clients. -* Some special usecases -- [strong consistency](/riak/kv/2.1.3/configuring/strong-consistency), - [data types](/riak/kv/2.1.3/developing/data-types), and [write-once buckets](/riak/kv/2.1.3/developing/app-guide/write-once) -- are only +* Some special usecases -- [strong consistency]({{}}riak/kv/2.1.3/configuring/strong-consistency), + [data types]({{}}riak/kv/2.1.3/developing/data-types), and [write-once buckets]({{}}riak/kv/2.1.3/developing/app-guide/write-once) -- are only available through bucket properties or bucket types. For these reasons, we recommend _always_ using bucket types in versions @@ -123,7 +123,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.1.3/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.3/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.3/developing/getting-started) section. If creation is successful, you should see the following output: @@ -525,7 +525,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.1.3/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.1.3/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -538,7 +538,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.1.3/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -589,8 +589,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.1.3/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.1.3/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.1.3/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.1.3/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -718,7 +718,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.1.3/learn/concepts/buckets) and [keys](/riak/kv/2.1.3/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.1.3/learn/concepts/buckets) and [keys]({{}}riak/kv/2.1.3/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.1.3/using/reference/custom-code.md b/content/riak/kv/2.1.3/using/reference/custom-code.md index c85fd4b60a..fffb8f2494 100644 --- a/content/riak/kv/2.1.3/using/reference/custom-code.md +++ b/content/riak/kv/2.1.3/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.1.3/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.1.3/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.1.3/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.1.3/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.1.3/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.1.3/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.1.3/using/reference/handoff.md b/content/riak/kv/2.1.3/using/reference/handoff.md index 5d2aa0f04d..201b6f5d47 100644 --- a/content/riak/kv/2.1.3/using/reference/handoff.md +++ b/content/riak/kv/2.1.3/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.1.3/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.1.3/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.1.3/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.1.3/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.1.3/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.1.3/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.1.3/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.1.3/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.1.3/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.1.3/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.1.3/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.1.3/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.1.3/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.1.3/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.1.3/using/reference/jmx.md b/content/riak/kv/2.1.3/using/reference/jmx.md index 37778ff1f2..d032b4c060 100644 --- a/content/riak/kv/2.1.3/using/reference/jmx.md +++ b/content/riak/kv/2.1.3/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.3/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.1.3/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.1.3/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.1.3/using/reference/logging.md b/content/riak/kv/2.1.3/using/reference/logging.md index efe8a64562..2d9a081c77 100644 --- a/content/riak/kv/2.1.3/using/reference/logging.md +++ b/content/riak/kv/2.1.3/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.3/ops/running/logging --- -[cluster ops log]: /riak/kv/2.1.3/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.1.3/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.1.3/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.1.3/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -46,7 +46,7 @@ File | Significance `console.log` | Console log output `crash.log` | Crash logs `erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. -`error.log` | [Common errors](../../repair-recover/errors) emitted by Riak. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.1.3/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.1.3/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.1.3/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.1.3/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.1.3/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.1.3/using/reference/multi-datacenter/comparison.md index 4a06334624..51afc3d532 100644 --- a/content/riak/kv/2.1.3/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.1.3/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.1.3/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.1.3/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.1.3/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.1.3/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.1.3/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.1.3/using/reference/runtime-interaction.md b/content/riak/kv/2.1.3/using/reference/runtime-interaction.md index f58502e22f..387b8943f0 100644 --- a/content/riak/kv/2.1.3/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.1.3/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.3/ops/advanced/runtime --- -[config reference]: /riak/kv/2.1.3/configuring/reference -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.1.3/configuring/reference +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements of the underlying operating system: distribution ports and OS diff --git a/content/riak/kv/2.1.3/using/reference/search.md b/content/riak/kv/2.1.3/using/reference/search.md index 9c67db4b59..4ce8a69b74 100644 --- a/content/riak/kv/2.1.3/using/reference/search.md +++ b/content/riak/kv/2.1.3/using/reference/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.1.3/dev/advanced/search --- -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.1.3/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.1.3/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -126,7 +126,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.1.3/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.1.3/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -288,7 +288,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.1.3/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -298,7 +298,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.1.3/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -353,7 +353,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.1.3/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.1.3/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.1.3/using/reference/secondary-indexes.md b/content/riak/kv/2.1.3/using/reference/secondary-indexes.md index 1bbd238051..7843354efd 100644 --- a/content/riak/kv/2.1.3/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.1.3/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.1.3/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[use ref strong consistency]: /riak/2.1.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.1.3/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.1.3/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.1.3/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.1.3/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.1.3/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.1.3/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.1.3/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.1.3/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.1.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.1.3/using/reference/statistics-monitoring.md b/content/riak/kv/2.1.3/using/reference/statistics-monitoring.md index fff58a7495..f5772cd0e1 100644 --- a/content/riak/kv/2.1.3/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.1.3/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.1.3/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.1.3/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.1.3/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.1.3/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.1.3/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.1.3/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.1.3/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.1.3/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.1.3/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.1.3/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.1.3/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.1.3/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.1.3/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.1.3/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.1.3/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.1.3/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.1.3/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.1.3/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.1.3/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.1.3/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.1.3/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.1.3/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.1.3/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,8 +349,8 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.1.3/using/cluster-operations/inspecting-node) -* [Riak Control](/riak/kv/2.1.3/using/admin/riak-control/) +* [Inspecting a Node]({{}}riak/kv/2.1.3/using/cluster-operations/inspecting-node) +* [Riak Control]({{}}riak/kv/2.1.3/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -366,9 +366,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.1.3/using/reference/strong-consistency.md b/content/riak/kv/2.1.3/using/reference/strong-consistency.md index c48fd18fe5..49cb2782cd 100644 --- a/content/riak/kv/2.1.3/using/reference/strong-consistency.md +++ b/content/riak/kv/2.1.3/using/reference/strong-consistency.md @@ -12,8 +12,8 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.1.3/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.3/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.1.3/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.3/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -24,7 +24,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.1.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.1.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -35,7 +35,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.1.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.1.3/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.1.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.1.3/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -143,12 +143,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.1.3/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.1.3/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.1.3/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.1.3/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.1.3/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.1.3/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.1.3/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.1.3/using/reference/v2-multi-datacenter/architecture.md index 45d4ef251d..d7a5e81698 100644 --- a/content/riak/kv/2.1.3/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.1.3/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.1.3/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.1.3/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.1.3/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.1.3/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.1.3/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.1.3/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/aae.md index b835f5e588..3dad06999a 100644 --- a/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.1.3/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.1.3/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.1.3/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture.md index e5af863336..ca30e12f84 100644 --- a/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.1.3/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.1.3/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.1.3/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.1.3/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/cascading-writes.md index 70a14a437a..77e77c24e5 100644 --- a/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.1.3/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md index fffca962f0..9c7d421737 100644 --- a/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.1.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.3/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.1.3/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.1.3/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.1.3/using/repair-recovery.md b/content/riak/kv/2.1.3/using/repair-recovery.md index 9ec8ef136c..95cbf4d23e 100644 --- a/content/riak/kv/2.1.3/using/repair-recovery.md +++ b/content/riak/kv/2.1.3/using/repair-recovery.md @@ -15,7 +15,7 @@ toc: true [repair recover fail]: ./failure-recovery/ [repair recover errors]: ./errors/ [repair recover repairs]: ./repairs/ -[repair recover restart]: ./rolling-restarts/ +[repair recover restart]: ./rolling-restart/ ## In This Section diff --git a/content/riak/kv/2.1.3/using/repair-recovery/errors.md b/content/riak/kv/2.1.3/using/repair-recovery/errors.md index 3fc0f21f82..6e2aa5888e 100644 --- a/content/riak/kv/2.1.3/using/repair-recovery/errors.md +++ b/content/riak/kv/2.1.3/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.1.3/configuring/reference +[config reference]: {{}}riak/kv/2.1.3/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See 1 +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See 1 `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See 2 enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.1.3/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.1.3/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.1.3/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.1.3/using/repair-recovery/failure-recovery.md index c74f01f333..d9f242c70e 100644 --- a/content/riak/kv/2.1.3/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.1.3/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.1.3/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.1.3/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.1.3/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.1.3/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -115,7 +115,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.1.3/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.1.3/using/repair-recovery/repairs.md b/content/riak/kv/2.1.3/using/repair-recovery/repairs.md index 85110250df..1c799e04f1 100644 --- a/content/riak/kv/2.1.3/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.1.3/using/repair-recovery/repairs.md @@ -149,7 +149,7 @@ In the event of major hardware or filesystem problems, LevelDB can become corrup ### Checking for Compaction Errors -Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`](/riak/kv/2.1.3/configuring/reference/) configuration file. The default is `./data`. +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb//LOG`. The `platform_data_dir` can be specified in the [`riak.conf`]({{}}riak/kv/2.1.3/configuring/reference/) configuration file. The default is `./data`. Compaction error messages take the following form: @@ -218,23 +218,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.1.3/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.1.3/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.1.3/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.1.3/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.1.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.1.3/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.1.3/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.1.3/using/repair-recovery/rolling-restart.md index c660bbb670..c40b096f37 100644 --- a/content/riak/kv/2.1.3/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.1.3/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.3/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.1.3/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.1.3/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.1.3/using/running-a-cluster.md b/content/riak/kv/2.1.3/using/running-a-cluster.md index 2c534576d1..80dd4b3934 100644 --- a/content/riak/kv/2.1.3/using/running-a-cluster.md +++ b/content/riak/kv/2.1.3/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.1.3/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.1.3/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.1.3/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.1.3/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.1.3/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.1.3/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.1.3/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.1.3/using/security.md b/content/riak/kv/2.1.3/using/security.md index ae9cabd38f..58e80ad484 100644 --- a/content/riak/kv/2.1.3/using/security.md +++ b/content/riak/kv/2.1.3/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.1.3/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.1.3/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.1.3/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.1.3/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.1.3/using/security/basics +[security managing]: {{}}riak/kv/2.1.3/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.1.3/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.1.3/using/security/basics.md b/content/riak/kv/2.1.3/using/security/basics.md index 2d92fa4fad..38c2210aec 100644 --- a/content/riak/kv/2.1.3/using/security/basics.md +++ b/content/riak/kv/2.1.3/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.1.3/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.1.3/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.1.3/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.1.3/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.1.3/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.1.3/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.1.3/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.1.3/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.1.3/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.1.3/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.1.3/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.1.3/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.1.3/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.1.3/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.1.3/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.1.3/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.1.3/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.1.3/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.1.3/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.1.3/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.1.3/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.1.3/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.1.3/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.1.3/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.1.3/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.1.3/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.1.3/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.1.3/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.1.3/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.1.3/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.1.3/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.1.3/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.1.3/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.1.3/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.1.3/configuring/reference/#directories).
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="../../learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.1.3/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.1.3/using/security/managing-sources.md b/content/riak/kv/2.1.3/using/security/managing-sources.md index 49b8caf177..c4010dc4c6 100644 --- a/content/riak/kv/2.1.3/using/security/managing-sources.md +++ b/content/riak/kv/2.1.3/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.1.3/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.1.3/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.1.3/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.1.3/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.1.3/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.1.3/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.1.3/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.1.3/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.1.3/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.1.3/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.1.3/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.1.3/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.1.3/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.1.3/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.1.4/_reference-links.md b/content/riak/kv/2.1.4/_reference-links.md index fd06af51ee..6aba95f375 100644 --- a/content/riak/kv/2.1.4/_reference-links.md +++ b/content/riak/kv/2.1.4/_reference-links.md @@ -4,250 +4,250 @@ ## Common -[downloads]: /riak/kv/2.1.4/downloads/ -[install index]: /riak/kv/2.1.4/setup/installing -[upgrade index]: /riak/kv/2.1.4/upgrading -[plan index]: /riak/kv/2.1.4/planning -[config index]: /riak/2.1.4/using/configuring/ -[config reference]: /riak/kv/2.1.4/configuring/reference/ -[manage index]: /riak/kv/2.1.4/using/managing -[performance index]: /riak/kv/2.1.4/using/performance -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.1.4/downloads/ +[install index]: {{}}riak/kv/2.1.4/setup/installing +[upgrade index]: {{}}riak/kv/2.1.4/upgrading +[plan index]: {{}}riak/kv/2.1.4/planning +[config index]: {{}}riak/kv/2.1.4/using/configuring/ +[config reference]: {{}}riak/kv/2.1.4/configuring/reference/ +[manage index]: {{}}riak/kv/2.1.4/using/managing +[performance index]: {{}}riak/kv/2.1.4/using/performance +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.1.4/setup/planning -[plan start]: /riak/kv/2.1.4/setup/planning/start -[plan backend]: /riak/kv/2.1.4/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.4/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.4/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.1.4/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.1.4/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.1.4/setup/planning/best-practices -[plan future]: /riak/kv/2.1.4/setup/planning/future +[plan index]: {{}}riak/kv/2.1.4/setup/planning +[plan start]: {{}}riak/kv/2.1.4/setup/planning/start +[plan backend]: {{}}riak/kv/2.1.4/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.4/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.1.4/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.1.4/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.1.4/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.1.4/setup/planning/future ## Installing -[install index]: /riak/kv/2.1.4/setup/installing -[install aws]: /riak/kv/2.1.4/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.1.4/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.1.4/setup/installing/freebsd -[install mac osx]: /riak/kv/2.1.4/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.1.4/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.1.4/setup/installing/smartos -[install solaris]: /riak/kv/2.1.4/setup/installing/solaris -[install suse]: /riak/kv/2.1.4/setup/installing/suse -[install windows azure]: /riak/kv/2.1.4/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.1.4/setup/installing +[install aws]: {{}}riak/kv/2.1.4/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.1.4/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.1.4/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.1.4/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.1.4/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.1.4/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.1.4/setup/installing/solaris +[install suse]: {{}}riak/kv/2.1.4/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.1.4/setup/installing/windows-azure -[install source index]: /riak/kv/2.1.4/setup/installing/source -[install source erlang]: /riak/kv/2.1.4/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.1.4/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.1.4/setup/installing/source +[install source erlang]: {{}}riak/kv/2.1.4/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.1.4/setup/installing/source/jvm -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.1.4/setup/upgrading -[upgrade checklist]: /riak/kv/2.1.4/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.1.4/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.1.4/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.1.4/setup/upgrading/multi-datacenter +[upgrade index]: {{}}riak/kv/2.1.4/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.1.4/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.1.4/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.1.4/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.1.4/setup/upgrading/multi-datacenter -##downgrade +## Downgrade -[downgrade]: /riak/kv/2.1.4/setup/downgrade +[downgrade]: {{}}riak/kv/2.1.4/setup/downgrade ## Configuring -[config index]: /riak/kv/2.1.4/configuring -[config basic]: /riak/kv/2.1.4/configuring/basic -[config backend]: /riak/kv/2.1.4/configuring/backend -[config manage]: /riak/kv/2.1.4/configuring/managing -[config reference]: /riak/kv/2.1.4/configuring/reference/ -[config strong consistency]: /riak/kv/2.1.4/configuring/strong-consistency -[config load balance]: /riak/kv/2.1.4/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.1.4/configuring/mapreduce -[config search]: /riak/kv/2.1.4/configuring/search/ +[config index]: {{}}riak/kv/2.1.4/configuring +[config basic]: {{}}riak/kv/2.1.4/configuring/basic +[config backend]: {{}}riak/kv/2.1.4/configuring/backend +[config manage]: {{}}riak/kv/2.1.4/configuring/managing +[config reference]: {{}}riak/kv/2.1.4/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.1.4/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.1.4/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.1.4/configuring/mapreduce +[config search]: {{}}riak/kv/2.1.4/configuring/search/ -[config v3 mdc]: /riak/kv/2.1.4/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.1.4/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.1.4/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.1.4/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.1.4/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.1.4/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.1.4/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.1.4/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.1.4/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.1.4/using/ -[use admin commands]: /riak/kv/2.1.4/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.1.4/using/running-a-cluster +[use index]: {{}}riak/kv/2.1.4/using/ +[use admin commands]: {{}}riak/kv/2.1.4/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.1.4/using/running-a-cluster ### Reference -[use ref bucket types]: /riak/kv/2.1.4/using/reference/bucket-types -[use ref custom code]: /riak/kv/2.1.4/using/reference/custom-code -[use ref handoff]: /riak/kv/2.1.4/using/reference/handoff -[use ref monitoring]: /riak/kv/2.1.4/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.1.4/using/reference/search -[use ref 2i]: /riak/kv/2.1.4/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.1.4/using/reference/snmp -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.1.4/using/reference/jmx -[use ref obj del]: /riak/kv/2.1.4/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.1.4/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.1.4/using/reference/v2-multi-datacenter +[use ref bucket types]: {{}}riak/kv/2.1.4/using/reference/bucket-types +[use ref custom code]: {{}}riak/kv/2.1.4/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.1.4/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.1.4/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.1.4/using/reference/search +[use ref 2i]: {{}}riak/kv/2.1.4/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.1.4/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.1.4/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.1.4/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.1.4/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.1.4/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.1.4/using/admin/ -[use admin commands]: /riak/kv/2.1.4/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.1.4/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.1.4/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.1.4/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.1.4/using/admin/ +[use admin commands]: {{}}riak/kv/2.1.4/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.1.4/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.1.4/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.1.4/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.1.4/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.1.4/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.1.4/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.1.4/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.1.4/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.1.4/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.1.4/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.1.4/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.1.4/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.1.4/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.1.4/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.1.4/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.1.4/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.1.4/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.1.4/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.1.4/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.1.4/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.1.4/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.1.4/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.1.4/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.1.4/using/repair-recovery -[repair recover fail]: /riak/kv/2.1.4/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.1.4/using/repair-recovery +[repair recover fail]: {{}}riak/kv/2.1.4/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.1.4/using/security/ -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.1.4/using/security/ +[security basics]: {{}}riak/kv/2.1.4/using/security/basics +[security managing]: {{}}riak/kv/2.1.4/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.1.4/using/performance/ -[perf benchmark]: /riak/kv/2.1.4/using/performance/benchmarking -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.1.4/using/performance/erlang -[perf aws]: /riak/kv/2.1.4/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.1.4/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.1.4/using/performance/ +[perf benchmark]: {{}}riak/kv/2.1.4/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.1.4/using/performance/erlang +[perf aws]: {{}}riak/kv/2.1.4/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.1.4/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.1.4/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.1.4/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.1.4/developing -[dev client libraries]: /riak/kv/2.1.4/developing/client-libraries -[dev data model]: /riak/kv/2.1.4/developing/data-modeling -[dev data types]: /riak/kv/2.1.4/developing/data-types -[dev kv model]: /riak/kv/2.1.4/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.1.4/developing +[dev client libraries]: {{}}riak/kv/2.1.4/developing/client-libraries +[dev data model]: {{}}riak/kv/2.1.4/developing/data-modeling +[dev data types]: {{}}riak/kv/2.1.4/developing/data-types +[dev kv model]: {{}}riak/kv/2.1.4/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.1.4/developing/getting-started -[getting started java]: /riak/kv/2.1.4/developing/getting-started/java -[getting started ruby]: /riak/kv/2.1.4/developing/getting-started/ruby -[getting started python]: /riak/kv/2.1.4/developing/getting-started/python -[getting started php]: /riak/kv/2.1.4/developing/getting-started/php -[getting started csharp]: /riak/kv/2.1.4/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.1.4/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.1.4/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.1.4/developing/getting-started/golang - -[obj model java]: /riak/kv/2.1.4/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.1.4/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.1.4/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.1.4/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.1.4/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.1.4/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.1.4/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.1.4/developing/getting-started +[getting started java]: {{}}riak/kv/2.1.4/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.1.4/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.1.4/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.1.4/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.1.4/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.1.4/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.1.4/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.1.4/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.1.4/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.1.4/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.1.4/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.1.4/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.1.4/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.1.4/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.1.4/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.1.4/developing/usage -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types/ -[usage commit hooks]: /riak/kv/2.1.4/developing/usage/commit-hooks/ -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.1.4/developing/usage/content-types -[usage create objects]: /riak/kv/2.1.4/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.1.4/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.1.4/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.1.4/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.4/developing/usage/search -[usage search schema]: /riak/kv/2.1.4/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.1.4/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.1.4/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.1.4/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.1.4/developing/usage +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types/ +[usage commit hooks]: {{}}riak/kv/2.1.4/developing/usage/commit-hooks/ +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.1.4/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.1.4/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.1.4/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.1.4/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.1.4/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search +[usage search schema]: {{}}riak/kv/2.1.4/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.1.4/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.1.4/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.1.4/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.1.4/developing/app-guide/strong-consistency -[apps write once]: /riak/kv/2.1.4/developing/app-guide/write-once +[apps mapreduce]: {{}}riak/kv/2.1.4/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.1.4/developing/app-guide/strong-consistency +[apps write once]: {{}}riak/kv/2.1.4/developing/app-guide/write-once ### API -[dev api backend]: /riak/kv/2.1.4/developing/api/backend -[dev api http]: /riak/kv/2.1.4/developing/api/http -[dev api http status]: /riak/kv/2.1.4/developing/api/http/status -[dev api pbc]: /riak/kv/2.1.4/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.1.4/developing/api/backend +[dev api http]: {{}}riak/kv/2.1.4/developing/api/http +[dev api http status]: {{}}riak/kv/2.1.4/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.1.4/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.1.4/learn/glossary/ -[glossary aae]: /riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.1.4/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.1.4/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.1.4/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode -[concept aae]: /riak/kv/2.1.4/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.1.4/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.1.4/learn/concepts/causal-context/ -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.1.4/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.4/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.1.4/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.1.4/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/ +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.1.4/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.1.4/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.1.4/add-ons.md b/content/riak/kv/2.1.4/add-ons.md index 2d4c6588a3..0943d7d26c 100644 --- a/content/riak/kv/2.1.4/add-ons.md +++ b/content/riak/kv/2.1.4/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.1.4/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.1.4/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.1.4/add-ons/redis/developing-rra.md b/content/riak/kv/2.1.4/add-ons/redis/developing-rra.md index 384193973f..a79981129b 100644 --- a/content/riak/kv/2.1.4/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.1.4/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.1.4/developing/api/http +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.1.4/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.1.4/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.1.4/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.1.4/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.1.4/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.1.4/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.1.4/add-ons/redis/redis-add-on-features.md index 9a1fe87cb3..698e69de7e 100644 --- a/content/riak/kv/2.1.4/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.1.4/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.1.4/add-ons/redis/set-up-rra.md b/content/riak/kv/2.1.4/add-ons/redis/set-up-rra.md index 9fa9213231..c6c964f4d7 100644 --- a/content/riak/kv/2.1.4/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.1.4/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.1.4/setup/installing -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.1.4/setup/installing +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.1.4/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.1.4/add-ons/redis/set-up-rra/deployment-models.md index 3ccc23c898..6365761f79 100644 --- a/content/riak/kv/2.1.4/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/kv/2.1.4/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/kv/2.1.4/add-ons/redis/using-rra.md b/content/riak/kv/2.1.4/add-ons/redis/using-rra.md index cb50a77dc0..0c87d67c65 100644 --- a/content/riak/kv/2.1.4/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.1.4/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.1.4/developing/api/http/ +[dev api http]: {{}}riak/kv/2.1.4/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.1.4/configuring/backend.md b/content/riak/kv/2.1.4/configuring/backend.md index 41baecca8f..62568ead42 100644 --- a/content/riak/kv/2.1.4/configuring/backend.md +++ b/content/riak/kv/2.1.4/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.4/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.4/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.4/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.1.4/configuring/basic.md b/content/riak/kv/2.1.4/configuring/basic.md index fbe6534766..c026ddd7ba 100644 --- a/content/riak/kv/2.1.4/configuring/basic.md +++ b/content/riak/kv/2.1.4/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.1.4/ops/building/configuration/ --- -[config reference]: /riak/kv/2.1.4/configuring/reference -[use running cluster]: /riak/kv/2.1.4/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.1.4/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.1.4/using/performance/erlang -[plan start]: /riak/kv/2.1.4/setup/planning/start -[plan best practices]: /riak/kv/2.1.4/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.1.4/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.1.4/setup/planning/backend -[plan backend multi]: /riak/kv/2.1.4/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.1.4/using/performance/benchmarking -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit -[perf index]: /riak/kv/2.1.4/using/performance -[perf aws]: /riak/kv/2.1.4/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.1.4/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[use running cluster]: {{}}riak/kv/2.1.4/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.1.4/using/performance/erlang +[plan start]: {{}}riak/kv/2.1.4/setup/planning/start +[plan best practices]: {{}}riak/kv/2.1.4/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.1.4/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.1.4/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.1.4/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.1.4/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.1.4/using/performance +[perf aws]: {{}}riak/kv/2.1.4/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.1.4/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.1.4/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.1.4/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.1.4/configuring/load-balancing-proxy.md b/content/riak/kv/2.1.4/configuring/load-balancing-proxy.md index 6bbc80af96..e67111324e 100644 --- a/content/riak/kv/2.1.4/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.1.4/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.1.4/configuring/managing.md b/content/riak/kv/2.1.4/configuring/managing.md index 4a4599d080..5b412cc4ac 100644 --- a/content/riak/kv/2.1.4/configuring/managing.md +++ b/content/riak/kv/2.1.4/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.1.4/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.1.4/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.1.4/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.1.4/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.1.4/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.1.4/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.1.4/configuring/mapreduce.md b/content/riak/kv/2.1.4/configuring/mapreduce.md index 144cf86cda..4a54d97d5d 100644 --- a/content/riak/kv/2.1.4/configuring/mapreduce.md +++ b/content/riak/kv/2.1.4/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.1.4/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.1.4/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.1.4/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.1.4/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.1.4/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.1.4/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.1.4/configuring/reference.md b/content/riak/kv/2.1.4/configuring/reference.md index e1a7e52836..d9ad23758d 100644 --- a/content/riak/kv/2.1.4/configuring/reference.md +++ b/content/riak/kv/2.1.4/configuring/reference.md @@ -199,7 +199,7 @@ executables are stored. +as active anti-entropy data, and cluster metadata. @@ -485,7 +485,7 @@ to be deemed successful. conflicts. The default is 2 in Riak 2.0 for typed buckets and 1 for non-typed buckets. This setting reduces sibling creation through additional metadata on each sibling (also known as Dotted +href="{{< baseurl >}}riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors">Dotted Version Vectors). Setting this to 1 is the default for Riak 1.4 and earlier, and may duplicate siblings that originated in the same write. @@ -1727,7 +1727,7 @@ abandons the leader (in milliseconds). This must be set greater than the @@ -2093,8 +2093,8 @@ There are three non-`riak_repl` settings available in diff --git a/content/riak/kv/2.1.4/configuring/search.md b/content/riak/kv/2.1.4/configuring/search.md index 56e9501113..fc229c5f55 100644 --- a/content/riak/kv/2.1.4/configuring/search.md +++ b/content/riak/kv/2.1.4/configuring/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.1.4/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.1.4/developing/usage/search -[usage search schema]: /riak/kv/2.1.4/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.1.4/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.1.4/developing/usage/custom-extractors -[config reference]: /riak/kv/2.1.4/configuring/reference -[config reference#search]: /riak/kv/2.1.4/configuring/reference/#search -[glossary aae]: /riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.1.4/using/security/ +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search +[usage search schema]: {{}}riak/kv/2.1.4/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.1.4/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.1.4/developing/usage/custom-extractors +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[config reference#search]: {{}}riak/kv/2.1.4/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.1.4/using/security/ > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Riak Search Settings](http://docs.basho.com/riak/1.4.8/ops/advanced/configs/search/). +Yokozuna). This document covers Riak's Search subsystem from an operational perspective. If you are looking for more developer-focused diff --git a/content/riak/kv/2.1.4/configuring/strong-consistency.md b/content/riak/kv/2.1.4/configuring/strong-consistency.md index aee7d66d70..8c468696c6 100644 --- a/content/riak/kv/2.1.4/configuring/strong-consistency.md +++ b/content/riak/kv/2.1.4/configuring/strong-consistency.md @@ -15,29 +15,29 @@ aliases: - /riak/2.1.4/ops/advanced/strong-consistency/ --- -[apps strong consistency]: /riak/kv/2.1.4/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.1.4/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.1.4/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.1.4/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.1.4/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.1.4/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.1.4/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.1.4/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.1.4/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.1.4/learn/concepts/causal-context -[dev data types]: /riak/kv/2.1.4/developing/data-types -[glossary aae]: /riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.1.4/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.1.4/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.1.4/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.1.4/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.1.4/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.1.4/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.1.4/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.1.4/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.1.4/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.1.4/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.1.4/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.1.4/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.1.4/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.1.4/developing/data-types +[glossary aae]: {{}}riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.1.4/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.1.4/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.1.4/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.1.4/developing/client-libraries > **Please Note:** > @@ -311,11 +311,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.1.4/configuring/v2-multi-datacenter.md b/content/riak/kv/2.1.4/configuring/v2-multi-datacenter.md index 7385adf604..3f509f964e 100644 --- a/content/riak/kv/2.1.4/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.1.4/configuring/v2-multi-datacenter.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.4/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.1.4/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.1.4/configuring/v2-multi-datacenter/ssl Riak Enterprise's Multi-Datacenter Replication capabilities offer a variety of configurable parameters. diff --git a/content/riak/kv/2.1.4/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.1.4/configuring/v2-multi-datacenter/nat.md index 181e5d0fd3..6d7f2b1409 100644 --- a/content/riak/kv/2.1.4/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.1.4/configuring/v2-multi-datacenter/nat.md @@ -16,7 +16,7 @@ aliases: - /riak/2.1.4/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.1.4/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.1.4/configuring/v2-multi-datacenter/ssl Riak Enterprise supports replication of data on networks that use static NAT. This capability can be used for replicating data over the internet diff --git a/content/riak/kv/2.1.4/configuring/v3-multi-datacenter.md b/content/riak/kv/2.1.4/configuring/v3-multi-datacenter.md index 0395e17aa3..e733b0f9ab 100644 --- a/content/riak/kv/2.1.4/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.1.4/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.4/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.1.4/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.1.4/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/nat.md index e7f63de121..9c34f0c13e 100644 --- a/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/nat.md @@ -16,7 +16,7 @@ aliases: - /riak/2.1.4/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/quick-start.md index f08df1cb41..94d9d4a818 100644 --- a/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/quick-start.md @@ -16,9 +16,9 @@ aliases: - /riak/2.1.4/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.1.4/using/performance -[config v3 mdc]: /riak/kv/2.1.4/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.1.4/using/performance +[config v3 mdc]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl.md index de405f4491..ca63f31f4b 100644 --- a/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl.md @@ -16,7 +16,7 @@ aliases: - /riak/2.1.4/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.1.4/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.1.4/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.1.4/developing/api/backend.md b/content/riak/kv/2.1.4/developing/api/backend.md index 24e749b54c..d14fd6f01f 100644 --- a/content/riak/kv/2.1.4/developing/api/backend.md +++ b/content/riak/kv/2.1.4/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/backend-api --- -[plan backend]: /riak/kv/2.1.4/setup/planning/backend +[plan backend]: {{}}riak/kv/2.1.4/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.1.4/developing/api/http.md b/content/riak/kv/2.1.4/developing/api/http.md index 88bd0080e1..ad8b4934b0 100644 --- a/content/riak/kv/2.1.4/developing/api/http.md +++ b/content/riak/kv/2.1.4/developing/api/http.md @@ -29,50 +29,50 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.1.4/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.1.4/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.1.4/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.1.4/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.1.4/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.1.4/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.1.4/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.1.4/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.1.4/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.1.4/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.1.4/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.1.4/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.1.4/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.1.4/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.1.4/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.1.4/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.1.4/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.1.4/developing/api/http/delete-object) ## Riak-Data-Type-related Operations -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.1.4/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.1.4/developing/data-types/#usage-examples) and subpages e.g. [sets](/riak/kv/2.1.4/developing/data-types/sets). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.1.4/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.1.4/developing/data-types/#usage-examples) and subpages e.g. [sets]({{}}riak/kv/2.1.4/developing/data-types/sets). ## Query-related Operations Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.1.4/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.1.4/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.1.4/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.1.4/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.1.4/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.1.4/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.1.4/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.1.4/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.1.4/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.1.4/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.1.4/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.1.4/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.1.4/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.1.4/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.1.4/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.1.4/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.1.4/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.1.4/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.1.4/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.1.4/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.1.4/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.1.4/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.1.4/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.1.4/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.1.4/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.1.4/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.1.4/developing/api/http/counters.md b/content/riak/kv/2.1.4/developing/api/http/counters.md index bf94973dd2..bed589740d 100644 --- a/content/riak/kv/2.1.4/developing/api/http/counters.md +++ b/content/riak/kv/2.1.4/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.1.4/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.1.4/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.1.4/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.1.4/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.1.4/developing/api/http/fetch-object.md b/content/riak/kv/2.1.4/developing/api/http/fetch-object.md index 6408d2390a..f7dd0c7d03 100644 --- a/content/riak/kv/2.1.4/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.1.4/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.1.4/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.1.4/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.1.4/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.1.4/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.1.4/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.1.4/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.1.4/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.1.4/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.1.4/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.1.4/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.1.4/developing/api/http/fetch-search-index.md b/content/riak/kv/2.1.4/developing/api/http/fetch-search-index.md index 3535292070..e6fd21e72a 100644 --- a/content/riak/kv/2.1.4/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.1.4/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.1.4/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.1.4/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.1.4/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.1.4/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.1.4/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.1.4/developing/api/http/fetch-search-schema.md index e73c81d86c..57bed30a88 100644 --- a/content/riak/kv/2.1.4/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.1.4/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.1.4/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.1.4/developing/api/http/get-bucket-props.md b/content/riak/kv/2.1.4/developing/api/http/get-bucket-props.md index a37b4fdf79..eabbae25f7 100644 --- a/content/riak/kv/2.1.4/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.1.4/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.1.4/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.1.4/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.1.4/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.1.4/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.1.4/developing/api/http/link-walking.md b/content/riak/kv/2.1.4/developing/api/http/link-walking.md index ff7ca0efad..03b2080b4f 100644 --- a/content/riak/kv/2.1.4/developing/api/http/link-walking.md +++ b/content/riak/kv/2.1.4/developing/api/http/link-walking.md @@ -17,8 +17,8 @@ aliases: Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.1.4/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.1.4/learn/glossary/#links). ## Request @@ -64,7 +64,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.1.4/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.1.4/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.1.4/developing/api/http/list-resources.md b/content/riak/kv/2.1.4/developing/api/http/list-resources.md index bcc33346f7..a2fb1c684f 100644 --- a/content/riak/kv/2.1.4/developing/api/http/list-resources.md +++ b/content/riak/kv/2.1.4/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.1.4/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.1.4/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.1.4/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.1.4/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.1.4/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.1.4/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.1.4/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.1.4/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.1.4/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.1.4/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.1.4/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.1.4/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.1.4/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.1.4/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.1.4/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.1.4/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.1.4/developing/api/http/mapreduce.md b/content/riak/kv/2.1.4/developing/api/http/mapreduce.md index eef64ecc05..d0d3bc4240 100644 --- a/content/riak/kv/2.1.4/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.1.4/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.1.4/developing/api/http/search-index-info.md b/content/riak/kv/2.1.4/developing/api/http/search-index-info.md index 68899d541a..1d63b7e8ac 100644 --- a/content/riak/kv/2.1.4/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.1.4/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.1.4/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.1.4/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.1.4/developing/api/http/search-query.md b/content/riak/kv/2.1.4/developing/api/http/search-query.md index aa908dfd4d..c412358d30 100644 --- a/content/riak/kv/2.1.4/developing/api/http/search-query.md +++ b/content/riak/kv/2.1.4/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.1.4/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.1.4/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.1.4/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.1.4/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.1.4/developing/api/http/secondary-indexes.md b/content/riak/kv/2.1.4/developing/api/http/secondary-indexes.md index d457327a41..a07c5e9fa1 100644 --- a/content/riak/kv/2.1.4/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.1.4/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.1.4/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.1.4/developing/api/http/set-bucket-props.md b/content/riak/kv/2.1.4/developing/api/http/set-bucket-props.md index 29c588e9f8..f95a4da3a0 100644 --- a/content/riak/kv/2.1.4/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.1.4/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.1.4/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.1.4/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.1.4/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.1.4/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.1.4/developing/api/http/status.md b/content/riak/kv/2.1.4/developing/api/http/status.md index d9a508db2a..6793c0bdda 100644 --- a/content/riak/kv/2.1.4/developing/api/http/status.md +++ b/content/riak/kv/2.1.4/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.1.4/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.1.4/developing/api/http/store-object.md b/content/riak/kv/2.1.4/developing/api/http/store-object.md index e41b9563e0..a8886c55ef 100644 --- a/content/riak/kv/2.1.4/developing/api/http/store-object.md +++ b/content/riak/kv/2.1.4/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.1.4/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.1.4/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.1.4/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.1.4/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.1.4/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.1.4/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.1.4/developing/api/http/store-search-index.md b/content/riak/kv/2.1.4/developing/api/http/store-search-index.md index 9ee1933341..720164fa9a 100644 --- a/content/riak/kv/2.1.4/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.1.4/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.1.4/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.1.4/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.1.4/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.1.4/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.1.4/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.1.4/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.1.4/developing/api/http/store-search-schema.md b/content/riak/kv/2.1.4/developing/api/http/store-search-schema.md index 32b423a47f..04dd52e68e 100644 --- a/content/riak/kv/2.1.4/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.1.4/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.1.4/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.1.4/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers.md index 9a09f77754..86d24d3661 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.1.4/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.1.4/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.1.4/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.1.4/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.1.4/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.1.4/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.1.4/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.1.4/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.1.4/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.1.4/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/auth-req.md index 0690506c8a..2c34f01e5b 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.1.4/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.1.4/using/security/basics). diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/coverage-queries.md index 72bf100afd..b16edbb897 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/coverage-queries.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/coverage-queries.md @@ -9,15 +9,16 @@ menu: identifier: "pbc_coverage_queries" weight: 108 parent: "apis_pbc" +version_history: + in: "2.1.4+" toc: true aliases: - /riak/2.1.4/dev/references/protocol-buffers/coverage-queries - /riak/kv/2.1.4/dev/references/protocol-buffers/coverage-queries -canonical_link: "https://docs.basho.com/riak/kv/latest/developing/api/protocol-buffers/coverage-queries" --- Prepare for parallelizable -[secondary index queries][../secondary-indexes/] by requesting a +[secondary index queries](../secondary-indexes/) by requesting a coverage plan. The response will be multiple slices of the cluster, as identified by a TCP endpoint and an opaque binary to be included with each 2i query. diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/delete-object.md index 5661ba398b..96b0400eda 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.1.4/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.1.4/using/cluster-operations/bucket-types)/bucket/key location. ## Request @@ -49,7 +49,7 @@ Parameter | Description | {{% note title="Note on defaults and special values" %}} All of the optional parameters below have default values determined on a per-bucket basis. Please refer to the documentation on setting +href="{{< baseurl >}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props/">setting bucket properties for more information. Furthermore, you can assign an integer value to the `rw`, `r`, `w`, `pr`, diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store.md index 55a52b397a..1098027a05 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.1.4/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.1.4/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-fetch.md index 09daa58d8c..2d2faa010c 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.1.4/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.1.4/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.1.4/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.1.4/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.1.4/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store.md index 5a0d0762c0..bbb1d95def 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store.md index 25b390507d..51390bf8eb 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-store.md index 905332ff3d..24a60e8989 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.1.4/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.1.4/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.1.4/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.1.4/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -69,7 +69,7 @@ message DtOp { {{% note title="Note on defaults and special values" %}} All of the optional parameters below have default values determined on a per-bucket basis. Please refer to the documentation on setting +href="{{< baseurl >}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props/">setting bucket properties for more information. Furthermore, you can assign an integer value to the `w`, `dw`, and `pw`, @@ -81,7 +81,7 @@ value denoting `one` (`4294967295-1`), `quorum` (`4294967295-2`), `all` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client is `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.1.4/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.1.4/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -94,7 +94,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.1.4/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-union.md index f90ce6e717..a06943dae5 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.1.4/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object.md index fcd031d596..4de2ab6278 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.1.4/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.1.4/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.1.4/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.1.4/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props.md index 037572743f..dcfebc1010 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.1.4/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.1.4/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.1.4/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.1.4/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) {{% /note %}} diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-type.md index d36bd63026..9d23461b1b 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.1.4/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.1.4/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-client-id.md index b8845644da..ec33fd4276 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.1.4/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/mapreduce.md index 925f96f812..cba16c50e4 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.1.4/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.1.4/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.1.4/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.1.4/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/reset-bucket-props.md index 953f7d4003..192836803b 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/secondary-indexes.md index 771085730f..088a2e2baa 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/secondary-indexes.md @@ -63,7 +63,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.1.4/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key `cover_context` | Opaque binary used to target a vnode. Requested via [a coverage query][../coverage-queries/] @@ -88,7 +88,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props.md index f7c83c989c..690b6c6c4b 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-type.md index 89652918a6..3115afabbd 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.4/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.1.4/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/store-object.md index b99dd4d3d8..ae91ec6ed9 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.1.4/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.1.4/learn/concepts/buckets), and [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.1.4/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.1.4/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.1.4/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.1.4/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,14 +50,14 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object) #### Optional Parameters {{% note title="Note on defaults and special values" %}} All of the optional parameters below have default values determined on a per-bucket basis. Please refer to the documentation on setting +href="{{< baseurl >}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props/">setting bucket properties for more information. Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and `pw`, @@ -95,7 +95,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.1.4/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.1.4/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-get.md index 3b8b3a92c1..8c1216b599 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.1.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.1.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-put.md index 85d133ed33..18364a1f96 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-index-put.md @@ -37,4 +37,4 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.1.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.1.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-get.md index 3dd8bf545d..9a6286f040 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.1.4/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-put.md index 0b0badc029..2e3b6030b9 100644 --- a/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.1.4/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.1.4/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.1.4/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.1.4/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.1.4/developing/app-guide.md b/content/riak/kv/2.1.4/developing/app-guide.md index 20b684cc34..bd9f5d2b65 100644 --- a/content/riak/kv/2.1.4/developing/app-guide.md +++ b/content/riak/kv/2.1.4/developing/app-guide.md @@ -16,48 +16,48 @@ aliases: - /riak/kv/2.1.4/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.1.4/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.1.4/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.1.4/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.1.4/developing/key-value-modeling -[dev data types]: /riak/kv/2.1.4/developing/data-types -[dev data types#counters]: /riak/kv/2.1.4/developing/data-types/counters -[dev data types#sets]: /riak/kv/2.1.4/developing/data-types/sets -[dev data types#maps]: /riak/kv/2.1.4/developing/data-types/maps -[usage create objects]: /riak/kv/2.1.4/developing/usage/creating-objects -[usage search]: /riak/kv/2.1.4/developing/usage/search -[use ref search]: /riak/kv/2.1.4/using/reference/search -[usage 2i]: /riak/kv/2.1.4/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.1.4/developing/client-libraries -[concept crdts]: /riak/kv/2.1.4/learn/concepts/crdts -[dev data model]: /riak/kv/2.1.4/developing/data-modeling -[usage mapreduce]: /riak/kv/2.1.4/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.1.4/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.1.4/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.4/setup/planning/backend/memory -[obj model java]: /riak/kv/2.1.4/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.1.4/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.1.4/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.1.4/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.1.4/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.1.4/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.1.4/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.1.4/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.1.4/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.1.4/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.1.4/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[install index]: /riak/kv/2.1.4/setup/installing -[getting started]: /riak/kv/2.1.4/developing/getting-started -[usage index]: /riak/kv/2.1.4/developing/usage -[glossary]: /riak/kv/2.1.4/learn/glossary -[write-once]: /riak/kv/2.1.4/developing/app-guide/write-once +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.1.4/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.1.4/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.1.4/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.1.4/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.1.4/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.1.4/developing/data-types/counters +[dev data types#sets]: {{}}riak/kv/2.1.4/developing/data-types/sets +[dev data types#maps]: {{}}riak/kv/2.1.4/developing/data-types/maps +[usage create objects]: {{}}riak/kv/2.1.4/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search +[use ref search]: {{}}riak/kv/2.1.4/using/reference/search +[usage 2i]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.1.4/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.1.4/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.1.4/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.1.4/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.1.4/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.1.4/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.1.4/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.1.4/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.1.4/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.1.4/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.1.4/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.1.4/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.1.4/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.1.4/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.1.4/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.1.4/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.1.4/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.1.4/setup/installing +[getting started]: {{}}riak/kv/2.1.4/developing/getting-started +[usage index]: {{}}riak/kv/2.1.4/developing/usage +[glossary]: {{}}riak/kv/2.1.4/learn/glossary +[write-once]: {{}}riak/kv/2.1.4/developing/app-guide/write-once So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -120,7 +120,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.1.4/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.1.4/developing/app-guide/advanced-mapreduce.md index d856879cb6..b34a33e0a3 100644 --- a/content/riak/kv/2.1.4/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.1.4/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.1.4/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.1.4/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.1.4/using/reference/custom-code -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[config reference]: /riak/kv/2.1.4/configuring/reference +[usage 2i]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.1.4/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.1.4/configuring/reference > **Use MapReduce sparingly** > @@ -725,7 +725,7 @@ You can use streaming with Erlang via the Riak local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.1.4/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.1.4/developing/app-guide/cluster-metadata.md index a7bc5e9dfa..0c21a8a718 100644 --- a/content/riak/kv/2.1.4/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.1.4/developing/app-guide/cluster-metadata.md @@ -22,7 +22,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.1.4/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.1.4/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -57,7 +57,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.1.4/developing/app-guide/replication-properties.md b/content/riak/kv/2.1.4/developing/app-guide/replication-properties.md index a3611d5011..48e0c41baf 100644 --- a/content/riak/kv/2.1.4/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.1.4/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.4/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.1.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.1.4/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.1.4/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.1.4/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.1.4/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.1.4/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.1.4/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.1.4/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.1.4/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.1.4/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.1.4/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.1.4/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.1.4/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.1.4/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.1.4/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.1.4/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.1.4/developing/app-guide/strong-consistency.md b/content/riak/kv/2.1.4/developing/app-guide/strong-consistency.md index c75871a46d..0fe6916a80 100644 --- a/content/riak/kv/2.1.4/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.1.4/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.1.4/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/kv/2.1.4/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.1.4/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.1.4/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.1.4/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.1.4/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.1.4/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.1.4/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/kv/2.1.4/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.1.4/developing/client-libraries -[getting started]: /riak/kv/2.1.4/developing/getting-started -[config strong consistency#details]: /riak/kv/2.1.4/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.1.4/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.1.4/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.1.4/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.1.4/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.1.4/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.1.4/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.1.4/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.1.4/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.1.4/developing/client-libraries +[getting started]: {{}}riak/kv/2.1.4/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.1.4/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.1.4/developing/app-guide/write-once.md b/content/riak/kv/2.1.4/developing/app-guide/write-once.md index c2414eae56..9bf8de92e9 100644 --- a/content/riak/kv/2.1.4/developing/app-guide/write-once.md +++ b/content/riak/kv/2.1.4/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.1.4/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[bucket type]: /riak/kv/2.1.4/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.1.4/developing/data-types -[strong consistency]: /riak/kv/2.1.4/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.1.4/developing/data-types +[strong consistency]: {{}}riak/kv/2.1.4/developing/app-guide/strong-consistency Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. @@ -98,7 +98,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -149,7 +149,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.1.4/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.1.4/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.1.4/developing/client-libraries.md b/content/riak/kv/2.1.4/developing/client-libraries.md index 7ab7aed9d5..73a36b856c 100644 --- a/content/riak/kv/2.1.4/developing/client-libraries.md +++ b/content/riak/kv/2.1.4/developing/client-libraries.md @@ -37,7 +37,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.1.4/developing/data-types.md b/content/riak/kv/2.1.4/developing/data-types.md index f700a7ef9f..ae5264fa10 100644 --- a/content/riak/kv/2.1.4/developing/data-types.md +++ b/content/riak/kv/2.1.4/developing/data-types.md @@ -38,9 +38,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -261,5 +261,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.1.4/developing/faq.md b/content/riak/kv/2.1.4/developing/faq.md index b82c91e508..e171a33f78 100644 --- a/content/riak/kv/2.1.4/developing/faq.md +++ b/content/riak/kv/2.1.4/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.1.4/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.1.4/using/performance/benchmarking -[Bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.1.4/developing/usage +[[Basho Bench]: {{}}riak/kv/2.1.4/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.1.4/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.1.4/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.1.4/configuring/reference +[commit hooks]: {{}}riak/kv/2.1.4/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.1.4/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.1.4/developing/client-libraries -[MapReduce]: /riak/kv/2.1.4/developing/usage/mapreduce -[Memory]: /riak/kv/2.1.4/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.1.4/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.1.4/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.1.4/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.1.4/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.1.4/developing/getting-started.md b/content/riak/kv/2.1.4/developing/getting-started.md index 4f05ede750..0a1a31a439 100644 --- a/content/riak/kv/2.1.4/developing/getting-started.md +++ b/content/riak/kv/2.1.4/developing/getting-started.md @@ -16,8 +16,8 @@ aliases: - /riak/kv/2.1.4/dev/taste-of-riak/ --- -[install index]: /riak/kv/2.1.4/setup/installing -[dev client libraries]: /riak/kv/2.1.4/developing/client-libraries +[install index]: {{}}riak/kv/2.1.4/setup/installing +[dev client libraries]: {{}}riak/kv/2.1.4/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.1.4/developing/getting-started/csharp.md b/content/riak/kv/2.1.4/developing/getting-started/csharp.md index 8ef0b38c22..d3add3c3eb 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/csharp.md +++ b/content/riak/kv/2.1.4/developing/getting-started/csharp.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/taste-of-riak/csharp --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -77,4 +77,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.4/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.4/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.1.4/developing/getting-started/csharp/querying.md b/content/riak/kv/2.1.4/developing/getting-started/csharp/querying.md index 3fa6c49e7f..8664080c7a 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.1.4/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.4/developing/getting-started/erlang.md b/content/riak/kv/2.1.4/developing/getting-started/erlang.md index 8ee356edd9..9eeada0f32 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/erlang.md +++ b/content/riak/kv/2.1.4/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.4/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.4/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.1.4/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.1.4/developing/getting-started/erlang/object-modeling.md index 7f5929e159..306067f15a 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.1.4/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.1.4/developing/getting-started/erlang/querying.md b/content/riak/kv/2.1.4/developing/getting-started/erlang/querying.md index 1874ef25f0..fb7b0a26e4 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.1.4/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.1.4/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.1.4/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.4/developing/getting-started/golang.md b/content/riak/kv/2.1.4/developing/getting-started/golang.md index 781bcdc974..ee9fb87856 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/golang.md +++ b/content/riak/kv/2.1.4/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.4/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.4/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.4/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.4/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.1.4/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.1.4/developing/getting-started/golang/object-modeling.md index 22b8681cdb..b90223f1b6 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.1.4/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.1.4/developing/getting-started/golang/querying.md b/content/riak/kv/2.1.4/developing/getting-started/golang/querying.md index 05ce11d200..2021ff8fbc 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.1.4/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.1.4/developing/getting-started/java.md b/content/riak/kv/2.1.4/developing/getting-started/java.md index 3b50abcaf6..a490771841 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/java.md +++ b/content/riak/kv/2.1.4/developing/getting-started/java.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/taste-of-riak/java --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -84,4 +84,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.1.4/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.4/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.1.4/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.1.4/developing/getting-started/java/crud-operations.md index 7c2e35d162..8af45f7943 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.1.4/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.1.4/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.1.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -178,6 +178,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.1.4/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.1.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.1.4/developing/getting-started/java/querying.md b/content/riak/kv/2.1.4/developing/getting-started/java/querying.md index b5d6d19181..39ec6e6944 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.1.4/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.4/developing/getting-started/nodejs.md b/content/riak/kv/2.1.4/developing/getting-started/nodejs.md index 8b35a3bb20..0347370d44 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.1.4/developing/getting-started/nodejs.md @@ -20,7 +20,7 @@ aliases: [node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.4/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.4/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.1.4/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.1.4/developing/getting-started/nodejs/querying.md index e374aa2bc3..b94939d84e 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.1.4/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.1.4/developing/getting-started/php.md b/content/riak/kv/2.1.4/developing/getting-started/php.md index 01c5f1507e..2210e0727e 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/php.md +++ b/content/riak/kv/2.1.4/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.4/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.4/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.1.4/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.1.4/developing/getting-started/php/crud-operations.md index 3dcbe9b83e..ab63ee4d6d 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.1.4/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.1.4/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.1.4/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.1.4/developing/getting-started/php/querying.md b/content/riak/kv/2.1.4/developing/getting-started/php/querying.md index 5eced10f00..ae0404da2e 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.1.4/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.4/developing/getting-started/python.md b/content/riak/kv/2.1.4/developing/getting-started/python.md index 63bc37f36f..3d02de9673 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/python.md +++ b/content/riak/kv/2.1.4/developing/getting-started/python.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/taste-of-riak/python --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.4/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -94,4 +94,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.4/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.4/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.1.4/developing/getting-started/python/querying.md b/content/riak/kv/2.1.4/developing/getting-started/python/querying.md index e01b91a3c1..7913a5dc9a 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.1.4/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.4/developing/getting-started/ruby.md b/content/riak/kv/2.1.4/developing/getting-started/ruby.md index 2de47ef02a..a179272e6d 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/ruby.md +++ b/content/riak/kv/2.1.4/developing/getting-started/ruby.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/taste-of-riak/ruby --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.1.4/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.1.4/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -59,4 +59,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.1.4/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.1.4/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.1.4/developing/getting-started/ruby/querying.md b/content/riak/kv/2.1.4/developing/getting-started/ruby/querying.md index 83c22d66bc..85f4e836c6 100644 --- a/content/riak/kv/2.1.4/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.1.4/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.1.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.1.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.1.4/developing/key-value-modeling.md b/content/riak/kv/2.1.4/developing/key-value-modeling.md index 6e976ad3a0..7dbeda00e6 100644 --- a/content/riak/kv/2.1.4/developing/key-value-modeling.md +++ b/content/riak/kv/2.1.4/developing/key-value-modeling.md @@ -17,7 +17,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.1.4/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.1.4/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.1.4/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.1.4/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -25,7 +25,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.1.4/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.1.4/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -38,12 +38,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.1.4/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.1.4/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.1.4/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.1.4/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.1.4/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -80,7 +80,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.1.4/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.1.4/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -138,13 +138,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.1.4/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.1.4/developing/data-types/sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.1.4/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.1.4/developing/data-types/sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.1.4/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -153,7 +153,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.1.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.1.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -191,7 +191,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.1.4/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.4/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -435,8 +435,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.1.4/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.1.4/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -444,7 +444,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.1.4/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.1.4/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.1.4/developing/usage/commit-hooks.md b/content/riak/kv/2.1.4/developing/usage/commit-hooks.md index 8ac83649a7..2460a72a8b 100644 --- a/content/riak/kv/2.1.4/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.1.4/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.1.4/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.1.4/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.1.4/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.1.4/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.1.4/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.1.4/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.1.4/developing/usage/conflict-resolution.md b/content/riak/kv/2.1.4/developing/usage/conflict-resolution.md index 468ca546c9..116a27fd7d 100644 --- a/content/riak/kv/2.1.4/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.1.4/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.1.4/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.1.4/learn/concepts/clusters) system in which any [node](/riak/kv/2.1.4/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.1.4/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.1.4/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.1.4/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.1.4/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.1.4/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.1.4/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.1.4/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.1.4/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.1.4/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.1.4/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.1.4/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.1.4/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.1.4/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.1.4/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the `[allow_mult](#siblings)` parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -`[last_write_wins](/riak/kv/2.1.4/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, +`[last_write_wins]({{}}riak/kv/2.1.4/learn/concepts/buckets)`. If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.1.4/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.1.4/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.1.4/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.1.4/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.1.4/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.1.4/configuring/reference) to change the [default bucket properties](/riak/kv/2.1.4/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.1.4/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.1.4/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.1.4/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.1.4/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.1.4/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.1.4/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.1.4/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.1.4/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.1.4/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.1.4/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.1.4/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.1.4/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.1.4/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.4/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.1.4/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.1.4/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.1.4/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.1.4/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.1.4/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -611,7 +611,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.1.4/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.1.4/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -666,7 +666,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/csharp.md index 37061c51b3..ccac61b281 100644 --- a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.4/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/golang.md index 7ad0d8458c..3456a40da3 100644 --- a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.4/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/java.md index e357e23912..9023e6e536 100644 --- a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.4/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.4/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.4/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.4/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.4/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.4/developing/data-types/counters), [set](/riak/kv/2.1.4/developing/data-types/sets), or [map](/riak/kv/2.1.4/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.4/developing/data-types/counters), [set]({{}}riak/kv/2.1.4/developing/data-types/sets), or [map]({{}}riak/kv/2.1.4/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.4/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.4/developing/data-types/sets). diff --git a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/nodejs.md index 1c4180a8c9..84918d50ed 100644 --- a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.4/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/php.md index 163346998b..aae50ee43c 100644 --- a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.4/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.4/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.4/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.4/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.4/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.4/developing/data-types/counters), [set](/riak/kv/2.1.4/developing/data-types/sets), or [map](/riak/kv/2.1.4/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.4/developing/data-types/counters), [set]({{}}riak/kv/2.1.4/developing/data-types/sets), or [map]({{}}riak/kv/2.1.4/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.4/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.4/developing/data-types/sets). diff --git a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/python.md index 8d9463b0bd..9e4b436522 100644 --- a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.4/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -183,7 +183,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.4/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.4/developing/usage) section. ## More Advanced Example @@ -238,9 +238,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.4/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.4/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.4/developing/data-types/counters), [set](/riak/kv/2.1.4/developing/data-types/sets), or [map](/riak/kv/2.1.4/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.4/developing/data-types/counters), [set]({{}}riak/kv/2.1.4/developing/data-types/sets), or [map]({{}}riak/kv/2.1.4/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -249,4 +249,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.4/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.4/developing/data-types/sets). diff --git a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/ruby.md index ffa1df8af6..f7c83c1d01 100644 --- a/content/riak/kv/2.1.4/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.1.4/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.4/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.1.4/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.1.4/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.1.4/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.1.4/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.1.4/developing/data-types/counters), [set](/riak/kv/2.1.4/developing/data-types/sets), or [map](/riak/kv/2.1.4/developing/data-types/maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.1.4/developing/data-types/counters), [set]({{}}riak/kv/2.1.4/developing/data-types/sets), or [map]({{}}riak/kv/2.1.4/developing/data-types/maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.1.4/developing/data-types/sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.1.4/developing/data-types/sets). diff --git a/content/riak/kv/2.1.4/developing/usage/creating-objects.md b/content/riak/kv/2.1.4/developing/usage/creating-objects.md index 063d314f7f..01dd65b9c2 100644 --- a/content/riak/kv/2.1.4/developing/usage/creating-objects.md +++ b/content/riak/kv/2.1.4/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.1.4/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.1.4/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -27,7 +27,7 @@ In the example above, our read was unsuccessful because our Riak cluster is currently empty. Let's change that by storing an object containing information about a dog named Rufus. We'll store that object in the location described above, i.e. in the key `rufus` in the bucket `dogs`, -which bears the `animals` [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types). +which bears the `animals` [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -122,7 +122,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, run the same read operation in [Reading Objects](/riak/kv/2.1.4/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no +Now, run the same read operation in [Reading Objects]({{}}riak/kv/2.1.4/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Your Riak cluster is no longer empty! ### Store an Object @@ -143,7 +143,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.1.4/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.1.4/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.1.4/developing/usage/custom-extractors.md b/content/riak/kv/2.1.4/developing/usage/custom-extractors.md index 0f7fbeb78e..fa342844b8 100644 --- a/content/riak/kv/2.1.4/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.1.4/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.1.4/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.1.4/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.1.4/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.1.4/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.1.4/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.1.4/developing/usage/deleting-objects.md b/content/riak/kv/2.1.4/developing/usage/deleting-objects.md index 110a5f123f..517a0f5e42 100644 --- a/content/riak/kv/2.1.4/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.1.4/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.1.4/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.1.4/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.1.4/developing/usage/document-store.md b/content/riak/kv/2.1.4/developing/usage/document-store.md index 8f97c8b798..288af40a82 100644 --- a/content/riak/kv/2.1.4/developing/usage/document-store.md +++ b/content/riak/kv/2.1.4/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.1.4/developing/usage/search/) and [Riak Data Types](/riak/kv/2.1.4/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.1.4/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.1.4/developing/data-types/maps). +[Riak maps]({{}}riak/kv/2.1.4/developing/data-types/maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.1.4/developing/data-types/maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.1.4/developing/data-types/maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.1.4/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.1.4/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.1.4/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.1.4/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.1.4/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.1.4/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.1.4/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.1.4/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.1.4/developing/usage/mapreduce.md b/content/riak/kv/2.1.4/developing/usage/mapreduce.md index 20c5088e39..eeb116ffa0 100644 --- a/content/riak/kv/2.1.4/developing/usage/mapreduce.md +++ b/content/riak/kv/2.1.4/developing/usage/mapreduce.md @@ -33,9 +33,9 @@ transferring a potentially huge dataset to a client algorithm. Developers can use MapReduce for things like filtering documents by tags, counting words in documents, and extracting links to related data. In Riak, MapReduce is one method for querying that is not strictly based -on key querying, alongside [secondary indexes](/riak/kv/2.1.4/developing/usage/secondary-indexes/) -and [Search](/riak/kv/2.1.4/developing/usage/search/). MapReduce jobs can be submitted through the -[HTTP API](/riak/kv/2.1.4/developing/api/http) or the [Protocol Buffers API](/riak/kv/2.1.4/developing/api/protocol-buffers/), although we +on key querying, alongside [secondary indexes]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes/) +and [Search]({{}}riak/kv/2.1.4/developing/usage/search/). MapReduce jobs can be submitted through the +[HTTP API]({{}}riak/kv/2.1.4/developing/api/http) or the [Protocol Buffers API]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/), although we strongly recommend using the Protocol Buffers API for performance reasons. @@ -49,9 +49,9 @@ reasons. ## When to Use MapReduce * When you know the set of objects over which you want to MapReduce - (i.e. the locations of the objects, as specified by [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types), bucket, and key) + (i.e. the locations of the objects, as specified by [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types), bucket, and key) * When you want to return actual objects or pieces of objects and not - just the keys. [Search](/riak/kv/2.1.4/developing/usage/search/) and [secondary indexes](/riak/kv/2.1.4/developing/usage/secondary-indexes) are other means of returning objects based on + just the keys. [Search]({{}}riak/kv/2.1.4/developing/usage/search/) and [secondary indexes]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes) are other means of returning objects based on non-key-based queries, but they only return lists of keys and not whole objects. * When you need the utmost flexibility in querying your data. MapReduce @@ -86,7 +86,7 @@ Riak MapReduce queries have two components: * A list of phases The elements of the input list are object locations as specified by -[bucket type](/riak/kv/2.1.4/developing/usage/bucket-types), bucket, and key. The elements of the +[bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types), bucket, and key. The elements of the phases list are chunks of information related to a map, a reduce, or a link function. @@ -96,7 +96,7 @@ node that the client contacts to make the request becomes the above, each job consists of a list of phases, where each phase is either a map or a reduce phase. The coordinating node uses the list of phases to route the object keys and the function that will operate over the -objects stored in those keys and instruct the proper [vnode](/riak/kv/2.1.4/learn/glossary/#vnode) to +objects stored in those keys and instruct the proper [vnode]({{}}riak/kv/2.1.4/learn/glossary/#vnode) to run that function over the right objects. After running the map function, the results are sent back to the @@ -107,20 +107,20 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example In this example, we'll create four objects with the text "caremad" repeated a varying number of times and store those objects in the bucket -`training` (which does not bear a [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types)). +`training` (which does not bear a [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types)). An Erlang MapReduce function will be used to count the occurrences of the word "caremad." ### Data object input commands For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) -in conjunction with Riak's [HTTP API](/riak/kv/2.1.4/developing/api/http) to store the objects: +in conjunction with Riak's [HTTP API]({{}}riak/kv/2.1.4/developing/api/http) to store the objects: ```curl curl -XPUT http://localhost:8098/buckets/training/keys/foo \ @@ -218,4 +218,4 @@ counting the number of instances of the word. ## Advanced MapReduce Queries For more detailed information on MapReduce queries in Riak, we recommend -checking out our [Advanced MapReduce](/riak/kv/2.1.4/developing/app-guide/advanced-mapreduce) guide. +checking out our [Advanced MapReduce]({{}}riak/kv/2.1.4/developing/app-guide/advanced-mapreduce) guide. diff --git a/content/riak/kv/2.1.4/developing/usage/reading-objects.md b/content/riak/kv/2.1.4/developing/usage/reading-objects.md index d084939086..b9793605ee 100644 --- a/content/riak/kv/2.1.4/developing/usage/reading-objects.md +++ b/content/riak/kv/2.1.4/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.1.4/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) `animals`: +`dogs`, which bears the [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) `animals`: ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.1.4/developing/usage/replication.md b/content/riak/kv/2.1.4/developing/usage/replication.md index 3f487bffd8..673496c3d7 100644 --- a/content/riak/kv/2.1.4/developing/usage/replication.md +++ b/content/riak/kv/2.1.4/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.1.4/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.1.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -71,7 +71,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.1.4/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.1.4/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -95,8 +95,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -105,7 +105,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -317,7 +317,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.1.4/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.1.4/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -355,7 +355,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.1.4/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.1.4/setup/planning/backend/multi). ## Delete Quorum with RW @@ -530,9 +530,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.1.4/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.1.4/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.1.4/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.1.4/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -546,7 +546,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.1.4/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -556,8 +556,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.1.4/developing/usage/search-schemas.md b/content/riak/kv/2.1.4/developing/usage/search-schemas.md index f8f949b9a3..e1a36a8cdc 100644 --- a/content/riak/kv/2.1.4/developing/usage/search-schemas.md +++ b/content/riak/kv/2.1.4/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.1.4/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.1.4/developing/data-types/), and [more](/riak/kv/2.1.4/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types/), and [more]({{}}riak/kv/2.1.4/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -224,7 +224,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.1.4/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.1.4/developing/usage/search.md b/content/riak/kv/2.1.4/developing/usage/search.md index 1a2af3d84e..51eab217d7 100644 --- a/content/riak/kv/2.1.4/developing/usage/search.md +++ b/content/riak/kv/2.1.4/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.1.4/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.1.4/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.1.4/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.4/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.1.4/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.1.4/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.1.4/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.1.4/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.1.4/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.1.4/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.1.4/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.1.4/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.1.4/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.1.4/developing/usage/searching-data-types.md b/content/riak/kv/2.1.4/developing/usage/searching-data-types.md index 8811e12c09..f873f7b078 100644 --- a/content/riak/kv/2.1.4/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.1.4/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.4/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.1.4/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.1.4/developing/data-types/counters), [sets](/riak/kv/2.1.4/developing/data-types/sets), and [maps](/riak/kv/2.1.4/developing/data-types/maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.1.4/developing/data-types/counters), [sets]({{}}riak/kv/2.1.4/developing/data-types/sets), and [maps]({{}}riak/kv/2.1.4/developing/data-types/maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.1.4/developing/data-types/counters) indexes each +The default schema for [counters]({{}}riak/kv/2.1.4/developing/data-types/counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.1.4/developing/data-types/sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.1.4/developing/data-types/sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.1.4/developing/data-types/maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.1.4/developing/data-types/maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) for [storing counters](/riak/kv/2.1.4/developing/data-types/counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.1.4/developing/data-types/counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types) for [storing sets](/riak/kv/2.1.4/developing/data-types/sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.1.4/developing/data-types/sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.1.4/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.1.4/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.1.4/developing/data-types/maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.1.4/developing/data-types/maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.1.4/developing/usage/secondary-indexes.md b/content/riak/kv/2.1.4/developing/usage/secondary-indexes.md index e583208d25..abc6eeb924 100644 --- a/content/riak/kv/2.1.4/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.1.4/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.1.4/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.4/setup/planning/backend/memory -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.1.4/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.1.4/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.1.4/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.1.4/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.1.4/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.1.4/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.1.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.1.4/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.1.4/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.1.4/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.1.4/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.1.4/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.4/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.1.4/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.1.4/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.1.4/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.1.4/developing/usage/security.md b/content/riak/kv/2.1.4/developing/usage/security.md index 5ffba5c4d8..ec8a0c4af4 100644 --- a/content/riak/kv/2.1.4/developing/usage/security.md +++ b/content/riak/kv/2.1.4/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.1.4/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.1.4/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.1.4/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.1.4/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.1.4/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.1.4/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.1.4/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - `[riak-admin security](/riak/kv/2.1.4/using/security/managing-sources/#managing-sources)` + `[riak-admin security]({{}}riak/kv/2.1.4/using/security/managing-sources/#managing-sources)` command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.1.4/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.1.4/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.1.4/using/security/basics) -* [Managing Security Sources](/riak/kv/2.1.4/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.1.4/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.1.4/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.1.4/developing/usage/security/java) -* [Ruby](/riak/kv/2.1.4/developing/usage/security/ruby) -* [PHP](/riak/kv/2.1.4/developing/usage/security/php) -* [Python](/riak/kv/2.1.4/developing/usage/security/python) -* [Erlang](/riak/kv/2.1.4/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.1.4/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.1.4/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.1.4/developing/usage/security/php) +* [Python]({{}}riak/kv/2.1.4/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.1.4/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.1.4/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.1.4/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.1.4/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.1.4/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.1.4/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.1.4/developing/usage/security/erlang.md b/content/riak/kv/2.1.4/developing/usage/security/erlang.md index d72435334f..0d23a567f6 100644 --- a/content/riak/kv/2.1.4/developing/usage/security/erlang.md +++ b/content/riak/kv/2.1.4/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.1.4/using/security/managing-sources/), [PAM-](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.1.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.1.4/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.1.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.4/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.4/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.4/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.1.4/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.1.4/developing/usage/security/java.md b/content/riak/kv/2.1.4/developing/usage/security/java.md index 4ef3d792b8..101f173cd1 100644 --- a/content/riak/kv/2.1.4/developing/usage/security/java.md +++ b/content/riak/kv/2.1.4/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.4/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.4/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.1.4/developing/usage/security/php.md b/content/riak/kv/2.1.4/developing/usage/security/php.md index 0487cc2054..bf67ce75ee 100644 --- a/content/riak/kv/2.1.4/developing/usage/security/php.md +++ b/content/riak/kv/2.1.4/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.4/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.1.4/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.1.4/developing/usage/security/python.md b/content/riak/kv/2.1.4/developing/usage/security/python.md index 5e1b406ddf..116d7f8198 100644 --- a/content/riak/kv/2.1.4/developing/usage/security/python.md +++ b/content/riak/kv/2.1.4/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.4/using/security/managing-sources/) or [PAM-](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.1.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.1.4/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.1.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.4/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.1.4/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.1.4/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.1.4/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.4/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.1.4/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.1.4/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.1.4/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.1.4/developing/usage/security/ruby.md b/content/riak/kv/2.1.4/developing/usage/security/ruby.md index 5491ba56eb..6fed0966f1 100644 --- a/content/riak/kv/2.1.4/developing/usage/security/ruby.md +++ b/content/riak/kv/2.1.4/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.1.4/using/security/managing-sources/) or [PAM](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.1.4/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.1.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.1.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.1.4/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.1.4/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.1.4/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.1.4/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.1.4/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.1.4/developing/usage/updating-objects.md b/content/riak/kv/2.1.4/developing/usage/updating-objects.md index 838a53f860..bd32bde915 100644 --- a/content/riak/kv/2.1.4/developing/usage/updating-objects.md +++ b/content/riak/kv/2.1.4/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/dev/using/updates --- -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.1.4/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.1.4/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.1.4/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.1.4/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.1.4/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.1.4/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.1.4/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.1.4/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.1.4/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.1.4/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.1.4/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.1.4/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.1.4/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.1.4/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.1.4/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.1.4/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.1.4/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.1.4/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.1.4/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.1.4/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.1.4/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.1.4/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.1.4/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.1.4/index.md b/content/riak/kv/2.1.4/index.md index a9e86955c1..6e26002ea4 100644 --- a/content/riak/kv/2.1.4/index.md +++ b/content/riak/kv/2.1.4/index.md @@ -15,16 +15,16 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.1.4/configuring -[dev index]: /riak/kv/2.1.4/developing -[downloads]: /riak/kv/2.1.4/downloads/ -[install index]: /riak/kv/2.1.4/setup/installing/ -[plan index]: /riak/kv/2.1.4/setup/planning -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.1.4/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.1.4/developing/usage/search -[getting started]: /riak/kv/2.1.4/developing/getting-started -[dev client libraries]: /riak/kv/2.1.4/developing/client-libraries +[config index]: {{}}riak/kv/2.1.4/configuring +[dev index]: {{}}riak/kv/2.1.4/developing +[downloads]: {{}}riak/kv/2.1.4/downloads/ +[install index]: {{}}riak/kv/2.1.4/setup/installing/ +[plan index]: {{}}riak/kv/2.1.4/setup/planning +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.1.4/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search +[getting started]: {{}}riak/kv/2.1.4/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.1.4/developing/client-libraries diff --git a/content/riak/kv/2.1.4/introduction.md b/content/riak/kv/2.1.4/introduction.md index a5aaf28537..4b4b2b62fc 100644 --- a/content/riak/kv/2.1.4/introduction.md +++ b/content/riak/kv/2.1.4/introduction.md @@ -15,6 +15,7 @@ version_history: aliases: - /riak/kv/2.1.4/intro-v20 - /riak/2.1.4/intro-v20 + - /riak/kv/latest/introduction --- Riak version 2.0 includes deep changes and many new features affecting @@ -27,7 +28,7 @@ For more in-depth implementation details check out the If you're upgrading to Riak 2.0 from an earlier version, please be aware that all of the new features listed below are optional: -* **Riak Data Types** --- Riak's new CRDT-based [Data Types](/riak/kv/2.1.4/developing/data-types) can +* **Riak Data Types** --- Riak's new CRDT-based [Data Types]({{}}riak/kv/2.1.4/developing/data-types) can simplify modeling data in Riak, but are only used in buckets explicitly configured to use them. * **Strong Consistency, Riak Security, and the New Riak Search** --- @@ -35,16 +36,16 @@ that all of the new features listed below are optional: work. If not turned on, they will have no impact on performance. Furthermore, the older Riak Search will continue to be included with Riak. -* **Security** --- [Authentication and authorization](/riak/kv/2.1.4/using/security/basics) can be enabled +* **Security** --- [Authentication and authorization]({{}}riak/kv/2.1.4/using/security/basics) can be enabled or disabled at any time. -* **Configuration management** --- Riak's [configuration files](/riak/kv/2.1.4/configuring/reference/) have +* **Configuration management** --- Riak's [configuration files]({{}}riak/kv/2.1.4/configuring/reference/) have been streamlined into a single file named `riak.conf`. If you are upgrading, however, your existing `app.config` and `vm.args` files will still be recognized in version 2.0. -* **Bucket Types** --- While we strongly recommend [using bucket types](/riak/kv/2.1.4/using/reference/bucket-types) when creating new buckets, they are not required. +* **Bucket Types** --- While we strongly recommend [using bucket types]({{}}riak/kv/2.1.4/using/reference/bucket-types) when creating new buckets, they are not required. * **Dotted Version Vectors (DVVs)** --- This alternative to traditional - [vector clocks](/riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks) is enabled by default - in all [bucket types](/riak/kv/2.1.4/using/reference/bucket-types), but DVVs can be disabled + [vector clocks]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks) is enabled by default + in all [bucket types]({{}}riak/kv/2.1.4/using/reference/bucket-types), but DVVs can be disabled by setting the `dvv_enabled` property to `false` on any bucket type. In a nutshell, upgrading to 2.0 will change how you use Riak only if you @@ -52,17 +53,17 @@ want it to. But even if you don't plan on using the new features, there are a number of improvements that make upgrading a good choice, including the following: -* [Cluster metadata](/riak/kv/2.1.4/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that +* [Cluster metadata]({{}}riak/kv/2.1.4/developing/app-guide/cluster-metadata) --- This is a subsystem of Riak added in 2.0 that reduces the amount of inter-node gossip in Riak clusters, which can reduce network congestion. -* [Active Anti-Entropy](/riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy +* [Active Anti-Entropy]({{}}riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae) --- While Riak has had an Active Anti-Entropy (AAE) feature that is turned on by default since version 1.3, AAE performance has been improved in version 2.0. * [Bug patches](https://github.com/basho/riak/blob/2.0/RELEASE-NOTES.md) --- A variety of bugs present in earlier versions have been identified and patched. -More on upgrading can be found in our [Riak 2.0 upgrade guide](/riak/kv/2.1.4/setup/upgrading/version). +More on upgrading can be found in our [Riak 2.0 upgrade guide]({{}}riak/kv/2.1.4/setup/upgrading/version). ## Riak Data Types @@ -73,20 +74,20 @@ application is responsible for resolving conflicts between replicas of objects stored in different Riak nodes. Riak 2.0 offers a new approach to this problem for a wide range of use -cases in the form of [Riak Data Types](/riak/kv/2.1.4/developing/data-types). Instead of +cases in the form of [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types). Instead of forcing the application to resolve conflicts, Riak offers five Data Types that can reduce some of the complexities of developing using -Riak: [flags](/riak/kv/2.1.4/developing/data-types/maps#flags), [registers](/riak/kv/2.1.4/developing/data-types/maps#registers), -[counters](/riak/kv/2.1.4/developing/data-types/counters), [sets](/riak/kv/2.1.4/developing/data-types/sets), and -[maps](/riak/kv/2.1.4/developing/data-types/maps). +Riak: [flags]({{}}riak/kv/2.1.4/developing/data-types/maps#flags), [registers]({{}}riak/kv/2.1.4/developing/data-types/maps#registers), +[counters]({{}}riak/kv/2.1.4/developing/data-types/counters), [sets]({{}}riak/kv/2.1.4/developing/data-types/sets), and +[maps]({{}}riak/kv/2.1.4/developing/data-types/maps). #### Relevant Docs -* [Using Data Types](/riak/kv/2.1.4/developing/data-types) explains how to use Riak Data Types on the +* [Using Data Types]({{}}riak/kv/2.1.4/developing/data-types) explains how to use Riak Data Types on the application side, with usage examples for all five Data Types in all of Basho's officially supported clients (Java, Ruby, Python, .NET and Erlang) and for Riak's HTTP interface. -* [Data Types](/riak/kv/2.1.4/developing/data-types) explains some of the theoretical concerns that drive +* [Data Types]({{}}riak/kv/2.1.4/developing/data-types) explains some of the theoretical concerns that drive Riak Data Types and shares details about how they are implemented in Riak. @@ -103,11 +104,11 @@ Search, integrating Riak with [Apache Solr](https://lucene.apache.org/solr/)'s f #### Relevant Docs -* [Using Search](/riak/kv/2.1.4/developing/usage/search) provides an overview of how to use the new +* [Using Search]({{}}riak/kv/2.1.4/developing/usage/search) provides an overview of how to use the new Riak Search. -* [Search Schema](/riak/kv/2.1.4/developing/usage/search-schemas) shows you how to create and manage custom search +* [Search Schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas) shows you how to create and manage custom search schemas. -* [Search Details](/riak/kv/2.1.4/using/reference/search) provides an in-depth look at the design +* [Search Details]({{}}riak/kv/2.1.4/using/reference/search) provides an in-depth look at the design considerations that went into the new Riak Search. #### Video @@ -125,13 +126,13 @@ some (or perhaps all) of your data. #### Relevant Docs -* [Using Strong Consistency](/riak/kv/2.1.4/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong +* [Using Strong Consistency]({{}}riak/kv/2.1.4/using/cluster-operations/strong-consistency) shows you how to enable Riak's strong consistency subsystem and to apply strong consistency guarantees to data stored in specified buckets. -* [Strong Consistency](/riak/kv/2.1.4/using/reference/strong-consistency) provides a theoretical treatment of how a - strongly consistent system differs from an [eventually consistent](/riak/kv/2.1.4/learn/concepts/eventual-consistency) system, as well as details about how +* [Strong Consistency]({{}}riak/kv/2.1.4/using/reference/strong-consistency) provides a theoretical treatment of how a + strongly consistent system differs from an [eventually consistent]({{}}riak/kv/2.1.4/learn/concepts/eventual-consistency) system, as well as details about how strong consistency is implemented in Riak. -* [Managing Strong Consistency](/riak/kv/2.1.4/configuring/strong-consistency) is a guide to strong consistency for +* [Managing Strong Consistency]({{}}riak/kv/2.1.4/configuring/strong-consistency) is a guide to strong consistency for Riak operators. #### Video @@ -155,11 +156,11 @@ Riak itself and managed through a simple command-line interface. #### Relevant Docs -* [Authentication and Authorization](/riak/kv/2.1.4/using/security/basics) explains how Riak Security can be +* [Authentication and Authorization]({{}}riak/kv/2.1.4/using/security/basics) explains how Riak Security can be enabled and disabled, how users and groups are managed, how authorization to perform certain operations can be granted and revoked, how security ciphers can be chosen, and more. -* [Managing Security Sources](/riak/kv/2.1.4/using/security/managing-sources/) is an in-depth tutorial on how to +* [Managing Security Sources]({{}}riak/kv/2.1.4/using/security/managing-sources/) is an in-depth tutorial on how to implement Riak's four supported authentication sources: trusted networks, passwords, pluggable authentication modules, and certificates. @@ -194,7 +195,7 @@ override any settings from the new system. #### Relevant Docs -* [Configuration Files](/riak/kv/2.1.4/configuring/reference/) lists and describes all of the configurable +* [Configuration Files]({{}}riak/kv/2.1.4/configuring/reference/) lists and describes all of the configurable parameters available in Riak 2.0, from configuring your chosen storage backend(s) to setting default bucket properties to controlling Riak's logging system and much more. @@ -214,7 +215,7 @@ and keys. #### Relevant Docs -* [Using Bucket Types](/riak/kv/2.1.4/using/reference/bucket-types) explains how to create, modify, and activate +* [Using Bucket Types]({{}}riak/kv/2.1.4/using/reference/bucket-types) explains how to create, modify, and activate bucket types, as well as how the new system differs from the older, bucket properties-based system. @@ -226,20 +227,20 @@ and [Jordan West](https://github.com/jrwest). ## Dotted Version Vectors -In prior versions of Riak, [conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution) was managed using -[vector clocks](/riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks), which track object update causality. +In prior versions of Riak, [conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution) was managed using +[vector clocks]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks), which track object update causality. Riak 2.0 has added support for dotted version vectors (DVVs). DVVs serve an analogous role to vector -clocks but are more effective at containing [sibling explosion](/riak/kv/2.1.4/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. +clocks but are more effective at containing [sibling explosion]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#sibling-explosion) and can reduce Riak cluster latency. #### Relevant Docs -* [Dotted Version Vectors](/riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. +* [Dotted Version Vectors]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors) explains some of the theoretical nuances behind the distinction between DVVs and vector clocks and offers instructions on implementing DVVs. ## New Client Libraries -While Basho offered official [client libraries](/riak/kv/2.1.4/developing/client-libraries) for Java, Ruby, +While Basho offered official [client libraries]({{}}riak/kv/2.1.4/developing/client-libraries) for Java, Ruby, Python, .NET and Erlang for versions of Riak prior to 2.0, all clients have undergone major changes in anticipation of the 2.0 release. diff --git a/content/riak/kv/2.1.4/learn/concepts.md b/content/riak/kv/2.1.4/learn/concepts.md index fa2f1b5c1e..3430a95f9c 100644 --- a/content/riak/kv/2.1.4/learn/concepts.md +++ b/content/riak/kv/2.1.4/learn/concepts.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.1.4/theory/concepts/ --- -[concept aae]: /riak/kv/2.1.4/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.1.4/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.1.4/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[concept crdts]: /riak/kv/2.1.4/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.4/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.1.4/learn/concepts/vnodes -[config index]: /riak/kv/2.1.4/configuring -[plan index]: /riak/kv/2.1.4/setup/planning -[use index]: /riak/kv/2.1.4/using/ +[concept aae]: {{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.1.4/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.1.4/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.1.4/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.1.4/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.1.4/configuring +[plan index]: {{}}riak/kv/2.1.4/setup/planning +[use index]: {{}}riak/kv/2.1.4/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.1.4/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.1.4/learn/concepts/active-anti-entropy.md index 8d71af8323..5ad821f255 100644 --- a/content/riak/kv/2.1.4/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.1.4/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.1.4/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.1.4/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.1.4/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.1.4/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.1.4/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.1.4/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.1.4/learn/concepts/buckets.md b/content/riak/kv/2.1.4/learn/concepts/buckets.md index 97099e9ab5..8eeb696b76 100644 --- a/content/riak/kv/2.1.4/learn/concepts/buckets.md +++ b/content/riak/kv/2.1.4/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.1.4/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.1.4/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.1.4/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.1.4/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.1.4/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.1.4/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.1.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[config basic]: /riak/kv/2.1.4/configuring/basic -[dev api http]: /riak/kv/2.1.4/developing/api/http -[dev data types]: /riak/kv/2.1.4/developing/data-types -[glossary ring]: /riak/kv/2.1.4/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.1.4/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.4/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.1.4/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.1.4/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.1.4/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.1.4/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.1.4/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.1.4/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.1.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.1.4/configuring/basic +[dev api http]: {{}}riak/kv/2.1.4/developing/api/http +[dev data types]: {{}}riak/kv/2.1.4/developing/data-types +[glossary ring]: {{}}riak/kv/2.1.4/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.4/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.1.4/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.1.4/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.1.4/learn/concepts/capability-negotiation.md b/content/riak/kv/2.1.4/learn/concepts/capability-negotiation.md index 3146115833..645180cc1f 100644 --- a/content/riak/kv/2.1.4/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.1.4/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.1.4/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.1.4/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.1.4/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.1.4/developing/usage/mapreduce In versions of Riak prior to 1.2.0, [rolling upgrades][upgrade cluster] from an older version of Riak to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.1.4/learn/concepts/causal-context.md b/content/riak/kv/2.1.4/learn/concepts/causal-context.md index 24c5a52251..d601cf0abc 100644 --- a/content/riak/kv/2.1.4/learn/concepts/causal-context.md +++ b/content/riak/kv/2.1.4/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.1.4/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.1.4/developing/api/http -[dev key value]: /riak/kv/2.1.4/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.1.4/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.1.4/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.1.4/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.1.4/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.1.4/developing/api/http +[dev key value]: {{}}riak/kv/2.1.4/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.1.4/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.1.4/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.1.4/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.1.4/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -78,7 +78,7 @@ sections below. In the case of outcome 2, the choice between **a** and **b** is yours to to make. If you set the `allow_mult` parameter to `true` for a bucket, -[using bucket types](/riak/kv/2.1.4/developing/usage/bucket-types), all writes to that bucket will create siblings +[using bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types), all writes to that bucket will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). diff --git a/content/riak/kv/2.1.4/learn/concepts/clusters.md b/content/riak/kv/2.1.4/learn/concepts/clusters.md index 5fab66e76f..4e43ec7882 100644 --- a/content/riak/kv/2.1.4/learn/concepts/clusters.md +++ b/content/riak/kv/2.1.4/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.1.4/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.4/learn/concepts/replication -[glossary node]: /riak/kv/2.1.4/learn/glossary/#node -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.1.4/learn/dynamo -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.1.4/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.1.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.4/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.1.4/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.1.4/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.1.4/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.1.4/learn/concepts/crdts.md b/content/riak/kv/2.1.4/learn/concepts/crdts.md index 3cb01f005b..00bd990795 100644 --- a/content/riak/kv/2.1.4/learn/concepts/crdts.md +++ b/content/riak/kv/2.1.4/learn/concepts/crdts.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context dvv]: /riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.1.4/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.1.4/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.1.4/developing/data-types -[glossary node]: /riak/kv/2.1.4/learn/glossary/#node -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution +[concept causal context dvv]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.1.4/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.1.4/developing/data-types +[glossary node]: {{}}riak/kv/2.1.4/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution A pure key/value store is completely agnostic toward the data stored @@ -32,7 +32,7 @@ within it. Any key can be associated with values of any conceivable type, from short strings to large JSON objects to video files. Riak began as a pure key/value store, but over time it has become more and more aware of the data stored in it through features like [secondary -indexes](/riak/kv/2.1.4/developing/usage/secondary-indexes/) and [Search](/riak/kv/2.1.4/developing/usage/search/). +indexes]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes/) and [Search]({{}}riak/kv/2.1.4/developing/usage/search/). In version 2.0, Riak continued this evolution by introducing a series of eventually convergent **Data Types**. Riak Data Types are convergent @@ -214,7 +214,7 @@ The beauty of Data Types is that Riak "knows" how to resolve value conflicts by applying Data Type-specific rules. In general, Riak does this by remembering the **history** of a value and broadcasting that history along with the current value in the form of a [context -object](/riak/kv/2.1.4/developing/data-types/#Data-Types-and-Context) that is similar to a +object]({{}}riak/kv/2.1.4/developing/data-types/#Data-Types-and-Context) that is similar to a [vector clock][concept causal context vc] or `[dotted version vectors][concept causal context dvv]. Riak uses the history of each Data Type to make deterministic judgments about which value should be deemed correct. diff --git a/content/riak/kv/2.1.4/learn/concepts/eventual-consistency.md b/content/riak/kv/2.1.4/learn/concepts/eventual-consistency.md index 417d98ba23..1147c6e054 100644 --- a/content/riak/kv/2.1.4/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.1.4/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[concept replication]: /riak/kv/2.1.4/learn/concepts/replication -[glossary node]: /riak/kv/2.1.4/learn/glossary/#node -[glossary read rep]: /riak/kv/2.1.4/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.1.4/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.1.4/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.1.4/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.1.4/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.1.4/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.1.4/developing/data-modeling/). +or models]({{}}riak/kv/2.1.4/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.1.4/learn/concepts/keys-and-objects.md b/content/riak/kv/2.1.4/learn/concepts/keys-and-objects.md index 31a3507584..0944b0b07b 100644 --- a/content/riak/kv/2.1.4/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.1.4/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.4/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.1.4/learn/concepts/replication.md b/content/riak/kv/2.1.4/learn/concepts/replication.md index 1ab808548c..1b3e1db0e0 100644 --- a/content/riak/kv/2.1.4/learn/concepts/replication.md +++ b/content/riak/kv/2.1.4/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.1.4/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.1.4/learn/concepts/vnodes -[glossary node]: /riak/kv/2.1.4/learn/glossary/#node -[glossary ring]: /riak/kv/2.1.4/learn/glossary/#ring -[usage replication]: /riak/kv/2.1.4/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.1.4/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.1.4/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.1.4/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.1.4/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.1.4/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.1.4/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.1.4/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.1.4/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.1.4/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.1.4/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.1.4/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.1.4/learn/concepts/strong-consistency.md b/content/riak/kv/2.1.4/learn/concepts/strong-consistency.md index dc7bd811ea..e7a18ee458 100644 --- a/content/riak/kv/2.1.4/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.1.4/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.4/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.1.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.1.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.1.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.1.4/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.1.4/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.1.4/learn/concepts/vnodes.md b/content/riak/kv/2.1.4/learn/concepts/vnodes.md index 811d24b81f..1fc15c0deb 100644 --- a/content/riak/kv/2.1.4/learn/concepts/vnodes.md +++ b/content/riak/kv/2.1.4/learn/concepts/vnodes.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context]: /riak/kv/2.1.4/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.1.4/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.1.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.4/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.1.4/learn/glossary/#node -[glossary ring]: /riak/kv/2.1.4/learn/glossary/#ring -[plan backend]: /riak/kv/2.1.4/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.1.4/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.1.4/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.1.4/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.1.4/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.1.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.4/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.1.4/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.1.4/learn/glossary/#ring +[plan backend]: {{}}riak/kv/2.1.4/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.1.4/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.1.4/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.1.4/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.1.4/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.1.4/learn/dynamo.md b/content/riak/kv/2.1.4/learn/dynamo.md index c7356b47e8..fa892e8126 100644 --- a/content/riak/kv/2.1.4/learn/dynamo.md +++ b/content/riak/kv/2.1.4/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.1.4/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.1.4/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.1.4/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.1.4/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.1.4/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.1.4/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.1.4/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.1.4/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.1.4/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.1.4/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.1.4/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.1.4/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.1.4/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.1.4/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.1.4/developing/api/http/) +>[REST API]({{}}riak/kv/2.1.4/developing/api/http/) > ->[Writing Data](/riak/kv/2.1.4/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.1.4/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.1.4/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.1.4/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.1.4/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.1.4/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.1.4/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.1.4/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.1.4/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.1.4/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.1.4/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.1.4/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.1.4/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.1.4/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.1.4/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.1.4/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.1.4/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.1.4/setup/planning/backend/ -[Bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.1.4/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.1.4/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.1.4/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.1.4/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.1.4/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.1.4/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.1.4/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.1.4/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.1.4/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.1.4/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.1.4/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.1.4/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.1.4/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.1.4/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.1.4/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.1.4/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.1.4/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.1.4/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.1.4/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.1.4/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.1.4/learn/glossary.md b/content/riak/kv/2.1.4/learn/glossary.md index 3a9b249828..d2b94e21f0 100644 --- a/content/riak/kv/2.1.4/learn/glossary.md +++ b/content/riak/kv/2.1.4/learn/glossary.md @@ -16,39 +16,39 @@ aliases: --- -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.1.4/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[concept crdts]: /riak/kv/2.1.4/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.1.4/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.1.4/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.1.4/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.1.4/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.1.4/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.1.4/developing/api/http -[dev data model]: /riak/kv/2.1.4/developing/data-modeling -[dev data types]: /riak/kv/2.1.4/developing/data-types -[glossary read rep]: /riak/kv/2.1.4/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.1.4/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.1.4/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.1.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.1.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.1.4/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.1.4/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.1.4/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.1.4/developing/api/http +[dev data model]: {{}}riak/kv/2.1.4/developing/data-modeling +[dev data types]: {{}}riak/kv/2.1.4/developing/data-types +[glossary read rep]: {{}}riak/kv/2.1.4/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.1.4/learn/dynamo -[plan cluster capacity]: /riak/kv/2.1.4/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.1.4/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.1.4/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.1.4/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.1.4/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.1.4/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.1.4/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.1.4/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.4/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.1.4/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.1.4/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -76,7 +76,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.1.4/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.1.4/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -95,7 +95,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.1.4/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.1.4/learn/use-cases.md b/content/riak/kv/2.1.4/learn/use-cases.md index 8fd48ec347..264f980728 100644 --- a/content/riak/kv/2.1.4/learn/use-cases.md +++ b/content/riak/kv/2.1.4/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.1.4/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.1.4/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.1.4/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.1.4/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.1.4/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.1.4/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.1.4/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.1.4/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.1.4/developing/data-types -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.1.4/developing/usage/mapreduce -[usage search]: /riak/kv/2.1.4/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.1.4/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.1.4/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.1.4/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.1.4/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.1.4/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.1.4/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.1.4/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.1.4/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.1.4/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.1.4/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.1.4/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.1.4/learn/why-riak-kv.md b/content/riak/kv/2.1.4/learn/why-riak-kv.md index bdfe595094..38b6984125 100644 --- a/content/riak/kv/2.1.4/learn/why-riak-kv.md +++ b/content/riak/kv/2.1.4/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.1.4/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.1.4/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.1.4/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.1.4/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.1.4/developing/data-types -[glossary read rep]: /riak/kv/2.1.4/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.1.4/developing/data-types +[glossary read rep]: {{}}riak/kv/2.1.4/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.1.4/release-notes.md b/content/riak/kv/2.1.4/release-notes.md index c7530b17c6..a7cb13865f 100644 --- a/content/riak/kv/2.1.4/release-notes.md +++ b/content/riak/kv/2.1.4/release-notes.md @@ -18,7 +18,7 @@ toc: true Released April 11, 2016. -This is a bugfix release providing patches for the [Riak init file](http://docs.basho.com/community/productadvisories/codeinjectioninitfiles/) Product Advisory and the [leveldb segfault](http://docs.basho.com/community/productadvisories/leveldbsegfault/) Product Advisory. +This is a bugfix release providing patches for the [Riak init file]({{}}community/productadvisories/codeinjectioninitfiles/) Product Advisory and the [leveldb segfault]({{}}community/productadvisories/leveldbsegfault/) Product Advisory. ## Upgraded Components diff --git a/content/riak/kv/2.1.4/setup/downgrade.md b/content/riak/kv/2.1.4/setup/downgrade.md index b7dbf4151e..a8d1d90aa6 100644 --- a/content/riak/kv/2.1.4/setup/downgrade.md +++ b/content/riak/kv/2.1.4/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.4/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.1.4/setup/upgrading/cluster -[config ref]: /riak/kv/2.1.4/configuring/reference -[concept aae]: /riak/kv/2.1.4/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.1.4/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.1.4/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.1.4/configuring/reference +[concept aae]: {{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#aae-status Downgrades of Riak are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade]. diff --git a/content/riak/kv/2.1.4/setup/installing.md b/content/riak/kv/2.1.4/setup/installing.md index d3386a43ed..0553540026 100644 --- a/content/riak/kv/2.1.4/setup/installing.md +++ b/content/riak/kv/2.1.4/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.1.4/installing/ --- -[install aws]: /riak/kv/2.1.4/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.1.4/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.1.4/setup/installing/freebsd -[install mac osx]: /riak/kv/2.1.4/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.1.4/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.1.4/setup/installing/smartos -[install solaris]: /riak/kv/2.1.4/setup/installing/solaris -[install suse]: /riak/kv/2.1.4/setup/installing/suse -[install windows azure]: /riak/kv/2.1.4/setup/installing/windows-azure -[install source index]: /riak/kv/2.1.4/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.1.4/setup/upgrading +[install aws]: {{}}riak/kv/2.1.4/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.1.4/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.1.4/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.1.4/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.1.4/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.1.4/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.1.4/setup/installing/solaris +[install suse]: {{}}riak/kv/2.1.4/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.1.4/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.1.4/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.1.4/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.1.4/setup/installing/amazon-web-services.md b/content/riak/kv/2.1.4/setup/installing/amazon-web-services.md index 7c2ac87ae2..76ae7770c5 100644 --- a/content/riak/kv/2.1.4/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.1.4/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.1.4/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.1.4/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.1.4/setup/installing/debian-ubuntu.md b/content/riak/kv/2.1.4/setup/installing/debian-ubuntu.md index 28e3ca6365..be86b9fbeb 100644 --- a/content/riak/kv/2.1.4/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.1.4/setup/installing/debian-ubuntu.md @@ -18,10 +18,10 @@ aliases: - /riak/kv/2.1.4/installing/debian-ubuntu/ --- -[install source index]: /riak/kv/2.1.4/setup/installing/source/ -[security index]: /riak/kv/2.1.4/using/security/ -[install source erlang]: /riak/kv/2.1.4/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[install source index]: {{}}riak/kv/2.1.4/setup/installing/source/ +[security index]: {{}}riak/kv/2.1.4/using/security/ +[install source erlang]: {{}}riak/kv/2.1.4/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.1.4/setup/installing/freebsd.md b/content/riak/kv/2.1.4/setup/installing/freebsd.md index bc42a6b149..5e411f9548 100644 --- a/content/riak/kv/2.1.4/setup/installing/freebsd.md +++ b/content/riak/kv/2.1.4/setup/installing/freebsd.md @@ -18,9 +18,9 @@ aliases: - /riak/kv/2.1.4/installing/freebsd/ --- -[install source erlang]: /riak/kv/2.1.4/setup/installing/source/erlang -[downloads]: /riak/kv/2.1.4/downloads/ -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.1.4/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.1.4/downloads/ +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.1.4/setup/installing/mac-osx.md b/content/riak/kv/2.1.4/setup/installing/mac-osx.md index 633155a095..fe1a54ee52 100644 --- a/content/riak/kv/2.1.4/setup/installing/mac-osx.md +++ b/content/riak/kv/2.1.4/setup/installing/mac-osx.md @@ -18,9 +18,9 @@ aliases: - /riak/kv/2.1.4/installing/mac-osx/ --- -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.1.4/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.1.4/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.1.4/setup/installing/rhel-centos.md b/content/riak/kv/2.1.4/setup/installing/rhel-centos.md index e38b92fd75..1a10535091 100644 --- a/content/riak/kv/2.1.4/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.1.4/setup/installing/rhel-centos.md @@ -18,9 +18,9 @@ aliases: - /riak/kv/2.1.4/installing/rhel-centos/ --- -[install source index]: /riak/kv/2.1.4/setup/installing/source -[install source erlang]: /riak/kv/2.1.4/setup/installing/source/erlang -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[install source index]: {{}}riak/kv/2.1.4/setup/installing/source +[install source erlang]: {{}}riak/kv/2.1.4/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.1.4/setup/installing/smartos.md b/content/riak/kv/2.1.4/setup/installing/smartos.md index 11eafd24f7..ec58aa4c58 100644 --- a/content/riak/kv/2.1.4/setup/installing/smartos.md +++ b/content/riak/kv/2.1.4/setup/installing/smartos.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.1.4/installing/smartos/ --- -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. diff --git a/content/riak/kv/2.1.4/setup/installing/solaris.md b/content/riak/kv/2.1.4/setup/installing/solaris.md index 7b79ae3ac7..f4e9e79fd5 100644 --- a/content/riak/kv/2.1.4/setup/installing/solaris.md +++ b/content/riak/kv/2.1.4/setup/installing/solaris.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.1.4/installing/solaris/ --- -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.1.4/setup/installing/source.md b/content/riak/kv/2.1.4/setup/installing/source.md index 457a155739..3aa43a7417 100644 --- a/content/riak/kv/2.1.4/setup/installing/source.md +++ b/content/riak/kv/2.1.4/setup/installing/source.md @@ -18,13 +18,13 @@ aliases: - /riak/kv/2.1.4/installing/source/ --- -[install source erlang]: /riak/kv/2.1.4/setup/installing/source/erlang -[downloads]: /riak/kv/2.1.4/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.1.4/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.1.4/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.1.4/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.1.4/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.1.4/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.1.4/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.1.4/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.1.4/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.1.4/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.1.4/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.1.4/setup/installing/source/erlang.md b/content/riak/kv/2.1.4/setup/installing/source/erlang.md index 9019d7319e..4ccb53212c 100644 --- a/content/riak/kv/2.1.4/setup/installing/source/erlang.md +++ b/content/riak/kv/2.1.4/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.4/installing/source/erlang/ --- -[install index]: /riak/kv/2.1.4/setup/installing -[security basics]: /riak/kv/2.1.4/using/security/basics +[install index]: {{}}riak/kv/2.1.4/setup/installing +[security basics]: {{}}riak/kv/2.1.4/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://github.com/basho/otp/archive/OTP_R16B02_basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.1.4/setup/installing/source/jvm.md b/content/riak/kv/2.1.4/setup/installing/source/jvm.md index 9554c697f9..124ac5ade5 100644 --- a/content/riak/kv/2.1.4/setup/installing/source/jvm.md +++ b/content/riak/kv/2.1.4/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.1.4/installing/source/jvm/ --- -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.1.4/setup/installing/suse.md b/content/riak/kv/2.1.4/setup/installing/suse.md index ca23bbea36..9e6b1a26e7 100644 --- a/content/riak/kv/2.1.4/setup/installing/suse.md +++ b/content/riak/kv/2.1.4/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.1.4/installing/suse/ --- -[install verify]: /riak/kv/2.1.4/setup/installing/verify +[install verify]: {{}}riak/kv/2.1.4/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.1.4/setup/installing/verify.md b/content/riak/kv/2.1.4/setup/installing/verify.md index 1b2213bf6a..8326d4c6f3 100644 --- a/content/riak/kv/2.1.4/setup/installing/verify.md +++ b/content/riak/kv/2.1.4/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.1.4/installing/verify-install/ --- -[client libraries]: /riak/kv/2.1.4/developing/client-libraries -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.1.4/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.1.4/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.1.4/developing/client-libraries +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.1.4/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.1.4/setup/installing/windows-azure.md b/content/riak/kv/2.1.4/setup/installing/windows-azure.md index 19103c7c34..e66fa97eb2 100644 --- a/content/riak/kv/2.1.4/setup/installing/windows-azure.md +++ b/content/riak/kv/2.1.4/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.1.4/setup/planning/backend.md b/content/riak/kv/2.1.4/setup/planning/backend.md index b2c9feea11..f13850aedf 100644 --- a/content/riak/kv/2.1.4/setup/planning/backend.md +++ b/content/riak/kv/2.1.4/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.4/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.4/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.1.4/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.1.4/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.1.4/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.1.4/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.1.4/setup/planning/backend/bitcask.md b/content/riak/kv/2.1.4/setup/planning/backend/bitcask.md index 9e036fc31a..4f0194cffb 100644 --- a/content/riak/kv/2.1.4/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.1.4/setup/planning/backend/bitcask.md @@ -17,17 +17,17 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.1.4/using/admin/riak-cli -[config reference]: /riak/kv/2.1.4/configuring/reference -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.1.4/setup/planning/backend/multi -[usage search]: /riak/kv/2.1.4/developing/usage/search -[glossary aae]: /riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.1.4/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.1.4/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.1.4/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.1.4/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search +[glossary aae]: {{}}riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.1.4/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.1.4/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.1.4/setup/planning/backend/leveldb.md b/content/riak/kv/2.1.4/setup/planning/backend/leveldb.md index 051fc1c157..b0200156e2 100644 --- a/content/riak/kv/2.1.4/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.1.4/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.4/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[config reference]: /riak/kv/2.1.4/configuring/reference -[perf index]: /riak/kv/2.1.4/using/performance -[config reference#aae]: /riak/kv/2.1.4/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[perf index]: {{}}riak/kv/2.1.4/using/performance +[config reference#aae]: {{}}riak/kv/2.1.4/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.1.4/setup/planning/backend/memory.md b/content/riak/kv/2.1.4/setup/planning/backend/memory.md index e462281449..70b8eef056 100644 --- a/content/riak/kv/2.1.4/setup/planning/backend/memory.md +++ b/content/riak/kv/2.1.4/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.4/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.1.4/configuring/reference -[plan backend multi]: /riak/kv/2.1.4/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[plan backend multi]: {{}}riak/kv/2.1.4/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.1.4/setup/planning/backend/multi.md b/content/riak/kv/2.1.4/setup/planning/backend/multi.md index 898eeb4aae..c463c4a8cb 100644 --- a/content/riak/kv/2.1.4/setup/planning/backend/multi.md +++ b/content/riak/kv/2.1.4/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.4/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.1.4/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.1.4/setup/planning/backend/memory -[config reference]: /riak/kv/2.1.4/configuring/reference -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.1.4/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.1.4/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.1.4/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.1.4/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.1.4/setup/planning/best-practices.md b/content/riak/kv/2.1.4/setup/planning/best-practices.md index f5518f56ff..57abac7b33 100644 --- a/content/riak/kv/2.1.4/setup/planning/best-practices.md +++ b/content/riak/kv/2.1.4/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.4/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.1.4/using/reference/handoff -[config mapreduce]: /riak/kv/2.1.4/configuring/mapreduce -[glossary aae]: /riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.1.4/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.1.4/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.1.4/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.1.4/setup/planning/bitcask-capacity-calc.md index a3ca2b7372..0e4dcf2453 100644 --- a/content/riak/kv/2.1.4/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.1.4/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.1.4/setup/planning/cluster-capacity.md b/content/riak/kv/2.1.4/setup/planning/cluster-capacity.md index 6ec9cfd052..836613a68b 100644 --- a/content/riak/kv/2.1.4/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.1.4/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.4/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.1.4/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.1.4/setup/planning -[concept replication]: /riak/kv/2.1.4/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.1.4/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.1.4/configuring/reference -[perf benchmark]: /riak/kv/2.1.4/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.1.4/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.1.4/setup/planning +[concept replication]: {{}}riak/kv/2.1.4/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[perf benchmark]: {{}}riak/kv/2.1.4/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.1.4/setup/planning/operating-system.md b/content/riak/kv/2.1.4/setup/planning/operating-system.md index 850278933a..156e43deee 100644 --- a/content/riak/kv/2.1.4/setup/planning/operating-system.md +++ b/content/riak/kv/2.1.4/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.1.4/downloads/ +[downloads]: {{}}riak/kv/2.1.4/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.1.4/setup/planning/start.md b/content/riak/kv/2.1.4/setup/planning/start.md index a5aa3ef82b..92ac8a0167 100644 --- a/content/riak/kv/2.1.4/setup/planning/start.md +++ b/content/riak/kv/2.1.4/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.4/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.1.4/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.1.4/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.1.4/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.1.4/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.1.4/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.1.4/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.1.4/setup/upgrading/checklist.md b/content/riak/kv/2.1.4/setup/upgrading/checklist.md index ea0bd4a4ab..98e601976f 100644 --- a/content/riak/kv/2.1.4/setup/upgrading/checklist.md +++ b/content/riak/kv/2.1.4/setup/upgrading/checklist.md @@ -16,24 +16,24 @@ aliases: - /riak/kv/2.1.4/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.1.4/using/performance/open-files-limit -[perf index]: /riak/kv/2.1.4/using/performance +[perf open files]: {{}}riak/kv/2.1.4/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.1.4/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.1.4/using/security/basics -[cluster ops load balance]: /riak/kv/2.1.4/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.1.4/configuring/reference -[config backend]: /riak/kv/2.1.4/configuring/backend -[usage search]: /riak/kv/2.1.4/developing/usage/search -[usage conflict resolution]: /riak/kv/2.1.4/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.1.4/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.1.4/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.1.4/using/admin/commands -[use admin riak control]: /riak/kv/2.1.4/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.1.4/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.1.4/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.1.4/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.1.4/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.1.4/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[config backend]: {{}}riak/kv/2.1.4/configuring/backend +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.1.4/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.1.4/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.1.4/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.1.4/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.1.4/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.1.4/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.1.4/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a production environment from a development or testing environment can be a complex process. While the specific process will depend on your environment and practices, there are some basics for you to consider and a few questions you will want to ask while making this transition. diff --git a/content/riak/kv/2.1.4/setup/upgrading/cluster.md b/content/riak/kv/2.1.4/setup/upgrading/cluster.md index 4c26c0451b..a07fb3d9eb 100644 --- a/content/riak/kv/2.1.4/setup/upgrading/cluster.md +++ b/content/riak/kv/2.1.4/setup/upgrading/cluster.md @@ -11,23 +11,23 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.1.4/ops/upgrading/rolling-upgrades/ - /riak/kv/2.1.4/ops/upgrading/rolling-upgrades/ --- -[production checklist]: /riak/kv/2.1.4/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.1.4/using/admin/riak-control -[use admin commands]: /riak/kv/2.1.4/using/admin/commands -[use admin riak-admin]: /riak/kv/2.1.4/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.1.4/developing/usage/secondary-indexes +[production checklist]: {{}}riak/kv/2.1.4/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.1.4/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.1.4/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.1.4/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes [release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.1.4/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.1.4/using/reference/jmx -[snmp]: /riak/kv/2.1.4/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.1.4/using/reference/jmx +[snmp]: {{}}riak/kv/2.1.4/using/reference/snmp {{% note title="Note on upgrading Riak KV from older versions" %}} Riak KV upgrades are tested and supported for two feature release versions. @@ -38,7 +38,7 @@ recommend first upgrading to an intermediate version. For example, in an upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x before upgrading to 1.4.x. -If you run [Riak Control](/riak/kv/2.1.4/using/admin/riak-control), you should disable it during the rolling upgrade process. +If you run [Riak Control]({{}}riak/kv/2.1.4/using/admin/riak-control), you should disable it during the rolling upgrade process. {{% /note %}} Riak KV nodes negotiate with each other to determine supported diff --git a/content/riak/kv/2.1.4/setup/upgrading/search.md b/content/riak/kv/2.1.4/setup/upgrading/search.md index 1ad8219f26..ee435ec421 100644 --- a/content/riak/kv/2.1.4/setup/upgrading/search.md +++ b/content/riak/kv/2.1.4/setup/upgrading/search.md @@ -11,7 +11,7 @@ menu: parent: "upgrading" toc: true version_history: - in: "2.0.0-2.1.999" + in: "2.0.0-2.99.999" aliases: - /riak/2.1.4/ops/advanced/upgrading-search-2 - /riak/kv/2.1.4/ops/advanced/upgrading-search-2 @@ -273,4 +273,4 @@ search property is set to false. 11. Finally, delete the merge index directories to reclaim disk space. -For any questions reach out to the [Riak community](/community). Preferably, ask your questions up front rather than during the middle of a migration. +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.1.4/setup/upgrading/version.md b/content/riak/kv/2.1.4/setup/upgrading/version.md index 98fd745619..59da2570f8 100644 --- a/content/riak/kv/2.1.4/setup/upgrading/version.md +++ b/content/riak/kv/2.1.4/setup/upgrading/version.md @@ -21,7 +21,7 @@ explains which default Riak behaviors have changed and specific steps to take for a successful upgrade. For an overview of the new features and functionality -included in version 2.0, check out our guide to [Riak 2.0](/riak/kv/2.1.4/introduction). +included in version 2.0, check out our guide to [Riak 2.0]({{}}riak/kv/2.1.4/introduction). ## New Clients @@ -37,14 +37,14 @@ was built with those features in mind. There are official While we strongly recommend using the newest versions of these clients, older versions will still work with Riak 2.0, with the drawback that -those older clients will not able to take advantage of [new features](/riak/kv/2.1.4/introduction) like [data types](/riak/kv/2.1.4/developing/data-types) or the new [Riak Search](/riak/kv/2.1.4/using/reference/search). +those older clients will not able to take advantage of [new features]({{}}riak/kv/2.1.4/introduction) like [data types]({{}}riak/kv/2.1.4/developing/data-types) or the new [Riak Search]({{}}riak/kv/2.1.4/using/reference/search). ## Bucket Types In versions of Riak prior to 2.0, the location of objects was -determined by objects' [bucket](/riak/kv/2.1.4/learn/concepts/buckets) and [key](/riak/kv/2.1.4/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties](/riak/kv/2.1.4/developing/usage/bucket-types/). +determined by objects' [bucket]({{}}riak/kv/2.1.4/learn/concepts/buckets) and [key]({{}}riak/kv/2.1.4/learn/concepts/keys-and-objects), while all bucket-level configurations were managed by setting [bucket properties]({{}}riak/kv/2.1.4/developing/usage/bucket-types/). -In Riak 2.0, [bucket types](/riak/kv/2.1.4/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types](/riak/kv/2.1.4/using/reference/bucket-types). +In Riak 2.0, [bucket types]({{}}riak/kv/2.1.4/using/cluster-operations/bucket-types) are both an additional namespace for locating objects _and_ a new way of configuring bucket properties in a systematic fashion. More comprehensive details on usage can be found in the documentation on [using bucket types]({{}}riak/kv/2.1.4/using/reference/bucket-types). Here, we'll list some of the things to be aware of when upgrading. #### Bucket types and object location @@ -57,7 +57,7 @@ is determined by: * key This means there are 3 namespaces involved in object location instead of 2. -A full tutorial can be found in [Using Bucket Types](/riak/kv/2.1.4/using/reference/bucket-types). +A full tutorial can be found in [Using Bucket Types]({{}}riak/kv/2.1.4/using/reference/bucket-types). If your application was written using a version of Riak prior to 2.0, you should make sure that any endpoint in Riak targeting @@ -76,8 +76,8 @@ configurations. The following URLs are equivalent in Riak 2.0: If you use object locations that don't specify a bucket type, you have three options: -* Accept Riak's [default bucket configurations](/riak/kv/2.1.4/using/reference/bucket-types/#buckets-as-namespaces) -* Change Riak's defaults using your [configuration files](/riak/kv/2.1.4/configuring/reference/#default-bucket-properties) +* Accept Riak's [default bucket configurations]({{}}riak/kv/2.1.4/using/reference/bucket-types/#buckets-as-namespaces) +* Change Riak's defaults using your [configuration files]({{}}riak/kv/2.1.4/configuring/reference/#default-bucket-properties) * Manage multiple sets of bucket properties by specifying those properties for all operations (not recommended) @@ -87,17 +87,17 @@ One reason we recommend using bucket types for Riak 2.0 and later is because many newer Riak features were built with bucket types as a precondition: -* [Strong consistency](/riak/kv/2.1.4/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem +* [Strong consistency]({{}}riak/kv/2.1.4/using/reference/strong-consistency) --- Using Riak's strong consistency subsystem requires you to set the `consistent` parameter on a bucket type to `true` -* [Riak Data Types](/riak/kv/2.1.4/developing/data-types) --- In order to use Riak Data - Types, you must [create bucket types](/riak/kv/2.1.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the +* [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types) --- In order to use Riak Data + Types, you must [create bucket types]({{}}riak/kv/2.1.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) specific to the Data Type you are using #### Bucket types and downgrades If you decide to use bucket types, please remember that you -cannot [downgrade](/riak/kv/2.1.4/setup/downgrade) your cluster to a version of +cannot [downgrade]({{}}riak/kv/2.1.4/setup/downgrade) your cluster to a version of Riak prior to 2.0 if you have both created and activated a bucket type. @@ -105,20 +105,20 @@ bucket type. One of the biggest changes in version 2.0 regarding application development involves Riak's default -[siblings](/riak/kv/2.1.4/learn/concepts/causal-context/#siblings) behavior. +[siblings]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#siblings) behavior. In versions prior to 2.0, the `allow_mult` setting was set to `false` by default for all buckets. So Riak's default behavior was to resolve -object replica [conflicts](/riak/kv/2.1.4/developing/usage/conflict-resolution) between nodes on its +object replica [conflicts]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution) between nodes on its own; relieving connecting clients of the need to resolve those conflicts. **In 2.0, `allow_mult` is set to `true` for any bucket type that you create and activate.** -This means that the default when [using bucket types](/riak/kv/2.1.4/using/reference/bucket-types/) is to handle [conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution) on the client side using -either traditional [vector clocks](/riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors](/riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vector). +This means that the default when [using bucket types]({{}}riak/kv/2.1.4/using/reference/bucket-types/) is to handle [conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution) on the client side using +either traditional [vector clocks]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#vector-clocks) or the newer [dotted version vectors]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#dotted-version-vector). If you wish to set `allow_mult` to `false` in version 2.0, you have two options: @@ -127,11 +127,11 @@ options: * Don't use bucket types. More information on handling siblings can be found in our documentation -on [conflict resolution](/riak/kv/2.1.4/developing/usage/conflict-resolution). +on [conflict resolution]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution). ## Enabling Security -The [authentication and authorization](/riak/kv/2.1.4/using/security/basics) mechanisms included with Riak 2.0 should only be turned +The [authentication and authorization]({{}}riak/kv/2.1.4/using/security/basics) mechanisms included with Riak 2.0 should only be turned on after careful testing in a non-production environment. Security changes the way all applications interact with Riak. @@ -141,12 +141,12 @@ If you decide to upgrade to version 2.0, you can still downgrade your cluster to an earlier version of Riak if you wish, _unless_ you perform one of the following actions in your cluster: -* Index data to be used in conjunction with the new [Riak Search](/riak/kv/2.1.4/using/reference/search). -* Create _and_ activate one or more [bucket types](/riak/kv/2.1.4/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: - - [Strong consistency](/riak/kv/2.1.4/using/reference/strong-consistency) - - [Riak Data Types](/riak/kv/2.1.4/developing/data-types) +* Index data to be used in conjunction with the new [Riak Search]({{}}riak/kv/2.1.4/using/reference/search). +* Create _and_ activate one or more [bucket types]({{}}riak/kv/2.1.4/using/reference/bucket-types/). By extension, you will not be able to downgrade your cluster if you have used the following features, both of which rely on bucket types: + - [Strong consistency]({{}}riak/kv/2.1.4/using/reference/strong-consistency) + - [Riak Data Types]({{}}riak/kv/2.1.4/developing/data-types) -If you use other new features, such as [Riak Security](/riak/kv/2.1.4/using/security/basics) or the new [configuration files](/riak/kv/2.1.4/configuring/reference/), you can still +If you use other new features, such as [Riak Security]({{}}riak/kv/2.1.4/using/security/basics) or the new [configuration files]({{}}riak/kv/2.1.4/configuring/reference/), you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. @@ -156,7 +156,7 @@ Riak 2.0 offers a new configuration system that both simplifies configuration syntax and uses one configuration file, `riak.conf`, instead of the two files, `app.config` and `vm.args`, required by the older system. Full documentation of the new system can be found in -[Configuration Files](/riak/kv/2.1.4/configuring/reference/). +[Configuration Files]({{}}riak/kv/2.1.4/configuring/reference/). If you're upgrading to Riak 2.0 from an earlier version, you have two configuration options: @@ -167,12 +167,12 @@ configuration options: recognized in Riak 2.0. If you choose the first option, make sure to consult the -[configuration files](/riak/kv/2.1.4/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. +[configuration files]({{}}riak/kv/2.1.4/configuring/reference/) documentation, as many configuration parameters have changed names, some no longer exist, and others have been added that were not previously available. If you choose the second option, Riak will automatically determine that the older configuration system is being used. You should be aware, however, that some settings must be set in an `advanced.config` file. -For a listing of those parameters, see our documentation on [advanced configuration](/riak/kv/2.1.4/configuring/reference/#advanced-configuration). +For a listing of those parameters, see our documentation on [advanced configuration]({{}}riak/kv/2.1.4/configuring/reference/#advanced-configuration). If you choose to keep the existing `app.config` files, you _must_ add the following additional settings in the `riak_core` section: @@ -210,7 +210,7 @@ default to a value of `15`, which can cause problems in some clusters. ## Upgrading Search Information on upgrading Riak Search to 2.0 can be found in our -[Search upgrade guide](/riak/kv/2.1.4/setup/upgrading/search). +[Search upgrade guide]({{}}riak/kv/2.1.4/setup/upgrading/search). ## Migrating from Short Names @@ -221,12 +221,11 @@ and `-name` in `vm.args`. If you are upgrading from a previous version of Riak to 2.0 and are using `-sname` in your `vm.args`, the below steps are required to migrate away from `-sname`. -1. Upgrade to Riak -[1.4.12](http://docs.basho.com/riak/1.4.12/downloads/). +1. Upgrade to Riak 1.4.12. 2. Back up the ring directory on each node, typically located in `/var/lib/riak/ring`. 3. Stop all nodes in your cluster. -4. Run [`riak-admin reip `](/riak/kv/2.1.4/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your +4. Run [`riak-admin reip `]({{}}riak/kv/2.1.4/using/admin/riak-admin/#reip) on each node in your cluster, for each node in your cluster. For example, in a 5 node cluster this will be run 25 total times, 5 times on each node. The `` is the current shortname, and the `` is the new fully qualified hostname. diff --git a/content/riak/kv/2.1.4/using/admin/commands.md b/content/riak/kv/2.1.4/using/admin/commands.md index 30fdb923af..e770db6298 100644 --- a/content/riak/kv/2.1.4/using/admin/commands.md +++ b/content/riak/kv/2.1.4/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.1.4/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.1.4/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.1.4/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.1.4/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.1.4/using/admin/riak-admin.md b/content/riak/kv/2.1.4/using/admin/riak-admin.md index a847bceb1f..3c7e4857f0 100644 --- a/content/riak/kv/2.1.4/using/admin/riak-admin.md +++ b/content/riak/kv/2.1.4/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.1.4/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.1.4/configuring/reference -[use admin commands]: /riak/kv/2.1.4/using/admin/commands -[use admin commands#join]: /riak/kv/2.1.4/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.1.4/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.1.4/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.1.4/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.1.4/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.1.4/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.1.4/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.1.4/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.1.4/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.1.4/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.1.4/setup/downgrade -[security index]: /riak/kv/2.1.4/using/security/ -[security managing]: /riak/kv/2.1.4/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.1.4/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.1.4/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.1.4/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.1.4/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.1.4/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.1.4/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[use admin commands]: {{}}riak/kv/2.1.4/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.1.4/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.1.4/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.1.4/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.1.4/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.1.4/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.1.4/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.1.4/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.1.4/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.1.4/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.1.4/setup/downgrade +[security index]: {{}}riak/kv/2.1.4/using/security/ +[security managing]: {{}}riak/kv/2.1.4/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.1.4/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.1.4/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.1.4/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.1.4/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.1.4/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#stats ## riak-admin diff --git a/content/riak/kv/2.1.4/using/admin/riak-cli.md b/content/riak/kv/2.1.4/using/admin/riak-cli.md index 1b9d70b3e6..de09360089 100644 --- a/content/riak/kv/2.1.4/using/admin/riak-cli.md +++ b/content/riak/kv/2.1.4/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.1.4/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.1.4/configuring/reference/ +[configuration file]: {{}}riak/kv/2.1.4/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.1.4/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.1.4/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.1.4/configuring/reference/ ## riak diff --git a/content/riak/kv/2.1.4/using/admin/riak-control.md b/content/riak/kv/2.1.4/using/admin/riak-control.md index f3696c7a22..eff2c03ca4 100644 --- a/content/riak/kv/2.1.4/using/admin/riak-control.md +++ b/content/riak/kv/2.1.4/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.1.4/configuring/reference +[config reference]: {{}}riak/kv/2.1.4/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.1.4/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.1.4/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.1.4/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.1.4/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.1.4/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.1.4/using/cluster-operations/active-anti-entropy.md index f85eac31e1..0a510221bc 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/active-anti-entropy.md @@ -54,12 +54,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -87,7 +87,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes.md index 3ba955c18f..9cac9cba40 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.1.4/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.1.4/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.1.4/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.1.4/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.1.4/using/cluster-operations/backing-up.md b/content/riak/kv/2.1.4/using/cluster-operations/backing-up.md index e68aa500a4..1bbc50c1cc 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.1.4/ops/running/backups --- -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters -[config reference]: /riak/kv/2.1.4/configuring/reference -[plan backend leveldb]: /riak/kv/2.1.4/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency -[concept aae]: /riak/kv/2.1.4/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.1.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.1.4/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.1.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.1.4/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.1.4/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.1.4/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.1.4/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.1.4/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.1.4/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.1.4/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.1.4/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.1.4/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.1.4/using/cluster-operations/bucket-types.md b/content/riak/kv/2.1.4/using/cluster-operations/bucket-types.md index abcd70e196..f24b096454 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info.md index 0fafe2135f..a6a4a742b1 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.1.4/configuring/reference +[config reference]: {{}}riak/kv/2.1.4/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.1.4/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.1.4/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.1.4/using/cluster-operations/handoff.md b/content/riak/kv/2.1.4/using/cluster-operations/handoff.md index b6897fa4bf..e92c419903 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.1.4/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.1.4/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -89,7 +89,7 @@ Header | Description `Total` | Total number of active transfers throughout the entire cluster `Ownership` | Total number of ownership exchanges `Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) -`Hinted` | Total number of hinted handoffs +`Hinted` | Total number of hinted handoffs `Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). ### details @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.1.4/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.1.4/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.1.4/using/cluster-operations/logging.md b/content/riak/kv/2.1.4/using/cluster-operations/logging.md index 3f000e98fb..3143e8308a 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/logging.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/logging.md @@ -17,7 +17,7 @@ aliases: If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.1.4/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.1.4/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.1.4/using/cluster-operations/replacing-node.md b/content/riak/kv/2.1.4/using/cluster-operations/replacing-node.md index 185df51608..57f02129ed 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/replacing-node.md @@ -16,7 +16,7 @@ aliases: --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.1.4/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.1.4/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -34,21 +34,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.1.4/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.1.4/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.1.4/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.1.4/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -63,7 +63,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.1.4/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -71,13 +71,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.1.4/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.1.4/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -85,8 +85,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.1.4/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.1.4/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.1.4/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.1.4/using/cluster-operations/strong-consistency.md index 96c2f2fe9b..d647756c1f 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.1.4/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.1.4/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.1.4/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.1.4/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.1.4/using/cluster-operations/v2-multi-datacenter.md index b0b32765cd..52e0ba7263 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/v2-multi-datacenter.md @@ -159,7 +159,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -179,7 +179,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -217,7 +217,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.1.4/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.1.4/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -238,7 +238,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter.md index e2933859e9..3f5e9b041c 100644 --- a/content/riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.1.4/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.1.4/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.1.4/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.1.4/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.1.4/using/performance.md b/content/riak/kv/2.1.4/using/performance.md index 5613257f07..87a13dbd40 100644 --- a/content/riak/kv/2.1.4/using/performance.md +++ b/content/riak/kv/2.1.4/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.1.4/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.1.4/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -237,12 +237,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.1.4/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.1.4/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.1.4/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.1.4/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.1.4/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.1.4/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.1.4/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.1.4/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.1.4/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.1.4/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.1.4/using/performance/benchmarking.md b/content/riak/kv/2.1.4/using/performance/benchmarking.md index 308b453f5a..71eb12e506 100644 --- a/content/riak/kv/2.1.4/using/performance/benchmarking.md +++ b/content/riak/kv/2.1.4/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.1.4/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.1.4/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.1.4/using/performance/latency-reduction.md b/content/riak/kv/2.1.4/using/performance/latency-reduction.md index 48a64ff21f..2f57830816 100644 --- a/content/riak/kv/2.1.4/using/performance/latency-reduction.md +++ b/content/riak/kv/2.1.4/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.1.4/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.1.4/using/performance/multi-datacenter-tuning.md index 1af4b17367..30a9db513e 100644 --- a/content/riak/kv/2.1.4/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.1.4/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.1.4/using/performance +[perf index]: {{}}riak/kv/2.1.4/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.1.4/using/performance/open-files-limit.md b/content/riak/kv/2.1.4/using/performance/open-files-limit.md index d5ea893c6c..f7e6f81c23 100644 --- a/content/riak/kv/2.1.4/using/performance/open-files-limit.md +++ b/content/riak/kv/2.1.4/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/tuning/open-files-limit/ --- -[plan backend bitcask]: /riak/kv/2.1.4/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.1.4/setup/planning/backend/bitcask Riak can consume a large number of open file handles during normal operation. The [Bitcask][plan backend bitcask] backend in particular may accumulate a high diff --git a/content/riak/kv/2.1.4/using/reference/bucket-types.md b/content/riak/kv/2.1.4/using/reference/bucket-types.md index b4da61ca4a..d53647a402 100644 --- a/content/riak/kv/2.1.4/using/reference/bucket-types.md +++ b/content/riak/kv/2.1.4/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.1.4/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.1.4/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.1.4/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.1.4/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,14 +39,14 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype`, `consistent`, and `write_once` properties, related to - [Riak data types](/riak/kv/2.1.4/developing/data-types), [strong consistency](/riak/kv/2.1.4/developing/app-guide/strong-consistency), and - [write-once buckets](/riak/kv/2.1.4/developing/app-guide/write-once) respectively + [Riak data types]({{}}riak/kv/2.1.4/developing/data-types), [strong consistency]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency), and + [write-once buckets]({{}}riak/kv/2.1.4/developing/app-guide/write-once) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.1.4/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.1.4/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -82,8 +82,8 @@ system of bucket configuration, including the following: `riak-admin bucket-type` interface (discussed in depth below) enables you to manage bucket configurations on the operations side, without recourse to Riak clients. -* Some special usecases -- [strong consistency](/riak/kv/2.1.4/configuring/strong-consistency), - [data types](/riak/kv/2.1.4/developing/data-types), and [write-once buckets](/riak/kv/2.1.4/developing/app-guide/write-once) -- are only +* Some special usecases -- [strong consistency]({{}}riak/kv/2.1.4/configuring/strong-consistency), + [data types]({{}}riak/kv/2.1.4/developing/data-types), and [write-once buckets]({{}}riak/kv/2.1.4/developing/app-guide/write-once) -- are only available through bucket properties or bucket types. For these reasons, we recommend _always_ using bucket types in versions @@ -123,7 +123,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.1.4/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.1.4/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.1.4/developing/getting-started) section. If creation is successful, you should see the following output: @@ -525,7 +525,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.1.4/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.1.4/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -538,7 +538,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.1.4/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -589,8 +589,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.1.4/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.1.4/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.1.4/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.1.4/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -718,7 +718,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.1.4/learn/concepts/buckets) and [keys](/riak/kv/2.1.4/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.1.4/learn/concepts/buckets) and [keys]({{}}riak/kv/2.1.4/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.1.4/using/reference/custom-code.md b/content/riak/kv/2.1.4/using/reference/custom-code.md index 8554894697..bc417f79ef 100644 --- a/content/riak/kv/2.1.4/using/reference/custom-code.md +++ b/content/riak/kv/2.1.4/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.1.4/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.1.4/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.1.4/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.1.4/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.1.4/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.1.4/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.1.4/using/reference/handoff.md b/content/riak/kv/2.1.4/using/reference/handoff.md index 33a013c360..c6ce609dbd 100644 --- a/content/riak/kv/2.1.4/using/reference/handoff.md +++ b/content/riak/kv/2.1.4/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.1.4/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.1.4/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.1.4/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.1.4/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.1.4/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.1.4/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.1.4/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.1.4/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.1.4/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.1.4/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.1.4/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.1.4/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.1.4/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.1.4/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.1.4/using/reference/jmx.md b/content/riak/kv/2.1.4/using/reference/jmx.md index d18cfb2374..a581c614dd 100644 --- a/content/riak/kv/2.1.4/using/reference/jmx.md +++ b/content/riak/kv/2.1.4/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.1.4/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.1.4/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.1.4/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.1.4/using/reference/logging.md b/content/riak/kv/2.1.4/using/reference/logging.md index 5b27a112e2..04eab69436 100644 --- a/content/riak/kv/2.1.4/using/reference/logging.md +++ b/content/riak/kv/2.1.4/using/reference/logging.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.1.4/ops/running/logging --- -[cluster ops log]: /riak/kv/2.1.4/using/cluster-operations/logging -[config reference lager]: /riak/kv/2.1.4/configuring/reference/#lager +[cluster ops log]: {{}}riak/kv/2.1.4/using/cluster-operations/logging +[config reference lager]: {{}}riak/kv/2.1.4/configuring/reference/#logging [Erlang]: http://www.erlang.org [facility level]: http://en.wikipedia.org/wiki/Syslog#Facility_levels -[install source index]: /riak/kv/2.1.4/setup/installing/source +[install source index]: {{}}riak/kv/2.1.4/setup/installing/source [lager]: https://github.com/basho/lager [SASL]: http://www.erlang.org/doc/man/sasl_app.html -[use admin riak cli attach direct]: /riak/kv/2.1.4/using/admin/riak-cli/#attach-direct +[use admin riak cli attach direct]: {{}}riak/kv/2.1.4/using/admin/riak-cli/#attach-direct Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang][Erlang] called [lager][lager]. @@ -51,8 +51,8 @@ File | Significance :----|:------------ `console.log` | Console log output `crash.log` | Crash logs -`erlang.log` | Logs emitted by the Erlang VM on which Riak runs. -`error.log` | Common errors emitted by Riak. +`erlang.log` | Logs emitted by the Erlang VM on which Riak runs. +`error.log` | Common errors emitted by Riak. `run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. ## Log Syntax diff --git a/content/riak/kv/2.1.4/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.1.4/using/reference/multi-datacenter/comparison.md index dd97475d7b..d604f41326 100644 --- a/content/riak/kv/2.1.4/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.1.4/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.1.4/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.1.4/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.1.4/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.1.4/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.1.4/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.1.4/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.1.4/using/reference/multi-datacenter/statistics.md index 364caa1b9b..61fdbca1d2 100644 --- a/content/riak/kv/2.1.4/using/reference/multi-datacenter/statistics.md +++ b/content/riak/kv/2.1.4/using/reference/multi-datacenter/statistics.md @@ -106,7 +106,7 @@ Field | Description `successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. `error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later `running_stats` | `[{, },…]` Any running sync processes are listed here, and described in the table below -`socket` | See Socket Statistics +`socket` | See Socket Statistics `fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested `fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. `socket` | `{peername: `, `sockname: }` @@ -119,7 +119,7 @@ Field | Description `site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. `strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. `fullsync_worker` | The Erlang process id of the fullsync worker. -`socket` | See Socket Statistics +`socket` | See Socket Statistics `state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.
  • **`wait_for_partition`**
  • **`build_keylist`**
  • **`wait_keylist`**
  • **`diff_bloom`**
  • **`diff_keylist`**
`fullsync` | The partition that is currently being synchronized with the sink cluster `partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink @@ -177,8 +177,8 @@ Field | Description `leader` | Which node is the current leader of the cluster for Version 2 Replication `local_leader_message_queue_len` | The length of the object queue on the leader `local_leader_heap_size` | The amount of memory the leader is using -`client_stats` | See Client Statistics -`server_stats` | See Server Statistics +`client_stats` | See Client Statistics +`server_stats` | See Server Statistics ## Client Statistics @@ -201,7 +201,7 @@ Field | Description `site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See Bounded Queue +`bounded_queue` | See Bounded Queue `state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • **`wait_for_partition`**
  • **`build_keylist`**
  • **`wait_keylist`**
  • **`diff_bloom`**
  • **`diff_keylist`**
`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.1.4/using/reference/runtime-interaction.md b/content/riak/kv/2.1.4/using/reference/runtime-interaction.md index dbc22c893b..8c79ed0ef5 100644 --- a/content/riak/kv/2.1.4/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.1.4/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.1.4/ops/advanced/runtime --- -[config reference]: /riak/kv/2.1.4/configuring/reference -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.1.4/configuring/reference +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements of the underlying operating system: distribution ports and OS diff --git a/content/riak/kv/2.1.4/using/reference/search.md b/content/riak/kv/2.1.4/using/reference/search.md index 2c833220ca..bd249f989e 100644 --- a/content/riak/kv/2.1.4/using/reference/search.md +++ b/content/riak/kv/2.1.4/using/reference/search.md @@ -15,20 +15,20 @@ aliases: - /riak/kv/2.1.4/dev/advanced/search --- -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak Search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -Search, you should check out the [Using Search](/riak/kv/2.1.4/developing/usage/search) document. +Search, you should check out the [Using Search]({{}}riak/kv/2.1.4/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -126,7 +126,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.1.4/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.1.4/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -288,7 +288,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.1.4/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -298,7 +298,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.1.4/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -353,7 +353,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.1.4/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.1.4/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.1.4/using/reference/secondary-indexes.md b/content/riak/kv/2.1.4/using/reference/secondary-indexes.md index 92e47757bd..8619da21a7 100644 --- a/content/riak/kv/2.1.4/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.1.4/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.1.4/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.1.4/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.1.4/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.1.4/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.1.4/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.1.4/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.1.4/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.1.4/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.1.4/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.1.4/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.1.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.1.4/using/reference/statistics-monitoring.md b/content/riak/kv/2.1.4/using/reference/statistics-monitoring.md index 85ae63ff8d..5380c42252 100644 --- a/content/riak/kv/2.1.4/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.1.4/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.1.4/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.1.4/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.1.4/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.1.4/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.1.4/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.1.4/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.1.4/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.1.4/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.1.4/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.1.4/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -143,7 +143,7 @@ Metric | Also | Notes ## Command-line Interface -The [`riak-admin`](/riak/kv/2.1.4/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.1.4/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -168,14 +168,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.1.4/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.1.4/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.1.4/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -222,7 +222,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.1.4/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.1.4/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -246,7 +246,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.1.4/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.1.4/developing/api/http/status) endpoint is also available. #### Nagios @@ -320,14 +320,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.1.4/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.1.4/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.1.4/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.1.4/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -349,8 +349,8 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.1.4/using/cluster-operations/inspecting-node) -* [Riak Control](/riak/kv/2.1.4/using/admin/riak-control/) +* [Inspecting a Node]({{}}riak/kv/2.1.4/using/cluster-operations/inspecting-node) +* [Riak Control]({{}}riak/kv/2.1.4/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -366,9 +366,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.1.4/using/reference/strong-consistency.md b/content/riak/kv/2.1.4/using/reference/strong-consistency.md index 45d0446bf9..5e5096c9c7 100644 --- a/content/riak/kv/2.1.4/using/reference/strong-consistency.md +++ b/content/riak/kv/2.1.4/using/reference/strong-consistency.md @@ -12,8 +12,8 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.1.4/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.1.4/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.1.4/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.1.4/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -24,7 +24,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.1.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.1.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -35,7 +35,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.1.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.1.4/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.1.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.1.4/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -143,12 +143,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.1.4/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.1.4/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.1.4/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.1.4/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.1.4/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.1.4/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.1.4/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.1.4/using/reference/v2-multi-datacenter/architecture.md index 24b7b9a6db..08f2c2fa5f 100644 --- a/content/riak/kv/2.1.4/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.1.4/using/reference/v2-multi-datacenter/architecture.md @@ -78,7 +78,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.1.4/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.1.4/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -90,7 +90,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -108,7 +108,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -116,6 +116,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.1.4/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.1.4/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.1.4/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.1.4/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/aae.md index 9ef0f2a944..dca0e2f5ca 100644 --- a/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.1.4/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.1.4/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.1.4/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/architecture.md index cb807e47fa..7ceda275eb 100644 --- a/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.1.4/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.1.4/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.1.4/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.1.4/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.1.4/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/cascading-writes.md index 9d0ed808b8..561c441e8f 100644 --- a/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 47f5d46935..efe9faa263 100644 --- a/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.1.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.1.4/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.1.4/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.1.4/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.1.4/using/repair-recovery/errors.md b/content/riak/kv/2.1.4/using/repair-recovery/errors.md index fcd1fb3a4e..063ccf6688 100644 --- a/content/riak/kv/2.1.4/using/repair-recovery/errors.md +++ b/content/riak/kv/2.1.4/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.1.4/configuring/reference +[config reference]: {{}}riak/kv/2.1.4/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.1.4/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.1.4/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.1.4/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.1.4/using/repair-recovery/failure-recovery.md index 174d23ace4..698eb2f9c1 100644 --- a/content/riak/kv/2.1.4/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.1.4/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.1.4/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.1.4/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.1.4/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.1.4/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -115,7 +115,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.1.4/using/repair-recovery/repairs.md b/content/riak/kv/2.1.4/using/repair-recovery/repairs.md index 507c78c473..6c74451ef3 100644 --- a/content/riak/kv/2.1.4/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.1.4/using/repair-recovery/repairs.md @@ -20,13 +20,13 @@ aliases: - /riak/kv/2.1.4/ops/running/recovery/repairing-leveldb - /riak/kv/2.1.4/ops/running/recovery/repairing-partitions --- -[cluster ops aae]: /riak/kv/2.1.4/using/cluster-operations/active-anti-entropy/ -[config ref]: /riak/kv/2.1.4/configuring/reference/ +[cluster ops aae]: {{}}riak/kv/2.1.4/using/cluster-operations/active-anti-entropy/ +[config ref]: {{}}riak/kv/2.1.4/configuring/reference/ [Erlang shell]: http://learnyousomeerlang.com/starting-out -[glossary AAE]: /riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae -[glossary readrep]: /riak/kv/2.1.4/learn/glossary/#read-repair -[search config]: /riak/kv/2.1.4/configuring/search/#search-config-settings -[tiered storage]: /riak/kv/2.1.4/setup/planning/backend/leveldb/#tiered-storage +[glossary AAE]: {{}}riak/kv/2.1.4/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{}}riak/kv/2.1.4/learn/glossary/#read-repair +[search config]: {{}}riak/kv/2.1.4/configuring/search/#search-config-settings +[tiered storage]: {{}}riak/kv/2.1.4/setup/planning/backend/leveldb/#tiered-storage @@ -236,23 +236,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.1.4/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.1.4/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.1.4/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.1.4/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.1.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.1.4/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.1.4/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.1.4/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.1.4/using/repair-recovery/rolling-replaces.md index ac2f5657c9..87f77964bf 100644 --- a/content/riak/kv/2.1.4/using/repair-recovery/rolling-replaces.md +++ b/content/riak/kv/2.1.4/using/repair-recovery/rolling-replaces.md @@ -12,9 +12,9 @@ menu: toc: true --- -[upgrade]: /riak/kv/2.1.4/setup/upgrading/cluster/ -[rolling restarts]: /riak/kv/2.1.4/using/repair-recovery/rolling-restart/ -[add node]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes +[upgrade]: {{}}riak/kv/2.1.4/setup/upgrading/cluster/ +[rolling restarts]: {{}}riak/kv/2.1.4/using/repair-recovery/rolling-restart/ +[add node]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. diff --git a/content/riak/kv/2.1.4/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.1.4/using/repair-recovery/rolling-restart.md index 1f9f7344cd..49526b9ec0 100644 --- a/content/riak/kv/2.1.4/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.1.4/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.1.4/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.1.4/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.1.4/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.1.4/using/running-a-cluster.md b/content/riak/kv/2.1.4/using/running-a-cluster.md index 89ea6d0f54..f4bf433878 100644 --- a/content/riak/kv/2.1.4/using/running-a-cluster.md +++ b/content/riak/kv/2.1.4/using/running-a-cluster.md @@ -14,12 +14,12 @@ aliases: - /riak/2.1.4/ops/building/basic-cluster-setup --- -[cluster ops add remove node]: /riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes -[config reference]: /riak/kv/2.1.4/configuring/reference/ -[dev api pbc]: /riak/kv/2.1.4/developing/api/protocol-buffers/ +[cluster ops add remove node]: {{}}riak/kv/2.1.4/using/cluster-operations/adding-removing-nodes +[config reference]: {{}}riak/kv/2.1.4/configuring/reference/ +[dev api pbc]: {{}}riak/kv/2.1.4/developing/api/protocol-buffers/ [FQDNs]: http://en.wikipedia.org/wiki/Fully_qualified_domain_name -[use admin riak-admin cluster replace]: /riak/kv/2.1.4/using/admin/riak-admin/#cluster-replace -[use admin riak-admin force replace]: /riak/kv/2.1.4/using/admin/riak-admin/#cluster-force-replace +[use admin riak-admin cluster replace]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster-replace +[use admin riak-admin force replace]: {{}}riak/kv/2.1.4/using/admin/riak-admin/#cluster-force-replace Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the @@ -102,7 +102,7 @@ systems. Bear in mind that you need to use either the older or the newer but never both simultaneously. More on configuring Riak can be found in the Configuration +href="{{< baseurl >}}riak/kv/2.1.4/configuring/">Configuration Files documentation. {{% /note %}} diff --git a/content/riak/kv/2.1.4/using/security.md b/content/riak/kv/2.1.4/using/security.md index d96e10f256..8ec1de66cf 100644 --- a/content/riak/kv/2.1.4/using/security.md +++ b/content/riak/kv/2.1.4/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.1.4/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.1.4/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.1.4/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.1.4/using/security/basics +[security managing]: {{}}riak/kv/2.1.4/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.1.4/developing/usage/search > **Internal security** > @@ -103,8 +103,8 @@ cluster on the following TCP ports: Protocol | Port :--------|:---- -HTTP | TCP port 8098 -Protocol Buffers | TCP port 8087 +HTTP | TCP port 8098 +Protocol Buffers | TCP port 8087 ### Riak Search Ports diff --git a/content/riak/kv/2.1.4/using/security/basics.md b/content/riak/kv/2.1.4/using/security/basics.md index 0fc506ff44..527bbe68b0 100644 --- a/content/riak/kv/2.1.4/using/security/basics.md +++ b/content/riak/kv/2.1.4/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.1.4/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.1.4/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.1.4/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.1.4/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.1.4/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.1.4/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.1.4/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.1.4/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.1.4/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.1.4/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.1.4/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.1.4/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.1.4/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.1.4/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.1.4/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.1.4/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.1.4/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.1.4/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.1.4/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.1.4/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.1.4/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.1.4/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.1.4/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.1.4/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.1.4/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.1.4/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.1.4/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.1.4/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.1.4/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.1.4/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.1.4/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.1.4/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.1.4/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.1.4/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.1.4/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.1.4/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.1.4/configuring/reference/#directories).
platform_data_dir The directory in which Riak stores its storage backend data, as well -as active anti-entropy data, and cluster metadata. ./data
alive_tokens Determines the number of ticks the leader will wait to hear from its -associated vnode before assuming that the vnode +associated vnode before assuming that the vnode is unhealthy and stepping down as leader. If the vnode does not respond to the leader before ensemble_tick * alive_tokens milliseconds have elapsed, the leader will @@ -1876,8 +1876,8 @@ package) and in R14B04 via a custom repository and branch.
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="{{< baseurl >}}riak/kv/2.1.4/learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
riak_kv If you are installing custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +href="{{< baseurl >}}riak/kv/2.1.4/developing/usage/mapreduce/">MapReduce jobs or commit hooks, this setting specifies the paths to any compiled .beam files that you wish to use. This is expressed as a list of absolute paths on the node's filesystem, e.g. [ "/tmp", "/other" ].
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.1.4/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.1.4/using/security/managing-sources.md b/content/riak/kv/2.1.4/using/security/managing-sources.md index 884b8cfa40..48f0bf1923 100644 --- a/content/riak/kv/2.1.4/using/security/managing-sources.md +++ b/content/riak/kv/2.1.4/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.1.4/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.1.4/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.1.4/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.1.4/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.1.4/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.1.4/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.1.4/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.1.4/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.1.4/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.1.4/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.1.4/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.1.4/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.1.4/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.1.4/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.2.0/_reference-links.md b/content/riak/kv/2.2.0/_reference-links.md index e820e6a4b6..f8dd3efbf7 100644 --- a/content/riak/kv/2.2.0/_reference-links.md +++ b/content/riak/kv/2.2.0/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.2.0/downloads/ -[install index]: /riak/kv/2.2.0/setup/installing -[upgrade index]: /riak/kv/2.2.0/upgrading -[plan index]: /riak/kv/2.2.0/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.2.0/configuring/reference/ -[manage index]: /riak/kv/2.2.0/using/managing -[performance index]: /riak/kv/2.2.0/using/performance -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.2.0/downloads/ +[install index]: {{}}riak/kv/2.2.0/setup/installing +[upgrade index]: {{}}riak/kv/2.2.0/upgrading +[plan index]: {{}}riak/kv/2.2.0/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.2.0/configuring/reference/ +[manage index]: {{}}riak/kv/2.2.0/using/managing +[performance index]: {{}}riak/kv/2.2.0/using/performance +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.2.0/setup/planning -[plan start]: /riak/kv/2.2.0/setup/planning/start -[plan backend]: /riak/kv/2.2.0/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.0/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.0/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.2.0/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.2.0/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.2.0/setup/planning/best-practices -[plan future]: /riak/kv/2.2.0/setup/planning/future +[plan index]: {{}}riak/kv/2.2.0/setup/planning +[plan start]: {{}}riak/kv/2.2.0/setup/planning/start +[plan backend]: {{}}riak/kv/2.2.0/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.0/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.2.0/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.2.0/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.2.0/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.2.0/setup/planning/future ## Installing -[install index]: /riak/kv/2.2.0/setup/installing -[install aws]: /riak/kv/2.2.0/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.2.0/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.2.0/setup/installing/freebsd -[install mac osx]: /riak/kv/2.2.0/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.2.0/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.2.0/setup/installing/smartos -[install solaris]: /riak/kv/2.2.0/setup/installing/solaris -[install suse]: /riak/kv/2.2.0/setup/installing/suse -[install windows azure]: /riak/kv/2.2.0/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.2.0/setup/installing +[install aws]: {{}}riak/kv/2.2.0/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.0/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.0/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.0/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.0/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.0/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.0/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.0/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.0/setup/installing/windows-azure -[install source index]: /riak/kv/2.2.0/setup/installing/source -[install source erlang]: /riak/kv/2.2.0/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.2.0/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.2.0/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.0/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.2.0/setup/installing/source/jvm -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.2.0/setup/upgrading -[upgrade checklist]: /riak/kv/2.2.0/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.2.0/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.2.0/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.2.0/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.2.0/setup/downgrade +[upgrade index]: {{}}riak/kv/2.2.0/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.2.0/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.2.0/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.2.0/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.2.0/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.2.0/setup/downgrade ## Configuring -[config index]: /riak/kv/2.2.0/configuring -[config basic]: /riak/kv/2.2.0/configuring/basic -[config backend]: /riak/kv/2.2.0/configuring/backend -[config manage]: /riak/kv/2.2.0/configuring/managing -[config reference]: /riak/kv/2.2.0/configuring/reference/ -[config strong consistency]: /riak/kv/2.2.0/configuring/strong-consistency -[config load balance]: /riak/kv/2.2.0/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.2.0/configuring/mapreduce -[config search]: /riak/kv/2.2.0/configuring/search/ +[config index]: {{}}riak/kv/2.2.0/configuring +[config basic]: {{}}riak/kv/2.2.0/configuring/basic +[config backend]: {{}}riak/kv/2.2.0/configuring/backend +[config manage]: {{}}riak/kv/2.2.0/configuring/managing +[config reference]: {{}}riak/kv/2.2.0/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.2.0/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.2.0/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.2.0/configuring/mapreduce +[config search]: {{}}riak/kv/2.2.0/configuring/search/ -[config v3 mdc]: /riak/kv/2.2.0/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.2.0/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.2.0/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.2.0/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.2.0/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.2.0/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.2.0/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.2.0/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.2.0/using/ -[use admin commands]: /riak/kv/2.2.0/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.2.0/using/running-a-cluster +[use index]: {{}}riak/kv/2.2.0/using/ +[use admin commands]: {{}}riak/kv/2.2.0/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.2.0/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.2.0/using/reference/custom-code -[use ref handoff]: /riak/kv/2.2.0/using/reference/handoff -[use ref monitoring]: /riak/kv/2.2.0/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.2.0/using/reference/search -[use ref 2i]: /riak/kv/2.2.0/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.2.0/using/reference/snmp -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.2.0/using/reference/jmx -[use ref obj del]: /riak/kv/2.2.0/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.2.0/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.2.0/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.2.0/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.2.0/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.2.0/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.2.0/using/reference/search +[use ref 2i]: {{}}riak/kv/2.2.0/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.2.0/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.2.0/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.2.0/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.2.0/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.2.0/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.2.0/using/admin/ -[use admin commands]: /riak/kv/2.2.0/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.2.0/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.2.0/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.2.0/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.2.0/using/admin/ +[use admin commands]: {{}}riak/kv/2.2.0/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.2.0/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.2.0/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.2.0/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.2.0/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.2.0/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.2.0/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.2.0/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.2.0/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.2.0/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.2.0/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.2.0/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.2.0/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.2.0/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.2.0/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.2.0/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.2.0/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.2.0/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.2.0/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.2.0/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.2.0/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.2.0/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.2.0/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.2.0/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.2.0/using/repair-recovery -[repair recover index]: /riak/kv/2.2.0/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.2.0/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.2.0/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.2.0/using/security/ -[security basics]: /riak/kv/2.2.0/using/security/basics -[security managing]: /riak/kv/2.2.0/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.2.0/using/security/ +[security basics]: {{}}riak/kv/2.2.0/using/security/basics +[security managing]: {{}}riak/kv/2.2.0/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.2.0/using/performance/ -[perf benchmark]: /riak/kv/2.2.0/using/performance/benchmarking -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.2.0/using/performance/erlang -[perf aws]: /riak/kv/2.2.0/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.2.0/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.2.0/using/performance/ +[perf benchmark]: {{}}riak/kv/2.2.0/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.2.0/using/performance/erlang +[perf aws]: {{}}riak/kv/2.2.0/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.2.0/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.2.0/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.2.0/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.2.0/developing -[dev client libraries]: /riak/kv/2.2.0/developing/client-libraries -[dev data model]: /riak/kv/2.2.0/developing/data-modeling -[dev data types]: /riak/kv/2.2.0/developing/data-types -[dev kv model]: /riak/kv/2.2.0/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.2.0/developing +[dev client libraries]: {{}}riak/kv/2.2.0/developing/client-libraries +[dev data model]: {{}}riak/kv/2.2.0/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.0/developing/data-types +[dev kv model]: {{}}riak/kv/2.2.0/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.2.0/developing/getting-started -[getting started java]: /riak/kv/2.2.0/developing/getting-started/java -[getting started ruby]: /riak/kv/2.2.0/developing/getting-started/ruby -[getting started python]: /riak/kv/2.2.0/developing/getting-started/python -[getting started php]: /riak/kv/2.2.0/developing/getting-started/php -[getting started csharp]: /riak/kv/2.2.0/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.2.0/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.2.0/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.2.0/developing/getting-started/golang - -[obj model java]: /riak/kv/2.2.0/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.2.0/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.2.0/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.2.0/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.2.0/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.2.0/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.2.0/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.2.0/developing/getting-started +[getting started java]: {{}}riak/kv/2.2.0/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.2.0/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.2.0/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.2.0/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.2.0/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.2.0/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.2.0/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.2.0/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.2.0/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.0/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.0/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.0/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.0/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.0/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.0/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.2.0/developing/usage -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.2.0/developing/usage/content-types -[usage create objects]: /riak/kv/2.2.0/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.2.0/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.2.0/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.2.0/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.0/developing/usage/search -[usage search schema]: /riak/kv/2.2.0/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.2.0/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.2.0/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.2.0/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.2.0/developing/usage +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.2.0/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.2.0/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.2.0/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.2.0/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.2.0/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.0/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.0/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.2.0/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.2.0/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.2.0/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.2.0/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.2.0/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.2.0/developing/api/backend -[dev api http]: /riak/kv/2.2.0/developing/api/http -[dev api http status]: /riak/kv/2.2.0/developing/api/http/status -[dev api pbc]: /riak/kv/2.2.0/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.2.0/developing/api/backend +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http +[dev api http status]: {{}}riak/kv/2.2.0/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.2.0/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.2.0/learn/glossary/ -[glossary aae]: /riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.2.0/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.2.0/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.2.0/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode -[concept aae]: /riak/kv/2.2.0/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.2.0/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.2.0/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.0/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.2.0/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.0/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.2.0/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.0/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.0/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.2.0/add-ons.md b/content/riak/kv/2.2.0/add-ons.md index cf2ccc0a78..9631c7434e 100644 --- a/content/riak/kv/2.2.0/add-ons.md +++ b/content/riak/kv/2.2.0/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.2.0/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.2.0/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.2.0/add-ons/redis/developing-rra.md b/content/riak/kv/2.2.0/add-ons/redis/developing-rra.md index 681557476b..966dccb910 100644 --- a/content/riak/kv/2.2.0/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.2.0/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.2.0/developing/api/http +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.2.0/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.2.0/add-ons/redis/redis-add-on-features.md index 333881048a..ae60f3b68f 100644 --- a/content/riak/kv/2.2.0/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.2.0/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.2.0/add-ons/redis/set-up-rra.md b/content/riak/kv/2.2.0/add-ons/redis/set-up-rra.md index c4494ae48e..2f50348c4f 100644 --- a/content/riak/kv/2.2.0/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.2.0/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.2.0/setup/installing -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.2.0/setup/installing +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.2.0/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.2.0/add-ons/redis/set-up-rra/deployment-models.md index 942bba42c8..3b38418341 100644 --- a/content/riak/kv/2.2.0/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/kv/2.2.0/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/kv/2.2.0/add-ons/redis/using-rra.md b/content/riak/kv/2.2.0/add-ons/redis/using-rra.md index 5f94ac2f9c..dc0d70ab2e 100644 --- a/content/riak/kv/2.2.0/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.2.0/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.2.0/developing/api/http/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.2.0/configuring/backend.md b/content/riak/kv/2.2.0/configuring/backend.md index 9f7e9c7e8f..3470aa64d8 100644 --- a/content/riak/kv/2.2.0/configuring/backend.md +++ b/content/riak/kv/2.2.0/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.0/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.0/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.0/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.2.0/configuring/basic.md b/content/riak/kv/2.2.0/configuring/basic.md index c562afb612..448e094d8d 100644 --- a/content/riak/kv/2.2.0/configuring/basic.md +++ b/content/riak/kv/2.2.0/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.2.0/ops/building/configuration/ --- -[config reference]: /riak/kv/2.2.0/configuring/reference -[use running cluster]: /riak/kv/2.2.0/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.2.0/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.2.0/using/performance/erlang -[plan start]: /riak/kv/2.2.0/setup/planning/start -[plan best practices]: /riak/kv/2.2.0/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.2.0/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.2.0/setup/planning/backend -[plan backend multi]: /riak/kv/2.2.0/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.2.0/using/performance/benchmarking -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit -[perf index]: /riak/kv/2.2.0/using/performance -[perf aws]: /riak/kv/2.2.0/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.2.0/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[use running cluster]: {{}}riak/kv/2.2.0/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.2.0/using/performance/erlang +[plan start]: {{}}riak/kv/2.2.0/setup/planning/start +[plan best practices]: {{}}riak/kv/2.2.0/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.2.0/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.2.0/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.2.0/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.2.0/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.0/using/performance +[perf aws]: {{}}riak/kv/2.2.0/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.2.0/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.2.0/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.0/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.2.0/configuring/global-object-expiration.md b/content/riak/kv/2.2.0/configuring/global-object-expiration.md index 125801885e..1caa836941 100644 --- a/content/riak/kv/2.2.0/configuring/global-object-expiration.md +++ b/content/riak/kv/2.2.0/configuring/global-object-expiration.md @@ -10,7 +10,6 @@ menu: project: "riak_kv" project_version: "2.2.0" toc: true -canonical_link: "https://docs.basho.com/riak/kv/latest/configuring/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/kv/2.2.0/configuring/load-balancing-proxy.md b/content/riak/kv/2.2.0/configuring/load-balancing-proxy.md index 7bd0d86aec..1be8ee067c 100644 --- a/content/riak/kv/2.2.0/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.2.0/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.2.0/configuring/managing.md b/content/riak/kv/2.2.0/configuring/managing.md index d7f3203056..ff06b55750 100644 --- a/content/riak/kv/2.2.0/configuring/managing.md +++ b/content/riak/kv/2.2.0/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.2.0/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.2.0/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.2.0/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.2.0/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.2.0/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.2.0/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.2.0/configuring/mapreduce.md b/content/riak/kv/2.2.0/configuring/mapreduce.md index 2614b4a7b2..7caac40a89 100644 --- a/content/riak/kv/2.2.0/configuring/mapreduce.md +++ b/content/riak/kv/2.2.0/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.2.0/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.2.0/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.2.0/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.2.0/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.2.0/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.2.0/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.2.0/configuring/reference.md b/content/riak/kv/2.2.0/configuring/reference.md index 61024540f8..0afe6052f5 100644 --- a/content/riak/kv/2.2.0/configuring/reference.md +++ b/content/riak/kv/2.2.0/configuring/reference.md @@ -200,7 +200,7 @@ executables are stored. +as active anti-entropy data, and cluster metadata. @@ -1684,7 +1684,7 @@ abandons the leader (in milliseconds). This must be set greater than the diff --git a/content/riak/kv/2.2.0/configuring/search.md b/content/riak/kv/2.2.0/configuring/search.md index 709caa1504..beeb3540ae 100644 --- a/content/riak/kv/2.2.0/configuring/search.md +++ b/content/riak/kv/2.2.0/configuring/search.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.0/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.2.0/developing/usage/search -[usage search schema]: /riak/kv/2.2.0/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.2.0/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.2.0/developing/usage/custom-extractors -[cluster-ops aae throttle]: /riak/kv/2.2.0/using/cluster-operations/active-anti-entropy/#throttling -[config reference]: /riak/kv/2.2.0/configuring/reference -[config reference#search]: /riak/kv/2.2.0/configuring/reference/#search -[glossary aae]: /riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.2.0/using/security/ +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.0/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.0/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.2.0/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.2.0/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[config reference#search]: {{}}riak/kv/2.2.0/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.2.0/using/security/ [java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads [java se docs]: http://www.oracle.com/technetwork/java/javase/documentation @@ -150,15 +150,15 @@ Valid values: `on` or `off` ### `search.index.error_threshold.failure_count` -The number of failures encountered while updating a search index within [`search.queue.error_threshold.failure_interval`](#search-queue-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. Valid values: Integer ### `search.index.error_threshold.failure_interval` -The window of time during which `search.queue.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. -If [`search.queue.error_threshold.failure_count`](#search-queue-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.queue.error_threshold.reset_interval`](search-queue-error-threshold-reset-interval) has passed. +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. Valid values: Milliseconds diff --git a/content/riak/kv/2.2.0/configuring/strong-consistency.md b/content/riak/kv/2.2.0/configuring/strong-consistency.md index 3c58a254e2..86e8a2a5c1 100644 --- a/content/riak/kv/2.2.0/configuring/strong-consistency.md +++ b/content/riak/kv/2.2.0/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.2.0/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.2.0/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.2.0/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.2.0/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.2.0/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.2.0/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.2.0/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.2.0/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.2.0/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context -[dev data types]: /riak/kv/2.2.0/developing/data-types -[glossary aae]: /riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.2.0/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.2.0/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.2.0/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.2.0/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.2.0/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.2.0/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.2.0/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.2.0/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.2.0/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.2.0/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.2.0/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.2.0/developing/data-types +[glossary aae]: {{}}riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.2.0/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.2.0/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.2.0/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.2.0/configuring/v2-multi-datacenter.md b/content/riak/kv/2.2.0/configuring/v2-multi-datacenter.md index 688735ca99..408eb0ddd4 100644 --- a/content/riak/kv/2.2.0/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.0/configuring/v2-multi-datacenter.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/configuring/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication capabilities offer a diff --git a/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/nat.md index 7e8abf4355..8ba614137f 100644 --- a/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/nat.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/configuring/v3-multi-datacenter/nat/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/nat/) instead. {{% /note %}} Riak Enterprise supports replication of data on networks that use static diff --git a/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/quick-start.md index 807a45c7f5..ae24af0dc0 100644 --- a/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/quick-start.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start/) instead. {{% /note %}} The Riak Multi-Datacenter Replication Quick Start will walk you through diff --git a/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl.md index d2d397c2e5..ed8a8a4347 100644 --- a/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl.md +++ b/content/riak/kv/2.2.0/configuring/v2-multi-datacenter/ssl.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl/) instead. {{% /note %}} ## Features diff --git a/content/riak/kv/2.2.0/configuring/v3-multi-datacenter.md b/content/riak/kv/2.2.0/configuring/v3-multi-datacenter.md index b431c16b8b..a15419c67b 100644 --- a/content/riak/kv/2.2.0/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.2.0/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.2.0/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.2.0/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/nat.md index 06d2c78af1..1b38d55ded 100644 --- a/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start.md index 85b18a1569..b1e6492314 100644 --- a/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.2.0/using/performance -[config v3 mdc]: /riak/kv/2.2.0/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.2.0/using/performance +[config v3 mdc]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl.md index 05a0bcf333..ef4af99901 100644 --- a/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.2.0/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.2.0/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.2.0/developing/api/backend.md b/content/riak/kv/2.2.0/developing/api/backend.md index be658b618d..52110a9d2e 100644 --- a/content/riak/kv/2.2.0/developing/api/backend.md +++ b/content/riak/kv/2.2.0/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/backend-api --- -[plan backend]: /riak/kv/2.2.0/setup/planning/backend +[plan backend]: {{}}riak/kv/2.2.0/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.2.0/developing/api/http.md b/content/riak/kv/2.2.0/developing/api/http.md index ecd04c2a19..ba1bdcf8c9 100644 --- a/content/riak/kv/2.2.0/developing/api/http.md +++ b/content/riak/kv/2.2.0/developing/api/http.md @@ -29,21 +29,21 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.2.0/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.2.0/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.2.0/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.2.0/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.2.0/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.2.0/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.2.0/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.0/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.2.0/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.2.0/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.2.0/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys` | [HTTP Store Object](/riak/kv/2.2.0/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.2.0/developing/api/http/store-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.2.0/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.2.0/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.2.0/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.2.0/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.0/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.0/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.2.0/developing/api/http/delete-object) ## Riak-Data-Type-related Operations @@ -53,9 +53,9 @@ Method | URL `POST` | `/types//buckets//datatypes` `POST` | `/types//buckets//datatypes/` -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.2.0/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.2.0/developing/data-types/#usage-examples) -and subpages e.g. [sets](/riak/kv/2.2.0/developing/data-types/sets). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.2.0/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.2.0/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.2.0/developing/data-types/sets). Advanced users may consult the technical documentation inside the Riak KV internal module `riak_kv_wm_crdt`. @@ -64,26 +64,26 @@ KV internal module `riak_kv_wm_crdt`. Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.2.0/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.2.0/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.2.0/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.2.0/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.0/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.0/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.2.0/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.2.0/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.2.0/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.2.0/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.2.0/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.2.0/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.2.0/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.2.0/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.2.0/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.2.0/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.2.0/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.2.0/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.2.0/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.2.0/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.2.0/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.2.0/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.2.0/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.2.0/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.2.0/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.2.0/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.2.0/developing/api/http/counters.md b/content/riak/kv/2.2.0/developing/api/http/counters.md index 1994ee6bbe..de266c6235 100644 --- a/content/riak/kv/2.2.0/developing/api/http/counters.md +++ b/content/riak/kv/2.2.0/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.2.0/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.2.0/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.2.0/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.2.0/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.2.0/developing/api/http/fetch-object.md b/content/riak/kv/2.2.0/developing/api/http/fetch-object.md index dfd3c04c2d..3be599d14c 100644 --- a/content/riak/kv/2.2.0/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.2.0/developing/api/http/fetch-object.md @@ -40,14 +40,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.2.0/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.2.0/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.2.0/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.2.0/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.2.0/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.2.0/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.2.0/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.2.0/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -74,7 +74,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.2.0/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.0/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.2.0/developing/api/http/fetch-search-index.md b/content/riak/kv/2.2.0/developing/api/http/fetch-search-index.md index c78df42599..dfa6bc464f 100644 --- a/content/riak/kv/2.2.0/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.2.0/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.2.0/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.2.0/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.2.0/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.2.0/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.2.0/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.2.0/developing/api/http/fetch-search-schema.md index ecd2fc74f6..483b3c0b22 100644 --- a/content/riak/kv/2.2.0/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.2.0/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.2.0/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.2.0/developing/api/http/get-bucket-props.md b/content/riak/kv/2.2.0/developing/api/http/get-bucket-props.md index 721ccc56fd..b6f831328d 100644 --- a/content/riak/kv/2.2.0/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.2.0/developing/api/http/get-bucket-props.md @@ -27,7 +27,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.2.0/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.2.0/developing/api/http/list-keys). ## Response @@ -43,7 +43,7 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.2.0/developing/api/http/set-bucket-props) for more information about the available +See [HTTP Set Bucket Properties]({{}}riak/kv/2.2.0/developing/api/http/set-bucket-props) for more information about the available bucket properties. ## Example diff --git a/content/riak/kv/2.2.0/developing/api/http/link-walking.md b/content/riak/kv/2.2.0/developing/api/http/link-walking.md index a3f36cc989..747b7b1f3c 100644 --- a/content/riak/kv/2.2.0/developing/api/http/link-walking.md +++ b/content/riak/kv/2.2.0/developing/api/http/link-walking.md @@ -21,8 +21,8 @@ This feature is deprecated and will be removed in a future version. Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.2.0/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.2.0/learn/glossary/#links). ## Request @@ -68,7 +68,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.2.0/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.2.0/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.2.0/developing/api/http/list-resources.md b/content/riak/kv/2.2.0/developing/api/http/list-resources.md index b6b7020569..a92ce0a1f8 100644 --- a/content/riak/kv/2.2.0/developing/api/http/list-resources.md +++ b/content/riak/kv/2.2.0/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.2.0/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.2.0/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.2.0/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.2.0/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.2.0/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.2.0/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.2.0/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.2.0/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.2.0/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.2.0/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.2.0/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.2.0/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.2.0/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.2.0/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.2.0/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.2.0/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.2.0/developing/api/http/mapreduce.md b/content/riak/kv/2.2.0/developing/api/http/mapreduce.md index ce3bf986ea..4320ce0ef1 100644 --- a/content/riak/kv/2.2.0/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.2.0/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.2.0/developing/api/http/search-index-info.md b/content/riak/kv/2.2.0/developing/api/http/search-index-info.md index f0f7daf45b..33095c8a0a 100644 --- a/content/riak/kv/2.2.0/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.2.0/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.2.0/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.2.0/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.2.0/developing/api/http/search-query.md b/content/riak/kv/2.2.0/developing/api/http/search-query.md index 7cfd0f769e..2bc3be96df 100644 --- a/content/riak/kv/2.2.0/developing/api/http/search-query.md +++ b/content/riak/kv/2.2.0/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.2.0/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.2.0/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.2.0/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.2.0/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.2.0/developing/api/http/secondary-indexes.md b/content/riak/kv/2.2.0/developing/api/http/secondary-indexes.md index 621d2b2069..75db386ac5 100644 --- a/content/riak/kv/2.2.0/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.2.0/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.2.0/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.2.0/developing/api/http/set-bucket-props.md b/content/riak/kv/2.2.0/developing/api/http/set-bucket-props.md index b231594971..cc4689945a 100644 --- a/content/riak/kv/2.2.0/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.2.0/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.2.0/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.2.0/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.2.0/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.2.0/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.2.0/developing/api/http/status.md b/content/riak/kv/2.2.0/developing/api/http/status.md index 05c0f00179..f3ca07d42e 100644 --- a/content/riak/kv/2.2.0/developing/api/http/status.md +++ b/content/riak/kv/2.2.0/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.2.0/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.2.0/developing/api/http/store-object.md b/content/riak/kv/2.2.0/developing/api/http/store-object.md index f594c39267..dc7c7c842c 100644 --- a/content/riak/kv/2.2.0/developing/api/http/store-object.md +++ b/content/riak/kv/2.2.0/developing/api/http/store-object.md @@ -38,8 +38,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.2.0/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.2.0/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.2.0/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.0/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -83,7 +83,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.2.0/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.2.0/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.2.0/developing/api/http/store-search-index.md b/content/riak/kv/2.2.0/developing/api/http/store-search-index.md index 0355647835..bfbce5741b 100644 --- a/content/riak/kv/2.2.0/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.2.0/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.2.0/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.2.0/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.2.0/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.2.0/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.2.0/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.2.0/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.2.0/developing/api/http/store-search-schema.md b/content/riak/kv/2.2.0/developing/api/http/store-search-schema.md index b5b45c7546..deb0ffcd72 100644 --- a/content/riak/kv/2.2.0/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.2.0/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.2.0/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.2.0/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers.md index dc6bc2f9c7..15c4417b3c 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.2.0/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.2.0/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.2.0/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.2.0/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.2.0/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.2.0/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.2.0/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.2.0/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.2.0/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.2.0/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/auth-req.md index f39b80e561..b914ea8860 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.2.0/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.2.0/using/security/basics). diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..323191259e --- /dev/null +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,78 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.2.0" +menu: + riak_kv-2.2.0: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.2.0/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.2.0/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/delete-object.md index a8fb8a5147..9b279a545c 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.2.0/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.2.0/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store.md index cb4e3e03c9..41fc8a4c6b 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.2.0/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.2.0/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-fetch.md index 522938412d..118dbaba73 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.2.0/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.2.0/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.2.0/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.2.0/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.2.0/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.2.0/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store.md index d29a30caf7..420bf8673e 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store.md index 5d25ffffb7..a9f2d40c67 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-store.md index c282ead8a4..de43b30c00 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.2.0/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.2.0/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.2.0/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.2.0/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.2.0/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.2.0/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.2.0/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-union.md index b3e9366fe2..398717e36a 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.2.0/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object.md index 4aa65a543d..bbd07cf2a0 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.2.0/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.2.0/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.2.0/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.2.0/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props.md index 9a8bab23a6..e485ddab7e 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.2.0/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.2.0/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.2.0/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.2.0/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) {{% /note %}} diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-type.md index ef2db1326c..4433bda139 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.2.0/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.2.0/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-client-id.md index 1e0e6ef9bd..4ed3c3a64f 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.2.0/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/mapreduce.md index 3ade9d71cb..19a65b0c83 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.2.0/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.2.0/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.2.0/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.2.0/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/reset-bucket-props.md index 8635e28320..bb01fba4c5 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/secondary-indexes.md index 56894c0944..fbf6d12959 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.2.0/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props.md index 18cabde88c..8b686f497f 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-type.md index 5a92b3bce5..b6f261e679 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.0/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.2.0/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/store-object.md index cd6a720d0c..bafac30166 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.2.0/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.2.0/learn/concepts/buckets), and [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.2.0/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.2.0/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.2.0/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.2.0/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.2.0/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.2.0/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-delete.md index 1f417736e7..e0d664f98b 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-delete.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-delete.md @@ -29,5 +29,5 @@ message RpbYokozunaIndexDeleteReq { ## Response -Returns a [RpbDelResp](/riak/kv/2.2.0/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbDelResp]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-get.md index 835504409d..eaa44f0b49 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.2.0/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.0/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-put.md index 687d8b647a..83b607bd6a 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-index-put.md @@ -37,9 +37,9 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.2.0/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.0/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. ## Response -Returns a [RpbPutResp](/riak/kv/2.2.0/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-get.md index 032322a718..1c3d16d2a0 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.2.0/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-put.md index a7f9a470c4..1561ce2480 100644 --- a/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.2.0/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.2.0/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.2.0/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.2.0/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.0/developing/app-guide.md b/content/riak/kv/2.2.0/developing/app-guide.md index 36d0eba99c..00eec7ca0b 100644 --- a/content/riak/kv/2.2.0/developing/app-guide.md +++ b/content/riak/kv/2.2.0/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.2.0/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.2.0/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.2.0/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.2.0/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.2.0/developing/key-value-modeling -[dev data types]: /riak/kv/2.2.0/developing/data-types -[dev data types#counters]: /riak/kv/2.2.0/developing/data-types/#counters -[dev data types#sets]: /riak/kv/2.2.0/developing/data-types/#sets -[dev data types#maps]: /riak/kv/2.2.0/developing/data-types/#maps -[usage create objects]: /riak/kv/2.2.0/developing/usage/creating-objects -[usage search]: /riak/kv/2.2.0/developing/usage/search -[use ref search]: /riak/kv/2.2.0/using/reference/search -[usage 2i]: /riak/kv/2.2.0/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.2.0/developing/client-libraries -[concept crdts]: /riak/kv/2.2.0/learn/concepts/crdts -[dev data model]: /riak/kv/2.2.0/developing/data-modeling -[usage mapreduce]: /riak/kv/2.2.0/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.2.0/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.2.0/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.0/setup/planning/backend/memory -[obj model java]: /riak/kv/2.2.0/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.2.0/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.2.0/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.2.0/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.2.0/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.2.0/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.2.0/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.2.0/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.2.0/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.2.0/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.2.0/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[install index]: /riak/kv/2.2.0/setup/installing -[getting started]: /riak/kv/2.2.0/developing/getting-started -[usage index]: /riak/kv/2.2.0/developing/usage -[glossary]: /riak/kv/2.2.0/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.2.0/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.2.0/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.2.0/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.2.0/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.2.0/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.2.0/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.2.0/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.2.0/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.2.0/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search +[use ref search]: {{}}riak/kv/2.2.0/using/reference/search +[usage 2i]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.2.0/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.2.0/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.2.0/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.2.0/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.2.0/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.2.0/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.2.0/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.0/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.0/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.0/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.0/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.0/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.0/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.2.0/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.2.0/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.2.0/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.2.0/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.2.0/setup/installing +[getting started]: {{}}riak/kv/2.2.0/developing/getting-started +[usage index]: {{}}riak/kv/2.2.0/developing/usage +[glossary]: {{}}riak/kv/2.2.0/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.2.0/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.2.0/developing/app-guide/advanced-mapreduce.md index d975aab015..8a1b40dbf1 100644 --- a/content/riak/kv/2.2.0/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.2.0/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.2.0/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.2.0/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.2.0/using/reference/custom-code -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[config reference]: /riak/kv/2.2.0/configuring/reference +[usage 2i]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.2.0/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.0/configuring/reference [google mr]: http://research.google.com/archive/mapreduce.html [mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map [function contrib]: https://github.com/basho/riak_function_contrib @@ -381,7 +381,7 @@ Erlang client. {{% note title="Distributing Erlang MapReduce Code" %}} Any modules and functions you use in your Erlang MapReduce calls must be available on all nodes in the cluster. Please read about -[installing custom code](/riak/kv/2.2.0/using/reference/custom-code). +[installing custom code]({{}}riak/kv/2.2.0/using/reference/custom-code). {{% /note %}} ### Erlang Example @@ -728,7 +728,7 @@ You can use streaming with Erlang via the Riak KV local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.2.0/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.2.0/developing/app-guide/cluster-metadata.md index b2d3cea96b..d36701be59 100644 --- a/content/riak/kv/2.2.0/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.2.0/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.2.0/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.2.0/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.2.0/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.2.0/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.2.0/developing/app-guide/replication-properties.md b/content/riak/kv/2.2.0/developing/app-guide/replication-properties.md index 9c6adcf787..5f877493e7 100644 --- a/content/riak/kv/2.2.0/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.2.0/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.0/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.2.0/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.2.0/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.2.0/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.2.0/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.2.0/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.0/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.2.0/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.2.0/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.2.0/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.0/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.2.0/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.2.0/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.2.0/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.2.0/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.2.0/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.2.0/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.2.0/developing/app-guide/strong-consistency.md b/content/riak/kv/2.2.0/developing/app-guide/strong-consistency.md index efa696144c..140afc8bf9 100644 --- a/content/riak/kv/2.2.0/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.2.0/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.2.0/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/kv/2.2.0/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.2.0/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.2.0/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.2.0/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.2.0/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.2.0/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.2.0/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/kv/2.2.0/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.2.0/developing/client-libraries -[getting started]: /riak/kv/2.2.0/developing/getting-started -[config strong consistency#details]: /riak/kv/2.2.0/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.2.0/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.2.0/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.2.0/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.2.0/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.2.0/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.2.0/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.2.0/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.2.0/developing/client-libraries +[getting started]: {{}}riak/kv/2.2.0/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.2.0/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.2.0/developing/app-guide/write-once.md b/content/riak/kv/2.2.0/developing/app-guide/write-once.md index aa45dc880b..0ea4646d45 100644 --- a/content/riak/kv/2.2.0/developing/app-guide/write-once.md +++ b/content/riak/kv/2.2.0/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.0/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[bucket type]: /riak/kv/2.2.0/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.2.0/developing/data-types -[strong consistency]: /riak/kv/2.2.0/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.2.0/developing/data-types +[strong consistency]: {{}}riak/kv/2.2.0/developing/app-guide/strong-consistency Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. @@ -98,7 +98,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -149,7 +149,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.2.0/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.2.0/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.2.0/developing/client-libraries.md b/content/riak/kv/2.2.0/developing/client-libraries.md index 77978c60a3..cec5dc14a7 100644 --- a/content/riak/kv/2.2.0/developing/client-libraries.md +++ b/content/riak/kv/2.2.0/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.2.0/developing/data-types.md b/content/riak/kv/2.2.0/developing/data-types.md index 287e5f7335..42af6331d6 100644 --- a/content/riak/kv/2.2.0/developing/data-types.md +++ b/content/riak/kv/2.2.0/developing/data-types.md @@ -43,9 +43,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -268,5 +268,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.2.0/developing/faq.md b/content/riak/kv/2.2.0/developing/faq.md index e4e4bdc12d..e05f5440c6 100644 --- a/content/riak/kv/2.2.0/developing/faq.md +++ b/content/riak/kv/2.2.0/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.2.0/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.2.0/using/performance/benchmarking -[Bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.2.0/developing/usage +[[Basho Bench]: {{}}riak/kv/2.2.0/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.2.0/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.2.0/configuring/reference +[commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.2.0/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.2.0/developing/client-libraries -[MapReduce]: /riak/kv/2.2.0/developing/usage/mapreduce -[Memory]: /riak/kv/2.2.0/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.2.0/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.2.0/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.2.0/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.2.0/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.2.0/developing/getting-started.md b/content/riak/kv/2.2.0/developing/getting-started.md index 05aff26e87..4e61adf4fa 100644 --- a/content/riak/kv/2.2.0/developing/getting-started.md +++ b/content/riak/kv/2.2.0/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.2.0/setup/installing -[dev client libraries]: /riak/kv/2.2.0/developing/client-libraries +[install index]: {{}}riak/kv/2.2.0/setup/installing +[dev client libraries]: {{}}riak/kv/2.2.0/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.2.0/developing/getting-started/csharp.md b/content/riak/kv/2.2.0/developing/getting-started/csharp.md index e99b13819a..6c86b702c4 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/csharp.md +++ b/content/riak/kv/2.2.0/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.0/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.0/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.2.0/developing/getting-started/csharp/querying.md b/content/riak/kv/2.2.0/developing/getting-started/csharp/querying.md index 1146011c46..11a12b99bd 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.2.0/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.0/developing/getting-started/erlang.md b/content/riak/kv/2.2.0/developing/getting-started/erlang.md index 29b1989451..147d1a3bae 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/erlang.md +++ b/content/riak/kv/2.2.0/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.0/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.0/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.2.0/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.2.0/developing/getting-started/erlang/object-modeling.md index 081b5d90a3..1c1eeca5d9 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.2.0/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.2.0/developing/getting-started/erlang/querying.md b/content/riak/kv/2.2.0/developing/getting-started/erlang/querying.md index 027c338347..2d3c03565f 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.2.0/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.2.0/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.2.0/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.0/developing/getting-started/golang.md b/content/riak/kv/2.2.0/developing/getting-started/golang.md index f23ed76c8b..ef974b2119 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/golang.md +++ b/content/riak/kv/2.2.0/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.0/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.0/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.0/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.0/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.2.0/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.2.0/developing/getting-started/golang/object-modeling.md index eb79706cd4..e1b2869227 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.2.0/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.2.0/developing/getting-started/golang/querying.md b/content/riak/kv/2.2.0/developing/getting-started/golang/querying.md index 9ee0ef717f..3e1593a493 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.2.0/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.2.0/developing/getting-started/java.md b/content/riak/kv/2.2.0/developing/getting-started/java.md index 960de3712b..ff1941da2e 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/java.md +++ b/content/riak/kv/2.2.0/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.2.0/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.0/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.2.0/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.2.0/developing/getting-started/java/crud-operations.md index cc69422b26..e40c13dfd4 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.2.0/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.0/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.0/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/) documentation. ## Updating Objects @@ -85,8 +85,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.0/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.0/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -196,6 +196,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.0/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.0/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.2.0/developing/getting-started/java/querying.md b/content/riak/kv/2.2.0/developing/getting-started/java/querying.md index 6fb130d97c..372fed2390 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.2.0/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.0/developing/getting-started/nodejs.md b/content/riak/kv/2.2.0/developing/getting-started/nodejs.md index 116d33a533..1338e9cf80 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.2.0/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.0/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.0/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.2.0/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.2.0/developing/getting-started/nodejs/querying.md index feec1029b0..cce56adf3c 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.2.0/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.0/developing/getting-started/php.md b/content/riak/kv/2.2.0/developing/getting-started/php.md index 17bf2a4104..c6db8d28ff 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/php.md +++ b/content/riak/kv/2.2.0/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.0/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.0/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.2.0/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.2.0/developing/getting-started/php/crud-operations.md index 626475714c..cdb509e794 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.2.0/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.2.0/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.2.0/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.2.0/developing/getting-started/php/querying.md b/content/riak/kv/2.2.0/developing/getting-started/php/querying.md index a37f334126..4f1d54665a 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.2.0/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.0/developing/getting-started/python.md b/content/riak/kv/2.2.0/developing/getting-started/python.md index 5b2b1def11..0afb57a7db 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/python.md +++ b/content/riak/kv/2.2.0/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.0/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.0/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.0/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.2.0/developing/getting-started/python/querying.md b/content/riak/kv/2.2.0/developing/getting-started/python/querying.md index bedeb1c6ba..c0094ea7a9 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.2.0/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.0/developing/getting-started/ruby.md b/content/riak/kv/2.2.0/developing/getting-started/ruby.md index 1301b06f1f..3381fe0d20 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/ruby.md +++ b/content/riak/kv/2.2.0/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.0/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.0/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.0/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.0/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.2.0/developing/getting-started/ruby/querying.md b/content/riak/kv/2.2.0/developing/getting-started/ruby/querying.md index 2a925735cb..e324c4b42b 100644 --- a/content/riak/kv/2.2.0/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.2.0/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.0/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.0/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.0/developing/key-value-modeling.md b/content/riak/kv/2.2.0/developing/key-value-modeling.md index d3db211738..e3bb85f993 100644 --- a/content/riak/kv/2.2.0/developing/key-value-modeling.md +++ b/content/riak/kv/2.2.0/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.2.0/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.2.0/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.2.0/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.2.0/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.2.0/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.2.0/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.2.0/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.2.0/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.2.0/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.2.0/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.2.0/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.2.0/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.2.0/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.2.0/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.2.0/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.2.0/developing/data-types/#sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.2.0/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.2.0/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.2.0/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.2.0/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.2.0/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.2.0/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.0/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.2.0/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.2.0/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.2.0/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.2.0/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.2.0/developing/usage/commit-hooks.md b/content/riak/kv/2.2.0/developing/usage/commit-hooks.md index 2228c28421..abaa080291 100644 --- a/content/riak/kv/2.2.0/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.2.0/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.2.0/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.2.0/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.2.0/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.2.0/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.2.0/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.2.0/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.2.0/developing/usage/conflict-resolution.md b/content/riak/kv/2.2.0/developing/usage/conflict-resolution.md index 7e7c2ef622..de9ee029f5 100644 --- a/content/riak/kv/2.2.0/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.2.0/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.0/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.2.0/learn/concepts/clusters) system in which any [node](/riak/kv/2.2.0/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.2.0/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.2.0/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.2.0/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.2.0/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.2.0/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.2.0/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.2.0/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.2.0/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.2.0/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.2.0/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.2.0/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.2.0/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.2.0/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.2.0/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the [`allow_mult`](#siblings) parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -[`last_write_wins`](/riak/kv/2.2.0/learn/concepts/buckets). If `last_write_wins` is set to `false`, +[`last_write_wins`]({{}}riak/kv/2.2.0/learn/concepts/buckets). If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.2.0/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.2.0/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.2.0/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.2.0/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.2.0/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.2.0/configuring/reference) to change the [default bucket properties](/riak/kv/2.2.0/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.2.0/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.2.0/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.2.0/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.2.0/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.2.0/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.2.0/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.2.0/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.2.0/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.2.0/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.2.0/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.2.0/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.2.0/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.2.0/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.0/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.2.0/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.2.0/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.2.0/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.2.0/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.2.0/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -610,7 +610,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.2.0/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.2.0/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -665,7 +665,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/csharp.md index fa1219dcde..e54b5d4df5 100644 --- a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.0/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/golang.md index 59e14dde63..6ad5c6fc5e 100644 --- a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.0/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/java.md index 221ca895ca..3a4e02b207 100644 --- a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.0/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.0/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.0/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.0/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.0/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.0/developing/data-types/#counters), [set](/riak/kv/2.2.0/developing/data-types/#sets), or [map](/riak/kv/2.2.0/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.0/developing/data-types/#counters), [set]({{}}riak/kv/2.2.0/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.0/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.0/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.0/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/nodejs.md index 702ff21c0e..9351fd5442 100644 --- a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.0/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/php.md index b44c93526d..c80b639513 100644 --- a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.0/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.0/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.0/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.0/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.0/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.0/developing/data-types/#counters), [set](/riak/kv/2.2.0/developing/data-types/#sets), or [map](/riak/kv/2.2.0/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.0/developing/data-types/#counters), [set]({{}}riak/kv/2.2.0/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.0/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.0/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.0/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/python.md index c7071cebc1..268d9c0200 100644 --- a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.0/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -185,7 +185,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.0/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.0/developing/usage) section. ## More Advanced Example @@ -240,9 +240,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.0/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.0/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.0/developing/data-types/#counters), [set](/riak/kv/2.2.0/developing/data-types/#sets), or [map](/riak/kv/2.2.0/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.0/developing/data-types/#counters), [set]({{}}riak/kv/2.2.0/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.0/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -251,4 +251,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.0/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.0/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/ruby.md index 6b206fc732..e981c5942e 100644 --- a/content/riak/kv/2.2.0/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.2.0/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.0/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.0/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.0/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.0/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.0/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.0/developing/data-types/#counters), [set](/riak/kv/2.2.0/developing/data-types/#sets), or [map](/riak/kv/2.2.0/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.0/developing/data-types/#counters), [set]({{}}riak/kv/2.2.0/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.0/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.0/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.0/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.0/developing/usage/creating-objects.md b/content/riak/kv/2.2.0/developing/usage/creating-objects.md index 8b565ccccc..f4d518cb64 100644 --- a/content/riak/kv/2.2.0/developing/usage/creating-objects.md +++ b/content/riak/kv/2.2.0/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.2.0/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.2.0/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -23,7 +23,7 @@ PUT /types//buckets//keys/ # If you're using HTTP to interact with Riak, you can also use POST ``` -As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type](/riak/kv/2.2.0/using/cluster-operations/bucket-types). +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{}}riak/kv/2.2.0/using/cluster-operations/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -118,7 +118,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, you run the same read operation as in [Reading Objects](/riak/kv/2.2.0/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types](/riak/kv/2.2.0/using/cluster-operations/bucket-types). +Now, you run the same read operation as in [Reading Objects]({{}}riak/kv/2.2.0/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{}}riak/kv/2.2.0/using/cluster-operations/bucket-types). ### Store an Object @@ -138,7 +138,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.2.0/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.2.0/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.2.0/developing/usage/custom-extractors.md b/content/riak/kv/2.2.0/developing/usage/custom-extractors.md index 841e0cebb6..9ccadaf7d4 100644 --- a/content/riak/kv/2.2.0/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.2.0/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.2.0/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.2.0/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.2.0/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.2.0/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.2.0/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.2.0/developing/usage/deleting-objects.md b/content/riak/kv/2.2.0/developing/usage/deleting-objects.md index 40334c3c4e..af1e85f317 100644 --- a/content/riak/kv/2.2.0/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.2.0/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.2.0/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.2.0/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.2.0/developing/usage/document-store.md b/content/riak/kv/2.2.0/developing/usage/document-store.md index c14a0858e0..aa44db959c 100644 --- a/content/riak/kv/2.2.0/developing/usage/document-store.md +++ b/content/riak/kv/2.2.0/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.2.0/developing/usage/search/) and [Riak Data Types](/riak/kv/2.2.0/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.2.0/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.2.0/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.2.0/developing/data-types/#maps). +[Riak maps]({{}}riak/kv/2.2.0/developing/data-types/#maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.2.0/developing/data-types/#maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.2.0/developing/data-types/#maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.2.0/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.2.0/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.2.0/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.2.0/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.2.0/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.2.0/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.2.0/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.2.0/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.2.0/developing/usage/mapreduce.md b/content/riak/kv/2.2.0/developing/usage/mapreduce.md index 4b3f35c30f..e5c16b693e 100644 --- a/content/riak/kv/2.2.0/developing/usage/mapreduce.md +++ b/content/riak/kv/2.2.0/developing/usage/mapreduce.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.0/dev/using/mapreduce --- -[usage 2i]: /riak/kv/2.2.0/developing/usage/secondary-indexes -[usage search]: /riak/kv/2.2.0/developing/usage/search -[usage types]: /riak/kv/2.2.0/developing/usage/bucket-types -[api http]: /riak/kv/2.2.0/developing/api/http -[api pb]: /riak/kv/2.2.0/developing/api/protocol-buffers -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[guide mapreduce]: /riak/kv/2.2.0/developing/app-guide/advanced-mapreduce +[usage 2i]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search +[usage types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[api http]: {{}}riak/kv/2.2.0/developing/api/http +[api pb]: {{}}riak/kv/2.2.0/developing/api/protocol-buffers +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[guide mapreduce]: {{}}riak/kv/2.2.0/developing/app-guide/advanced-mapreduce {{% note title="Use MapReduce sparingly" %}} In Riak KV, MapReduce is the primary method for non-primary-key-based @@ -116,7 +116,7 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example diff --git a/content/riak/kv/2.2.0/developing/usage/reading-objects.md b/content/riak/kv/2.2.0/developing/usage/reading-objects.md index 36f257716b..3d7b02fea1 100644 --- a/content/riak/kv/2.2.0/developing/usage/reading-objects.md +++ b/content/riak/kv/2.2.0/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.2.0/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type](/riak/kv/2.2.0/using/cluster-operations/bucket-types) page. +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{}}riak/kv/2.2.0/using/cluster-operations/bucket-types) page. ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.2.0/developing/usage/replication.md b/content/riak/kv/2.2.0/developing/usage/replication.md index cdd210f50f..b8b8a9b638 100644 --- a/content/riak/kv/2.2.0/developing/usage/replication.md +++ b/content/riak/kv/2.2.0/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.2.0/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.2.0/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.2.0/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.0/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.2.0/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.0/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.2.0/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.2.0/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.2.0/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.2.0/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.2.0/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.2.0/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.0/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.2.0/developing/usage/search-schemas.md b/content/riak/kv/2.2.0/developing/usage/search-schemas.md index 48344c673e..3ffaf206f6 100644 --- a/content/riak/kv/2.2.0/developing/usage/search-schemas.md +++ b/content/riak/kv/2.2.0/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.2.0/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.2.0/developing/data-types/), and [more](/riak/kv/2.2.0/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.2.0/developing/data-types/), and [more]({{}}riak/kv/2.2.0/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.2.0/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.2.0/developing/usage/search.md b/content/riak/kv/2.2.0/developing/usage/search.md index 35643b63dc..fc8fd6da71 100644 --- a/content/riak/kv/2.2.0/developing/usage/search.md +++ b/content/riak/kv/2.2.0/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.2.0/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.2.0/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.2.0/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.0/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.2.0/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.2.0/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.2.0/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.2.0/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.2.0/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.2.0/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.2.0/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.2.0/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.2.0/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.2.0/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.2.0/developing/usage/searching-data-types.md b/content/riak/kv/2.2.0/developing/usage/searching-data-types.md index 07cfa2ba95..1d9a5b1c86 100644 --- a/content/riak/kv/2.2.0/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.2.0/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.0/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.2.0/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.2.0/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.2.0/developing/data-types/#counters), [sets](/riak/kv/2.2.0/developing/data-types/#sets), and [maps](/riak/kv/2.2.0/developing/data-types/#maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.2.0/developing/data-types/#counters), [sets]({{}}riak/kv/2.2.0/developing/data-types/#sets), and [maps]({{}}riak/kv/2.2.0/developing/data-types/#maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.2.0/developing/data-types/#counters) indexes each +The default schema for [counters]({{}}riak/kv/2.2.0/developing/data-types/#counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.2.0/developing/data-types/#sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.2.0/developing/data-types/#sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.2.0/developing/data-types/#maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.2.0/developing/data-types/#maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) for [storing counters](/riak/kv/2.2.0/developing/data-types/#counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.2.0/developing/data-types/#counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types) for [storing sets](/riak/kv/2.2.0/developing/data-types/#sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.2.0/developing/data-types/#sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.2.0/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.2.0/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.2.0/developing/data-types/#maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.2.0/developing/data-types/#maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.2.0/developing/usage/secondary-indexes.md b/content/riak/kv/2.2.0/developing/usage/secondary-indexes.md index dc710654e5..db87ca1dc2 100644 --- a/content/riak/kv/2.2.0/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.2.0/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.2.0/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.0/setup/planning/backend/memory -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.2.0/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.2.0/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.2.0/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.2.0/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.2.0/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.2.0/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.2.0/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.2.0/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.2.0/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.2.0/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.2.0/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.2.0/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.0/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.2.0/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.2.0/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.2.0/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.2.0/developing/usage/security.md b/content/riak/kv/2.2.0/developing/usage/security.md index cc311e2137..bc2e694ed3 100644 --- a/content/riak/kv/2.2.0/developing/usage/security.md +++ b/content/riak/kv/2.2.0/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.2.0/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.2.0/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.2.0/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.2.0/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.2.0/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.2.0/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.2.0/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - [`riak-admin security`](/riak/kv/2.2.0/using/security/managing-sources/#managing-sources) + [`riak-admin security`]({{}}riak/kv/2.2.0/using/security/managing-sources/#managing-sources) command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.2.0/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.2.0/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.2.0/using/security/basics) -* [Managing Security Sources](/riak/kv/2.2.0/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.2.0/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.2.0/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.2.0/developing/usage/security/java) -* [Ruby](/riak/kv/2.2.0/developing/usage/security/ruby) -* [PHP](/riak/kv/2.2.0/developing/usage/security/php) -* [Python](/riak/kv/2.2.0/developing/usage/security/python) -* [Erlang](/riak/kv/2.2.0/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.2.0/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.2.0/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.2.0/developing/usage/security/php) +* [Python]({{}}riak/kv/2.2.0/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.2.0/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.2.0/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.2.0/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.2.0/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.2.0/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.2.0/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.2.0/developing/usage/security/erlang.md b/content/riak/kv/2.2.0/developing/usage/security/erlang.md index 42b51537f6..1d5ddae371 100644 --- a/content/riak/kv/2.2.0/developing/usage/security/erlang.md +++ b/content/riak/kv/2.2.0/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.2.0/using/security/managing-sources/), [PAM-](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.2.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.2.0/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.2.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.0/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.0/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.0/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.2.0/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.2.0/developing/usage/security/java.md b/content/riak/kv/2.2.0/developing/usage/security/java.md index 651a87ebaa..56fb8a4d42 100644 --- a/content/riak/kv/2.2.0/developing/usage/security/java.md +++ b/content/riak/kv/2.2.0/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.0/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.0/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.2.0/developing/usage/security/php.md b/content/riak/kv/2.2.0/developing/usage/security/php.md index eba4c68ccc..f03e391acc 100644 --- a/content/riak/kv/2.2.0/developing/usage/security/php.md +++ b/content/riak/kv/2.2.0/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.0/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.2.0/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.2.0/developing/usage/security/python.md b/content/riak/kv/2.2.0/developing/usage/security/python.md index a34d2df7cf..33559b431e 100644 --- a/content/riak/kv/2.2.0/developing/usage/security/python.md +++ b/content/riak/kv/2.2.0/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.0/using/security/managing-sources/) or [PAM-](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.2.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.2.0/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.2.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.0/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.0/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.2.0/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.2.0/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.0/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.2.0/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.2.0/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.2.0/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.2.0/developing/usage/security/ruby.md b/content/riak/kv/2.2.0/developing/usage/security/ruby.md index fcb8843950..1cc6fbf3f3 100644 --- a/content/riak/kv/2.2.0/developing/usage/security/ruby.md +++ b/content/riak/kv/2.2.0/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.0/using/security/managing-sources/) or [PAM](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.2.0/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.2.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.2.0/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.0/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.2.0/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.0/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.0/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.2.0/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.2.0/developing/usage/updating-objects.md b/content/riak/kv/2.2.0/developing/usage/updating-objects.md index f81c29cce2..34a3062f89 100644 --- a/content/riak/kv/2.2.0/developing/usage/updating-objects.md +++ b/content/riak/kv/2.2.0/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/dev/using/updates --- -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.2.0/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.2.0/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.2.0/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.2.0/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.2.0/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.2.0/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.2.0/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.2.0/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.2.0/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.2.0/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.2.0/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.2.0/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.2.0/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.2.0/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.2.0/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.2.0/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.2.0/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.2.0/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.2.0/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.2.0/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.2.0/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.2.0/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.2.0/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.2.0/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.2.0/index.md b/content/riak/kv/2.2.0/index.md index 2d49e62c48..6898922317 100644 --- a/content/riak/kv/2.2.0/index.md +++ b/content/riak/kv/2.2.0/index.md @@ -1,5 +1,5 @@ --- -title: "Riak KV" +title: "Riak KV 2.2.0" description: "" project: "riak_kv" project_version: "2.2.0" @@ -15,15 +15,15 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.2.0/configuring -[downloads]: /riak/kv/2.2.0/downloads/ -[install index]: /riak/kv/2.2.0/setup/installing/ -[plan index]: /riak/kv/2.2.0/setup/planning -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.2.0/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.2.0/developing/usage/search -[getting started]: /riak/kv/2.2.0/developing/getting-started -[dev client libraries]: /riak/kv/2.2.0/developing/client-libraries +[config index]: {{}}riak/kv/2.2.0/configuring +[downloads]: {{}}riak/kv/2.2.0/downloads/ +[install index]: {{}}riak/kv/2.2.0/setup/installing/ +[plan index]: {{}}riak/kv/2.2.0/setup/planning +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.2.0/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search +[getting started]: {{}}riak/kv/2.2.0/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.2.0/developing/client-libraries @@ -56,7 +56,7 @@ Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and the 3. [Configure Riak KV for your needs][config index] {{% note title="Developing with Riak KV" %}} -If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV](/riak/kv/2.2.0/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{}}riak/kv/2.2.0/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. {{% /note %}} ## Popular Docs diff --git a/content/riak/kv/2.2.0/learn/concepts.md b/content/riak/kv/2.2.0/learn/concepts.md index 07f4b94246..5c2d7e177d 100644 --- a/content/riak/kv/2.2.0/learn/concepts.md +++ b/content/riak/kv/2.2.0/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.2.0/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.2.0/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[concept crdts]: /riak/kv/2.2.0/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.0/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.2.0/learn/concepts/vnodes -[config index]: /riak/kv/2.2.0/configuring -[plan index]: /riak/kv/2.2.0/setup/planning -[use index]: /riak/kv/2.2.0/using/ +[concept aae]: {{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.0/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.2.0/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.0/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.0/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.2.0/configuring +[plan index]: {{}}riak/kv/2.2.0/setup/planning +[use index]: {{}}riak/kv/2.2.0/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.2.0/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.2.0/learn/concepts/active-anti-entropy.md index ea8acb09fb..932cafa4d1 100644 --- a/content/riak/kv/2.2.0/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.2.0/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.0/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.2.0/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.2.0/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.2.0/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.2.0/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.2.0/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.2.0/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.2.0/developing/usage/search +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.2.0/learn/concepts/buckets.md b/content/riak/kv/2.2.0/learn/concepts/buckets.md index ce58b3bf3c..451c8defdf 100644 --- a/content/riak/kv/2.2.0/learn/concepts/buckets.md +++ b/content/riak/kv/2.2.0/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.2.0/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.2.0/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.2.0/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.2.0/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.2.0/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.2.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[config basic]: /riak/kv/2.2.0/configuring/basic -[dev api http]: /riak/kv/2.2.0/developing/api/http -[dev data types]: /riak/kv/2.2.0/developing/data-types -[glossary ring]: /riak/kv/2.2.0/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.0/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.0/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.2.0/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.2.0/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.2.0/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.2.0/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.2.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.2.0/configuring/basic +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http +[dev data types]: {{}}riak/kv/2.2.0/developing/data-types +[glossary ring]: {{}}riak/kv/2.2.0/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.0/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.2.0/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.2.0/learn/concepts/capability-negotiation.md b/content/riak/kv/2.2.0/learn/concepts/capability-negotiation.md index 07e70f943a..1cd242a649 100644 --- a/content/riak/kv/2.2.0/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.2.0/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.2.0/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.2.0/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.2.0/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.2.0/developing/usage/mapreduce In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.2.0/learn/concepts/causal-context.md b/content/riak/kv/2.2.0/learn/concepts/causal-context.md index a7b57c8a3d..6d93535f82 100644 --- a/content/riak/kv/2.2.0/learn/concepts/causal-context.md +++ b/content/riak/kv/2.2.0/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.2.0/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.2.0/developing/api/http -[dev key value]: /riak/kv/2.2.0/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.2.0/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.2.0/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.2.0/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.2.0/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http +[dev key value]: {{}}riak/kv/2.2.0/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.2.0/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.2.0/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.2.0/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.2.0/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -73,7 +73,7 @@ Causal context comes in two forms in Riak: **vector clocks** and **dotted version vectors**. More information in both can be found in the sections below. -In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). If, however, `allow_mult` is set to `false`, then Riak will not generate diff --git a/content/riak/kv/2.2.0/learn/concepts/clusters.md b/content/riak/kv/2.2.0/learn/concepts/clusters.md index 8eb6571db2..c547e7853a 100644 --- a/content/riak/kv/2.2.0/learn/concepts/clusters.md +++ b/content/riak/kv/2.2.0/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.2.0/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.0/learn/concepts/replication -[glossary node]: /riak/kv/2.2.0/learn/glossary/#node -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.2.0/learn/dynamo -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.2.0/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.2.0/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.0/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.2.0/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.2.0/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.2.0/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.2.0/learn/concepts/crdts.md b/content/riak/kv/2.2.0/learn/concepts/crdts.md index 325957c860..a135fe8cef 100644 --- a/content/riak/kv/2.2.0/learn/concepts/crdts.md +++ b/content/riak/kv/2.2.0/learn/concepts/crdts.md @@ -17,20 +17,20 @@ aliases: --- [crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf -[data types converg]: /riak/kv/2.2.0/learn/concepts/crdts/#convergence +[data types converg]: {{}}riak/kv/2.2.0/learn/concepts/crdts/#convergence [crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html -[data types impl]: /riak/kv/2.2.0/learn/concepts/crdts/#implementation -[concept causal context dvv]: /riak/kv/2.2.0/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.2.0/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.2.0/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.2.0/developing/data-types +[data types impl]: {{}}riak/kv/2.2.0/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.2.0/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.2.0/developing/data-types [riak_dt]: https://github.com/basho/riak_dt -[dev data types context]: /riak/kv/2.1.4/developing/data-types/#data-types-and-context -[glossary node]: /riak/kv/2.2.0/learn/glossary/#node -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution +[dev data types context]: {{}}riak/kv/2.2.0/developing/data-types/#data-types-and-context +[glossary node]: {{}}riak/kv/2.2.0/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: diff --git a/content/riak/kv/2.2.0/learn/concepts/eventual-consistency.md b/content/riak/kv/2.2.0/learn/concepts/eventual-consistency.md index f5c953ae29..7cda90dc81 100644 --- a/content/riak/kv/2.2.0/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.2.0/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[concept replication]: /riak/kv/2.2.0/learn/concepts/replication -[glossary node]: /riak/kv/2.2.0/learn/glossary/#node -[glossary read rep]: /riak/kv/2.2.0/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.2.0/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.2.0/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.2.0/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.2.0/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.2.0/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.2.0/developing/data-modeling/). +or models]({{}}riak/kv/2.2.0/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.2.0/learn/concepts/keys-and-objects.md b/content/riak/kv/2.2.0/learn/concepts/keys-and-objects.md index ffa7a82c37..984844d744 100644 --- a/content/riak/kv/2.2.0/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.2.0/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.0/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.2.0/learn/concepts/replication.md b/content/riak/kv/2.2.0/learn/concepts/replication.md index 79f248ec43..b337dfab24 100644 --- a/content/riak/kv/2.2.0/learn/concepts/replication.md +++ b/content/riak/kv/2.2.0/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.2.0/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.2.0/learn/concepts/vnodes -[glossary node]: /riak/kv/2.2.0/learn/glossary/#node -[glossary ring]: /riak/kv/2.2.0/learn/glossary/#ring -[usage replication]: /riak/kv/2.2.0/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.2.0/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.2.0/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.2.0/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.2.0/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.2.0/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.2.0/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.2.0/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.2.0/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.2.0/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.2.0/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.2.0/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.2.0/learn/concepts/strong-consistency.md b/content/riak/kv/2.2.0/learn/concepts/strong-consistency.md index 88ba390414..a3ec2bbebf 100644 --- a/content/riak/kv/2.2.0/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.2.0/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.0/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.2.0/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.2.0/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.2.0/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.2.0/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.2.0/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.2.0/learn/concepts/vnodes.md b/content/riak/kv/2.2.0/learn/concepts/vnodes.md index cdd1f4cb1b..5a78d47d52 100644 --- a/content/riak/kv/2.2.0/learn/concepts/vnodes.md +++ b/content/riak/kv/2.2.0/learn/concepts/vnodes.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.2.0/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.2.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.0/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.2.0/learn/glossary/#node -[glossary ring]: /riak/kv/2.2.0/learn/glossary/#ring -[plan backend]: /riak/kv/2.2.0/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.2.0/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.2.0/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.2.0/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.2.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.0/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.2.0/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.2.0/learn/glossary/#ring +[plan backend]: {{}}riak/kv/2.2.0/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.2.0/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.2.0/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.2.0/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.2.0/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.2.0/learn/dynamo.md b/content/riak/kv/2.2.0/learn/dynamo.md index 10a52e4f6b..3e6c60ea1f 100644 --- a/content/riak/kv/2.2.0/learn/dynamo.md +++ b/content/riak/kv/2.2.0/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.2.0/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.2.0/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.2.0/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.2.0/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.2.0/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.2.0/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.2.0/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.2.0/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.2.0/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.2.0/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.2.0/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.2.0/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.2.0/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.2.0/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.2.0/developing/api/http/) +>[REST API]({{}}riak/kv/2.2.0/developing/api/http/) > ->[Writing Data](/riak/kv/2.2.0/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.2.0/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.2.0/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.2.0/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.2.0/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.2.0/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.2.0/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.2.0/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.2.0/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.2.0/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.2.0/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.2.0/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.2.0/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.2.0/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.2.0/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.2.0/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.2.0/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.2.0/setup/planning/backend/ -[Bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.2.0/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.2.0/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.2.0/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.2.0/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.2.0/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.2.0/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.2.0/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.2.0/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.2.0/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.2.0/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.2.0/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.2.0/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.2.0/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.2.0/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.2.0/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.2.0/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.2.0/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.2.0/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.2.0/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.2.0/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.2.0/learn/glossary.md b/content/riak/kv/2.2.0/learn/glossary.md index 2b4982ef17..03aaf109e8 100644 --- a/content/riak/kv/2.2.0/learn/glossary.md +++ b/content/riak/kv/2.2.0/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.2.0/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[concept crdts]: /riak/kv/2.2.0/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.0/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.0/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.0/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.2.0/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.2.0/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.2.0/developing/api/http -[dev data model]: /riak/kv/2.2.0/developing/data-modeling -[dev data types]: /riak/kv/2.2.0/developing/data-types -[glossary read rep]: /riak/kv/2.2.0/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.2.0/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.0/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.2.0/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.0/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.0/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.0/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.2.0/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.2.0/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http +[dev data model]: {{}}riak/kv/2.2.0/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.0/developing/data-types +[glossary read rep]: {{}}riak/kv/2.2.0/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.2.0/learn/dynamo -[plan cluster capacity]: /riak/kv/2.2.0/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.2.0/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.2.0/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.2.0/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.2.0/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.2.0/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.2.0/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.2.0/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.0/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.2.0/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.2.0/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.2.0/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.2.0/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.2.0/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.2.0/learn/use-cases.md b/content/riak/kv/2.2.0/learn/use-cases.md index c96c39eb5a..d868b439b4 100644 --- a/content/riak/kv/2.2.0/learn/use-cases.md +++ b/content/riak/kv/2.2.0/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.2.0/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.2.0/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.2.0/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.2.0/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.2.0/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.2.0/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.2.0/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.2.0/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.2.0/developing/data-types -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.2.0/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.0/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.2.0/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.2.0/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.2.0/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.2.0/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.2.0/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.2.0/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.2.0/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.2.0/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.2.0/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.2.0/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.2.0/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.2.0/learn/why-riak-kv.md b/content/riak/kv/2.2.0/learn/why-riak-kv.md index 5ee082e9b4..950a846595 100644 --- a/content/riak/kv/2.2.0/learn/why-riak-kv.md +++ b/content/riak/kv/2.2.0/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.2.0/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.2.0/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.2.0/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.2.0/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.2.0/developing/data-types -[glossary read rep]: /riak/kv/2.2.0/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.2.0/developing/data-types +[glossary read rep]: {{}}riak/kv/2.2.0/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.2.0/release-notes.md b/content/riak/kv/2.2.0/release-notes.md index 1bb7de5b92..30f23509e0 100644 --- a/content/riak/kv/2.2.0/release-notes.md +++ b/content/riak/kv/2.2.0/release-notes.md @@ -35,19 +35,19 @@ New features in KV 2.2.0 include global object expiration and LZ4 compression fo ### Riak KV Enterprise Edition Only -If you are using AAE fullsync and have a very tight downgrade window, consider disabling the AAE upgrade until you have fully accepted 2.2.0 and rolled it out to all participating clusters. You can read how to disable the upgraded AAE at [Step 5 here](/riak/kv/2.2.0/setup/upgrading/version/#upgrading-process). +If you are using AAE fullsync and have a very tight downgrade window, consider disabling the AAE upgrade until you have fully accepted 2.2.0 and rolled it out to all participating clusters. You can read how to disable the upgraded AAE at [Step 5 here]({{}}riak/kv/2.2.0/setup/upgrading/version/#upgrading-process). AAE trees are versioned, so if you choose to enable the 2.2.0 AAE improvements, the AAE trees will need to be destroyed on downgrade and fully repopulated from the object data. During any period in which the AAE trees are invalid, AAE fullsyncs will not work. -If MDC clusters will be upgraded in stages, during the time that the cluster versions are mismatched with Riak KV versions 2.2.0 and Riak KV versions less than 2.2.0, replication will fail due to a known issue with Bucket Mismatch between the clusters documented [here](/riak/kv/2.2.0/release-notes/#replication-bucket-mismatch). +If MDC clusters will be upgraded in stages, during the time that the cluster versions are mismatched with Riak KV versions 2.2.0 and Riak KV versions less than 2.2.0, replication will fail due to a known issue with Bucket Mismatch between the clusters documented [here]({{}}riak/kv/2.2.0/release-notes/#replication-bucket-mismatch). ## Downgrading ### Riak search users -The upgrade to Solr 4.10.4 causes new data written to the cluster to be written in a format that is incompatible with earlier versions of Solr (and, therefore, earlier versions of Riak KV). The [Upgrade](/riak/kv/2.2.0/setup/upgrading/version/) and [Downgrade](/riak/kv/2.2.0/setup/downgrade/) documentation describes the steps you will need to take to reindex your data in a rolling fashion. Be aware this can make downgrades take a very long time, but will minimize exposure of the downgrading nodes to applications that utilize the Riak search feature. +The upgrade to Solr 4.10.4 causes new data written to the cluster to be written in a format that is incompatible with earlier versions of Solr (and, therefore, earlier versions of Riak KV). The [Upgrade]({{}}riak/kv/2.2.0/setup/upgrading/version/) and [Downgrade]({{}}riak/kv/2.2.0/setup/downgrade/) documentation describes the steps you will need to take to reindex your data in a rolling fashion. Be aware this can make downgrades take a very long time, but will minimize exposure of the downgrading nodes to applications that utilize the Riak search feature. @@ -66,7 +66,7 @@ The upgrade to Solr 4.10.4 causes new data written to the cluster to be written * `yz_solrq_drain_fsm` are now monitored from the queues being drained. Before, it was possible for a queue to get stuck in wait_for_drain_complete state if the drain fsm crashed before the drain complete messages were sent. * Logging has been added to clear and exchange trees for audit of administrative operations. * All above work captured in [[yokozuna PR 700](https://github.com/basho/yokozuna/pull/700)]. -* Additional [Cuttlefish parameters](/riak/kv/2.2.0/configuring/reference/#search) have been added to support the Riak search batching updates. These configs will allow you to set batching parameters based on your needs and have, in certain cases, led to significantly higher write throughput to Solr. +* Additional [Cuttlefish parameters]({{}}riak/kv/2.2.0/configuring/reference/#search) have been added to support the Riak search batching updates. These configs will allow you to set batching parameters based on your needs and have, in certain cases, led to significantly higher write throughput to Solr. * [[yokozuna PR 700](https://github.com/basho/yokozuna/pull/700)] * LevelDB global object expiration allows data to be automatically, efficiently deleted in LevelDB and brings LevelDB to feature parity with Bitcask. * [[eleveldb PR 211](https://github.com/basho/eleveldb/pull/211)] @@ -74,14 +74,14 @@ The upgrade to Solr 4.10.4 causes new data written to the cluster to be written * LevelDB now has LZ4 compression, which provides faster compression of data for enhanced cluster performance. * [[eleveldb PR 208](https://github.com/basho/eleveldb/pull/208)] * [[eleveldb PR 216](https://github.com/basho/eleveldb/pull/216)] -* Cluster job controls allow you to set controls over commands that might have a performance impact on the Riak cluster, for example: list keys, list buckets, secondary index(2i) queries, and MapReduce. Denied operations will be logged to file. You can read more about these [here](/riak/kv/2.2.0/configuring/reference#cluster-job-controls). +* Cluster job controls allow you to set controls over commands that might have a performance impact on the Riak cluster, for example: list keys, list buckets, secondary index(2i) queries, and MapReduce. Denied operations will be logged to file. You can read more about these [here]({{}}riak/kv/2.2.0/configuring/reference#cluster-job-controls). * [[riak PR 868](https://github.com/basho/riak/pull/868)] * [[riak_core PR 851](https://github.com/basho/riak_core/pull/851)] * [[riak_ee PR ](https://github.com/basho/riak_ee/pull/405)] * [[riak_kv PR 1459](https://github.com/basho/riak_kv/pull/1459)] * [[riak_search PR 184](https://github.com/basho/riak_search/pull/184)] * [[yokozuna PR 671](https://github.com/basho/yokozuna/pull/671)] -* The [HyperLogLog (HLL) distributed data type](/riak/kv/2.2.0/learn/concepts/crdts/#hyperloglogs) provides high-performance, approximate count of unique objects in massive sets by estimating the unique elements in a large set or stream of data. HLL keeps items at a constant size using a hash-based algorithm, which keeps memory usage low. Normally, calculating the exact cardinality of a set requires an amount of memory proportional to the cardinality when counting these unique items. With HLLs, the trade off is less memory in exchange for approximated cardinality. More of HLL usage can be found [here](/riak/kv/2.2.0/developing/data-types/hyperloglogs/). +* The [HyperLogLog (HLL) distributed data type]({{}}riak/kv/2.2.0/learn/concepts/crdts/#hyperloglogs) provides high-performance, approximate count of unique objects in massive sets by estimating the unique elements in a large set or stream of data. HLL keeps items at a constant size using a hash-based algorithm, which keeps memory usage low. Normally, calculating the exact cardinality of a set requires an amount of memory proportional to the cardinality when counting these unique items. With HLLs, the trade off is less memory in exchange for approximated cardinality. More of HLL usage can be found [here]({{}}riak/kv/2.2.0/developing/data-types/hyperloglogs/). * [[riak_kv PR 1435](https://github.com/basho/riak_kv/pull/1435)] * Active anti-entropy (AAE) improvements remedy an issue in prior versions of Riak KV where the hashing function used for AAE could trigger unneeded read-repairs. In 2.2, hashing is improved so that unnecessary read repairs are not triggered and AAE uses less resources. AAE also upgrades automatically. (You can configure AAE not to automatically upgrade, but we do not recommend this.) * [[yokozuna PR 662](https://github.com/basho/yokozuna/pull/662)] @@ -128,8 +128,8 @@ The upgrade to Solr 4.10.4 causes new data written to the cluster to be written ## Bugs Fixed * [[Issue 1178](https://github.com/basho/riak_kv/issues/1178)/[riak_kv PR 1420](https://github.com/basho/riak_kv/pull/1420)] riak_kv can no longer run with sidejob disabled. The removal of the non-sidejob code cuts down on risk and maintenance costs, and improves performance. Included in the code removal are `riak_kv_get_fsm` and `riak_kv_put_fsm` supervisors. The GET/PUT FSM start_link functions have been renamed to 'start', though the start_link function name is kept as an alias to avoid any potential problems during rolling upgrades. This resolves an issue where calls to `riak_kv_get_fsm_sup:start_get_fsm` leave defunct PIDs in the `riak_kv_get_fsm_sup`, which can cause extended shutdown times as the supervisor attempts to iterate through millions of dead PIDs. -* A thorough review of file ownership across the Riak KV package was done and several files, including riak init, were changed to tighten the ownership to root:root instead of riak:riak to prevent a potential code injection across all supported operating systems. You can read more about this issue [here](http://docs.basho.com/community/productadvisories/codeinjectioninitfiles/). [[node_package PR 196](https://github.com/basho/node_package/pull/196)] -* The AddDB() call now occurs after all object initialization is complete to eliminate a race condition that leads to segfault. You can read more about the issue [here](http://docs.basho.com/community/productadvisories/leveldbsegfault/).[[LevelDB PR 184](https://github.com/basho/leveldb/pull/184)] +* A thorough review of file ownership across the Riak KV package was done and several files, including riak init, were changed to tighten the ownership to root:root instead of riak:riak to prevent a potential code injection across all supported operating systems. You can read more about this issue [here]({{}}community/productadvisories/codeinjectioninitfiles/). [[node_package PR 196](https://github.com/basho/node_package/pull/196)] +* The AddDB() call now occurs after all object initialization is complete to eliminate a race condition that leads to segfault. You can read more about the issue [here]({{}}community/productadvisories/leveldbsegfault/).[[LevelDB PR 184](https://github.com/basho/leveldb/pull/184)] * [[Issue 1064](https://github.com/basho/riak_kv/issues/1064)/[riak_kv PR 1331](https://github.com/basho/riak_kv/pull/1331) & [riak_kv PR 963](https://github.com/basho/riak_kv/pull/963)] When using the `max_memory` setting in the memory backend, a list of timers is kept in ETS. In certain circumstances, these timer references were not deleted when the item had expired or when a new value was put to the table.The timer references are now appropriately deleted. * [[riak_kv PR 1282](https://github.com/basho/riak_kv/pull/1282)] Unregister per-vnode statistics when cleanly shutting down. However if the vnode crashes, the terminate callback will not be executed. * [[mochiweb PR 20](https://github.com/basho/mochiweb/pull/20)] In certain circumstances, mochiweb_http could receive an unexpected message and reply with a 400 response. When using keep-alive HTTP connections and a load balancer, it was possible for this same connection to later receive and transmit back to a client a delayed message rather than closing the connection properly. Mochiweb is now prevented from sending an erroneous 400 message. @@ -188,10 +188,10 @@ Once all of the Riak KV clusters have been upgraded to version 2.2.0 or greater, ## Deprecation Notification -* [Link Walking](/riak/kv/2.2.0/developing/api/http/link-walking/) is deprecated and will not work if security is enabled. -* Key Filters are deprecated; we strongly discourage key listing in production due to the overhead involved, so it's better to maintain key indexes as values in Riak (see our [set data type](/riak/kv/2.2.0/developing/data-types/sets/) as a useful tool for such indexes). -* JavaScript MapReduce is deprecated; we have expanded our [Erlang MapReduce](/riak/kv/2.2.0/developing/app-guide/advanced-mapreduce/#mapreduce) documentation to assist with the transition. -* Riak search 1.0 is deprecated in favor of our Solr-based [Riak search 2.0](/riak/kv/2.2.0/developing/usage/search/). Version 1.0 will not work if security is enabled. +* [Link Walking]({{}}riak/kv/2.2.0/developing/api/http/link-walking/) is deprecated and will not work if security is enabled. +* Key Filters are deprecated; we strongly discourage key listing in production due to the overhead involved, so it's better to maintain key indexes as values in Riak (see our [set data type]({{}}riak/kv/2.2.0/developing/data-types/sets/) as a useful tool for such indexes). +* JavaScript MapReduce is deprecated; we have expanded our [Erlang MapReduce]({{}}riak/kv/2.2.0/developing/app-guide/advanced-mapreduce/#mapreduce) documentation to assist with the transition. +* Riak search 1.0 is deprecated in favor of our Solr-based [Riak search 2.0]({{}}riak/kv/2.2.0/developing/usage/search/). Version 1.0 will not work if security is enabled. * v2 replication (a component of Riak KV Enterprise) is superseded by v3 and will be removed in the future. * Legacy vnode routing (an early mechanism for managing requests between servers) is deprecated. If `vnode_routing` is set to `legacy` via Riak KV's capability system, it should be removed to prevent upgrade problems in the future. -* Some users in the past have used Riak's internal API (e.g. `riak:local_client/1`); this API may change at any time, so we strongly recommend using our [Erlang client library](http://github.com/basho/riak-erlang-client/) (or [one of the other libraries](/riak/kv/2.2.0/developing/client-libraries/) we support) instead. +* Some users in the past have used Riak's internal API (e.g. `riak:local_client/1`); this API may change at any time, so we strongly recommend using our [Erlang client library](http://github.com/basho/riak-erlang-client/) (or [one of the other libraries]({{}}riak/kv/2.2.0/developing/client-libraries/) we support) instead. diff --git a/content/riak/kv/2.2.0/setup/downgrade.md b/content/riak/kv/2.2.0/setup/downgrade.md index 1462df8c1d..9363d23592 100644 --- a/content/riak/kv/2.2.0/setup/downgrade.md +++ b/content/riak/kv/2.2.0/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.0/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.2.0/setup/upgrading/cluster -[config ref]: /riak/kv/2.2.0/configuring/reference -[concept aae]: /riak/kv/2.2.0/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.2.0/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.2.0/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.2.0/configuring/reference +[concept aae]: {{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#aae-status Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. diff --git a/content/riak/kv/2.2.0/setup/installing.md b/content/riak/kv/2.2.0/setup/installing.md index 3de55c2a7d..858722c876 100644 --- a/content/riak/kv/2.2.0/setup/installing.md +++ b/content/riak/kv/2.2.0/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.2.0/installing/ --- -[install aws]: /riak/kv/2.2.0/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.2.0/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.2.0/setup/installing/freebsd -[install mac osx]: /riak/kv/2.2.0/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.2.0/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.2.0/setup/installing/smartos -[install solaris]: /riak/kv/2.2.0/setup/installing/solaris -[install suse]: /riak/kv/2.2.0/setup/installing/suse -[install windows azure]: /riak/kv/2.2.0/setup/installing/windows-azure -[install source index]: /riak/kv/2.2.0/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.2.0/setup/upgrading +[install aws]: {{}}riak/kv/2.2.0/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.0/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.0/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.0/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.0/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.0/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.0/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.0/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.0/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.2.0/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.2.0/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.2.0/setup/installing/amazon-web-services.md b/content/riak/kv/2.2.0/setup/installing/amazon-web-services.md index 83a2a66844..cda9aedb9d 100644 --- a/content/riak/kv/2.2.0/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.2.0/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.2.0/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.2.0/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.2.0/setup/installing/debian-ubuntu.md b/content/riak/kv/2.2.0/setup/installing/debian-ubuntu.md index 7b40b503da..61b53dbf03 100644 --- a/content/riak/kv/2.2.0/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.2.0/setup/installing/debian-ubuntu.md @@ -18,10 +18,10 @@ aliases: - /riak/kv/2.2.0/installing/debian-ubuntu/ --- -[install source index]: /riak/kv/2.2.0/setup/installing/source/ -[security index]: /riak/kv/2.2.0/using/security/ -[install source erlang]: /riak/kv/2.2.0/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[install source index]: {{}}riak/kv/2.2.0/setup/installing/source/ +[security index]: {{}}riak/kv/2.2.0/using/security/ +[install source erlang]: {{}}riak/kv/2.2.0/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.2.0/setup/installing/freebsd.md b/content/riak/kv/2.2.0/setup/installing/freebsd.md index 7b34a93ad5..14eb8fe74d 100644 --- a/content/riak/kv/2.2.0/setup/installing/freebsd.md +++ b/content/riak/kv/2.2.0/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.2.0/setup/installing/source/erlang -[downloads]: /riak/kv/2.2.0/downloads/ -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.2.0/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.2.0/downloads/ +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.2.0/setup/installing/mac-osx.md b/content/riak/kv/2.2.0/setup/installing/mac-osx.md index e35f60f2e8..377dab38ad 100644 --- a/content/riak/kv/2.2.0/setup/installing/mac-osx.md +++ b/content/riak/kv/2.2.0/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.2.0/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.2.0/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.2.0/setup/installing/rhel-centos.md b/content/riak/kv/2.2.0/setup/installing/rhel-centos.md index 3713e564b9..b756ab537b 100644 --- a/content/riak/kv/2.2.0/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.2.0/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.2.0/setup/installing/source -[install source erlang]: /riak/kv/2.2.0/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[install source index]: {{}}riak/kv/2.2.0/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.0/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.2.0/setup/installing/smartos.md b/content/riak/kv/2.2.0/setup/installing/smartos.md index 70d85f58bd..5b1f72e033 100644 --- a/content/riak/kv/2.2.0/setup/installing/smartos.md +++ b/content/riak/kv/2.2.0/setup/installing/smartos.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.2.0/installing/smartos/ --- -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify {{% note title="SmartOS End of Life (EOL) for Riak KV 2.2.0" %}} SmartOS is no longer supported in Riak KV 2.2.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). @@ -74,13 +74,13 @@ cat /opt/local/etc/pkgin/repositories.conf Download your version of the Riak binary package for SmartOS: ```bash -curl -o /tmp/riak-2.2.0-SmartOS-x86_64.tgz http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/smartos/1.8/riak-2.2.0-SmartOS-x86_64.tgz +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz ``` Next, install the package: ``` -pkg_add /tmp/riak-2.2.0-SmartOS-x86_64.tgz +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz ``` After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: diff --git a/content/riak/kv/2.2.0/setup/installing/solaris.md b/content/riak/kv/2.2.0/setup/installing/solaris.md index 174aafc9bc..ee6f8d63a4 100644 --- a/content/riak/kv/2.2.0/setup/installing/solaris.md +++ b/content/riak/kv/2.2.0/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.2.0/setup/installing/source.md b/content/riak/kv/2.2.0/setup/installing/source.md index b956fc1313..a71e471d37 100644 --- a/content/riak/kv/2.2.0/setup/installing/source.md +++ b/content/riak/kv/2.2.0/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.2.0/setup/installing/source/erlang -[downloads]: /riak/kv/2.2.0/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.2.0/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.2.0/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.2.0/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.2.0/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.2.0/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.2.0/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.2.0/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.2.0/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.2.0/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.2.0/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.2.0/setup/installing/source/erlang.md b/content/riak/kv/2.2.0/setup/installing/source/erlang.md index 918c0a532f..8e1f189ef1 100644 --- a/content/riak/kv/2.2.0/setup/installing/source/erlang.md +++ b/content/riak/kv/2.2.0/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.0/installing/source/erlang/ --- -[install index]: /riak/kv/2.2.0/setup/installing -[security basics]: /riak/kv/2.2.0/using/security/basics +[install index]: {{}}riak/kv/2.2.0/setup/installing +[security basics]: {{}}riak/kv/2.2.0/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.2.0/setup/installing/source/jvm.md b/content/riak/kv/2.2.0/setup/installing/source/jvm.md index b38756a893..bac55a2036 100644 --- a/content/riak/kv/2.2.0/setup/installing/source/jvm.md +++ b/content/riak/kv/2.2.0/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.2.0/installing/source/jvm/ --- -[usage search]: /riak/kv/2.2.0/developing/usage/search +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.2.0/setup/installing/suse.md b/content/riak/kv/2.2.0/setup/installing/suse.md index 2756a2c0b7..a17ff206db 100644 --- a/content/riak/kv/2.2.0/setup/installing/suse.md +++ b/content/riak/kv/2.2.0/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.2.0/installing/suse/ --- -[install verify]: /riak/kv/2.2.0/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.0/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.2.0/setup/installing/verify.md b/content/riak/kv/2.2.0/setup/installing/verify.md index 3b62f89a5e..558c15951e 100644 --- a/content/riak/kv/2.2.0/setup/installing/verify.md +++ b/content/riak/kv/2.2.0/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.0/installing/verify-install/ --- -[client libraries]: /riak/kv/2.2.0/developing/client-libraries -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.2.0/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.2.0/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.2.0/developing/client-libraries +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.2.0/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.2.0/setup/installing/windows-azure.md b/content/riak/kv/2.2.0/setup/installing/windows-azure.md index b1f8d1fcef..15e04573b3 100644 --- a/content/riak/kv/2.2.0/setup/installing/windows-azure.md +++ b/content/riak/kv/2.2.0/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.2.0/setup/planning/backend.md b/content/riak/kv/2.2.0/setup/planning/backend.md index b0fc2f6f92..803f390f6a 100644 --- a/content/riak/kv/2.2.0/setup/planning/backend.md +++ b/content/riak/kv/2.2.0/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.0/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.0/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.0/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.2.0/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.0/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.2.0/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.2.0/setup/planning/backend/bitcask.md b/content/riak/kv/2.2.0/setup/planning/backend/bitcask.md index b4467781d4..77b273a70a 100644 --- a/content/riak/kv/2.2.0/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.2.0/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.2.0/using/admin/riak-cli -[config reference]: /riak/kv/2.2.0/configuring/reference -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.2.0/setup/planning/backend/multi -[usage search]: /riak/kv/2.2.0/developing/usage/search - -[glossary aae]: /riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.2.0/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.2.0/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.2.0/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.2.0/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.2.0/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.2.0/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.2.0/setup/planning/backend/leveldb.md b/content/riak/kv/2.2.0/setup/planning/backend/leveldb.md index 0d5ff941ec..60c913c0c0 100644 --- a/content/riak/kv/2.2.0/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.2.0/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.0/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[config reference]: /riak/kv/2.2.0/configuring/reference -[perf index]: /riak/kv/2.2.0/using/performance -[config reference#aae]: /riak/kv/2.2.0/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[perf index]: {{}}riak/kv/2.2.0/using/performance +[config reference#aae]: {{}}riak/kv/2.2.0/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.2.0/setup/planning/backend/memory.md b/content/riak/kv/2.2.0/setup/planning/backend/memory.md index 7db3b7af28..73db74ff07 100644 --- a/content/riak/kv/2.2.0/setup/planning/backend/memory.md +++ b/content/riak/kv/2.2.0/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.0/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.2.0/configuring/reference -[plan backend multi]: /riak/kv/2.2.0/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[plan backend multi]: {{}}riak/kv/2.2.0/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.2.0/setup/planning/backend/multi.md b/content/riak/kv/2.2.0/setup/planning/backend/multi.md index aa1b78ede6..9b2c96fdc8 100644 --- a/content/riak/kv/2.2.0/setup/planning/backend/multi.md +++ b/content/riak/kv/2.2.0/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.0/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.2.0/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.0/setup/planning/backend/memory -[config reference]: /riak/kv/2.2.0/configuring/reference -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.2.0/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.2.0/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.0/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.2.0/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.2.0/setup/planning/best-practices.md b/content/riak/kv/2.2.0/setup/planning/best-practices.md index 377504744a..55a88444fc 100644 --- a/content/riak/kv/2.2.0/setup/planning/best-practices.md +++ b/content/riak/kv/2.2.0/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.0/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.2.0/using/reference/handoff -[config mapreduce]: /riak/kv/2.2.0/configuring/mapreduce -[glossary aae]: /riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.2.0/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.2.0/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.2.0/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.2.0/setup/planning/bitcask-capacity-calc.md index 17485f4a05..f3e9091a36 100644 --- a/content/riak/kv/2.2.0/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.2.0/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.2.0/setup/planning/cluster-capacity.md b/content/riak/kv/2.2.0/setup/planning/cluster-capacity.md index afb540cca9..c6201af28e 100644 --- a/content/riak/kv/2.2.0/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.2.0/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.0/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.2.0/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.2.0/setup/planning -[concept replication]: /riak/kv/2.2.0/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.2.0/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.2.0/configuring/reference -[perf benchmark]: /riak/kv/2.2.0/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.2.0/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.2.0/setup/planning +[concept replication]: {{}}riak/kv/2.2.0/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[perf benchmark]: {{}}riak/kv/2.2.0/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.2.0/setup/planning/operating-system.md b/content/riak/kv/2.2.0/setup/planning/operating-system.md index fe1e9e54a0..90f115c923 100644 --- a/content/riak/kv/2.2.0/setup/planning/operating-system.md +++ b/content/riak/kv/2.2.0/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.2.0/downloads/ +[downloads]: {{}}riak/kv/2.2.0/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.2.0/setup/planning/start.md b/content/riak/kv/2.2.0/setup/planning/start.md index 178e0416a5..01b96e6da7 100644 --- a/content/riak/kv/2.2.0/setup/planning/start.md +++ b/content/riak/kv/2.2.0/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.0/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.2.0/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.2.0/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.2.0/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.2.0/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.2.0/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.2.0/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.2.0/setup/upgrading/checklist.md b/content/riak/kv/2.2.0/setup/upgrading/checklist.md index 1638650a31..1476cb1730 100644 --- a/content/riak/kv/2.2.0/setup/upgrading/checklist.md +++ b/content/riak/kv/2.2.0/setup/upgrading/checklist.md @@ -15,24 +15,24 @@ aliases: - /riak/kv/2.2.0/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.2.0/using/performance/open-files-limit -[perf index]: /riak/kv/2.2.0/using/performance +[perf open files]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.0/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.2.0/using/security/basics -[cluster ops load balance]: /riak/kv/2.2.0/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.2.0/configuring/reference -[config backend]: /riak/kv/2.2.0/configuring/backend -[usage search]: /riak/kv/2.2.0/developing/usage/search -[usage conflict resolution]: /riak/kv/2.2.0/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.2.0/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.2.0/using/admin/commands -[use admin riak control]: /riak/kv/2.2.0/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.2.0/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.2.0/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.2.0/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.2.0/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.2.0/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[config backend]: {{}}riak/kv/2.2.0/configuring/backend +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.2.0/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.2.0/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.2.0/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.2.0/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.2.0/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.2.0/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. diff --git a/content/riak/kv/2.2.0/setup/upgrading/cluster.md b/content/riak/kv/2.2.0/setup/upgrading/cluster.md new file mode 100644 index 0000000000..8834d72978 --- /dev/null +++ b/content/riak/kv/2.2.0/setup/upgrading/cluster.md @@ -0,0 +1,298 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.2.0" +menu: + riak_kv-2.2.0: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.0/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.2.0/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{}}riak/kv/2.2.0/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.2.0/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.2.0/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.0/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.0/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.0/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{}}riak/kv/2.2.0/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i .deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh .rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d .pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` --- See [JMX Monitoring][jmx monitor] for more information. + * `snmp` --- See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. diff --git a/content/riak/kv/2.2.0/setup/upgrading/search.md b/content/riak/kv/2.2.0/setup/upgrading/search.md new file mode 100644 index 0000000000..8b400799d0 --- /dev/null +++ b/content/riak/kv/2.2.0/setup/upgrading/search.md @@ -0,0 +1,276 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.2.0" +menu: + riak_kv-2.2.0: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.0/ops/advanced/upgrading-search-2 + - /riak/kv/2.2.0/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + +
+
Upgrade First
+ Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. +
+ +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + +
+
Check Results Before Switching (Optional)
+ Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/?q=...`. +
+ +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.2.0/setup/upgrading/version.md b/content/riak/kv/2.2.0/setup/upgrading/version.md index 3117710f15..a011d95ce7 100644 --- a/content/riak/kv/2.2.0/setup/upgrading/version.md +++ b/content/riak/kv/2.2.0/setup/upgrading/version.md @@ -19,18 +19,18 @@ aliases: --- -[production checklist]: /riak/kv/2.2.0/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.2.0/using/admin/riak-control -[use admin commands]: /riak/kv/2.2.0/using/admin/commands -[use admin riak-admin]: /riak/kv/2.2.0/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.2.0/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.2.0/release-notes +[production checklist]: {{}}riak/kv/2.2.0/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.2.0/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.2.0/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.0/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.2.0/release-notes [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.2.0/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.2.0/using/reference/jmx -[snmp]: /riak/kv/2.2.0/using/reference/snmp -[Release Notes]: /riak/kv/2.2.0/release-notes +[cluster ops mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.0/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.0/using/reference/snmp +[Release Notes]: {{}}riak/kv/2.2.0/release-notes ## Overview diff --git a/content/riak/kv/2.2.0/using/admin/commands.md b/content/riak/kv/2.2.0/using/admin/commands.md index 75f8f1b4a8..b9c3273586 100644 --- a/content/riak/kv/2.2.0/using/admin/commands.md +++ b/content/riak/kv/2.2.0/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.0/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.2.0/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.2.0/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.2.0/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.2.0/using/admin/riak-admin.md b/content/riak/kv/2.2.0/using/admin/riak-admin.md index 6ac259b879..de0b32fc14 100644 --- a/content/riak/kv/2.2.0/using/admin/riak-admin.md +++ b/content/riak/kv/2.2.0/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.2.0/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.2.0/configuring/reference -[use admin commands]: /riak/kv/2.2.0/using/admin/commands -[use admin commands#join]: /riak/kv/2.2.0/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.2.0/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.2.0/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.2.0/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.2.0/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.2.0/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.2.0/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.2.0/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.2.0/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.2.0/setup/downgrade -[security index]: /riak/kv/2.2.0/using/security/ -[security managing]: /riak/kv/2.2.0/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.2.0/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.2.0/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.2.0/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.2.0/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.2.0/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.2.0/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[use admin commands]: {{}}riak/kv/2.2.0/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.2.0/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.2.0/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.2.0/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.2.0/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.2.0/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.2.0/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.2.0/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.2.0/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.2.0/setup/downgrade +[security index]: {{}}riak/kv/2.2.0/using/security/ +[security managing]: {{}}riak/kv/2.2.0/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.2.0/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.2.0/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.2.0/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.2.0/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.2.0/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#stats ## `riak-admin` diff --git a/content/riak/kv/2.2.0/using/admin/riak-cli.md b/content/riak/kv/2.2.0/using/admin/riak-cli.md index 09726e675e..5fc8c4163e 100644 --- a/content/riak/kv/2.2.0/using/admin/riak-cli.md +++ b/content/riak/kv/2.2.0/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.0/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.2.0/configuring/reference/ +[configuration file]: {{}}riak/kv/2.2.0/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.2.0/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.2.0/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.2.0/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.2.0/configuring/reference/ ## riak diff --git a/content/riak/kv/2.2.0/using/admin/riak-control.md b/content/riak/kv/2.2.0/using/admin/riak-control.md index 535d55d713..49b373e24c 100644 --- a/content/riak/kv/2.2.0/using/admin/riak-control.md +++ b/content/riak/kv/2.2.0/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.2.0/configuring/reference +[config reference]: {{}}riak/kv/2.2.0/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.2.0/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.2.0/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.2.0/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.2.0/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.2.0/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.2.0/using/cluster-operations/active-anti-entropy.md index 47dfd4d023..44b0e12b81 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/active-anti-entropy.md @@ -15,8 +15,8 @@ aliases: - /riak/2.2.0/ops/advanced/aae/ --- -[config search#throttledelay]: /riak/kv/2.2.0/configuring/search/#search-anti-entropy-throttle-$tier-delay -[config search#throttle]: riak/kv/2.2.0/configuring/search/#search-anti-entropy-throttle +[config search#throttledelay]: {{}}riak/kv/2.2.0/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{}}riak/kv/2.2.0/configuring/search/#search-anti-entropy-throttle Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. @@ -57,12 +57,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -90,7 +90,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes.md index 6a7f286e4e..d0649f50e7 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.2.0/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.2.0/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.2.0/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.2.0/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.2.0/using/cluster-operations/backing-up.md b/content/riak/kv/2.2.0/using/cluster-operations/backing-up.md index ad4ccf8276..b19242415a 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.0/ops/running/backups --- -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[config reference]: /riak/kv/2.2.0/configuring/reference -[plan backend leveldb]: /riak/kv/2.2.0/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.0/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency -[concept aae]: /riak/kv/2.2.0/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.2.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.0/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.2.0/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.2.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.2.0/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.2.0/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.2.0/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.2.0/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.2.0/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.2.0/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.2.0/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.2.0/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.2.0/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.2.0/using/cluster-operations/bucket-types.md b/content/riak/kv/2.2.0/using/cluster-operations/bucket-types.md index b3f1597706..032a09e884 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.2.0/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.2.0/using/cluster-operations/changing-cluster-info.md index 4120a8e5ab..20ce0251d7 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.2.0/configuring/reference +[config reference]: {{}}riak/kv/2.2.0/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.2.0/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.2.0/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.2.0/using/cluster-operations/handoff.md b/content/riak/kv/2.2.0/using/cluster-operations/handoff.md index a33e1f0947..e689a08f26 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.2.0/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.2.0/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.2.0/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.2.0/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.2.0/using/cluster-operations/logging.md b/content/riak/kv/2.2.0/using/cluster-operations/logging.md index 5e1d8534e2..3f63d4bd23 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/logging.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.2.0/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.2.0/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.2.0/using/cluster-operations/replacing-node.md b/content/riak/kv/2.2.0/using/cluster-operations/replacing-node.md index 274b3ff912..048e680b8d 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.2.0/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.2.0/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.2.0/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.2.0/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.2.0/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.2.0/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.2.0/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.2.0/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.2.0/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.2.0/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.2.0/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.2.0/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.2.0/using/cluster-operations/strong-consistency.md index ba3a1c0cad..47298aa0f8 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.2.0/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.2.0/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.2.0/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.2.0/using/cluster-operations/v2-multi-datacenter.md index 050fb57bb3..a4f2ba7d61 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/v2-multi-datacenter.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication system is largely @@ -163,7 +163,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -183,7 +183,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -221,7 +221,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.2.0/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.2.0/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -242,7 +242,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter.md index 2f109d04d0..4c49ea8456 100644 --- a/content/riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.2.0/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.2.0/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.2.0/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.2.0/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.2.0/using/performance.md b/content/riak/kv/2.2.0/using/performance.md index 395baf89d2..81a2c8dc22 100644 --- a/content/riak/kv/2.2.0/using/performance.md +++ b/content/riak/kv/2.2.0/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.2.0/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.2.0/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -253,12 +253,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.2.0/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.2.0/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.2.0/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.2.0/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.2.0/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.2.0/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.2.0/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.2.0/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.2.0/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.2.0/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.2.0/using/performance/benchmarking.md b/content/riak/kv/2.2.0/using/performance/benchmarking.md index a512225bc1..33f59b7e8c 100644 --- a/content/riak/kv/2.2.0/using/performance/benchmarking.md +++ b/content/riak/kv/2.2.0/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.2.0/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.2.0/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.2.0/using/performance/latency-reduction.md b/content/riak/kv/2.2.0/using/performance/latency-reduction.md index 76b1ea2aee..4728c9d805 100644 --- a/content/riak/kv/2.2.0/using/performance/latency-reduction.md +++ b/content/riak/kv/2.2.0/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.2.0/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.2.0/using/performance/multi-datacenter-tuning.md index 7f035614d3..77d03c1657 100644 --- a/content/riak/kv/2.2.0/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.2.0/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.2.0/using/performance +[perf index]: {{}}riak/kv/2.2.0/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.2.0/using/performance/open-files-limit.md b/content/riak/kv/2.2.0/using/performance/open-files-limit.md index c46922043b..30bc64f04f 100644 --- a/content/riak/kv/2.2.0/using/performance/open-files-limit.md +++ b/content/riak/kv/2.2.0/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/tuning/open-files-limit/ --- -[plan backend]: /riak/kv/2.2.0/setup/planning/backend/ +[plan backend]: {{}}riak/kv/2.2.0/setup/planning/backend/ [blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. diff --git a/content/riak/kv/2.2.0/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.2.0/using/performance/v2-scheduling-fullsync.md index bd69b75be9..665bb6fb0e 100644 --- a/content/riak/kv/2.2.0/using/performance/v2-scheduling-fullsync.md +++ b/content/riak/kv/2.2.0/using/performance/v2-scheduling-fullsync.md @@ -14,7 +14,7 @@ commercial_offering: true --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.0/using/reference/bucket-types.md b/content/riak/kv/2.2.0/using/reference/bucket-types.md index 1daf17d13d..7ef205d2f7 100644 --- a/content/riak/kv/2.2.0/using/reference/bucket-types.md +++ b/content/riak/kv/2.2.0/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.2.0/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.2.0/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.2.0/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.2.0/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.2.0/developing/data-types), and [strong consistency](/riak/kv/2.2.0/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.2.0/developing/data-types), and [strong consistency]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.2.0/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.2.0/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.2.0/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.0/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.0/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.2.0/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.2.0/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.2.0/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.2.0/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.2.0/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.2.0/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.2.0/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.2.0/learn/concepts/buckets) and [keys](/riak/kv/2.2.0/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.2.0/learn/concepts/buckets) and [keys]({{}}riak/kv/2.2.0/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.2.0/using/reference/custom-code.md b/content/riak/kv/2.2.0/using/reference/custom-code.md index 1d141d4871..f06b41994a 100644 --- a/content/riak/kv/2.2.0/using/reference/custom-code.md +++ b/content/riak/kv/2.2.0/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.2.0/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.2.0/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.2.0/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.2.0/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.2.0/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.2.0/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.2.0/using/reference/handoff.md b/content/riak/kv/2.2.0/using/reference/handoff.md index 82e7b4e7cf..d281dcbf72 100644 --- a/content/riak/kv/2.2.0/using/reference/handoff.md +++ b/content/riak/kv/2.2.0/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.2.0/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.2.0/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.2.0/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.2.0/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.2.0/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.2.0/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.2.0/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.2.0/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.2.0/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.2.0/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.2.0/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.2.0/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.2.0/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.2.0/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.2.0/using/reference/jmx.md b/content/riak/kv/2.2.0/using/reference/jmx.md index 471f3d21b8..c235f50cf9 100644 --- a/content/riak/kv/2.2.0/using/reference/jmx.md +++ b/content/riak/kv/2.2.0/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.0/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.2.0/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.2.0/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.2.0/using/reference/logging.md b/content/riak/kv/2.2.0/using/reference/logging.md index 72a97e1218..65c7eec755 100644 --- a/content/riak/kv/2.2.0/using/reference/logging.md +++ b/content/riak/kv/2.2.0/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.0/ops/running/logging --- -[cluster ops log]: /riak/kv/2.2.0/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.2.0/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.2.0/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.2.0/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.2.0/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.2.0/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.2.0/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.2.0/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.2.0/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.2.0/using/reference/multi-datacenter/comparison.md index 1a4b923875..263cda2236 100644 --- a/content/riak/kv/2.2.0/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.2.0/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.2.0/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.2.0/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.2.0/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.2.0/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.2.0/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.2.0/using/reference/runtime-interaction.md b/content/riak/kv/2.2.0/using/reference/runtime-interaction.md index 8322abfe7f..908241ace9 100644 --- a/content/riak/kv/2.2.0/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.2.0/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.0/ops/advanced/runtime --- -[config reference]: /riak/kv/2.2.0/configuring/reference -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.2.0/configuring/reference +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.2.0/using/reference/search.md b/content/riak/kv/2.2.0/using/reference/search.md index 31f7536903..881686fbfc 100644 --- a/content/riak/kv/2.2.0/using/reference/search.md +++ b/content/riak/kv/2.2.0/using/reference/search.md @@ -15,21 +15,21 @@ aliases: - /riak/kv/2.2.0/dev/advanced/search --- -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters -[configuring search]: /riak/kv/2.2.0/configuring/search +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters +[configuring search]: {{}}riak/kv/2.2.0/configuring/search > **Note on search 2.0 vs. legacy search** > > This document refers to Riak search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak search, visit [the old Using Riak search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -search, you should check out the [Using Search](/riak/kv/2.2.0/developing/usage/search) document. +search, you should check out the [Using Search]({{}}riak/kv/2.2.0/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -127,7 +127,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.2.0/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.2.0/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -289,7 +289,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.2.0/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -299,7 +299,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.2.0/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -354,7 +354,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.2.0/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.2.0/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.2.0/using/reference/secondary-indexes.md b/content/riak/kv/2.2.0/using/reference/secondary-indexes.md index 8d29018d4c..b9399b8aab 100644 --- a/content/riak/kv/2.2.0/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.2.0/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.2.0/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.2.0/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.2.0/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.2.0/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.2.0/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.2.0/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.2.0/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.2.0/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.2.0/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.2.0/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.2.0/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.2.0/using/reference/statistics-monitoring.md b/content/riak/kv/2.2.0/using/reference/statistics-monitoring.md index cd1aa746fe..e5d69b2cef 100644 --- a/content/riak/kv/2.2.0/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.2.0/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.2.0/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.2.0/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.2.0/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.2.0/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.2.0/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.2.0/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.2.0/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.2.0/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.2.0/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.2.0/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -156,7 +156,7 @@ Metric | Description ## Command-line Interface -The [`riak-admin`](/riak/kv/2.2.0/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.2.0/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -181,14 +181,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.2.0/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.2.0/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.2.0/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -235,7 +235,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.2.0/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.2.0/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -259,7 +259,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.2.0/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.2.0/developing/api/http/status) endpoint is also available. #### Nagios @@ -333,14 +333,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.2.0/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.2.0/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.2.0/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.2.0/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -362,9 +362,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.2.0/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.2.0/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.2.0/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.2.0/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -380,9 +380,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.2.0/using/reference/strong-consistency.md b/content/riak/kv/2.2.0/using/reference/strong-consistency.md index b3e4e0c9fc..4e05cbdf0f 100644 --- a/content/riak/kv/2.2.0/using/reference/strong-consistency.md +++ b/content/riak/kv/2.2.0/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.0/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.0/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.2.0/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.2.0/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.2.0/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.2.0/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.2.0/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.2.0/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.2.0/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.2.0/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.2.0/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.2.0/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.2.0/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.2.0/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter.md index 1677e3af54..63bc3c6eb2 100644 --- a/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter.md @@ -16,7 +16,7 @@ toc: true [v2 mdc fullsync]: ./scheduling-fullsync {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/using/reference/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/using/reference/v3-multi-datacenter/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter/architecture.md index 87e227781a..7663f257c8 100644 --- a/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter/architecture.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/using/reference/v3-multi-datacenter/architecture/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/using/reference/v3-multi-datacenter/architecture/) instead. {{% /note %}} @@ -83,7 +83,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.2.0/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.2.0/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -95,7 +95,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -113,7 +113,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -121,6 +121,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.2.0/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.2.0/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.2.0/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.2.0/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter/scheduling-fullsync.md index dd99506bd1..fbc44aa976 100644 --- a/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.2.0/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.0/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.0/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/aae.md index a2afb069ec..5c2f3b971c 100644 --- a/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.2.0/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.2.0/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/architecture.md index f1fd5eda9b..60d5c42923 100644 --- a/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.2.0/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.2.0/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.2.0/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.2.0/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/cascading-writes.md index 4b87b7a466..d31031a97b 100644 --- a/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 50bf91a525..18cbc52964 100644 --- a/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.2.0/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.0/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.2.0/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.2.0/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.2.0/using/repair-recovery/errors.md b/content/riak/kv/2.2.0/using/repair-recovery/errors.md index 2f7eb322e2..2e60f56fb9 100644 --- a/content/riak/kv/2.2.0/using/repair-recovery/errors.md +++ b/content/riak/kv/2.2.0/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.2.0/configuring/reference +[config reference]: {{}}riak/kv/2.2.0/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.2.0/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.2.0/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.2.0/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.2.0/using/repair-recovery/failure-recovery.md index cce7661d99..7f537bf4f9 100644 --- a/content/riak/kv/2.2.0/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.2.0/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.2.0/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.2.0/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.2.0/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.2.0/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -116,7 +116,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.2.0/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.2.0/using/repair-recovery/repairs.md b/content/riak/kv/2.2.0/using/repair-recovery/repairs.md index fce5733eda..c0081af502 100644 --- a/content/riak/kv/2.2.0/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.2.0/using/repair-recovery/repairs.md @@ -21,13 +21,13 @@ aliases: - /riak/kv/2.2.0/ops/running/recovery/repairing-partitions --- -[cluster ops aae]: /riak/kv/2.2.0/using/cluster-operations/active-anti-entropy/ -[config ref]: /riak/kv/2.2.0/configuring/reference/ +[cluster ops aae]: {{}}riak/kv/2.2.0/using/cluster-operations/active-anti-entropy/ +[config ref]: {{}}riak/kv/2.2.0/configuring/reference/ [Erlang shell]: http://learnyousomeerlang.com/starting-out -[glossary AAE]: /riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae -[glossary readrep]: /riak/kv/2.2.0/learn/glossary/#read-repair -[search config]: /riak/kv/2.2.0/configuring/search/#search-config-settings -[tiered storage]: /riak/kv/2.2.0/setup/planning/backend/leveldb/#tiered-storage +[glossary AAE]: {{}}riak/kv/2.2.0/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{}}riak/kv/2.2.0/learn/glossary/#read-repair +[search config]: {{}}riak/kv/2.2.0/configuring/search/#search-config-settings +[tiered storage]: {{}}riak/kv/2.2.0/setup/planning/backend/leveldb/#tiered-storage @@ -237,23 +237,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.2.0/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.2.0/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.2.0/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.2.0/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.2.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.2.0/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.2.0/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.2.0/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.2.0/using/repair-recovery/rolling-replaces.md index e88db9fb10..3c124ef545 100644 --- a/content/riak/kv/2.2.0/using/repair-recovery/rolling-replaces.md +++ b/content/riak/kv/2.2.0/using/repair-recovery/rolling-replaces.md @@ -12,9 +12,9 @@ menu: toc: true --- -[upgrade]: /riak/kv/2.2.0/setup/upgrading/cluster/ -[rolling restarts]: /riak/kv/2.2.0/using/repair-recovery/rolling-restart/ -[add node]: /riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes +[upgrade]: {{}}riak/kv/2.2.0/setup/upgrading/cluster/ +[rolling restarts]: {{}}riak/kv/2.2.0/using/repair-recovery/rolling-restart/ +[add node]: {{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. diff --git a/content/riak/kv/2.2.0/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.2.0/using/repair-recovery/rolling-restart.md index 37db125cff..9ead799e5e 100644 --- a/content/riak/kv/2.2.0/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.2.0/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.0/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.2.0/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.2.0/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.2.0/using/running-a-cluster.md b/content/riak/kv/2.2.0/using/running-a-cluster.md index 2e0db1263e..368588f38d 100644 --- a/content/riak/kv/2.2.0/using/running-a-cluster.md +++ b/content/riak/kv/2.2.0/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.2.0/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.2.0/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.2.0/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.2.0/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.2.0/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.2.0/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.2.0/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.2.0/using/security.md b/content/riak/kv/2.2.0/using/security.md index 70fd1b48ac..59346de5a3 100644 --- a/content/riak/kv/2.2.0/using/security.md +++ b/content/riak/kv/2.2.0/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.2.0/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.2.0/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.2.0/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.2.0/using/security/basics +[security managing]: {{}}riak/kv/2.2.0/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.2.0/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.2.0/using/security/basics.md b/content/riak/kv/2.2.0/using/security/basics.md index fa3b879816..9413e86380 100644 --- a/content/riak/kv/2.2.0/using/security/basics.md +++ b/content/riak/kv/2.2.0/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.2.0/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.2.0/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.2.0/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.2.0/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.2.0/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.2.0/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.2.0/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.2.0/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.2.0/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.2.0/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.2.0/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.2.0/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.2.0/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.2.0/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.2.0/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.2.0/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.2.0/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.2.0/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.2.0/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.2.0/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.2.0/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.2.0/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.2.0/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.2.0/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.2.0/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.2.0/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.2.0/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.2.0/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.2.0/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.2.0/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.2.0/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.2.0/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.2.0/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.2.0/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.2.0/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.2.0/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.2.0/configuring/reference/#directories).
platform_data_dir The directory in which Riak stores its storage backend data, as well -as active anti-entropy data, and cluster metadata. ./data
alive_tokens Determines the number of ticks the leader will wait to hear from its -associated vnode before assuming that the vnode +associated vnode before assuming that the vnode is unhealthy and stepping down as leader. If the vnode does not respond to the leader before ensemble_tick * alive_tokens milliseconds have elapsed, the leader will @@ -1833,8 +1833,8 @@ package) and in R14B04 via a custom repository and branch.
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="{{< baseurl >}}riak/kv/2.2.0/learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.2.0/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.2.0/using/security/managing-sources.md b/content/riak/kv/2.2.0/using/security/managing-sources.md index 343df13a9b..e4965709b9 100644 --- a/content/riak/kv/2.2.0/using/security/managing-sources.md +++ b/content/riak/kv/2.2.0/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.2.0/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.2.0/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.2.0/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.2.0/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.2.0/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.2.0/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.2.0/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.2.0/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.2.0/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.2.0/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.2.0/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.2.0/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.2.0/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.2.0/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.2.1/_reference-links.md b/content/riak/kv/2.2.1/_reference-links.md index e68247178f..63676b3ed5 100644 --- a/content/riak/kv/2.2.1/_reference-links.md +++ b/content/riak/kv/2.2.1/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.2.1/downloads/ -[install index]: /riak/kv/2.2.1/setup/installing -[upgrade index]: /riak/kv/2.2.1/upgrading -[plan index]: /riak/kv/2.2.1/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.2.1/configuring/reference/ -[manage index]: /riak/kv/2.2.1/using/managing -[performance index]: /riak/kv/2.2.1/using/performance -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.2.1/downloads/ +[install index]: {{}}riak/kv/2.2.1/setup/installing +[upgrade index]: {{}}riak/kv/2.2.1/upgrading +[plan index]: {{}}riak/kv/2.2.1/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.2.1/configuring/reference/ +[manage index]: {{}}riak/kv/2.2.1/using/managing +[performance index]: {{}}riak/kv/2.2.1/using/performance +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.2.1/setup/planning -[plan start]: /riak/kv/2.2.1/setup/planning/start -[plan backend]: /riak/kv/2.2.1/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.1/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.2.1/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.2.1/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.2.1/setup/planning/best-practices -[plan future]: /riak/kv/2.2.1/setup/planning/future +[plan index]: {{}}riak/kv/2.2.1/setup/planning +[plan start]: {{}}riak/kv/2.2.1/setup/planning/start +[plan backend]: {{}}riak/kv/2.2.1/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.1/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.2.1/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.2.1/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.2.1/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.2.1/setup/planning/future ## Installing -[install index]: /riak/kv/2.2.1/setup/installing -[install aws]: /riak/kv/2.2.1/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.2.1/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.2.1/setup/installing/freebsd -[install mac osx]: /riak/kv/2.2.1/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.2.1/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.2.1/setup/installing/smartos -[install solaris]: /riak/kv/2.2.1/setup/installing/solaris -[install suse]: /riak/kv/2.2.1/setup/installing/suse -[install windows azure]: /riak/kv/2.2.1/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.2.1/setup/installing +[install aws]: {{}}riak/kv/2.2.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.1/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.1/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.1/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.1/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.1/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.1/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.1/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.1/setup/installing/windows-azure -[install source index]: /riak/kv/2.2.1/setup/installing/source -[install source erlang]: /riak/kv/2.2.1/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.2.1/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.2.1/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.1/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.2.1/setup/installing/source/jvm -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.2.1/setup/upgrading -[upgrade checklist]: /riak/kv/2.2.1/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.2.1/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.2.1/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.2.1/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.2.1/setup/downgrade +[upgrade index]: {{}}riak/kv/2.2.1/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.2.1/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.2.1/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.2.1/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.2.1/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.2.1/setup/downgrade ## Configuring -[config index]: /riak/kv/2.2.1/configuring -[config basic]: /riak/kv/2.2.1/configuring/basic -[config backend]: /riak/kv/2.2.1/configuring/backend -[config manage]: /riak/kv/2.2.1/configuring/managing -[config reference]: /riak/kv/2.2.1/configuring/reference/ -[config strong consistency]: /riak/kv/2.2.1/configuring/strong-consistency -[config load balance]: /riak/kv/2.2.1/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.2.1/configuring/mapreduce -[config search]: /riak/kv/2.2.1/configuring/search/ +[config index]: {{}}riak/kv/2.2.1/configuring +[config basic]: {{}}riak/kv/2.2.1/configuring/basic +[config backend]: {{}}riak/kv/2.2.1/configuring/backend +[config manage]: {{}}riak/kv/2.2.1/configuring/managing +[config reference]: {{}}riak/kv/2.2.1/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.2.1/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.2.1/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.2.1/configuring/mapreduce +[config search]: {{}}riak/kv/2.2.1/configuring/search/ -[config v3 mdc]: /riak/kv/2.2.1/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.2.1/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.2.1/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.2.1/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.2.1/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.2.1/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.2.1/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.2.1/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.2.1/using/ -[use admin commands]: /riak/kv/2.2.1/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.2.1/using/running-a-cluster +[use index]: {{}}riak/kv/2.2.1/using/ +[use admin commands]: {{}}riak/kv/2.2.1/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.2.1/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.2.1/using/reference/custom-code -[use ref handoff]: /riak/kv/2.2.1/using/reference/handoff -[use ref monitoring]: /riak/kv/2.2.1/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.2.1/using/reference/search -[use ref 2i]: /riak/kv/2.2.1/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.2.1/using/reference/snmp -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.2.1/using/reference/jmx -[use ref obj del]: /riak/kv/2.2.1/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.2.1/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.2.1/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.2.1/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.2.1/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.2.1/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.2.1/using/reference/search +[use ref 2i]: {{}}riak/kv/2.2.1/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.2.1/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.2.1/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.2.1/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.2.1/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.2.1/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.2.1/using/admin/ -[use admin commands]: /riak/kv/2.2.1/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.2.1/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.2.1/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.2.1/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.2.1/using/admin/ +[use admin commands]: {{}}riak/kv/2.2.1/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.2.1/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.2.1/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.2.1/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.2.1/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.2.1/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.2.1/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.2.1/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.2.1/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.2.1/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.2.1/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.2.1/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.2.1/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.2.1/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.2.1/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.2.1/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.2.1/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.2.1/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.2.1/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.2.1/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.2.1/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.2.1/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.2.1/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.2.1/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.2.1/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.2.1/using/repair-recovery -[repair recover index]: /riak/kv/2.2.1/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.2.1/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.2.1/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.2.1/using/security/ -[security basics]: /riak/kv/2.2.1/using/security/basics -[security managing]: /riak/kv/2.2.1/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.2.1/using/security/ +[security basics]: {{}}riak/kv/2.2.1/using/security/basics +[security managing]: {{}}riak/kv/2.2.1/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.2.1/using/performance/ -[perf benchmark]: /riak/kv/2.2.1/using/performance/benchmarking -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.2.1/using/performance/erlang -[perf aws]: /riak/kv/2.2.1/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.2.1/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.2.1/using/performance/ +[perf benchmark]: {{}}riak/kv/2.2.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.2.1/using/performance/erlang +[perf aws]: {{}}riak/kv/2.2.1/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.2.1/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.2.1/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.2.1/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.2.1/developing -[dev client libraries]: /riak/kv/2.2.1/developing/client-libraries -[dev data model]: /riak/kv/2.2.1/developing/data-modeling -[dev data types]: /riak/kv/2.2.1/developing/data-types -[dev kv model]: /riak/kv/2.2.1/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.2.1/developing +[dev client libraries]: {{}}riak/kv/2.2.1/developing/client-libraries +[dev data model]: {{}}riak/kv/2.2.1/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.1/developing/data-types +[dev kv model]: {{}}riak/kv/2.2.1/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.2.1/developing/getting-started -[getting started java]: /riak/kv/2.2.1/developing/getting-started/java -[getting started ruby]: /riak/kv/2.2.1/developing/getting-started/ruby -[getting started python]: /riak/kv/2.2.1/developing/getting-started/python -[getting started php]: /riak/kv/2.2.1/developing/getting-started/php -[getting started csharp]: /riak/kv/2.2.1/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.2.1/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.2.1/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.2.1/developing/getting-started/golang - -[obj model java]: /riak/kv/2.2.1/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.2.1/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.2.1/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.2.1/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.2.1/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.2.1/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.2.1/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.2.1/developing/getting-started +[getting started java]: {{}}riak/kv/2.2.1/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.2.1/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.2.1/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.2.1/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.2.1/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.2.1/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.2.1/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.2.1/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.2.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.1/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.2.1/developing/usage -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.2.1/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.2.1/developing/usage/content-types -[usage create objects]: /riak/kv/2.2.1/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.2.1/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.2.1/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.2.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.1/developing/usage/search -[usage search schema]: /riak/kv/2.2.1/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.2.1/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.2.1/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.2.1/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.2.1/developing/usage +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.1/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.2.1/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.2.1/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.2.1/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.2.1/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.2.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.1/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.2.1/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.2.1/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.2.1/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.2.1/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.2.1/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.2.1/developing/api/backend -[dev api http]: /riak/kv/2.2.1/developing/api/http -[dev api http status]: /riak/kv/2.2.1/developing/api/http/status -[dev api pbc]: /riak/kv/2.2.1/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.2.1/developing/api/backend +[dev api http]: {{}}riak/kv/2.2.1/developing/api/http +[dev api http status]: {{}}riak/kv/2.2.1/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.2.1/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.2.1/learn/glossary/ -[glossary aae]: /riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.2.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.2.1/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.2.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode -[concept aae]: /riak/kv/2.2.1/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.2.1/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.2.1/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.2.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.2.1/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.1/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.1/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.2.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.1/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.2.1/add-ons.md b/content/riak/kv/2.2.1/add-ons.md index 9fa39c5019..a72f47e7c8 100644 --- a/content/riak/kv/2.2.1/add-ons.md +++ b/content/riak/kv/2.2.1/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.2.1/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.2.1/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.2.1/add-ons/redis/developing-rra.md b/content/riak/kv/2.2.1/add-ons/redis/developing-rra.md index 7132dc2689..7d87fd67c0 100644 --- a/content/riak/kv/2.2.1/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.2.1/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.2.1/developing/api/http +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.1/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.2.1/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.2.1/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.1/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.1/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.2.1/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.2.1/add-ons/redis/redis-add-on-features.md index 486eedcc52..150cc5f703 100644 --- a/content/riak/kv/2.2.1/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.2.1/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.2.1/add-ons/redis/set-up-rra.md b/content/riak/kv/2.2.1/add-ons/redis/set-up-rra.md index ff5730e40b..74395108c1 100644 --- a/content/riak/kv/2.2.1/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.2.1/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.2.1/setup/installing -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.2.1/setup/installing +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.2.1/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.2.1/add-ons/redis/set-up-rra/deployment-models.md index a128aa173a..1ef34241ca 100644 --- a/content/riak/kv/2.2.1/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/kv/2.2.1/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/kv/2.2.1/add-ons/redis/using-rra.md b/content/riak/kv/2.2.1/add-ons/redis/using-rra.md index 3216035985..7db025ec29 100644 --- a/content/riak/kv/2.2.1/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.2.1/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.2.1/developing/api/http/ +[dev api http]: {{}}riak/kv/2.2.1/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.2.1/configuring/backend.md b/content/riak/kv/2.2.1/configuring/backend.md index 3298add3c7..bbdd43f35b 100644 --- a/content/riak/kv/2.2.1/configuring/backend.md +++ b/content/riak/kv/2.2.1/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.1/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.1/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.2.1/configuring/basic.md b/content/riak/kv/2.2.1/configuring/basic.md index 715b438ff4..7133d2325e 100644 --- a/content/riak/kv/2.2.1/configuring/basic.md +++ b/content/riak/kv/2.2.1/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.2.1/ops/building/configuration/ --- -[config reference]: /riak/kv/2.2.1/configuring/reference -[use running cluster]: /riak/kv/2.2.1/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.2.1/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.2.1/using/performance/erlang -[plan start]: /riak/kv/2.2.1/setup/planning/start -[plan best practices]: /riak/kv/2.2.1/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.2.1/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.2.1/setup/planning/backend -[plan backend multi]: /riak/kv/2.2.1/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.2.1/using/performance/benchmarking -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit -[perf index]: /riak/kv/2.2.1/using/performance -[perf aws]: /riak/kv/2.2.1/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.2.1/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[use running cluster]: {{}}riak/kv/2.2.1/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.2.1/using/performance/erlang +[plan start]: {{}}riak/kv/2.2.1/setup/planning/start +[plan best practices]: {{}}riak/kv/2.2.1/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.2.1/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.2.1/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.2.1/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.2.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.1/using/performance +[perf aws]: {{}}riak/kv/2.2.1/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.2.1/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.2.1/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.1/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.2.1/configuring/global-object-expiration.md b/content/riak/kv/2.2.1/configuring/global-object-expiration.md index c7b6d3d38f..f42f5e7db6 100644 --- a/content/riak/kv/2.2.1/configuring/global-object-expiration.md +++ b/content/riak/kv/2.2.1/configuring/global-object-expiration.md @@ -10,7 +10,6 @@ menu: project: "riak_kv" project_version: "2.2.1" toc: true -canonical_link: "https://docs.basho.com/riak/kv/latest/configuring/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/kv/2.2.1/configuring/load-balancing-proxy.md b/content/riak/kv/2.2.1/configuring/load-balancing-proxy.md index 924b2b6c39..b281a0f125 100644 --- a/content/riak/kv/2.2.1/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.2.1/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.2.1/configuring/managing.md b/content/riak/kv/2.2.1/configuring/managing.md index 0921bd338f..ad96c9e43c 100644 --- a/content/riak/kv/2.2.1/configuring/managing.md +++ b/content/riak/kv/2.2.1/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.2.1/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.2.1/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.2.1/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.2.1/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.2.1/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.2.1/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.2.1/configuring/mapreduce.md b/content/riak/kv/2.2.1/configuring/mapreduce.md index cac6802de4..d09801dfb7 100644 --- a/content/riak/kv/2.2.1/configuring/mapreduce.md +++ b/content/riak/kv/2.2.1/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.2.1/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.2.1/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.2.1/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.2.1/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.2.1/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.2.1/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.2.1/configuring/reference.md b/content/riak/kv/2.2.1/configuring/reference.md index 46205a9e82..594c018bb2 100644 --- a/content/riak/kv/2.2.1/configuring/reference.md +++ b/content/riak/kv/2.2.1/configuring/reference.md @@ -200,7 +200,7 @@ executables are stored. +as active anti-entropy data, and cluster metadata. @@ -1684,7 +1684,7 @@ abandons the leader (in milliseconds). This must be set greater than the diff --git a/content/riak/kv/2.2.1/configuring/search.md b/content/riak/kv/2.2.1/configuring/search.md index 41c0271c6d..4e149f8200 100644 --- a/content/riak/kv/2.2.1/configuring/search.md +++ b/content/riak/kv/2.2.1/configuring/search.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.1/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.2.1/developing/usage/search -[usage search schema]: /riak/kv/2.2.1/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.2.1/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.2.1/developing/usage/custom-extractors -[cluster-ops aae throttle]: /riak/kv/2.2.1/using/cluster-operations/active-anti-entropy/#throttling -[config reference]: /riak/kv/2.2.1/configuring/reference -[config reference#search]: /riak/kv/2.2.1/configuring/reference/#search -[glossary aae]: /riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.2.1/using/security/ +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.1/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.2.1/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.2.1/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[config reference#search]: {{}}riak/kv/2.2.1/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.2.1/using/security/ [java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads [java se docs]: http://www.oracle.com/technetwork/java/javase/documentation @@ -150,15 +150,15 @@ Valid values: `on` or `off` ### `search.index.error_threshold.failure_count` -The number of failures encountered while updating a search index within [`search.queue.error_threshold.failure_interval`](#search-queue-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. Valid values: Integer ### `search.index.error_threshold.failure_interval` -The window of time during which `search.queue.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. -If [`search.queue.error_threshold.failure_count`](#search-queue-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.queue.error_threshold.reset_interval`](search-queue-error-threshold-reset-interval) has passed. +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. Valid values: Milliseconds diff --git a/content/riak/kv/2.2.1/configuring/strong-consistency.md b/content/riak/kv/2.2.1/configuring/strong-consistency.md index 0ca9ae84ec..6be9da30d7 100644 --- a/content/riak/kv/2.2.1/configuring/strong-consistency.md +++ b/content/riak/kv/2.2.1/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.2.1/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.2.1/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.2.1/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.2.1/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.2.1/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.2.1/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.2.1/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.2.1/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.2.1/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.2.1/learn/concepts/causal-context -[dev data types]: /riak/kv/2.2.1/developing/data-types -[glossary aae]: /riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.2.1/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.2.1/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.2.1/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.2.1/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.2.1/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.2.1/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.2.1/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.2.1/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.2.1/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.2.1/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.2.1/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.2.1/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.2.1/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.2.1/developing/data-types +[glossary aae]: {{}}riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.2.1/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.2.1/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.2.1/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.2.1/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.2.1/configuring/v2-multi-datacenter.md b/content/riak/kv/2.2.1/configuring/v2-multi-datacenter.md index 190222d7c0..76fd132268 100644 --- a/content/riak/kv/2.2.1/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.1/configuring/v2-multi-datacenter.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/configuring/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication capabilities offer a diff --git a/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/nat.md index 524c2a44c7..4f6003ff96 100644 --- a/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/nat.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/configuring/v3-multi-datacenter/nat/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/nat/) instead. {{% /note %}} Riak Enterprise supports replication of data on networks that use static diff --git a/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/quick-start.md index 8f58f455f7..8e59bd9b1a 100644 --- a/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/quick-start.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start/) instead. {{% /note %}} The Riak Multi-Datacenter Replication Quick Start will walk you through diff --git a/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl.md index 5240661c87..86fcce9664 100644 --- a/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl.md +++ b/content/riak/kv/2.2.1/configuring/v2-multi-datacenter/ssl.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl/) instead. {{% /note %}} ## Features diff --git a/content/riak/kv/2.2.1/configuring/v3-multi-datacenter.md b/content/riak/kv/2.2.1/configuring/v3-multi-datacenter.md index d7080525a3..6a2e7cab52 100644 --- a/content/riak/kv/2.2.1/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.2.1/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.2.1/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.2.1/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/nat.md index 6c72b1278f..7d56e38b8a 100644 --- a/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start.md index 7e72ddc7c3..0806854bc2 100644 --- a/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.2.1/using/performance -[config v3 mdc]: /riak/kv/2.2.1/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.2.1/using/performance +[config v3 mdc]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl.md index f94ff152ec..86ec3099a8 100644 --- a/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.2.1/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.2.1/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.2.1/developing/api/backend.md b/content/riak/kv/2.2.1/developing/api/backend.md index 8bff4acc60..f2a35b9572 100644 --- a/content/riak/kv/2.2.1/developing/api/backend.md +++ b/content/riak/kv/2.2.1/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/backend-api --- -[plan backend]: /riak/kv/2.2.1/setup/planning/backend +[plan backend]: {{}}riak/kv/2.2.1/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.2.1/developing/api/http.md b/content/riak/kv/2.2.1/developing/api/http.md index 4d70fea999..a7fe6238b0 100644 --- a/content/riak/kv/2.2.1/developing/api/http.md +++ b/content/riak/kv/2.2.1/developing/api/http.md @@ -29,21 +29,21 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.2.1/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.2.1/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.2.1/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.2.1/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.2.1/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.2.1/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.2.1/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.1/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.2.1/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.2.1/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.2.1/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys` | [HTTP Store Object](/riak/kv/2.2.1/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.2.1/developing/api/http/store-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.2.1/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.2.1/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.2.1/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.2.1/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.1/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.1/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.2.1/developing/api/http/delete-object) ## Riak-Data-Type-related Operations @@ -53,9 +53,9 @@ Method | URL `POST` | `/types//buckets//datatypes` `POST` | `/types//buckets//datatypes/` -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.2.1/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.2.1/developing/data-types/#usage-examples) -and subpages e.g. [sets](/riak/kv/2.2.1/developing/data-types/sets). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.2.1/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.2.1/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.2.1/developing/data-types/sets). Advanced users may consult the technical documentation inside the Riak KV internal module `riak_kv_wm_crdt`. @@ -64,26 +64,26 @@ KV internal module `riak_kv_wm_crdt`. Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.2.1/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.2.1/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.2.1/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.2.1/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.1/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.1/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.2.1/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.2.1/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.2.1/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.2.1/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.2.1/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.2.1/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.2.1/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.2.1/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.2.1/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.2.1/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.2.1/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.2.1/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.2.1/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.2.1/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.2.1/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.2.1/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.2.1/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.2.1/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.2.1/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.2.1/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.2.1/developing/api/http/counters.md b/content/riak/kv/2.2.1/developing/api/http/counters.md index 08a3032d2c..bb0bd52051 100644 --- a/content/riak/kv/2.2.1/developing/api/http/counters.md +++ b/content/riak/kv/2.2.1/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.2.1/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.2.1/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.2.1/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.2.1/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.2.1/developing/api/http/fetch-object.md b/content/riak/kv/2.2.1/developing/api/http/fetch-object.md index 292eefea65..bfc0218e7d 100644 --- a/content/riak/kv/2.2.1/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.2.1/developing/api/http/fetch-object.md @@ -41,14 +41,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.2.1/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.2.1/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.2.1/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.2.1/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.2.1/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.2.1/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.2.1/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.2.1/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -75,7 +75,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.2.1/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.1/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.2.1/developing/api/http/fetch-search-index.md b/content/riak/kv/2.2.1/developing/api/http/fetch-search-index.md index 6d264a1b55..918b42f412 100644 --- a/content/riak/kv/2.2.1/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.2.1/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.2.1/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.2.1/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.2.1/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.2.1/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.2.1/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.2.1/developing/api/http/fetch-search-schema.md index 8012833497..768e35745d 100644 --- a/content/riak/kv/2.2.1/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.2.1/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.2.1/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.2.1/developing/api/http/get-bucket-props.md b/content/riak/kv/2.2.1/developing/api/http/get-bucket-props.md index 4cfb6a7518..9ae21a4156 100644 --- a/content/riak/kv/2.2.1/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.2.1/developing/api/http/get-bucket-props.md @@ -33,7 +33,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.2.1/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.2.1/developing/api/http/list-keys). ## Response @@ -49,8 +49,8 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.2.1/developing/api/http/set-bucket-props) for more information about the available -bucket properties. See [Managing Bucket Types Through the Command Line](http://docs.basho.com/riak/kv/2.2.0/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. +See [HTTP Set Bucket Properties]({{}}riak/kv/2.2.1/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.2.1/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. ## Example diff --git a/content/riak/kv/2.2.1/developing/api/http/link-walking.md b/content/riak/kv/2.2.1/developing/api/http/link-walking.md index 173ea04ccd..9a368651bc 100644 --- a/content/riak/kv/2.2.1/developing/api/http/link-walking.md +++ b/content/riak/kv/2.2.1/developing/api/http/link-walking.md @@ -21,8 +21,8 @@ This feature is deprecated and will be removed in a future version. Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.2.1/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.2.1/learn/glossary/#links). ## Request @@ -68,7 +68,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.2.1/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.2.1/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.2.1/developing/api/http/list-resources.md b/content/riak/kv/2.2.1/developing/api/http/list-resources.md index 17b2ef5fa6..2e47092c07 100644 --- a/content/riak/kv/2.2.1/developing/api/http/list-resources.md +++ b/content/riak/kv/2.2.1/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.2.1/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.2.1/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.2.1/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.2.1/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.2.1/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.2.1/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.2.1/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.2.1/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.2.1/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.2.1/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.2.1/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.2.1/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.2.1/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.2.1/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.2.1/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.2.1/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.2.1/developing/api/http/mapreduce.md b/content/riak/kv/2.2.1/developing/api/http/mapreduce.md index 210c6f22fc..e1d42c05b5 100644 --- a/content/riak/kv/2.2.1/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.2.1/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.2.1/developing/api/http/search-index-info.md b/content/riak/kv/2.2.1/developing/api/http/search-index-info.md index 729acf690e..f2ea6d2470 100644 --- a/content/riak/kv/2.2.1/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.2.1/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.2.1/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.2.1/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.2.1/developing/api/http/search-query.md b/content/riak/kv/2.2.1/developing/api/http/search-query.md index aea747a5b3..84d6c7d7d9 100644 --- a/content/riak/kv/2.2.1/developing/api/http/search-query.md +++ b/content/riak/kv/2.2.1/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.2.1/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.2.1/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.2.1/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.2.1/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.2.1/developing/api/http/secondary-indexes.md b/content/riak/kv/2.2.1/developing/api/http/secondary-indexes.md index 9c9fade9bc..9c43e1b571 100644 --- a/content/riak/kv/2.2.1/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.2.1/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.2.1/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.2.1/developing/api/http/set-bucket-props.md b/content/riak/kv/2.2.1/developing/api/http/set-bucket-props.md index b1ff7d0beb..8085bc99ec 100644 --- a/content/riak/kv/2.2.1/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.2.1/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.2.1/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.2.1/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.2.1/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.2.1/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.2.1/developing/api/http/status.md b/content/riak/kv/2.2.1/developing/api/http/status.md index ec8f1dbfe8..4b9ce07432 100644 --- a/content/riak/kv/2.2.1/developing/api/http/status.md +++ b/content/riak/kv/2.2.1/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.2.1/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.2.1/developing/api/http/store-object.md b/content/riak/kv/2.2.1/developing/api/http/store-object.md index e16ea0bdd8..f1583fb9fc 100644 --- a/content/riak/kv/2.2.1/developing/api/http/store-object.md +++ b/content/riak/kv/2.2.1/developing/api/http/store-object.md @@ -40,8 +40,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.2.1/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.2.1/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.2.1/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.1/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -85,7 +85,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.2.1/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.2.1/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.2.1/developing/api/http/store-search-index.md b/content/riak/kv/2.2.1/developing/api/http/store-search-index.md index 46f0cfb6e2..0e3e997720 100644 --- a/content/riak/kv/2.2.1/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.2.1/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.2.1/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.2.1/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.2.1/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.2.1/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.2.1/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.2.1/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.2.1/developing/api/http/store-search-schema.md b/content/riak/kv/2.2.1/developing/api/http/store-search-schema.md index c198d9db52..8d6b6cf5b6 100644 --- a/content/riak/kv/2.2.1/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.2.1/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.2.1/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.2.1/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers.md index f2341a50d2..f1c98a4f3a 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.2.1/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.2.1/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.2.1/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.2.1/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.2.1/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.2.1/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.2.1/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.2.1/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.2.1/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.2.1/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/auth-req.md index 443ed2784e..95f3a594f4 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.2.1/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.2.1/using/security/basics). diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..2199fbaa5a --- /dev/null +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,78 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.2.1" +menu: + riak_kv-2.2.1: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.2.1/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.2.1/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/delete-object.md index a7245a60fa..7cdec48652 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.2.1/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.2.1/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store.md index ce037b08ce..c79459ca11 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.2.1/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.2.1/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-fetch.md index fa6c2a29e1..07b92ee9ac 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.2.1/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.2.1/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.2.1/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.2.1/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.2.1/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.2.1/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store.md index 8d82cf2d40..f3cf98d704 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store.md index 5e00293aef..38d0b8c7ba 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-store.md index e6aed6dfbe..60926154d0 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.2.1/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.2.1/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.2.1/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.2.1/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.2.1/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.2.1/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.2.1/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-union.md index 11d66e40ed..468ffd6dac 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.2.1/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object.md index 79a5d17798..2f0947d14c 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.2.1/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.2.1/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.2.1/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.2.1/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props.md index 59d0d5744a..17e46b6d7e 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.2.1/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.2.1/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.2.1/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.2.1/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) {{% /note %}} diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-type.md index f6d4e6fcb8..1ac272c548 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.2.1/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.2.1/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-client-id.md index 728e8c3ace..9353c3dd8f 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.2.1/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/mapreduce.md index 1af73c849b..a6fc26600c 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.2.1/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.2.1/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.2.1/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.2.1/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/reset-bucket-props.md index 942a46ea07..df6cf44a66 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/secondary-indexes.md index 08859ea013..52cb6cebf4 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.2.1/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props.md index bdda91a0cd..2287761bed 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-type.md index d510efa52f..e682245807 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.1/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.2.1/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/store-object.md index ff89a7710d..7db98c032e 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.2.1/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.2.1/learn/concepts/buckets), and [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.2.1/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.2.1/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.2.1/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.2.1/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.2.1/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.2.1/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-delete.md index 008b816892..4d8a825e5d 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-delete.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-delete.md @@ -29,5 +29,5 @@ message RpbYokozunaIndexDeleteReq { ## Response -Returns a [RpbDelResp](/riak/kv/2.2.1/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbDelResp]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-get.md index 6370e87bc7..f77b41ef7f 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.2.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-put.md index a40952360d..7606c5fc7d 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-index-put.md @@ -37,9 +37,9 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.2.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. ## Response -Returns a [RpbPutResp](/riak/kv/2.2.1/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-get.md index c4db42d5e3..9d8078d04d 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.2.1/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-put.md index 91486bdc07..b32ed6eb1f 100644 --- a/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.2.1/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.2.1/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.2.1/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.2.1/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.1/developing/app-guide.md b/content/riak/kv/2.2.1/developing/app-guide.md index b008509c1e..f6b09855f6 100644 --- a/content/riak/kv/2.2.1/developing/app-guide.md +++ b/content/riak/kv/2.2.1/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.2.1/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.2.1/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.2.1/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.2.1/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.2.1/developing/key-value-modeling -[dev data types]: /riak/kv/2.2.1/developing/data-types -[dev data types#counters]: /riak/kv/2.2.1/developing/data-types/#counters -[dev data types#sets]: /riak/kv/2.2.1/developing/data-types/#sets -[dev data types#maps]: /riak/kv/2.2.1/developing/data-types/#maps -[usage create objects]: /riak/kv/2.2.1/developing/usage/creating-objects -[usage search]: /riak/kv/2.2.1/developing/usage/search -[use ref search]: /riak/kv/2.2.1/using/reference/search -[usage 2i]: /riak/kv/2.2.1/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.2.1/developing/client-libraries -[concept crdts]: /riak/kv/2.2.1/learn/concepts/crdts -[dev data model]: /riak/kv/2.2.1/developing/data-modeling -[usage mapreduce]: /riak/kv/2.2.1/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.2.1/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.2.1/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.1/setup/planning/backend/memory -[obj model java]: /riak/kv/2.2.1/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.2.1/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.2.1/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.2.1/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.2.1/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.2.1/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.2.1/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.2.1/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.2.1/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.2.1/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.2.1/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[install index]: /riak/kv/2.2.1/setup/installing -[getting started]: /riak/kv/2.2.1/developing/getting-started -[usage index]: /riak/kv/2.2.1/developing/usage -[glossary]: /riak/kv/2.2.1/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.2.1/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.2.1/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.2.1/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.2.1/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.2.1/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.2.1/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.2.1/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.2.1/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.2.1/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search +[use ref search]: {{}}riak/kv/2.2.1/using/reference/search +[usage 2i]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.2.1/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.2.1/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.2.1/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.2.1/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.2.1/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.2.1/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.2.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.1/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.2.1/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.2.1/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.2.1/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.2.1/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.2.1/setup/installing +[getting started]: {{}}riak/kv/2.2.1/developing/getting-started +[usage index]: {{}}riak/kv/2.2.1/developing/usage +[glossary]: {{}}riak/kv/2.2.1/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.2.1/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.2.1/developing/app-guide/advanced-mapreduce.md index 8183a0a64d..666bac107e 100644 --- a/content/riak/kv/2.2.1/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.2.1/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.2.1/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.2.1/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.2.1/using/reference/custom-code -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[config reference]: /riak/kv/2.2.1/configuring/reference +[usage 2i]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.2.1/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.1/configuring/reference [google mr]: http://research.google.com/archive/mapreduce.html [mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map [function contrib]: https://github.com/basho/riak_function_contrib @@ -381,7 +381,7 @@ Erlang client. {{% note title="Distributing Erlang MapReduce Code" %}} Any modules and functions you use in your Erlang MapReduce calls must be available on all nodes in the cluster. Please read about -[installing custom code](/riak/kv/2.2.1/using/reference/custom-code). +[installing custom code]({{}}riak/kv/2.2.1/using/reference/custom-code). {{% /note %}} ### Erlang Example @@ -728,7 +728,7 @@ You can use streaming with Erlang via the Riak KV local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.2.1/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.2.1/developing/app-guide/cluster-metadata.md index 558d33677e..a823188034 100644 --- a/content/riak/kv/2.2.1/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.2.1/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.2.1/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.2.1/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.2.1/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.2.1/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.2.1/developing/app-guide/replication-properties.md b/content/riak/kv/2.2.1/developing/app-guide/replication-properties.md index 4a6f5a5774..75aca015ea 100644 --- a/content/riak/kv/2.2.1/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.2.1/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.1/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.2.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.2.1/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.2.1/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.2.1/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.2.1/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.1/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.2.1/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.2.1/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.2.1/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.1/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.2.1/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.2.1/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.2.1/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.2.1/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.2.1/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.2.1/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.2.1/developing/app-guide/strong-consistency.md b/content/riak/kv/2.2.1/developing/app-guide/strong-consistency.md index 6056005e36..925f26988e 100644 --- a/content/riak/kv/2.2.1/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.2.1/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.2.1/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/kv/2.2.1/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.2.1/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.2.1/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.2.1/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.2.1/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.2.1/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.2.1/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.2.1/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/kv/2.2.1/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.2.1/developing/client-libraries -[getting started]: /riak/kv/2.2.1/developing/getting-started -[config strong consistency#details]: /riak/kv/2.2.1/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.2.1/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.2.1/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.2.1/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.2.1/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.2.1/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.2.1/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.2.1/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.2.1/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.2.1/developing/client-libraries +[getting started]: {{}}riak/kv/2.2.1/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.2.1/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.2.1/developing/app-guide/write-once.md b/content/riak/kv/2.2.1/developing/app-guide/write-once.md index 91d1f5ea3b..06e1d9a4b7 100644 --- a/content/riak/kv/2.2.1/developing/app-guide/write-once.md +++ b/content/riak/kv/2.2.1/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.1/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[bucket type]: /riak/kv/2.2.1/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.2.1/developing/data-types -[strong consistency]: /riak/kv/2.2.1/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.2.1/developing/data-types +[strong consistency]: {{}}riak/kv/2.2.1/developing/app-guide/strong-consistency Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. @@ -98,7 +98,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -149,7 +149,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.2.1/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.2.1/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.2.1/developing/client-libraries.md b/content/riak/kv/2.2.1/developing/client-libraries.md index 3fe37caa75..1ac87b94c9 100644 --- a/content/riak/kv/2.2.1/developing/client-libraries.md +++ b/content/riak/kv/2.2.1/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.2.1/developing/data-types.md b/content/riak/kv/2.2.1/developing/data-types.md index d2e72b5341..0f02a3a3af 100644 --- a/content/riak/kv/2.2.1/developing/data-types.md +++ b/content/riak/kv/2.2.1/developing/data-types.md @@ -43,9 +43,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -268,5 +268,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.2.1/developing/faq.md b/content/riak/kv/2.2.1/developing/faq.md index 67780004e9..ac75b4ced7 100644 --- a/content/riak/kv/2.2.1/developing/faq.md +++ b/content/riak/kv/2.2.1/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.2.1/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.2.1/using/performance/benchmarking -[Bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.2.1/developing/usage +[[Basho Bench]: {{}}riak/kv/2.2.1/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.2.1/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.2.1/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.2.1/configuring/reference +[commit hooks]: {{}}riak/kv/2.2.1/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.2.1/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.2.1/developing/client-libraries -[MapReduce]: /riak/kv/2.2.1/developing/usage/mapreduce -[Memory]: /riak/kv/2.2.1/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.2.1/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.2.1/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.2.1/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.2.1/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.2.1/developing/getting-started.md b/content/riak/kv/2.2.1/developing/getting-started.md index d107aae930..8b9de68559 100644 --- a/content/riak/kv/2.2.1/developing/getting-started.md +++ b/content/riak/kv/2.2.1/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.2.1/setup/installing -[dev client libraries]: /riak/kv/2.2.1/developing/client-libraries +[install index]: {{}}riak/kv/2.2.1/setup/installing +[dev client libraries]: {{}}riak/kv/2.2.1/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.2.1/developing/getting-started/csharp.md b/content/riak/kv/2.2.1/developing/getting-started/csharp.md index 574e361c88..08a5522e96 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/csharp.md +++ b/content/riak/kv/2.2.1/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.1/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.1/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.2.1/developing/getting-started/csharp/querying.md b/content/riak/kv/2.2.1/developing/getting-started/csharp/querying.md index 2603574a0a..3556039a2a 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.2.1/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.1/developing/getting-started/erlang.md b/content/riak/kv/2.2.1/developing/getting-started/erlang.md index a8d6a02c44..1bebaf3d27 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/erlang.md +++ b/content/riak/kv/2.2.1/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.1/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.1/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.2.1/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.2.1/developing/getting-started/erlang/object-modeling.md index be54e396c3..d0ce53dc6e 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.2.1/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.2.1/developing/getting-started/erlang/querying.md b/content/riak/kv/2.2.1/developing/getting-started/erlang/querying.md index 4829d80242..319ad25448 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.2.1/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.2.1/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.2.1/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.1/developing/getting-started/golang.md b/content/riak/kv/2.2.1/developing/getting-started/golang.md index e6fbf89cad..eb738c47c5 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/golang.md +++ b/content/riak/kv/2.2.1/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.1/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.1/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.1/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.1/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.2.1/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.2.1/developing/getting-started/golang/object-modeling.md index 53298e1f5a..ac93d7ef5e 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.2.1/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.2.1/developing/getting-started/golang/querying.md b/content/riak/kv/2.2.1/developing/getting-started/golang/querying.md index 80184c0faa..3d7e45c33a 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.2.1/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.2.1/developing/getting-started/java.md b/content/riak/kv/2.2.1/developing/getting-started/java.md index 094ccc16d7..2c6ee38ebf 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/java.md +++ b/content/riak/kv/2.2.1/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.2.1/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.1/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.2.1/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.2.1/developing/getting-started/java/crud-operations.md index 840b6499a7..b6127314ef 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.2.1/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.1/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/) documentation. ## Updating Objects @@ -85,8 +85,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.1/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -196,6 +196,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.1/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.2.1/developing/getting-started/java/querying.md b/content/riak/kv/2.2.1/developing/getting-started/java/querying.md index 922e7dfad1..5003c3c2cd 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.2.1/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.1/developing/getting-started/nodejs.md b/content/riak/kv/2.2.1/developing/getting-started/nodejs.md index 056005f0db..56ae4d46d6 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.2.1/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.1/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.1/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.2.1/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.2.1/developing/getting-started/nodejs/querying.md index 5d05885237..e35730f3dc 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.2.1/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.1/developing/getting-started/php.md b/content/riak/kv/2.2.1/developing/getting-started/php.md index 1efc9b8caa..32a8798dc4 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/php.md +++ b/content/riak/kv/2.2.1/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.1/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.1/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.2.1/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.2.1/developing/getting-started/php/crud-operations.md index a32e84ac16..bb8f0fd3c8 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.2.1/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.2.1/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.2.1/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.2.1/developing/getting-started/php/querying.md b/content/riak/kv/2.2.1/developing/getting-started/php/querying.md index 20f7d8b53e..ee660c3186 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.2.1/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.1/developing/getting-started/python.md b/content/riak/kv/2.2.1/developing/getting-started/python.md index e0fee4527a..825d0d1534 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/python.md +++ b/content/riak/kv/2.2.1/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.1/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.1/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.1/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.2.1/developing/getting-started/python/querying.md b/content/riak/kv/2.2.1/developing/getting-started/python/querying.md index 1990d6d4b7..c271242724 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.2.1/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.1/developing/getting-started/ruby.md b/content/riak/kv/2.2.1/developing/getting-started/ruby.md index f8df57ba2b..0c7e58b271 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/ruby.md +++ b/content/riak/kv/2.2.1/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.1/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.1/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.1/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.1/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.2.1/developing/getting-started/ruby/querying.md b/content/riak/kv/2.2.1/developing/getting-started/ruby/querying.md index 8a2cd822c8..1e7542bf1d 100644 --- a/content/riak/kv/2.2.1/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.2.1/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.1/developing/key-value-modeling.md b/content/riak/kv/2.2.1/developing/key-value-modeling.md index 1136372778..a35a1f5bf2 100644 --- a/content/riak/kv/2.2.1/developing/key-value-modeling.md +++ b/content/riak/kv/2.2.1/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.2.1/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.2.1/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.2.1/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.2.1/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.2.1/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.2.1/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.2.1/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.2.1/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.2.1/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.2.1/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.2.1/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.2.1/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.2.1/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.2.1/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.2.1/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.2.1/developing/data-types/#sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.2.1/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.2.1/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.2.1/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.2.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.2.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.2.1/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.1/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.2.1/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.2.1/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.2.1/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.2.1/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.2.1/developing/usage/commit-hooks.md b/content/riak/kv/2.2.1/developing/usage/commit-hooks.md index bf1c1f4172..50a6ad6a71 100644 --- a/content/riak/kv/2.2.1/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.2.1/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.2.1/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.2.1/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.2.1/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.2.1/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.2.1/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.2.1/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.2.1/developing/usage/conflict-resolution.md b/content/riak/kv/2.2.1/developing/usage/conflict-resolution.md index 830b25c020..ccc6ddb24d 100644 --- a/content/riak/kv/2.2.1/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.2.1/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.1/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.2.1/learn/concepts/clusters) system in which any [node](/riak/kv/2.2.1/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.2.1/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.2.1/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.2.1/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.2.1/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.2.1/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.2.1/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.2.1/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.2.1/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.2.1/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.2.1/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.2.1/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.2.1/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.2.1/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.2.1/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the [`allow_mult`](#siblings) parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -[`last_write_wins`](/riak/kv/2.2.1/learn/concepts/buckets). If `last_write_wins` is set to `false`, +[`last_write_wins`]({{}}riak/kv/2.2.1/learn/concepts/buckets). If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.2.1/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.2.1/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.2.1/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.2.1/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.2.1/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.2.1/configuring/reference) to change the [default bucket properties](/riak/kv/2.2.1/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.2.1/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.2.1/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.2.1/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.2.1/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.2.1/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.2.1/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.2.1/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.2.1/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.2.1/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.2.1/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.2.1/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.2.1/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.2.1/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.1/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.2.1/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.2.1/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.2.1/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.2.1/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.2.1/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -610,7 +610,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.2.1/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.2.1/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -665,7 +665,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/csharp.md index 4300eafeab..0477801708 100644 --- a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.1/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/golang.md index 514d9ec949..627070096a 100644 --- a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.1/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/java.md index 41bae5d4e9..edd94c5cbd 100644 --- a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.1/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.1/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.1/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.1/developing/data-types/#counters), [set](/riak/kv/2.2.1/developing/data-types/#sets), or [map](/riak/kv/2.2.1/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.1/developing/data-types/#counters), [set]({{}}riak/kv/2.2.1/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.1/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.1/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.1/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/nodejs.md index 8180d2501f..d4e9f2d162 100644 --- a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.1/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/php.md index 135650b11b..a5d2f4bedc 100644 --- a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.1/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.1/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.1/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.1/developing/data-types/#counters), [set](/riak/kv/2.2.1/developing/data-types/#sets), or [map](/riak/kv/2.2.1/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.1/developing/data-types/#counters), [set]({{}}riak/kv/2.2.1/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.1/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.1/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.1/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/python.md index 620d3f672f..8c50f02d27 100644 --- a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.1/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -185,7 +185,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.1/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.1/developing/usage) section. ## More Advanced Example @@ -240,9 +240,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.1/developing/data-types/#counters), [set](/riak/kv/2.2.1/developing/data-types/#sets), or [map](/riak/kv/2.2.1/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.1/developing/data-types/#counters), [set]({{}}riak/kv/2.2.1/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.1/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -251,4 +251,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.1/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.1/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/ruby.md index 8e958df5bb..48431fa404 100644 --- a/content/riak/kv/2.2.1/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.2.1/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.1/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.1/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.1/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.1/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.1/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.1/developing/data-types/#counters), [set](/riak/kv/2.2.1/developing/data-types/#sets), or [map](/riak/kv/2.2.1/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.1/developing/data-types/#counters), [set]({{}}riak/kv/2.2.1/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.1/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.1/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.1/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.1/developing/usage/creating-objects.md b/content/riak/kv/2.2.1/developing/usage/creating-objects.md index bd4105cac4..21e46aafde 100644 --- a/content/riak/kv/2.2.1/developing/usage/creating-objects.md +++ b/content/riak/kv/2.2.1/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.2.1/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.2.1/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -23,7 +23,7 @@ PUT /types//buckets//keys/ # If you're using HTTP to interact with Riak, you can also use POST ``` -As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type](/riak/kv/2.2.1/using/cluster-operations/bucket-types). +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{}}riak/kv/2.2.1/using/cluster-operations/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -118,7 +118,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, you run the same read operation as in [Reading Objects](/riak/kv/2.2.1/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types](/riak/kv/2.2.1/using/cluster-operations/bucket-types). +Now, you run the same read operation as in [Reading Objects]({{}}riak/kv/2.2.1/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{}}riak/kv/2.2.1/using/cluster-operations/bucket-types). ### Store an Object @@ -138,7 +138,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.2.1/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.2.1/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.2.1/developing/usage/custom-extractors.md b/content/riak/kv/2.2.1/developing/usage/custom-extractors.md index f3a3090a96..21e994678a 100644 --- a/content/riak/kv/2.2.1/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.2.1/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.2.1/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.2.1/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.2.1/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.2.1/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.2.1/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.2.1/developing/usage/deleting-objects.md b/content/riak/kv/2.2.1/developing/usage/deleting-objects.md index 90e695d237..6e75048ed2 100644 --- a/content/riak/kv/2.2.1/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.2.1/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.2.1/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.2.1/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.2.1/developing/usage/document-store.md b/content/riak/kv/2.2.1/developing/usage/document-store.md index 392b080b1c..3c21ebb9e1 100644 --- a/content/riak/kv/2.2.1/developing/usage/document-store.md +++ b/content/riak/kv/2.2.1/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.2.1/developing/usage/search/) and [Riak Data Types](/riak/kv/2.2.1/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.2.1/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.2.1/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.2.1/developing/data-types/#maps). +[Riak maps]({{}}riak/kv/2.2.1/developing/data-types/#maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.2.1/developing/data-types/#maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.2.1/developing/data-types/#maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.2.1/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.2.1/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.2.1/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.2.1/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.2.1/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.2.1/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.2.1/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.2.1/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.2.1/developing/usage/mapreduce.md b/content/riak/kv/2.2.1/developing/usage/mapreduce.md index 8589e73670..7cdaaf45f2 100644 --- a/content/riak/kv/2.2.1/developing/usage/mapreduce.md +++ b/content/riak/kv/2.2.1/developing/usage/mapreduce.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.1/dev/using/mapreduce --- -[usage 2i]: /riak/kv/2.2.1/developing/usage/secondary-indexes -[usage search]: /riak/kv/2.2.1/developing/usage/search -[usage types]: /riak/kv/2.2.1/developing/usage/bucket-types -[api http]: /riak/kv/2.2.1/developing/api/http -[api pb]: /riak/kv/2.2.1/developing/api/protocol-buffers -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[guide mapreduce]: /riak/kv/2.2.1/developing/app-guide/advanced-mapreduce +[usage 2i]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search +[usage types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[api http]: {{}}riak/kv/2.2.1/developing/api/http +[api pb]: {{}}riak/kv/2.2.1/developing/api/protocol-buffers +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[guide mapreduce]: {{}}riak/kv/2.2.1/developing/app-guide/advanced-mapreduce {{% note title="Use MapReduce sparingly" %}} In Riak KV, MapReduce is the primary method for non-primary-key-based @@ -116,7 +116,7 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example diff --git a/content/riak/kv/2.2.1/developing/usage/reading-objects.md b/content/riak/kv/2.2.1/developing/usage/reading-objects.md index 93f4ddbf39..aa850aa52a 100644 --- a/content/riak/kv/2.2.1/developing/usage/reading-objects.md +++ b/content/riak/kv/2.2.1/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.2.1/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type](/riak/kv/2.2.1/using/cluster-operations/bucket-types) page. +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{}}riak/kv/2.2.1/using/cluster-operations/bucket-types) page. ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.2.1/developing/usage/replication.md b/content/riak/kv/2.2.1/developing/usage/replication.md index 69ea27bda9..b243bfdfb6 100644 --- a/content/riak/kv/2.2.1/developing/usage/replication.md +++ b/content/riak/kv/2.2.1/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.2.1/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.2.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.2.1/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.1/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.2.1/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.1/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.2.1/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.2.1/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.2.1/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.2.1/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.2.1/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.2.1/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.1/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.2.1/developing/usage/search-schemas.md b/content/riak/kv/2.2.1/developing/usage/search-schemas.md index 946b957e22..0152a36e7b 100644 --- a/content/riak/kv/2.2.1/developing/usage/search-schemas.md +++ b/content/riak/kv/2.2.1/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.2.1/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.2.1/developing/data-types/), and [more](/riak/kv/2.2.1/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.2.1/developing/data-types/), and [more]({{}}riak/kv/2.2.1/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.2.1/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.2.1/developing/usage/search.md b/content/riak/kv/2.2.1/developing/usage/search.md index ea9b157c14..93b6a5b577 100644 --- a/content/riak/kv/2.2.1/developing/usage/search.md +++ b/content/riak/kv/2.2.1/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.2.1/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.2.1/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.2.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.1/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.2.1/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.2.1/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.2.1/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.2.1/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.2.1/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.2.1/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.2.1/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.2.1/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.2.1/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.2.1/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.2.1/developing/usage/searching-data-types.md b/content/riak/kv/2.2.1/developing/usage/searching-data-types.md index 9a84e1194e..f9fefa1527 100644 --- a/content/riak/kv/2.2.1/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.2.1/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.1/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.2.1/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.2.1/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.2.1/developing/data-types/#counters), [sets](/riak/kv/2.2.1/developing/data-types/#sets), and [maps](/riak/kv/2.2.1/developing/data-types/#maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.2.1/developing/data-types/#counters), [sets]({{}}riak/kv/2.2.1/developing/data-types/#sets), and [maps]({{}}riak/kv/2.2.1/developing/data-types/#maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.2.1/developing/data-types/#counters) indexes each +The default schema for [counters]({{}}riak/kv/2.2.1/developing/data-types/#counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.2.1/developing/data-types/#sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.2.1/developing/data-types/#sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.2.1/developing/data-types/#maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.2.1/developing/data-types/#maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) for [storing counters](/riak/kv/2.2.1/developing/data-types/#counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.2.1/developing/data-types/#counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types) for [storing sets](/riak/kv/2.2.1/developing/data-types/#sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.2.1/developing/data-types/#sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.2.1/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.2.1/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.2.1/developing/data-types/#maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.2.1/developing/data-types/#maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.2.1/developing/usage/secondary-indexes.md b/content/riak/kv/2.2.1/developing/usage/secondary-indexes.md index c64316add5..62c7bffca8 100644 --- a/content/riak/kv/2.2.1/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.2.1/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.2.1/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.1/setup/planning/backend/memory -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.2.1/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.2.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.2.1/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.2.1/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.2.1/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.2.1/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.2.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.2.1/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.2.1/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.2.1/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.2.1/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.2.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.1/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.2.1/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.2.1/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.2.1/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.2.1/developing/usage/security.md b/content/riak/kv/2.2.1/developing/usage/security.md index 60ceb68a5b..43754e3955 100644 --- a/content/riak/kv/2.2.1/developing/usage/security.md +++ b/content/riak/kv/2.2.1/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.2.1/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.2.1/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.2.1/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.2.1/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.2.1/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.2.1/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.2.1/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - [`riak-admin security`](/riak/kv/2.2.1/using/security/managing-sources/#managing-sources) + [`riak-admin security`]({{}}riak/kv/2.2.1/using/security/managing-sources/#managing-sources) command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.2.1/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.2.1/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.2.1/using/security/basics) -* [Managing Security Sources](/riak/kv/2.2.1/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.2.1/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.2.1/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.2.1/developing/usage/security/java) -* [Ruby](/riak/kv/2.2.1/developing/usage/security/ruby) -* [PHP](/riak/kv/2.2.1/developing/usage/security/php) -* [Python](/riak/kv/2.2.1/developing/usage/security/python) -* [Erlang](/riak/kv/2.2.1/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.2.1/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.2.1/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.2.1/developing/usage/security/php) +* [Python]({{}}riak/kv/2.2.1/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.2.1/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.2.1/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.2.1/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.2.1/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.2.1/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.2.1/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.2.1/developing/usage/security/erlang.md b/content/riak/kv/2.2.1/developing/usage/security/erlang.md index df31c295bd..e4c20620cb 100644 --- a/content/riak/kv/2.2.1/developing/usage/security/erlang.md +++ b/content/riak/kv/2.2.1/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.2.1/using/security/managing-sources/), [PAM-](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.2.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.2.1/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.2.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.1/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.1/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.2.1/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.2.1/developing/usage/security/java.md b/content/riak/kv/2.2.1/developing/usage/security/java.md index 889045af88..fe24f52d5b 100644 --- a/content/riak/kv/2.2.1/developing/usage/security/java.md +++ b/content/riak/kv/2.2.1/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.2.1/developing/usage/security/php.md b/content/riak/kv/2.2.1/developing/usage/security/php.md index 959ebdc67d..1a946f3f2c 100644 --- a/content/riak/kv/2.2.1/developing/usage/security/php.md +++ b/content/riak/kv/2.2.1/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.2.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.2.1/developing/usage/security/python.md b/content/riak/kv/2.2.1/developing/usage/security/python.md index f56cd93410..1663e6a42e 100644 --- a/content/riak/kv/2.2.1/developing/usage/security/python.md +++ b/content/riak/kv/2.2.1/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.1/using/security/managing-sources/) or [PAM-](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.2.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.2.1/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.2.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.1/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.2.1/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.2.1/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.1/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.2.1/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.2.1/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.2.1/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.2.1/developing/usage/security/ruby.md b/content/riak/kv/2.2.1/developing/usage/security/ruby.md index 455bc0f1cd..1862dab660 100644 --- a/content/riak/kv/2.2.1/developing/usage/security/ruby.md +++ b/content/riak/kv/2.2.1/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.1/using/security/managing-sources/) or [PAM](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.2.1/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.2.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.2.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.2.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.1/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.1/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.2.1/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.2.1/developing/usage/updating-objects.md b/content/riak/kv/2.2.1/developing/usage/updating-objects.md index f25e2318f7..110df47412 100644 --- a/content/riak/kv/2.2.1/developing/usage/updating-objects.md +++ b/content/riak/kv/2.2.1/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/dev/using/updates --- -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.2.1/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.2.1/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.2.1/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.2.1/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.2.1/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.2.1/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.2.1/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.2.1/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.2.1/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.2.1/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.2.1/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.2.1/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.2.1/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.2.1/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.2.1/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.2.1/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.2.1/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.2.1/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.2.1/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.2.1/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.2.1/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.2.1/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.2.1/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.2.1/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.2.1/index.md b/content/riak/kv/2.2.1/index.md index 567d7f30b5..6d6841eb21 100644 --- a/content/riak/kv/2.2.1/index.md +++ b/content/riak/kv/2.2.1/index.md @@ -1,5 +1,5 @@ --- -title: "Riak KV" +title: "Riak KV 2.2.1" description: "" project: "riak_kv" project_version: "2.2.1" @@ -15,15 +15,15 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.2.1/configuring -[downloads]: /riak/kv/2.2.1/downloads/ -[install index]: /riak/kv/2.2.1/setup/installing/ -[plan index]: /riak/kv/2.2.1/setup/planning -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.2.1/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.2.1/developing/usage/search -[getting started]: /riak/kv/2.2.1/developing/getting-started -[dev client libraries]: /riak/kv/2.2.1/developing/client-libraries +[config index]: {{}}riak/kv/2.2.1/configuring +[downloads]: {{}}riak/kv/2.2.1/downloads/ +[install index]: {{}}riak/kv/2.2.1/setup/installing/ +[plan index]: {{}}riak/kv/2.2.1/setup/planning +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.2.1/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search +[getting started]: {{}}riak/kv/2.2.1/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.2.1/developing/client-libraries @@ -56,7 +56,7 @@ Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and the 3. [Configure Riak KV for your needs][config index] {{% note title="Developing with Riak KV" %}} -If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV](/riak/kv/2.2.1/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{}}riak/kv/2.2.1/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. {{% /note %}} ## Popular Docs diff --git a/content/riak/kv/2.2.1/learn/concepts.md b/content/riak/kv/2.2.1/learn/concepts.md index 1cd0637e5f..e30799cc15 100644 --- a/content/riak/kv/2.2.1/learn/concepts.md +++ b/content/riak/kv/2.2.1/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.2.1/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.2.1/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.2.1/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[concept crdts]: /riak/kv/2.2.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.2.1/learn/concepts/vnodes -[config index]: /riak/kv/2.2.1/configuring -[plan index]: /riak/kv/2.2.1/setup/planning -[use index]: /riak/kv/2.2.1/using/ +[concept aae]: {{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.1/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.1/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.2.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.1/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.2.1/configuring +[plan index]: {{}}riak/kv/2.2.1/setup/planning +[use index]: {{}}riak/kv/2.2.1/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.2.1/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.2.1/learn/concepts/active-anti-entropy.md index 13cb73a99c..1ae5c71be9 100644 --- a/content/riak/kv/2.2.1/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.2.1/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.1/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.2.1/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.2.1/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.2.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.2.1/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.2.1/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.2.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.2.1/developing/usage/search +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.2.1/learn/concepts/buckets.md b/content/riak/kv/2.2.1/learn/concepts/buckets.md index 4a850ad579..092ae615e3 100644 --- a/content/riak/kv/2.2.1/learn/concepts/buckets.md +++ b/content/riak/kv/2.2.1/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.2.1/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.2.1/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.2.1/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.2.1/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.2.1/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.2.1/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.2.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[config basic]: /riak/kv/2.2.1/configuring/basic -[dev api http]: /riak/kv/2.2.1/developing/api/http -[dev data types]: /riak/kv/2.2.1/developing/data-types -[glossary ring]: /riak/kv/2.2.1/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.1/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.2.1/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.2.1/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.2.1/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.2.1/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.2.1/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.2.1/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.2.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.2.1/configuring/basic +[dev api http]: {{}}riak/kv/2.2.1/developing/api/http +[dev data types]: {{}}riak/kv/2.2.1/developing/data-types +[glossary ring]: {{}}riak/kv/2.2.1/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.1/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.1/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.2.1/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.2.1/learn/concepts/capability-negotiation.md b/content/riak/kv/2.2.1/learn/concepts/capability-negotiation.md index 9958fd1a58..d05cf24c38 100644 --- a/content/riak/kv/2.2.1/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.2.1/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.2.1/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.2.1/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.2.1/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.2.1/developing/usage/mapreduce In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.2.1/learn/concepts/causal-context.md b/content/riak/kv/2.2.1/learn/concepts/causal-context.md index 2684cb06ba..e16bb81fc6 100644 --- a/content/riak/kv/2.2.1/learn/concepts/causal-context.md +++ b/content/riak/kv/2.2.1/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.2.1/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.2.1/developing/api/http -[dev key value]: /riak/kv/2.2.1/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.2.1/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.2.1/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.2.1/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.2.1/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.2.1/developing/api/http +[dev key value]: {{}}riak/kv/2.2.1/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.2.1/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.2.1/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.2.1/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.2.1/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -73,7 +73,7 @@ Causal context comes in two forms in Riak: **vector clocks** and **dotted version vectors**. More information in both can be found in the sections below. -In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). If, however, `allow_mult` is set to `false`, then Riak will not generate diff --git a/content/riak/kv/2.2.1/learn/concepts/clusters.md b/content/riak/kv/2.2.1/learn/concepts/clusters.md index f2187242df..82f5645c9b 100644 --- a/content/riak/kv/2.2.1/learn/concepts/clusters.md +++ b/content/riak/kv/2.2.1/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.2.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.1/learn/concepts/replication -[glossary node]: /riak/kv/2.2.1/learn/glossary/#node -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.2.1/learn/dynamo -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.2.1/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.2.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.1/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.2.1/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.2.1/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.2.1/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.2.1/learn/concepts/crdts.md b/content/riak/kv/2.2.1/learn/concepts/crdts.md index 682cc49857..1819e9e7b1 100644 --- a/content/riak/kv/2.2.1/learn/concepts/crdts.md +++ b/content/riak/kv/2.2.1/learn/concepts/crdts.md @@ -17,20 +17,20 @@ aliases: --- [crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf -[data types converg]: /riak/kv/2.2.1/learn/concepts/crdts/#convergence +[data types converg]: {{}}riak/kv/2.2.1/learn/concepts/crdts/#convergence [crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html -[data types impl]: /riak/kv/2.2.1/learn/concepts/crdts/#implementation -[concept causal context dvv]: /riak/kv/2.2.1/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.2.1/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.2.1/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.2.1/developing/data-types +[data types impl]: {{}}riak/kv/2.2.1/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.2.1/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.2.1/developing/data-types [riak_dt]: https://github.com/basho/riak_dt -[dev data types context]: /riak/kv/2.1.4/developing/data-types/#data-types-and-context -[glossary node]: /riak/kv/2.2.1/learn/glossary/#node -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution +[dev data types context]: {{}}riak/kv/2.2.1/developing/data-types/#data-types-and-context +[glossary node]: {{}}riak/kv/2.2.1/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: diff --git a/content/riak/kv/2.2.1/learn/concepts/eventual-consistency.md b/content/riak/kv/2.2.1/learn/concepts/eventual-consistency.md index 85a770ea95..efe899b210 100644 --- a/content/riak/kv/2.2.1/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.2.1/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[concept replication]: /riak/kv/2.2.1/learn/concepts/replication -[glossary node]: /riak/kv/2.2.1/learn/glossary/#node -[glossary read rep]: /riak/kv/2.2.1/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.2.1/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.2.1/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.2.1/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.2.1/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.2.1/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.2.1/developing/data-modeling/). +or models]({{}}riak/kv/2.2.1/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.2.1/learn/concepts/keys-and-objects.md b/content/riak/kv/2.2.1/learn/concepts/keys-and-objects.md index 956796d71e..e710e07a0e 100644 --- a/content/riak/kv/2.2.1/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.2.1/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.1/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.2.1/learn/concepts/replication.md b/content/riak/kv/2.2.1/learn/concepts/replication.md index e8519af75e..a517663fd1 100644 --- a/content/riak/kv/2.2.1/learn/concepts/replication.md +++ b/content/riak/kv/2.2.1/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.2.1/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.2.1/learn/concepts/vnodes -[glossary node]: /riak/kv/2.2.1/learn/glossary/#node -[glossary ring]: /riak/kv/2.2.1/learn/glossary/#ring -[usage replication]: /riak/kv/2.2.1/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.2.1/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.2.1/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.2.1/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.2.1/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.2.1/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.2.1/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.2.1/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.2.1/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.2.1/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.2.1/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.2.1/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.2.1/learn/concepts/strong-consistency.md b/content/riak/kv/2.2.1/learn/concepts/strong-consistency.md index e332e57436..5813eec5e3 100644 --- a/content/riak/kv/2.2.1/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.2.1/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.1/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.2.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.2.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.2.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.2.1/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.2.1/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.2.1/learn/concepts/vnodes.md b/content/riak/kv/2.2.1/learn/concepts/vnodes.md index 49e3206b13..9830dc01e6 100644 --- a/content/riak/kv/2.2.1/learn/concepts/vnodes.md +++ b/content/riak/kv/2.2.1/learn/concepts/vnodes.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context]: /riak/kv/2.2.1/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.2.1/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.2.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.1/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.2.1/learn/glossary/#node -[glossary ring]: /riak/kv/2.2.1/learn/glossary/#ring -[plan backend]: /riak/kv/2.2.1/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.2.1/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.2.1/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.2.1/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.2.1/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.2.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.1/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.2.1/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.2.1/learn/glossary/#ring +[plan backend]: {{}}riak/kv/2.2.1/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.2.1/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.2.1/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.2.1/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.2.1/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.2.1/learn/dynamo.md b/content/riak/kv/2.2.1/learn/dynamo.md index da3e43342d..097f3aa9e9 100644 --- a/content/riak/kv/2.2.1/learn/dynamo.md +++ b/content/riak/kv/2.2.1/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.2.1/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.2.1/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.2.1/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.2.1/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.2.1/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.2.1/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.2.1/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.2.1/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.2.1/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.2.1/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.2.1/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.2.1/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.2.1/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.2.1/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.2.1/developing/api/http/) +>[REST API]({{}}riak/kv/2.2.1/developing/api/http/) > ->[Writing Data](/riak/kv/2.2.1/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.2.1/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.2.1/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.2.1/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.2.1/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.2.1/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.2.1/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.2.1/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.2.1/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.2.1/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.2.1/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.2.1/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.2.1/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.2.1/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.2.1/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.2.1/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.2.1/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.2.1/setup/planning/backend/ -[Bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.2.1/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.2.1/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.2.1/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.2.1/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.2.1/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.2.1/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.2.1/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.2.1/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.2.1/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.2.1/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.2.1/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.2.1/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.2.1/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.2.1/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.2.1/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.2.1/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.2.1/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.2.1/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.2.1/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.2.1/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.2.1/learn/glossary.md b/content/riak/kv/2.2.1/learn/glossary.md index 6f925e52cf..d8c4d9e448 100644 --- a/content/riak/kv/2.2.1/learn/glossary.md +++ b/content/riak/kv/2.2.1/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.2.1/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[concept crdts]: /riak/kv/2.2.1/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.1/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.1/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.1/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.2.1/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.2.1/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.2.1/developing/api/http -[dev data model]: /riak/kv/2.2.1/developing/data-modeling -[dev data types]: /riak/kv/2.2.1/developing/data-types -[glossary read rep]: /riak/kv/2.2.1/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.2.1/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.2.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.1/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.2.1/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.2.1/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.2.1/developing/api/http +[dev data model]: {{}}riak/kv/2.2.1/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.1/developing/data-types +[glossary read rep]: {{}}riak/kv/2.2.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.2.1/learn/dynamo -[plan cluster capacity]: /riak/kv/2.2.1/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.2.1/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.2.1/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.2.1/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.2.1/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.2.1/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.2.1/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.2.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.1/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.2.1/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.2.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.2.1/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.2.1/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.2.1/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.2.1/learn/use-cases.md b/content/riak/kv/2.2.1/learn/use-cases.md index 5002de6ecd..907b1db00a 100644 --- a/content/riak/kv/2.2.1/learn/use-cases.md +++ b/content/riak/kv/2.2.1/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.2.1/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.2.1/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.2.1/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.2.1/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.2.1/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.2.1/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.2.1/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.2.1/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.2.1/developing/data-types -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.2.1/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.1/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.2.1/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.2.1/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.2.1/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.2.1/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.2.1/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.2.1/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.2.1/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.2.1/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.2.1/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.2.1/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.2.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.2.1/learn/why-riak-kv.md b/content/riak/kv/2.2.1/learn/why-riak-kv.md index a11303a995..d91fa589c8 100644 --- a/content/riak/kv/2.2.1/learn/why-riak-kv.md +++ b/content/riak/kv/2.2.1/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.2.1/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.2.1/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.2.1/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.2.1/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.2.1/developing/data-types -[glossary read rep]: /riak/kv/2.2.1/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.2.1/developing/data-types +[glossary read rep]: {{}}riak/kv/2.2.1/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.2.1/release-notes.md b/content/riak/kv/2.2.1/release-notes.md index bee0c218fd..899ce7e0af 100644 --- a/content/riak/kv/2.2.1/release-notes.md +++ b/content/riak/kv/2.2.1/release-notes.md @@ -89,10 +89,10 @@ Once all of the Riak KV clusters have been upgraded to version 2.2.0 or greater, ## Deprecation Notification -* [Link Walking](/riak/kv/2.2.1/developing/api/http/link-walking/) is deprecated and will not work if security is enabled. -* Key Filters are deprecated; we strongly discourage key listing in production due to the overhead involved, so it's better to maintain key indexes as values in Riak (see our [set data type](/riak/kv/2.2.1/developing/data-types/sets/) as a useful tool for such indexes). -* JavaScript MapReduce is deprecated; we have expanded our [Erlang MapReduce](/riak/kv/2.2.1/developing/app-guide/advanced-mapreduce/#mapreduce) documentation to assist with the transition. -* Riak search 1.0 is deprecated in favor of our Solr-based [Riak search 2.0](/riak/kv/2.2.1/developing/usage/search/). Version 1.0 will not work if security is enabled. +* [Link Walking]({{}}riak/kv/2.2.1/developing/api/http/link-walking/) is deprecated and will not work if security is enabled. +* Key Filters are deprecated; we strongly discourage key listing in production due to the overhead involved, so it's better to maintain key indexes as values in Riak (see our [set data type]({{}}riak/kv/2.2.1/developing/data-types/sets/) as a useful tool for such indexes). +* JavaScript MapReduce is deprecated; we have expanded our [Erlang MapReduce]({{}}riak/kv/2.2.1/developing/app-guide/advanced-mapreduce/#mapreduce) documentation to assist with the transition. +* Riak search 1.0 is deprecated in favor of our Solr-based [Riak search 2.0]({{}}riak/kv/2.2.1/developing/usage/search/). Version 1.0 will not work if security is enabled. * v2 replication (a component of Riak KV Enterprise) is superseded by v3 and will be removed in the future. * Legacy vnode routing (an early mechanism for managing requests between servers) is deprecated. If `vnode_routing` is set to `legacy` via Riak KV's capability system, it should be removed to prevent upgrade problems in the future. -* Some users in the past have used Riak's internal API (e.g. `riak:local_client/1`); this API may change at any time, so we strongly recommend using our [Erlang client library](http://github.com/basho/riak-erlang-client/) (or [one of the other libraries](/riak/kv/2.2.1/developing/client-libraries/) we support) instead. +* Some users in the past have used Riak's internal API (e.g. `riak:local_client/1`); this API may change at any time, so we strongly recommend using our [Erlang client library](http://github.com/basho/riak-erlang-client/) (or [one of the other libraries]({{}}riak/kv/2.2.1/developing/client-libraries/) we support) instead. diff --git a/content/riak/kv/2.2.1/setup/downgrade.md b/content/riak/kv/2.2.1/setup/downgrade.md index ba970ecc10..9f71df2ce0 100644 --- a/content/riak/kv/2.2.1/setup/downgrade.md +++ b/content/riak/kv/2.2.1/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.1/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.2.1/setup/upgrading/cluster -[config ref]: /riak/kv/2.2.1/configuring/reference -[concept aae]: /riak/kv/2.2.1/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.2.1/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.2.1/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.2.1/configuring/reference +[concept aae]: {{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#aae-status Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. diff --git a/content/riak/kv/2.2.1/setup/installing.md b/content/riak/kv/2.2.1/setup/installing.md index ba9c63b9f7..608527c91e 100644 --- a/content/riak/kv/2.2.1/setup/installing.md +++ b/content/riak/kv/2.2.1/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.2.1/installing/ --- -[install aws]: /riak/kv/2.2.1/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.2.1/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.2.1/setup/installing/freebsd -[install mac osx]: /riak/kv/2.2.1/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.2.1/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.2.1/setup/installing/smartos -[install solaris]: /riak/kv/2.2.1/setup/installing/solaris -[install suse]: /riak/kv/2.2.1/setup/installing/suse -[install windows azure]: /riak/kv/2.2.1/setup/installing/windows-azure -[install source index]: /riak/kv/2.2.1/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.2.1/setup/upgrading +[install aws]: {{}}riak/kv/2.2.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.1/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.1/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.1/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.1/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.1/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.1/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.1/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.1/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.2.1/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.2.1/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.2.1/setup/installing/amazon-web-services.md b/content/riak/kv/2.2.1/setup/installing/amazon-web-services.md index 8ffe475476..4e755a4950 100644 --- a/content/riak/kv/2.2.1/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.2.1/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.2.1/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.2.1/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.2.1/setup/installing/debian-ubuntu.md b/content/riak/kv/2.2.1/setup/installing/debian-ubuntu.md index 5189109feb..b691900288 100644 --- a/content/riak/kv/2.2.1/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.2.1/setup/installing/debian-ubuntu.md @@ -18,10 +18,10 @@ aliases: - /riak/kv/2.2.1/installing/debian-ubuntu/ --- -[install source index]: /riak/kv/2.2.1/setup/installing/source/ -[security index]: /riak/kv/2.2.1/using/security/ -[install source erlang]: /riak/kv/2.2.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[install source index]: {{}}riak/kv/2.2.1/setup/installing/source/ +[security index]: {{}}riak/kv/2.2.1/using/security/ +[install source erlang]: {{}}riak/kv/2.2.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.2.1/setup/installing/freebsd.md b/content/riak/kv/2.2.1/setup/installing/freebsd.md index fbdc3e07b5..a407f5d226 100644 --- a/content/riak/kv/2.2.1/setup/installing/freebsd.md +++ b/content/riak/kv/2.2.1/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.2.1/setup/installing/source/erlang -[downloads]: /riak/kv/2.2.1/downloads/ -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.2.1/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.2.1/downloads/ +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.2.1/setup/installing/mac-osx.md b/content/riak/kv/2.2.1/setup/installing/mac-osx.md index 2dd6fb077a..d7f4a13a3b 100644 --- a/content/riak/kv/2.2.1/setup/installing/mac-osx.md +++ b/content/riak/kv/2.2.1/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.2.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.2.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.2.1/setup/installing/rhel-centos.md b/content/riak/kv/2.2.1/setup/installing/rhel-centos.md index b8828435ad..6f0ae4d25c 100644 --- a/content/riak/kv/2.2.1/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.2.1/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.2.1/setup/installing/source -[install source erlang]: /riak/kv/2.2.1/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[install source index]: {{}}riak/kv/2.2.1/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.1/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.2.1/setup/installing/smartos.md b/content/riak/kv/2.2.1/setup/installing/smartos.md index 33e6e0aa4c..73c54b9763 100644 --- a/content/riak/kv/2.2.1/setup/installing/smartos.md +++ b/content/riak/kv/2.2.1/setup/installing/smartos.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.2.1/installing/smartos/ --- -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify {{% note title="SmartOS End of Life (EOL) for Riak KV 2.2.1" %}} SmartOS is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). @@ -74,13 +74,13 @@ cat /opt/local/etc/pkgin/repositories.conf Download your version of the Riak binary package for SmartOS: ```bash -curl -o /tmp/riak-2.2.1-SmartOS-x86_64.tgz http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/smartos/1.8/riak-2.2.1-SmartOS-x86_64.tgz +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz ``` Next, install the package: ``` -pkg_add /tmp/riak-2.2.1-SmartOS-x86_64.tgz +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz ``` After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: diff --git a/content/riak/kv/2.2.1/setup/installing/solaris.md b/content/riak/kv/2.2.1/setup/installing/solaris.md index ad18e4b8ed..48ebeb0e91 100644 --- a/content/riak/kv/2.2.1/setup/installing/solaris.md +++ b/content/riak/kv/2.2.1/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.2.1/setup/installing/source.md b/content/riak/kv/2.2.1/setup/installing/source.md index 76acb75715..17b4d28db5 100644 --- a/content/riak/kv/2.2.1/setup/installing/source.md +++ b/content/riak/kv/2.2.1/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.2.1/setup/installing/source/erlang -[downloads]: /riak/kv/2.2.1/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.2.1/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.2.1/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.2.1/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.2.1/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.2.1/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.2.1/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.2.1/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.2.1/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.2.1/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.2.1/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.2.1/setup/installing/source/erlang.md b/content/riak/kv/2.2.1/setup/installing/source/erlang.md index c03828b937..aaca6f9814 100644 --- a/content/riak/kv/2.2.1/setup/installing/source/erlang.md +++ b/content/riak/kv/2.2.1/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.1/installing/source/erlang/ --- -[install index]: /riak/kv/2.2.1/setup/installing -[security basics]: /riak/kv/2.2.1/using/security/basics +[install index]: {{}}riak/kv/2.2.1/setup/installing +[security basics]: {{}}riak/kv/2.2.1/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.2.1/setup/installing/source/jvm.md b/content/riak/kv/2.2.1/setup/installing/source/jvm.md index 1f1ebddff6..9e32bc5c9b 100644 --- a/content/riak/kv/2.2.1/setup/installing/source/jvm.md +++ b/content/riak/kv/2.2.1/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.2.1/installing/source/jvm/ --- -[usage search]: /riak/kv/2.2.1/developing/usage/search +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.2.1/setup/installing/suse.md b/content/riak/kv/2.2.1/setup/installing/suse.md index 48789b6693..4a5bf1a813 100644 --- a/content/riak/kv/2.2.1/setup/installing/suse.md +++ b/content/riak/kv/2.2.1/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.2.1/installing/suse/ --- -[install verify]: /riak/kv/2.2.1/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.1/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.2.1/setup/installing/verify.md b/content/riak/kv/2.2.1/setup/installing/verify.md index da7ef16c1d..7e694dab24 100644 --- a/content/riak/kv/2.2.1/setup/installing/verify.md +++ b/content/riak/kv/2.2.1/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.1/installing/verify-install/ --- -[client libraries]: /riak/kv/2.2.1/developing/client-libraries -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.2.1/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.2.1/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.2.1/developing/client-libraries +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.2.1/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.2.1/setup/installing/windows-azure.md b/content/riak/kv/2.2.1/setup/installing/windows-azure.md index fd9b4dcc53..653b359a66 100644 --- a/content/riak/kv/2.2.1/setup/installing/windows-azure.md +++ b/content/riak/kv/2.2.1/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.2.1/setup/planning/backend.md b/content/riak/kv/2.2.1/setup/planning/backend.md index 100864b5b5..7cfdc8c2b8 100644 --- a/content/riak/kv/2.2.1/setup/planning/backend.md +++ b/content/riak/kv/2.2.1/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.1/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.1/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.1/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.2.1/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.1/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.2.1/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.2.1/setup/planning/backend/bitcask.md b/content/riak/kv/2.2.1/setup/planning/backend/bitcask.md index 50df256f00..895efdeb2b 100644 --- a/content/riak/kv/2.2.1/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.2.1/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.2.1/using/admin/riak-cli -[config reference]: /riak/kv/2.2.1/configuring/reference -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.2.1/setup/planning/backend/multi -[usage search]: /riak/kv/2.2.1/developing/usage/search - -[glossary aae]: /riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.2.1/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.2.1/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.2.1/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.2.1/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.2.1/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.2.1/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.2.1/setup/planning/backend/leveldb.md b/content/riak/kv/2.2.1/setup/planning/backend/leveldb.md index e6bae3047d..9e4ced9f9b 100644 --- a/content/riak/kv/2.2.1/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.2.1/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.1/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[config reference]: /riak/kv/2.2.1/configuring/reference -[perf index]: /riak/kv/2.2.1/using/performance -[config reference#aae]: /riak/kv/2.2.1/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[perf index]: {{}}riak/kv/2.2.1/using/performance +[config reference#aae]: {{}}riak/kv/2.2.1/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.2.1/setup/planning/backend/memory.md b/content/riak/kv/2.2.1/setup/planning/backend/memory.md index 35d6c5606c..10109a8f57 100644 --- a/content/riak/kv/2.2.1/setup/planning/backend/memory.md +++ b/content/riak/kv/2.2.1/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.1/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.2.1/configuring/reference -[plan backend multi]: /riak/kv/2.2.1/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[plan backend multi]: {{}}riak/kv/2.2.1/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.2.1/setup/planning/backend/multi.md b/content/riak/kv/2.2.1/setup/planning/backend/multi.md index c5c37b9455..dfe9ea0d6a 100644 --- a/content/riak/kv/2.2.1/setup/planning/backend/multi.md +++ b/content/riak/kv/2.2.1/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.1/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.2.1/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.1/setup/planning/backend/memory -[config reference]: /riak/kv/2.2.1/configuring/reference -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.2.1/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.2.1/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.1/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.2.1/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.2.1/setup/planning/best-practices.md b/content/riak/kv/2.2.1/setup/planning/best-practices.md index af41649949..519bd2c785 100644 --- a/content/riak/kv/2.2.1/setup/planning/best-practices.md +++ b/content/riak/kv/2.2.1/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.1/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.2.1/using/reference/handoff -[config mapreduce]: /riak/kv/2.2.1/configuring/mapreduce -[glossary aae]: /riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.2.1/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.2.1/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.2.1/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.2.1/setup/planning/bitcask-capacity-calc.md index 280d143341..e4e440a346 100644 --- a/content/riak/kv/2.2.1/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.2.1/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.2.1/setup/planning/cluster-capacity.md b/content/riak/kv/2.2.1/setup/planning/cluster-capacity.md index b8500ef768..cde0e06e67 100644 --- a/content/riak/kv/2.2.1/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.2.1/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.1/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.2.1/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.2.1/setup/planning -[concept replication]: /riak/kv/2.2.1/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.2.1/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.2.1/configuring/reference -[perf benchmark]: /riak/kv/2.2.1/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.2.1/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.2.1/setup/planning +[concept replication]: {{}}riak/kv/2.2.1/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[perf benchmark]: {{}}riak/kv/2.2.1/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.2.1/setup/planning/operating-system.md b/content/riak/kv/2.2.1/setup/planning/operating-system.md index dfc136323d..c00030be61 100644 --- a/content/riak/kv/2.2.1/setup/planning/operating-system.md +++ b/content/riak/kv/2.2.1/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.2.1/downloads/ +[downloads]: {{}}riak/kv/2.2.1/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.2.1/setup/planning/start.md b/content/riak/kv/2.2.1/setup/planning/start.md index 2f9d305675..045c9f51dd 100644 --- a/content/riak/kv/2.2.1/setup/planning/start.md +++ b/content/riak/kv/2.2.1/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.1/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.2.1/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.2.1/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.2.1/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.2.1/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.2.1/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.2.1/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.2.1/setup/upgrading/checklist.md b/content/riak/kv/2.2.1/setup/upgrading/checklist.md index dc473a64b6..0231b53789 100644 --- a/content/riak/kv/2.2.1/setup/upgrading/checklist.md +++ b/content/riak/kv/2.2.1/setup/upgrading/checklist.md @@ -15,24 +15,24 @@ aliases: - /riak/kv/2.2.1/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.2.1/using/performance/open-files-limit -[perf index]: /riak/kv/2.2.1/using/performance +[perf open files]: {{}}riak/kv/2.2.1/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.1/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.2.1/using/security/basics -[cluster ops load balance]: /riak/kv/2.2.1/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.2.1/configuring/reference -[config backend]: /riak/kv/2.2.1/configuring/backend -[usage search]: /riak/kv/2.2.1/developing/usage/search -[usage conflict resolution]: /riak/kv/2.2.1/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.2.1/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.2.1/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.2.1/using/admin/commands -[use admin riak control]: /riak/kv/2.2.1/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.2.1/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.2.1/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.2.1/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.2.1/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.2.1/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[config backend]: {{}}riak/kv/2.2.1/configuring/backend +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.2.1/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.2.1/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.2.1/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.2.1/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.2.1/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.2.1/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.2.1/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. diff --git a/content/riak/kv/2.2.1/setup/upgrading/cluster.md b/content/riak/kv/2.2.1/setup/upgrading/cluster.md new file mode 100644 index 0000000000..9ccc3b1718 --- /dev/null +++ b/content/riak/kv/2.2.1/setup/upgrading/cluster.md @@ -0,0 +1,298 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.2.1" +menu: + riak_kv-2.2.1: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.1/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.2.1/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{}}riak/kv/2.2.1/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.2.1/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.2.1/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.1/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.1/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.1/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{}}riak/kv/2.2.1/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i .deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh .rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d .pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` --- See [JMX Monitoring][jmx monitor] for more information. + * `snmp` --- See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. diff --git a/content/riak/kv/2.2.1/setup/upgrading/search.md b/content/riak/kv/2.2.1/setup/upgrading/search.md new file mode 100644 index 0000000000..751fb5916c --- /dev/null +++ b/content/riak/kv/2.2.1/setup/upgrading/search.md @@ -0,0 +1,276 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.2.1" +menu: + riak_kv-2.2.1: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.1/ops/advanced/upgrading-search-2 + - /riak/kv/2.2.1/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + +
+
Upgrade First
+ Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. +
+ +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + +
+
Check Results Before Switching (Optional)
+ Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/?q=...`. +
+ +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.2.1/setup/upgrading/version.md b/content/riak/kv/2.2.1/setup/upgrading/version.md index ff20b56523..ca71509bbf 100644 --- a/content/riak/kv/2.2.1/setup/upgrading/version.md +++ b/content/riak/kv/2.2.1/setup/upgrading/version.md @@ -19,18 +19,18 @@ aliases: --- -[production checklist]: /riak/kv/2.2.1/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.2.1/using/admin/riak-control -[use admin commands]: /riak/kv/2.2.1/using/admin/commands -[use admin riak-admin]: /riak/kv/2.2.1/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.2.1/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.2.1/release-notes +[production checklist]: {{}}riak/kv/2.2.1/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.2.1/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.2.1/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.1/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.1/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.2.1/release-notes [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.2.1/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.2.1/using/reference/jmx -[snmp]: /riak/kv/2.2.1/using/reference/snmp -[Release Notes]: /riak/kv/2.2.1/release-notes +[cluster ops mdc]: {{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.1/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.1/using/reference/snmp +[Release Notes]: {{}}riak/kv/2.2.1/release-notes ## Overview diff --git a/content/riak/kv/2.2.1/using/admin/commands.md b/content/riak/kv/2.2.1/using/admin/commands.md index d5a77ce6c2..ca3fe18ad8 100644 --- a/content/riak/kv/2.2.1/using/admin/commands.md +++ b/content/riak/kv/2.2.1/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.1/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.2.1/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.2.1/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.2.1/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.2.1/using/admin/riak-admin.md b/content/riak/kv/2.2.1/using/admin/riak-admin.md index 95ab330afa..985ed802a7 100644 --- a/content/riak/kv/2.2.1/using/admin/riak-admin.md +++ b/content/riak/kv/2.2.1/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.2.1/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.2.1/configuring/reference -[use admin commands]: /riak/kv/2.2.1/using/admin/commands -[use admin commands#join]: /riak/kv/2.2.1/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.2.1/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.2.1/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.2.1/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.2.1/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.2.1/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.2.1/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.2.1/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.2.1/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.2.1/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.2.1/setup/downgrade -[security index]: /riak/kv/2.2.1/using/security/ -[security managing]: /riak/kv/2.2.1/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.2.1/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.2.1/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.2.1/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.2.1/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.2.1/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.2.1/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[use admin commands]: {{}}riak/kv/2.2.1/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.2.1/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.2.1/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.2.1/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.2.1/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.2.1/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.2.1/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.2.1/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.2.1/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.2.1/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.2.1/setup/downgrade +[security index]: {{}}riak/kv/2.2.1/using/security/ +[security managing]: {{}}riak/kv/2.2.1/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.2.1/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.2.1/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.2.1/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.2.1/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.2.1/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#stats ## `riak-admin` diff --git a/content/riak/kv/2.2.1/using/admin/riak-cli.md b/content/riak/kv/2.2.1/using/admin/riak-cli.md index b70ae26938..2e5be75754 100644 --- a/content/riak/kv/2.2.1/using/admin/riak-cli.md +++ b/content/riak/kv/2.2.1/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.1/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.2.1/configuring/reference/ +[configuration file]: {{}}riak/kv/2.2.1/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.2.1/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.2.1/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.2.1/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.2.1/configuring/reference/ ## riak diff --git a/content/riak/kv/2.2.1/using/admin/riak-control.md b/content/riak/kv/2.2.1/using/admin/riak-control.md index 180a0b9f44..5645d6f240 100644 --- a/content/riak/kv/2.2.1/using/admin/riak-control.md +++ b/content/riak/kv/2.2.1/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.2.1/configuring/reference +[config reference]: {{}}riak/kv/2.2.1/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.2.1/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.2.1/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.2.1/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.2.1/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.2.1/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.2.1/using/cluster-operations/active-anti-entropy.md index 5b83a3a5c0..d3631eeb14 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/active-anti-entropy.md @@ -15,8 +15,8 @@ aliases: - /riak/2.2.1/ops/advanced/aae/ --- -[config search#throttledelay]: /riak/kv/2.2.1/configuring/search/#search-anti-entropy-throttle-$tier-delay -[config search#throttle]: riak/kv/2.2.1/configuring/search/#search-anti-entropy-throttle +[config search#throttledelay]: {{}}riak/kv/2.2.1/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{}}riak/kv/2.2.1/configuring/search/#search-anti-entropy-throttle Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. @@ -57,12 +57,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -90,7 +90,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes.md index d5de11f049..cb96e0e38a 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.2.1/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.2.1/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.2.1/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.2.1/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.2.1/using/cluster-operations/backing-up.md b/content/riak/kv/2.2.1/using/cluster-operations/backing-up.md index c65d0baa66..1c9c4b3ccd 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.1/ops/running/backups --- -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[config reference]: /riak/kv/2.2.1/configuring/reference -[plan backend leveldb]: /riak/kv/2.2.1/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.1/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency -[concept aae]: /riak/kv/2.2.1/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.2.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.2.1/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.2.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.2.1/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.2.1/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.2.1/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.2.1/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.2.1/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.2.1/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.2.1/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.2.1/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.2.1/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.2.1/using/cluster-operations/bucket-types.md b/content/riak/kv/2.2.1/using/cluster-operations/bucket-types.md index 71b730cb50..4567866494 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.2.1/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.2.1/using/cluster-operations/changing-cluster-info.md index f7c108c248..1b470503f1 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.2.1/configuring/reference +[config reference]: {{}}riak/kv/2.2.1/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.2.1/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.2.1/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.2.1/using/cluster-operations/handoff.md b/content/riak/kv/2.2.1/using/cluster-operations/handoff.md index ea658ad9fe..a2b756f8ec 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.2.1/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.2.1/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.2.1/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.2.1/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.2.1/using/cluster-operations/logging.md b/content/riak/kv/2.2.1/using/cluster-operations/logging.md index f847c58f25..f8544f26c8 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/logging.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.2.1/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.2.1/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.2.1/using/cluster-operations/replacing-node.md b/content/riak/kv/2.2.1/using/cluster-operations/replacing-node.md index 1f72d7ec7a..b95b28f1d6 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.2.1/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.2.1/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.2.1/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.2.1/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.2.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.2.1/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.2.1/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.2.1/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.2.1/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.2.1/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.2.1/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.2.1/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.2.1/using/cluster-operations/strong-consistency.md index 94f1f8a4b4..5403fe5743 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.2.1/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.2.1/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.2.1/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.2.1/using/cluster-operations/v2-multi-datacenter.md index 9000304e08..a9c712b440 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/v2-multi-datacenter.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication system is largely @@ -163,7 +163,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -183,7 +183,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -221,7 +221,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.2.1/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.2.1/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -242,7 +242,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter.md index a365829380..f534c463b6 100644 --- a/content/riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.2.1/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.2.1/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.2.1/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.2.1/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.2.1/using/performance.md b/content/riak/kv/2.2.1/using/performance.md index 5e4936ea6f..fcdc0a1a64 100644 --- a/content/riak/kv/2.2.1/using/performance.md +++ b/content/riak/kv/2.2.1/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.2.1/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.2.1/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -253,12 +253,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.2.1/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.2.1/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.2.1/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.2.1/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.2.1/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.2.1/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.2.1/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.2.1/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.2.1/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.2.1/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.2.1/using/performance/benchmarking.md b/content/riak/kv/2.2.1/using/performance/benchmarking.md index d92ed8bd5e..c3355d3e88 100644 --- a/content/riak/kv/2.2.1/using/performance/benchmarking.md +++ b/content/riak/kv/2.2.1/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.2.1/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.2.1/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.2.1/using/performance/latency-reduction.md b/content/riak/kv/2.2.1/using/performance/latency-reduction.md index 679200b927..0c769988c6 100644 --- a/content/riak/kv/2.2.1/using/performance/latency-reduction.md +++ b/content/riak/kv/2.2.1/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.2.1/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.2.1/using/performance/multi-datacenter-tuning.md index 0efb4f4bd0..fa575a1c82 100644 --- a/content/riak/kv/2.2.1/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.2.1/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.2.1/using/performance +[perf index]: {{}}riak/kv/2.2.1/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.2.1/using/performance/open-files-limit.md b/content/riak/kv/2.2.1/using/performance/open-files-limit.md index 3ed6205c2e..27e5343ef9 100644 --- a/content/riak/kv/2.2.1/using/performance/open-files-limit.md +++ b/content/riak/kv/2.2.1/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/tuning/open-files-limit/ --- -[plan backend]: /riak/kv/2.2.1/setup/planning/backend/ +[plan backend]: {{}}riak/kv/2.2.1/setup/planning/backend/ [blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. diff --git a/content/riak/kv/2.2.1/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.2.1/using/performance/v2-scheduling-fullsync.md index 78ff1e105f..42dbf402d2 100644 --- a/content/riak/kv/2.2.1/using/performance/v2-scheduling-fullsync.md +++ b/content/riak/kv/2.2.1/using/performance/v2-scheduling-fullsync.md @@ -14,7 +14,7 @@ commercial_offering: true --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.1/using/reference/bucket-types.md b/content/riak/kv/2.2.1/using/reference/bucket-types.md index 340e8238c7..3dd0324d51 100644 --- a/content/riak/kv/2.2.1/using/reference/bucket-types.md +++ b/content/riak/kv/2.2.1/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.2.1/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.2.1/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.2.1/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.2.1/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.2.1/developing/data-types), and [strong consistency](/riak/kv/2.2.1/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.2.1/developing/data-types), and [strong consistency]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.2.1/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.2.1/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.2.1/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.1/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.1/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.2.1/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.2.1/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.2.1/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.2.1/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.2.1/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.2.1/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.2.1/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.2.1/learn/concepts/buckets) and [keys](/riak/kv/2.2.1/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.2.1/learn/concepts/buckets) and [keys]({{}}riak/kv/2.2.1/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.2.1/using/reference/custom-code.md b/content/riak/kv/2.2.1/using/reference/custom-code.md index cc15e66d93..7304ef49a1 100644 --- a/content/riak/kv/2.2.1/using/reference/custom-code.md +++ b/content/riak/kv/2.2.1/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.2.1/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.2.1/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.2.1/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.2.1/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.2.1/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.2.1/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.2.1/using/reference/handoff.md b/content/riak/kv/2.2.1/using/reference/handoff.md index a0dd1b61cf..096ba8ee6c 100644 --- a/content/riak/kv/2.2.1/using/reference/handoff.md +++ b/content/riak/kv/2.2.1/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.2.1/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.2.1/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.2.1/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.2.1/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.2.1/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.2.1/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.2.1/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.2.1/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.2.1/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.2.1/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.2.1/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.2.1/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.2.1/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.2.1/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.2.1/using/reference/jmx.md b/content/riak/kv/2.2.1/using/reference/jmx.md index d6170f720b..e3736c140f 100644 --- a/content/riak/kv/2.2.1/using/reference/jmx.md +++ b/content/riak/kv/2.2.1/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.1/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.2.1/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.2.1/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.2.1/using/reference/logging.md b/content/riak/kv/2.2.1/using/reference/logging.md index 8efb5ac9da..604000b7bd 100644 --- a/content/riak/kv/2.2.1/using/reference/logging.md +++ b/content/riak/kv/2.2.1/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.1/ops/running/logging --- -[cluster ops log]: /riak/kv/2.2.1/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.2.1/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.2.1/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.2.1/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.2.1/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.2.1/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.2.1/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.2.1/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.2.1/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.2.1/using/reference/multi-datacenter/comparison.md index 6414e011bf..7514d86727 100644 --- a/content/riak/kv/2.2.1/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.2.1/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.2.1/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.2.1/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.2.1/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.2.1/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.2.1/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.2.1/using/reference/runtime-interaction.md b/content/riak/kv/2.2.1/using/reference/runtime-interaction.md index a0c620287c..7999a49b1b 100644 --- a/content/riak/kv/2.2.1/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.2.1/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.1/ops/advanced/runtime --- -[config reference]: /riak/kv/2.2.1/configuring/reference -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.2.1/configuring/reference +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.2.1/using/reference/search.md b/content/riak/kv/2.2.1/using/reference/search.md index 6c81ac06b8..833fb554cb 100644 --- a/content/riak/kv/2.2.1/using/reference/search.md +++ b/content/riak/kv/2.2.1/using/reference/search.md @@ -15,21 +15,21 @@ aliases: - /riak/kv/2.2.1/dev/advanced/search --- -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters -[configuring search]: /riak/kv/2.2.1/configuring/search +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters +[configuring search]: {{}}riak/kv/2.2.1/configuring/search > **Note on search 2.0 vs. legacy search** > > This document refers to Riak search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak search, visit [the old Using Riak search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -search, you should check out the [Using Search](/riak/kv/2.2.1/developing/usage/search) document. +search, you should check out the [Using Search]({{}}riak/kv/2.2.1/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -127,7 +127,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.2.1/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.2.1/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -289,7 +289,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.2.1/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -299,7 +299,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.2.1/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -354,7 +354,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.2.1/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.2.1/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.2.1/using/reference/secondary-indexes.md b/content/riak/kv/2.2.1/using/reference/secondary-indexes.md index b3f04523ff..3c20b9cd6c 100644 --- a/content/riak/kv/2.2.1/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.2.1/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.2.1/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.2.1/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.2.1/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.2.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.2.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.2.1/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.2.1/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.2.1/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.2.1/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.2.1/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.2.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.2.1/using/reference/statistics-monitoring.md b/content/riak/kv/2.2.1/using/reference/statistics-monitoring.md index 3b39048f31..77be29c2d2 100644 --- a/content/riak/kv/2.2.1/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.2.1/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.2.1/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.2.1/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.2.1/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.2.1/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.2.1/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.2.1/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.2.1/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.2.1/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.2.1/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.2.1/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -156,7 +156,7 @@ Metric | Description ## Command-line Interface -The [`riak-admin`](/riak/kv/2.2.1/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.2.1/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -181,14 +181,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.2.1/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.2.1/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.2.1/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -235,7 +235,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.2.1/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.2.1/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -259,7 +259,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.2.1/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.2.1/developing/api/http/status) endpoint is also available. #### Nagios @@ -333,14 +333,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.2.1/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.2.1/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.2.1/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.2.1/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -362,9 +362,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.2.1/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.2.1/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.2.1/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.2.1/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -380,9 +380,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.2.1/using/reference/strong-consistency.md b/content/riak/kv/2.2.1/using/reference/strong-consistency.md index 6ec3a3f925..53075d5f71 100644 --- a/content/riak/kv/2.2.1/using/reference/strong-consistency.md +++ b/content/riak/kv/2.2.1/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.2.1/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.1/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.2.1/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.1/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.2.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.2.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.2.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.2.1/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.2.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.2.1/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.2.1/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.2.1/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.2.1/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.2.1/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.2.1/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.2.1/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter.md index 3aa4e5d936..3f2a0b2f6e 100644 --- a/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter.md @@ -16,7 +16,7 @@ toc: true [v2 mdc fullsync]: ./scheduling-fullsync {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/using/reference/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/using/reference/v3-multi-datacenter/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter/architecture.md index be4bbbf6e0..880d70fe3b 100644 --- a/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter/architecture.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/using/reference/v3-multi-datacenter/architecture/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/using/reference/v3-multi-datacenter/architecture/) instead. {{% /note %}} @@ -83,7 +83,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.2.1/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.2.1/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -95,7 +95,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -113,7 +113,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -121,6 +121,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.2.1/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.2.1/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.2.1/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.2.1/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md index 11954a350d..8261a387a4 100644 --- a/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.2.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.1/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.1/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/aae.md index 1c84e359d5..716e88d9c2 100644 --- a/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.2.1/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.2.1/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/architecture.md index 48c2d22296..074db36b05 100644 --- a/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.2.1/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.2.1/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.2.1/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.2.1/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/cascading-writes.md index dd6192038f..7eeeda13d0 100644 --- a/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.2.1/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 9e2345f723..754459cb2c 100644 --- a/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.2.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.1/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.2.1/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.2.1/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.2.1/using/repair-recovery/errors.md b/content/riak/kv/2.2.1/using/repair-recovery/errors.md index 18a556a1a4..8339f95c4b 100644 --- a/content/riak/kv/2.2.1/using/repair-recovery/errors.md +++ b/content/riak/kv/2.2.1/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.2.1/configuring/reference +[config reference]: {{}}riak/kv/2.2.1/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.2.1/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.2.1/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.2.1/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.2.1/using/repair-recovery/failure-recovery.md index 942e100809..967f7b6b24 100644 --- a/content/riak/kv/2.2.1/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.2.1/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.2.1/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.2.1/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.2.1/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.2.1/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -116,7 +116,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.2.1/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.2.1/using/repair-recovery/repairs.md b/content/riak/kv/2.2.1/using/repair-recovery/repairs.md index 32d76c0ca1..f9c9a9b288 100644 --- a/content/riak/kv/2.2.1/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.2.1/using/repair-recovery/repairs.md @@ -21,13 +21,13 @@ aliases: - /riak/kv/2.2.1/ops/running/recovery/repairing-partitions --- -[cluster ops aae]: /riak/kv/2.2.1/using/cluster-operations/active-anti-entropy/ -[config ref]: /riak/kv/2.2.1/configuring/reference/ +[cluster ops aae]: {{}}riak/kv/2.2.1/using/cluster-operations/active-anti-entropy/ +[config ref]: {{}}riak/kv/2.2.1/configuring/reference/ [Erlang shell]: http://learnyousomeerlang.com/starting-out -[glossary AAE]: /riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae -[glossary readrep]: /riak/kv/2.2.1/learn/glossary/#read-repair -[search config]: /riak/kv/2.2.1/configuring/search/#search-config-settings -[tiered storage]: /riak/kv/2.2.1/setup/planning/backend/leveldb/#tiered-storage +[glossary AAE]: {{}}riak/kv/2.2.1/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{}}riak/kv/2.2.1/learn/glossary/#read-repair +[search config]: {{}}riak/kv/2.2.1/configuring/search/#search-config-settings +[tiered storage]: {{}}riak/kv/2.2.1/setup/planning/backend/leveldb/#tiered-storage @@ -237,23 +237,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.2.1/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.2.1/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.2.1/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.2.1/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.2.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.2.1/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.2.1/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.2.1/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.2.1/using/repair-recovery/rolling-replaces.md index 184e8a1034..e7cd6faeb1 100644 --- a/content/riak/kv/2.2.1/using/repair-recovery/rolling-replaces.md +++ b/content/riak/kv/2.2.1/using/repair-recovery/rolling-replaces.md @@ -12,9 +12,9 @@ menu: toc: true --- -[upgrade]: /riak/kv/2.2.1/setup/upgrading/cluster/ -[rolling restarts]: /riak/kv/2.2.1/using/repair-recovery/rolling-restart/ -[add node]: /riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes +[upgrade]: {{}}riak/kv/2.2.1/setup/upgrading/cluster/ +[rolling restarts]: {{}}riak/kv/2.2.1/using/repair-recovery/rolling-restart/ +[add node]: {{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. diff --git a/content/riak/kv/2.2.1/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.2.1/using/repair-recovery/rolling-restart.md index 005a200952..5a9831e0c6 100644 --- a/content/riak/kv/2.2.1/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.2.1/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.1/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.2.1/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.2.1/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.2.1/using/running-a-cluster.md b/content/riak/kv/2.2.1/using/running-a-cluster.md index c6e9c474c9..9206ae60e4 100644 --- a/content/riak/kv/2.2.1/using/running-a-cluster.md +++ b/content/riak/kv/2.2.1/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.2.1/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.2.1/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.2.1/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.2.1/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.2.1/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.2.1/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.2.1/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.2.1/using/security.md b/content/riak/kv/2.2.1/using/security.md index de06b119c7..4426b68e5a 100644 --- a/content/riak/kv/2.2.1/using/security.md +++ b/content/riak/kv/2.2.1/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.2.1/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.2.1/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.2.1/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.2.1/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.2.1/using/security/basics +[security managing]: {{}}riak/kv/2.2.1/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.2.1/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.2.1/using/security/basics.md b/content/riak/kv/2.2.1/using/security/basics.md index 15a24fca65..f8b96ce726 100644 --- a/content/riak/kv/2.2.1/using/security/basics.md +++ b/content/riak/kv/2.2.1/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.2.1/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.2.1/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.2.1/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.2.1/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.2.1/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.2.1/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.2.1/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.2.1/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.2.1/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.2.1/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.2.1/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.2.1/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.2.1/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.2.1/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.2.1/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.2.1/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.2.1/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.2.1/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.2.1/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.2.1/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.2.1/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.2.1/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.2.1/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.2.1/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.2.1/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.2.1/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.2.1/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.2.1/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.2.1/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.2.1/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.2.1/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.2.1/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.2.1/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.2.1/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.2.1/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.2.1/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.2.1/configuring/reference/#directories).
platform_data_dir The directory in which Riak stores its storage backend data, as well -as active anti-entropy data, and cluster metadata. ./data
alive_tokens Determines the number of ticks the leader will wait to hear from its -associated vnode before assuming that the vnode +associated vnode before assuming that the vnode is unhealthy and stepping down as leader. If the vnode does not respond to the leader before ensemble_tick * alive_tokens milliseconds have elapsed, the leader will @@ -1833,8 +1833,8 @@ package) and in R14B04 via a custom repository and branch.
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="{{< baseurl >}}riak/kv/2.2.1/learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.2.1/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.2.1/using/security/managing-sources.md b/content/riak/kv/2.2.1/using/security/managing-sources.md index e18df16707..d6e798c112 100644 --- a/content/riak/kv/2.2.1/using/security/managing-sources.md +++ b/content/riak/kv/2.2.1/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.2.1/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.2.1/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.2.1/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.2.1/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.2.1/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.2.1/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.2.1/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.2.1/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.2.1/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.2.1/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.2.1/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.2.1/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.2.1/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.2.1/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.2.2/_reference-links.md b/content/riak/kv/2.2.2/_reference-links.md index ef75982b87..f701e5d78b 100644 --- a/content/riak/kv/2.2.2/_reference-links.md +++ b/content/riak/kv/2.2.2/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.2.2/downloads/ -[install index]: /riak/kv/2.2.2/setup/installing -[upgrade index]: /riak/kv/2.2.2/upgrading -[plan index]: /riak/kv/2.2.2/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.2.2/configuring/reference/ -[manage index]: /riak/kv/2.2.2/using/managing -[performance index]: /riak/kv/2.2.2/using/performance -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.2.2/downloads/ +[install index]: {{}}riak/kv/2.2.2/setup/installing +[upgrade index]: {{}}riak/kv/2.2.2/upgrading +[plan index]: {{}}riak/kv/2.2.2/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.2.2/configuring/reference/ +[manage index]: {{}}riak/kv/2.2.2/using/managing +[performance index]: {{}}riak/kv/2.2.2/using/performance +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.2.2/setup/planning -[plan start]: /riak/kv/2.2.2/setup/planning/start -[plan backend]: /riak/kv/2.2.2/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.2/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.2/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.2.2/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.2.2/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.2.2/setup/planning/best-practices -[plan future]: /riak/kv/2.2.2/setup/planning/future +[plan index]: {{}}riak/kv/2.2.2/setup/planning +[plan start]: {{}}riak/kv/2.2.2/setup/planning/start +[plan backend]: {{}}riak/kv/2.2.2/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.2/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.2.2/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.2.2/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.2.2/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.2.2/setup/planning/future ## Installing -[install index]: /riak/kv/2.2.2/setup/installing -[install aws]: /riak/kv/2.2.2/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.2.2/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.2.2/setup/installing/freebsd -[install mac osx]: /riak/kv/2.2.2/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.2.2/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.2.2/setup/installing/smartos -[install solaris]: /riak/kv/2.2.2/setup/installing/solaris -[install suse]: /riak/kv/2.2.2/setup/installing/suse -[install windows azure]: /riak/kv/2.2.2/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.2.2/setup/installing +[install aws]: {{}}riak/kv/2.2.2/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.2/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.2/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.2/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.2/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.2/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.2/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.2/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.2/setup/installing/windows-azure -[install source index]: /riak/kv/2.2.2/setup/installing/source -[install source erlang]: /riak/kv/2.2.2/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.2.2/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.2.2/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.2/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.2.2/setup/installing/source/jvm -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.2.2/setup/upgrading -[upgrade checklist]: /riak/kv/2.2.2/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.2.2/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.2.2/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.2.2/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.2.2/setup/downgrade +[upgrade index]: {{}}riak/kv/2.2.2/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.2.2/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.2.2/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.2.2/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.2.2/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.2.2/setup/downgrade ## Configuring -[config index]: /riak/kv/2.2.2/configuring -[config basic]: /riak/kv/2.2.2/configuring/basic -[config backend]: /riak/kv/2.2.2/configuring/backend -[config manage]: /riak/kv/2.2.2/configuring/managing -[config reference]: /riak/kv/2.2.2/configuring/reference/ -[config strong consistency]: /riak/kv/2.2.2/configuring/strong-consistency -[config load balance]: /riak/kv/2.2.2/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.2.2/configuring/mapreduce -[config search]: /riak/kv/2.2.2/configuring/search/ +[config index]: {{}}riak/kv/2.2.2/configuring +[config basic]: {{}}riak/kv/2.2.2/configuring/basic +[config backend]: {{}}riak/kv/2.2.2/configuring/backend +[config manage]: {{}}riak/kv/2.2.2/configuring/managing +[config reference]: {{}}riak/kv/2.2.2/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.2.2/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.2.2/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.2.2/configuring/mapreduce +[config search]: {{}}riak/kv/2.2.2/configuring/search/ -[config v3 mdc]: /riak/kv/2.2.2/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.2.2/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.2.2/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.2.2/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.2.2/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.2.2/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.2.2/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.2.2/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.2.2/using/ -[use admin commands]: /riak/kv/2.2.2/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.2.2/using/running-a-cluster +[use index]: {{}}riak/kv/2.2.2/using/ +[use admin commands]: {{}}riak/kv/2.2.2/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.2.2/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.2.2/using/reference/custom-code -[use ref handoff]: /riak/kv/2.2.2/using/reference/handoff -[use ref monitoring]: /riak/kv/2.2.2/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.2.2/using/reference/search -[use ref 2i]: /riak/kv/2.2.2/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.2.2/using/reference/snmp -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.2.2/using/reference/jmx -[use ref obj del]: /riak/kv/2.2.2/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.2.2/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.2.2/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.2.2/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.2.2/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.2.2/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.2.2/using/reference/search +[use ref 2i]: {{}}riak/kv/2.2.2/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.2.2/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.2.2/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.2.2/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.2.2/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.2.2/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.2.2/using/admin/ -[use admin commands]: /riak/kv/2.2.2/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.2.2/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.2.2/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.2.2/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.2.2/using/admin/ +[use admin commands]: {{}}riak/kv/2.2.2/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.2.2/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.2.2/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.2.2/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.2.2/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.2.2/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.2.2/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.2.2/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.2.2/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.2.2/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.2.2/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.2.2/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.2.2/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.2.2/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.2.2/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.2.2/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.2.2/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.2.2/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.2.2/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.2.2/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.2.2/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.2.2/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.2.2/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.2.2/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.2.2/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.2.2/using/repair-recovery -[repair recover index]: /riak/kv/2.2.2/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.2.2/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.2.2/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.2.2/using/security/ -[security basics]: /riak/kv/2.2.2/using/security/basics -[security managing]: /riak/kv/2.2.2/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.2.2/using/security/ +[security basics]: {{}}riak/kv/2.2.2/using/security/basics +[security managing]: {{}}riak/kv/2.2.2/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.2.2/using/performance/ -[perf benchmark]: /riak/kv/2.2.2/using/performance/benchmarking -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.2.2/using/performance/erlang -[perf aws]: /riak/kv/2.2.2/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.2.2/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.2.2/using/performance/ +[perf benchmark]: {{}}riak/kv/2.2.2/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.2.2/using/performance/erlang +[perf aws]: {{}}riak/kv/2.2.2/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.2.2/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.2.2/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.2.2/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.2.2/developing -[dev client libraries]: /riak/kv/2.2.2/developing/client-libraries -[dev data model]: /riak/kv/2.2.2/developing/data-modeling -[dev data types]: /riak/kv/2.2.2/developing/data-types -[dev kv model]: /riak/kv/2.2.2/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.2.2/developing +[dev client libraries]: {{}}riak/kv/2.2.2/developing/client-libraries +[dev data model]: {{}}riak/kv/2.2.2/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.2/developing/data-types +[dev kv model]: {{}}riak/kv/2.2.2/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.2.2/developing/getting-started -[getting started java]: /riak/kv/2.2.2/developing/getting-started/java -[getting started ruby]: /riak/kv/2.2.2/developing/getting-started/ruby -[getting started python]: /riak/kv/2.2.2/developing/getting-started/python -[getting started php]: /riak/kv/2.2.2/developing/getting-started/php -[getting started csharp]: /riak/kv/2.2.2/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.2.2/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.2.2/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.2.2/developing/getting-started/golang - -[obj model java]: /riak/kv/2.2.2/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.2.2/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.2.2/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.2.2/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.2.2/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.2.2/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.2.2/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.2.2/developing/getting-started +[getting started java]: {{}}riak/kv/2.2.2/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.2.2/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.2.2/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.2.2/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.2.2/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.2.2/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.2.2/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.2.2/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.2.2/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.2/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.2/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.2/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.2/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.2/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.2/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.2.2/developing/usage -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.2.2/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.2.2/developing/usage/content-types -[usage create objects]: /riak/kv/2.2.2/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.2.2/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.2.2/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.2.2/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.2/developing/usage/search -[usage search schema]: /riak/kv/2.2.2/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.2.2/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.2.2/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.2.2/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.2.2/developing/usage +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.2/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.2.2/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.2.2/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.2.2/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.2.2/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.2.2/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.2/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.2/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.2.2/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.2.2/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.2.2/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.2.2/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.2.2/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.2.2/developing/api/backend -[dev api http]: /riak/kv/2.2.2/developing/api/http -[dev api http status]: /riak/kv/2.2.2/developing/api/http/status -[dev api pbc]: /riak/kv/2.2.2/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.2.2/developing/api/backend +[dev api http]: {{}}riak/kv/2.2.2/developing/api/http +[dev api http status]: {{}}riak/kv/2.2.2/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.2.2/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.2.2/learn/glossary/ -[glossary aae]: /riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.2.2/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.2.2/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.2.2/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode -[concept aae]: /riak/kv/2.2.2/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.2.2/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.2.2/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.2.2/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.2/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.2.2/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.2/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.2/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.2.2/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.2/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.2.2/add-ons.md b/content/riak/kv/2.2.2/add-ons.md index 4ded4ddfe3..d6430e2a96 100644 --- a/content/riak/kv/2.2.2/add-ons.md +++ b/content/riak/kv/2.2.2/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.2.2/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.2.2/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.2.2/add-ons/redis/developing-rra.md b/content/riak/kv/2.2.2/add-ons/redis/developing-rra.md index 52b4c5263b..cfa4d03f1d 100644 --- a/content/riak/kv/2.2.2/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.2.2/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.2.2/developing/api/http +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.2/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.2.2/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.2.2/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.2/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.2/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.2.2/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.2.2/add-ons/redis/redis-add-on-features.md index 09ff249877..229cfab4ea 100644 --- a/content/riak/kv/2.2.2/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.2.2/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.2.2/add-ons/redis/set-up-rra.md b/content/riak/kv/2.2.2/add-ons/redis/set-up-rra.md index 7a2d83c496..e082ec9e17 100644 --- a/content/riak/kv/2.2.2/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.2.2/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.2.2/setup/installing -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.2.2/setup/installing +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.2.2/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.2.2/add-ons/redis/set-up-rra/deployment-models.md index 9d13ac3d08..a4d35af0fb 100644 --- a/content/riak/kv/2.2.2/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/kv/2.2.2/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/kv/2.2.2/add-ons/redis/using-rra.md b/content/riak/kv/2.2.2/add-ons/redis/using-rra.md index 19d83e71e4..ed5c15321d 100644 --- a/content/riak/kv/2.2.2/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.2.2/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.2.2/developing/api/http/ +[dev api http]: {{}}riak/kv/2.2.2/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.2.2/configuring/backend.md b/content/riak/kv/2.2.2/configuring/backend.md index 50b1de7437..9fd9a25364 100644 --- a/content/riak/kv/2.2.2/configuring/backend.md +++ b/content/riak/kv/2.2.2/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.2/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.2/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.2/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.2.2/configuring/basic.md b/content/riak/kv/2.2.2/configuring/basic.md index 660ea903e6..3965535f2f 100644 --- a/content/riak/kv/2.2.2/configuring/basic.md +++ b/content/riak/kv/2.2.2/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.2.2/ops/building/configuration/ --- -[config reference]: /riak/kv/2.2.2/configuring/reference -[use running cluster]: /riak/kv/2.2.2/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.2.2/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.2.2/using/performance/erlang -[plan start]: /riak/kv/2.2.2/setup/planning/start -[plan best practices]: /riak/kv/2.2.2/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.2.2/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.2.2/setup/planning/backend -[plan backend multi]: /riak/kv/2.2.2/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.2.2/using/performance/benchmarking -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit -[perf index]: /riak/kv/2.2.2/using/performance -[perf aws]: /riak/kv/2.2.2/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.2.2/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[use running cluster]: {{}}riak/kv/2.2.2/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.2.2/using/performance/erlang +[plan start]: {{}}riak/kv/2.2.2/setup/planning/start +[plan best practices]: {{}}riak/kv/2.2.2/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.2.2/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.2.2/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.2.2/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.2.2/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.2/using/performance +[perf aws]: {{}}riak/kv/2.2.2/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.2.2/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.2.2/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.2/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.2.2/configuring/global-object-expiration.md b/content/riak/kv/2.2.2/configuring/global-object-expiration.md index 415188f269..bafcdd66d2 100644 --- a/content/riak/kv/2.2.2/configuring/global-object-expiration.md +++ b/content/riak/kv/2.2.2/configuring/global-object-expiration.md @@ -10,7 +10,6 @@ menu: project: "riak_kv" project_version: "2.2.2" toc: true -canonical_link: "https://docs.basho.com/riak/kv/latest/configuring/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/kv/2.2.2/configuring/load-balancing-proxy.md b/content/riak/kv/2.2.2/configuring/load-balancing-proxy.md index ece3b40102..0744c907f3 100644 --- a/content/riak/kv/2.2.2/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.2.2/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.2.2/configuring/managing.md b/content/riak/kv/2.2.2/configuring/managing.md index fefacdc375..bfdce51240 100644 --- a/content/riak/kv/2.2.2/configuring/managing.md +++ b/content/riak/kv/2.2.2/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.2.2/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.2.2/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.2.2/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.2.2/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.2.2/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.2.2/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.2.2/configuring/mapreduce.md b/content/riak/kv/2.2.2/configuring/mapreduce.md index 7bbe52243b..c5393460b0 100644 --- a/content/riak/kv/2.2.2/configuring/mapreduce.md +++ b/content/riak/kv/2.2.2/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.2.2/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.2.2/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.2.2/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.2.2/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.2.2/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.2.2/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.2.2/configuring/reference.md b/content/riak/kv/2.2.2/configuring/reference.md index 6ebd228421..cae11f0c4c 100644 --- a/content/riak/kv/2.2.2/configuring/reference.md +++ b/content/riak/kv/2.2.2/configuring/reference.md @@ -200,7 +200,7 @@ executables are stored. +as active anti-entropy data, and cluster metadata. @@ -1684,7 +1684,7 @@ abandons the leader (in milliseconds). This must be set greater than the diff --git a/content/riak/kv/2.2.2/configuring/search.md b/content/riak/kv/2.2.2/configuring/search.md index 8704f165f1..224e59b183 100644 --- a/content/riak/kv/2.2.2/configuring/search.md +++ b/content/riak/kv/2.2.2/configuring/search.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.2/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.2.2/developing/usage/search -[usage search schema]: /riak/kv/2.2.2/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.2.2/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.2.2/developing/usage/custom-extractors -[cluster-ops aae throttle]: /riak/kv/2.2.2/using/cluster-operations/active-anti-entropy/#throttling -[config reference]: /riak/kv/2.2.2/configuring/reference -[config reference#search]: /riak/kv/2.2.2/configuring/reference/#search -[glossary aae]: /riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.2.2/using/security/ +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.2/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.2/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.2.2/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.2.2/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[config reference#search]: {{}}riak/kv/2.2.2/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.2.2/using/security/ [java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads [java se docs]: http://www.oracle.com/technetwork/java/javase/documentation @@ -150,15 +150,15 @@ Valid values: `on` or `off` ### `search.index.error_threshold.failure_count` -The number of failures encountered while updating a search index within [`search.queue.error_threshold.failure_interval`](#search-queue-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. Valid values: Integer ### `search.index.error_threshold.failure_interval` -The window of time during which `search.queue.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. -If [`search.queue.error_threshold.failure_count`](#search-queue-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.queue.error_threshold.reset_interval`](search-queue-error-threshold-reset-interval) has passed. +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. Valid values: Milliseconds diff --git a/content/riak/kv/2.2.2/configuring/strong-consistency.md b/content/riak/kv/2.2.2/configuring/strong-consistency.md index caaff2efe1..4fc2ef846d 100644 --- a/content/riak/kv/2.2.2/configuring/strong-consistency.md +++ b/content/riak/kv/2.2.2/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.2.2/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.2.2/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.2.2/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.2.2/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.2.2/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.2.2/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.2.2/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.2.2/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.2.2/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.2.2/learn/concepts/causal-context -[dev data types]: /riak/kv/2.2.2/developing/data-types -[glossary aae]: /riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.2.2/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.2.2/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.2.2/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.2.2/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.2.2/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.2.2/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.2.2/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.2.2/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.2.2/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.2.2/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.2.2/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.2.2/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.2.2/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.2.2/developing/data-types +[glossary aae]: {{}}riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.2.2/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.2.2/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.2.2/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.2.2/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.2.2/configuring/v2-multi-datacenter.md b/content/riak/kv/2.2.2/configuring/v2-multi-datacenter.md index 3c008ee50b..f84876e722 100644 --- a/content/riak/kv/2.2.2/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.2/configuring/v2-multi-datacenter.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/configuring/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication capabilities offer a diff --git a/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/nat.md index 12eb7cfc05..95404ac702 100644 --- a/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/nat.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/configuring/v3-multi-datacenter/nat/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/nat/) instead. {{% /note %}} Riak Enterprise supports replication of data on networks that use static diff --git a/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/quick-start.md index 193a177901..e4a2872126 100644 --- a/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/quick-start.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start/) instead. {{% /note %}} The Riak Multi-Datacenter Replication Quick Start will walk you through diff --git a/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl.md index 1da6a856c1..0079c36e23 100644 --- a/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl.md +++ b/content/riak/kv/2.2.2/configuring/v2-multi-datacenter/ssl.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl/) instead. {{% /note %}} ## Features diff --git a/content/riak/kv/2.2.2/configuring/v3-multi-datacenter.md b/content/riak/kv/2.2.2/configuring/v3-multi-datacenter.md index 2582165e56..6b9428370c 100644 --- a/content/riak/kv/2.2.2/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.2.2/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.2.2/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.2.2/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/nat.md index 23276b1876..f8bc04486e 100644 --- a/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start.md index d94f1bf643..75cd01ad1f 100644 --- a/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.2.2/using/performance -[config v3 mdc]: /riak/kv/2.2.2/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.2.2/using/performance +[config v3 mdc]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl.md index c7eb9811fd..8e431fd014 100644 --- a/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.2.2/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.2.2/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.2.2/developing/api/backend.md b/content/riak/kv/2.2.2/developing/api/backend.md index 5deec5f05b..8db3119a9e 100644 --- a/content/riak/kv/2.2.2/developing/api/backend.md +++ b/content/riak/kv/2.2.2/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/backend-api --- -[plan backend]: /riak/kv/2.2.2/setup/planning/backend +[plan backend]: {{}}riak/kv/2.2.2/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.2.2/developing/api/http.md b/content/riak/kv/2.2.2/developing/api/http.md index 9f898e03ab..6f8d8497d3 100644 --- a/content/riak/kv/2.2.2/developing/api/http.md +++ b/content/riak/kv/2.2.2/developing/api/http.md @@ -29,21 +29,21 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.2.2/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.2.2/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.2.2/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.2.2/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.2.2/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.2.2/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.2.2/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.2/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.2.2/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.2.2/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.2.2/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys` | [HTTP Store Object](/riak/kv/2.2.2/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.2.2/developing/api/http/store-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.2.2/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.2.2/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.2.2/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.2.2/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.2/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.2/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.2.2/developing/api/http/delete-object) ## Riak-Data-Type-related Operations @@ -53,9 +53,9 @@ Method | URL `POST` | `/types//buckets//datatypes` `POST` | `/types//buckets//datatypes/` -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.2.2/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.2.2/developing/data-types/#usage-examples) -and subpages e.g. [sets](/riak/kv/2.2.2/developing/data-types/sets). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.2.2/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.2.2/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.2.2/developing/data-types/sets). Advanced users may consult the technical documentation inside the Riak KV internal module `riak_kv_wm_crdt`. @@ -64,26 +64,26 @@ KV internal module `riak_kv_wm_crdt`. Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.2.2/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.2.2/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.2.2/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.2.2/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.2/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.2/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.2.2/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.2.2/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.2.2/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.2.2/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.2.2/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.2.2/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.2.2/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.2.2/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.2.2/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.2.2/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.2.2/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.2.2/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.2.2/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.2.2/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.2.2/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.2.2/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.2.2/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.2.2/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.2.2/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.2.2/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.2.2/developing/api/http/counters.md b/content/riak/kv/2.2.2/developing/api/http/counters.md index 243da6f6f5..cc166db16d 100644 --- a/content/riak/kv/2.2.2/developing/api/http/counters.md +++ b/content/riak/kv/2.2.2/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.2.2/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.2.2/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.2.2/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.2.2/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.2.2/developing/api/http/fetch-object.md b/content/riak/kv/2.2.2/developing/api/http/fetch-object.md index 67a2ae9d21..07d19d019c 100644 --- a/content/riak/kv/2.2.2/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.2.2/developing/api/http/fetch-object.md @@ -41,14 +41,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.2.2/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.2.2/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.2.2/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.2.2/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.2.2/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.2.2/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.2.2/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.2.2/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -75,7 +75,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.2.2/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.2/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.2.2/developing/api/http/fetch-search-index.md b/content/riak/kv/2.2.2/developing/api/http/fetch-search-index.md index c08ea0ac5b..ad1c25df5d 100644 --- a/content/riak/kv/2.2.2/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.2.2/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.2.2/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.2.2/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.2.2/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.2.2/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.2.2/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.2.2/developing/api/http/fetch-search-schema.md index a3e5482452..c8a1d2443f 100644 --- a/content/riak/kv/2.2.2/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.2.2/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.2.2/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.2.2/developing/api/http/get-bucket-props.md b/content/riak/kv/2.2.2/developing/api/http/get-bucket-props.md index 04ff9721b7..66ea1ec6d6 100644 --- a/content/riak/kv/2.2.2/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.2.2/developing/api/http/get-bucket-props.md @@ -33,7 +33,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.2.2/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.2.2/developing/api/http/list-keys). ## Response @@ -49,8 +49,8 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.2.2/developing/api/http/set-bucket-props) for more information about the available -bucket properties. See [Managing Bucket Types Through the Command Line](http://docs.basho.com/riak/kv/2.2.0/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. +See [HTTP Set Bucket Properties]({{}}riak/kv/2.2.2/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.2.2/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. ## Example diff --git a/content/riak/kv/2.2.2/developing/api/http/link-walking.md b/content/riak/kv/2.2.2/developing/api/http/link-walking.md index 283ee05aa5..39ffc1731d 100644 --- a/content/riak/kv/2.2.2/developing/api/http/link-walking.md +++ b/content/riak/kv/2.2.2/developing/api/http/link-walking.md @@ -21,8 +21,8 @@ This feature is deprecated and will be removed in a future version. Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.2.2/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.2.2/learn/glossary/#links). ## Request @@ -68,7 +68,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.2.2/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.2.2/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.2.2/developing/api/http/list-resources.md b/content/riak/kv/2.2.2/developing/api/http/list-resources.md index 7122c4bbfa..dda2ee8567 100644 --- a/content/riak/kv/2.2.2/developing/api/http/list-resources.md +++ b/content/riak/kv/2.2.2/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.2.2/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.2.2/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.2.2/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.2.2/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.2.2/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.2.2/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.2.2/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.2.2/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.2.2/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.2.2/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.2.2/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.2.2/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.2.2/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.2.2/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.2.2/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.2.2/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.2.2/developing/api/http/mapreduce.md b/content/riak/kv/2.2.2/developing/api/http/mapreduce.md index 81b7033583..23ea990192 100644 --- a/content/riak/kv/2.2.2/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.2.2/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.2.2/developing/api/http/search-index-info.md b/content/riak/kv/2.2.2/developing/api/http/search-index-info.md index 1aa5a4dd51..685ee535ba 100644 --- a/content/riak/kv/2.2.2/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.2.2/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.2.2/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.2.2/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.2.2/developing/api/http/search-query.md b/content/riak/kv/2.2.2/developing/api/http/search-query.md index 47adc919da..35b77f7ca7 100644 --- a/content/riak/kv/2.2.2/developing/api/http/search-query.md +++ b/content/riak/kv/2.2.2/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.2.2/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.2.2/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.2.2/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.2.2/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.2.2/developing/api/http/secondary-indexes.md b/content/riak/kv/2.2.2/developing/api/http/secondary-indexes.md index 114bc277e1..7b97b61b4d 100644 --- a/content/riak/kv/2.2.2/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.2.2/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.2.2/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.2.2/developing/api/http/set-bucket-props.md b/content/riak/kv/2.2.2/developing/api/http/set-bucket-props.md index 88c9a4c192..11da4d9aee 100644 --- a/content/riak/kv/2.2.2/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.2.2/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.2.2/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.2.2/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.2.2/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.2.2/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.2.2/developing/api/http/status.md b/content/riak/kv/2.2.2/developing/api/http/status.md index 69a229e4be..9a93849fd8 100644 --- a/content/riak/kv/2.2.2/developing/api/http/status.md +++ b/content/riak/kv/2.2.2/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.2.2/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.2.2/developing/api/http/store-object.md b/content/riak/kv/2.2.2/developing/api/http/store-object.md index 7d7602890f..5c747ffd65 100644 --- a/content/riak/kv/2.2.2/developing/api/http/store-object.md +++ b/content/riak/kv/2.2.2/developing/api/http/store-object.md @@ -40,8 +40,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.2.2/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.2.2/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.2.2/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.2/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -85,7 +85,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.2.2/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.2.2/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.2.2/developing/api/http/store-search-index.md b/content/riak/kv/2.2.2/developing/api/http/store-search-index.md index 3c53c2c8dd..b41c6b986d 100644 --- a/content/riak/kv/2.2.2/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.2.2/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.2.2/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.2.2/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.2.2/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.2.2/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.2.2/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.2.2/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.2.2/developing/api/http/store-search-schema.md b/content/riak/kv/2.2.2/developing/api/http/store-search-schema.md index 0f553247f6..168e9ac956 100644 --- a/content/riak/kv/2.2.2/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.2.2/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.2.2/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.2.2/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers.md index 2f9b54deb5..d432c0aa75 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.2.2/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.2.2/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.2.2/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.2.2/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.2.2/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.2.2/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.2.2/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.2.2/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.2.2/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.2.2/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/auth-req.md index 97999f11b8..1e5f7e2fd2 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.2.2/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.2.2/using/security/basics). diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..7a06a6a012 --- /dev/null +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,78 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.2.2" +menu: + riak_kv-2.2.2: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.2.2/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.2.2/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/delete-object.md index 3bed4e2eb1..bb4c38fe01 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.2.2/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.2.2/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store.md index 9fa99405e7..6302c7d386 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.2.2/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.2.2/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-fetch.md index 3df47c1805..d12af8f61c 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.2.2/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.2.2/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.2.2/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.2.2/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.2.2/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.2.2/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store.md index dc685d1c5f..10005b7a27 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store.md index 361c94f63f..7caf5e7257 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-store.md index 20b84048a0..fe86b2e2bc 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.2.2/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.2.2/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.2.2/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.2.2/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.2.2/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.2.2/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.2.2/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-union.md index e7a8239d52..ae0b97aa36 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.2.2/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object.md index 6563828f1d..43d96ea8ac 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.2.2/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.2.2/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.2.2/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.2.2/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props.md index 7a3d8474a7..2fa2c0fc57 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.2.2/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.2.2/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.2.2/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.2.2/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) {{% /note %}} diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-type.md index 949fd3d5fd..a2156ab3a6 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.2.2/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.2.2/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-client-id.md index 9a4aad3bd7..036cef7e89 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.2.2/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/mapreduce.md index 762f2db98d..1fece2c064 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.2.2/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.2.2/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.2.2/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.2.2/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/reset-bucket-props.md index 1498e07487..c8b43844a8 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/secondary-indexes.md index 3608e73eff..a4b40d50f4 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.2.2/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props.md index 3284f4cd1a..49ba97cbd0 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-type.md index 828a5f7878..965018584b 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.2/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.2.2/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/store-object.md index 02e2979736..6ba90360a4 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.2.2/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.2.2/learn/concepts/buckets), and [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.2.2/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.2.2/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.2.2/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.2.2/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.2.2/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.2.2/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-delete.md index b79abf6283..8e179417fb 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-delete.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-delete.md @@ -29,5 +29,5 @@ message RpbYokozunaIndexDeleteReq { ## Response -Returns a [RpbDelResp](/riak/kv/2.2.2/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbDelResp]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-get.md index fed33f19ec..0ddbabe028 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.2.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-put.md index 9a58f6123d..36bcc14d0d 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-index-put.md @@ -37,9 +37,9 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.2.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. ## Response -Returns a [RpbPutResp](/riak/kv/2.2.2/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-get.md index 5a596f6a80..9c612501fd 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.2.2/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-put.md index 9512295b6b..2de0746400 100644 --- a/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.2.2/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.2.2/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.2.2/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.2.2/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.2/developing/app-guide.md b/content/riak/kv/2.2.2/developing/app-guide.md index 55e93a7cfb..d9a9d528ed 100644 --- a/content/riak/kv/2.2.2/developing/app-guide.md +++ b/content/riak/kv/2.2.2/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.2.2/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.2.2/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.2.2/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.2.2/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.2.2/developing/key-value-modeling -[dev data types]: /riak/kv/2.2.2/developing/data-types -[dev data types#counters]: /riak/kv/2.2.2/developing/data-types/#counters -[dev data types#sets]: /riak/kv/2.2.2/developing/data-types/#sets -[dev data types#maps]: /riak/kv/2.2.2/developing/data-types/#maps -[usage create objects]: /riak/kv/2.2.2/developing/usage/creating-objects -[usage search]: /riak/kv/2.2.2/developing/usage/search -[use ref search]: /riak/kv/2.2.2/using/reference/search -[usage 2i]: /riak/kv/2.2.2/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.2.2/developing/client-libraries -[concept crdts]: /riak/kv/2.2.2/learn/concepts/crdts -[dev data model]: /riak/kv/2.2.2/developing/data-modeling -[usage mapreduce]: /riak/kv/2.2.2/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.2.2/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.2.2/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.2/setup/planning/backend/memory -[obj model java]: /riak/kv/2.2.2/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.2.2/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.2.2/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.2.2/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.2.2/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.2.2/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.2.2/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.2.2/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.2.2/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.2.2/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.2.2/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[install index]: /riak/kv/2.2.2/setup/installing -[getting started]: /riak/kv/2.2.2/developing/getting-started -[usage index]: /riak/kv/2.2.2/developing/usage -[glossary]: /riak/kv/2.2.2/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.2.2/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.2.2/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.2.2/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.2.2/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.2.2/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.2.2/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.2.2/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.2.2/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.2.2/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search +[use ref search]: {{}}riak/kv/2.2.2/using/reference/search +[usage 2i]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.2.2/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.2.2/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.2.2/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.2.2/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.2.2/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.2.2/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.2.2/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.2/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.2/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.2/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.2/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.2/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.2/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.2.2/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.2.2/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.2.2/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.2.2/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.2.2/setup/installing +[getting started]: {{}}riak/kv/2.2.2/developing/getting-started +[usage index]: {{}}riak/kv/2.2.2/developing/usage +[glossary]: {{}}riak/kv/2.2.2/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.2.2/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.2.2/developing/app-guide/advanced-mapreduce.md index 70d7b726a8..1b1a669247 100644 --- a/content/riak/kv/2.2.2/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.2.2/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.2.2/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.2.2/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.2.2/using/reference/custom-code -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[config reference]: /riak/kv/2.2.2/configuring/reference +[usage 2i]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.2.2/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.2/configuring/reference [google mr]: http://research.google.com/archive/mapreduce.html [mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map [function contrib]: https://github.com/basho/riak_function_contrib @@ -381,7 +381,7 @@ Erlang client. {{% note title="Distributing Erlang MapReduce Code" %}} Any modules and functions you use in your Erlang MapReduce calls must be available on all nodes in the cluster. Please read about -[installing custom code](/riak/kv/2.2.2/using/reference/custom-code). +[installing custom code]({{}}riak/kv/2.2.2/using/reference/custom-code). {{% /note %}} ### Erlang Example @@ -728,7 +728,7 @@ You can use streaming with Erlang via the Riak KV local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.2.2/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.2.2/developing/app-guide/cluster-metadata.md index 36d586d684..be3f31acb7 100644 --- a/content/riak/kv/2.2.2/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.2.2/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.2.2/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.2.2/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.2.2/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.2.2/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.2.2/developing/app-guide/replication-properties.md b/content/riak/kv/2.2.2/developing/app-guide/replication-properties.md index bafd4f2d73..cdfecfe9c8 100644 --- a/content/riak/kv/2.2.2/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.2.2/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.2/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.2.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.2.2/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.2.2/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.2.2/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.2.2/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.2/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.2.2/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.2.2/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.2.2/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.2/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.2.2/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.2.2/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.2.2/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.2.2/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.2.2/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.2.2/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.2.2/developing/app-guide/strong-consistency.md b/content/riak/kv/2.2.2/developing/app-guide/strong-consistency.md index d7e66ca31d..46be4b769c 100644 --- a/content/riak/kv/2.2.2/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.2.2/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.2.2/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/kv/2.2.2/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.2.2/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.2.2/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.2.2/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.2.2/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.2.2/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.2.2/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.2.2/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/kv/2.2.2/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.2.2/developing/client-libraries -[getting started]: /riak/kv/2.2.2/developing/getting-started -[config strong consistency#details]: /riak/kv/2.2.2/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.2.2/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.2.2/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.2.2/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.2.2/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.2.2/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.2.2/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.2.2/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.2.2/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.2.2/developing/client-libraries +[getting started]: {{}}riak/kv/2.2.2/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.2.2/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.2.2/developing/app-guide/write-once.md b/content/riak/kv/2.2.2/developing/app-guide/write-once.md index 01e09d332e..a5b16b3e56 100644 --- a/content/riak/kv/2.2.2/developing/app-guide/write-once.md +++ b/content/riak/kv/2.2.2/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.2/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[bucket type]: /riak/kv/2.2.2/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.2.2/developing/data-types -[strong consistency]: /riak/kv/2.2.2/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.2.2/developing/data-types +[strong consistency]: {{}}riak/kv/2.2.2/developing/app-guide/strong-consistency Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. @@ -98,7 +98,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -149,7 +149,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.2.2/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.2.2/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.2.2/developing/client-libraries.md b/content/riak/kv/2.2.2/developing/client-libraries.md index 219fa2ae9a..71060020c6 100644 --- a/content/riak/kv/2.2.2/developing/client-libraries.md +++ b/content/riak/kv/2.2.2/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.2.2/developing/data-types.md b/content/riak/kv/2.2.2/developing/data-types.md index 306b3fbad7..13e1962eed 100644 --- a/content/riak/kv/2.2.2/developing/data-types.md +++ b/content/riak/kv/2.2.2/developing/data-types.md @@ -43,9 +43,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -268,5 +268,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.2.2/developing/faq.md b/content/riak/kv/2.2.2/developing/faq.md index 1deb18c939..09e45da2a9 100644 --- a/content/riak/kv/2.2.2/developing/faq.md +++ b/content/riak/kv/2.2.2/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.2.2/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.2.2/using/performance/benchmarking -[Bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.2.2/developing/usage +[[Basho Bench]: {{}}riak/kv/2.2.2/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.2.2/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.2.2/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.2.2/configuring/reference +[commit hooks]: {{}}riak/kv/2.2.2/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.2.2/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.2.2/developing/client-libraries -[MapReduce]: /riak/kv/2.2.2/developing/usage/mapreduce -[Memory]: /riak/kv/2.2.2/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.2.2/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.2.2/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.2.2/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.2.2/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.2.2/developing/getting-started.md b/content/riak/kv/2.2.2/developing/getting-started.md index 5c751035d0..ba913812b3 100644 --- a/content/riak/kv/2.2.2/developing/getting-started.md +++ b/content/riak/kv/2.2.2/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.2.2/setup/installing -[dev client libraries]: /riak/kv/2.2.2/developing/client-libraries +[install index]: {{}}riak/kv/2.2.2/setup/installing +[dev client libraries]: {{}}riak/kv/2.2.2/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.2.2/developing/getting-started/csharp.md b/content/riak/kv/2.2.2/developing/getting-started/csharp.md index 9260ca17b8..448bab80ac 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/csharp.md +++ b/content/riak/kv/2.2.2/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.2/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.2/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.2.2/developing/getting-started/csharp/querying.md b/content/riak/kv/2.2.2/developing/getting-started/csharp/querying.md index 78b8cbea63..d7e97d7838 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.2.2/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.2/developing/getting-started/erlang.md b/content/riak/kv/2.2.2/developing/getting-started/erlang.md index 5f1988311c..2ebb254d5d 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/erlang.md +++ b/content/riak/kv/2.2.2/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.2/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.2/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.2.2/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.2.2/developing/getting-started/erlang/object-modeling.md index d1437cbacd..c38841dcde 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.2.2/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.2.2/developing/getting-started/erlang/querying.md b/content/riak/kv/2.2.2/developing/getting-started/erlang/querying.md index ffeb39c64e..ac53c38290 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.2.2/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.2.2/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.2.2/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.2/developing/getting-started/golang.md b/content/riak/kv/2.2.2/developing/getting-started/golang.md index a5cc1019c3..ab0a25a5b2 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/golang.md +++ b/content/riak/kv/2.2.2/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.2/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.2/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.2/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.2/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.2.2/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.2.2/developing/getting-started/golang/object-modeling.md index 96b7a01ac8..d3df28e744 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.2.2/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.2.2/developing/getting-started/golang/querying.md b/content/riak/kv/2.2.2/developing/getting-started/golang/querying.md index 0e28dd8baf..4358d8ebe1 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.2.2/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.2.2/developing/getting-started/java.md b/content/riak/kv/2.2.2/developing/getting-started/java.md index 66a4d992a8..5e738cb264 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/java.md +++ b/content/riak/kv/2.2.2/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.2.2/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.2/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.2.2/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.2.2/developing/getting-started/java/crud-operations.md index 39f26811c3..cb99e5f771 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.2.2/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.2/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/) documentation. ## Updating Objects @@ -85,8 +85,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.2/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -196,6 +196,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.2/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.2.2/developing/getting-started/java/querying.md b/content/riak/kv/2.2.2/developing/getting-started/java/querying.md index 400b8711e5..4e9c80554e 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.2.2/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.2/developing/getting-started/nodejs.md b/content/riak/kv/2.2.2/developing/getting-started/nodejs.md index 81e459350c..32b6b13991 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.2.2/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.2/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.2/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.2.2/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.2.2/developing/getting-started/nodejs/querying.md index ae93e67c38..f71300b7bd 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.2.2/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.2/developing/getting-started/php.md b/content/riak/kv/2.2.2/developing/getting-started/php.md index 3a007f121d..46f63c37ab 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/php.md +++ b/content/riak/kv/2.2.2/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.2/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.2/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.2.2/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.2.2/developing/getting-started/php/crud-operations.md index 5de6642c91..89c7f077f9 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.2.2/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.2.2/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.2.2/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.2.2/developing/getting-started/php/querying.md b/content/riak/kv/2.2.2/developing/getting-started/php/querying.md index e26dbc7a86..87c1ed9bd9 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.2.2/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.2/developing/getting-started/python.md b/content/riak/kv/2.2.2/developing/getting-started/python.md index 839dd0ac50..1c419bc722 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/python.md +++ b/content/riak/kv/2.2.2/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.2/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.2/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.2/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.2.2/developing/getting-started/python/querying.md b/content/riak/kv/2.2.2/developing/getting-started/python/querying.md index bfec9f8f3f..e11155fe3a 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.2.2/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.2/developing/getting-started/ruby.md b/content/riak/kv/2.2.2/developing/getting-started/ruby.md index b7317906ba..c3bcbc0117 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/ruby.md +++ b/content/riak/kv/2.2.2/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.2/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.2/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.2/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.2/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.2.2/developing/getting-started/ruby/querying.md b/content/riak/kv/2.2.2/developing/getting-started/ruby/querying.md index c17c908c2b..8b5036d1ee 100644 --- a/content/riak/kv/2.2.2/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.2.2/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.2/developing/key-value-modeling.md b/content/riak/kv/2.2.2/developing/key-value-modeling.md index a319e482fa..74597212ed 100644 --- a/content/riak/kv/2.2.2/developing/key-value-modeling.md +++ b/content/riak/kv/2.2.2/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.2.2/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.2.2/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.2.2/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.2.2/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.2.2/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.2.2/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.2.2/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.2.2/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.2.2/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.2.2/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.2.2/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.2.2/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.2.2/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.2.2/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.2.2/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.2.2/developing/data-types/#sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.2.2/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.2.2/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.2.2/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.2.2/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.2.2/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.2.2/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.2/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.2.2/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.2.2/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.2.2/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.2.2/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.2.2/developing/usage/commit-hooks.md b/content/riak/kv/2.2.2/developing/usage/commit-hooks.md index a36bbf1f00..e5644b57a8 100644 --- a/content/riak/kv/2.2.2/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.2.2/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.2.2/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.2.2/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.2.2/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.2.2/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.2.2/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.2.2/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.2.2/developing/usage/conflict-resolution.md b/content/riak/kv/2.2.2/developing/usage/conflict-resolution.md index 2c272d63a7..30d80ae0c7 100644 --- a/content/riak/kv/2.2.2/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.2.2/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.2/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.2.2/learn/concepts/clusters) system in which any [node](/riak/kv/2.2.2/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.2.2/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.2.2/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.2.2/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.2.2/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.2.2/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.2.2/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.2.2/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.2.2/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.2.2/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.2.2/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.2.2/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.2.2/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.2.2/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.2.2/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the [`allow_mult`](#siblings) parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -[`last_write_wins`](/riak/kv/2.2.2/learn/concepts/buckets). If `last_write_wins` is set to `false`, +[`last_write_wins`]({{}}riak/kv/2.2.2/learn/concepts/buckets). If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.2.2/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.2.2/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.2.2/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.2.2/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.2.2/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.2.2/configuring/reference) to change the [default bucket properties](/riak/kv/2.2.2/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.2.2/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.2.2/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.2.2/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.2.2/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.2.2/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.2.2/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.2.2/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.2.2/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.2.2/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.2.2/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.2.2/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.2.2/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.2.2/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.2/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.2.2/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.2.2/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.2.2/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.2.2/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.2.2/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -610,7 +610,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.2.2/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.2.2/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -665,7 +665,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/csharp.md index 4d374b0da9..1194994583 100644 --- a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.2/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/golang.md index 1c5e970d79..abe518f2cb 100644 --- a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.2/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/java.md index 832e1c82c0..43a2183ff2 100644 --- a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.2/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.2/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.2/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.2/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.2/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.2/developing/data-types/#counters), [set](/riak/kv/2.2.2/developing/data-types/#sets), or [map](/riak/kv/2.2.2/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.2/developing/data-types/#counters), [set]({{}}riak/kv/2.2.2/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.2/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.2/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.2/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/nodejs.md index b107ed6598..685a0fae53 100644 --- a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.2/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/php.md index e9529833a2..9c5d1879f4 100644 --- a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.2/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.2/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.2/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.2/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.2/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.2/developing/data-types/#counters), [set](/riak/kv/2.2.2/developing/data-types/#sets), or [map](/riak/kv/2.2.2/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.2/developing/data-types/#counters), [set]({{}}riak/kv/2.2.2/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.2/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.2/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.2/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/python.md index 46fec93a85..d83150b8be 100644 --- a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.2/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -185,7 +185,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.2/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.2/developing/usage) section. ## More Advanced Example @@ -240,9 +240,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.2/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.2/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.2/developing/data-types/#counters), [set](/riak/kv/2.2.2/developing/data-types/#sets), or [map](/riak/kv/2.2.2/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.2/developing/data-types/#counters), [set]({{}}riak/kv/2.2.2/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.2/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -251,4 +251,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.2/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.2/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/ruby.md index 9cc24fe65e..aee7183eed 100644 --- a/content/riak/kv/2.2.2/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.2.2/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.2/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.2/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.2/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.2/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.2/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.2/developing/data-types/#counters), [set](/riak/kv/2.2.2/developing/data-types/#sets), or [map](/riak/kv/2.2.2/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.2/developing/data-types/#counters), [set]({{}}riak/kv/2.2.2/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.2/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.2/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.2/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.2/developing/usage/creating-objects.md b/content/riak/kv/2.2.2/developing/usage/creating-objects.md index 74330cd0c0..1f6ad5f05c 100644 --- a/content/riak/kv/2.2.2/developing/usage/creating-objects.md +++ b/content/riak/kv/2.2.2/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.2.2/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.2.2/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -23,7 +23,7 @@ PUT /types//buckets//keys/ # If you're using HTTP to interact with Riak, you can also use POST ``` -As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type](/riak/kv/2.2.2/using/cluster-operations/bucket-types). +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{}}riak/kv/2.2.2/using/cluster-operations/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -118,7 +118,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, you run the same read operation as in [Reading Objects](/riak/kv/2.2.2/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types](/riak/kv/2.2.2/using/cluster-operations/bucket-types). +Now, you run the same read operation as in [Reading Objects]({{}}riak/kv/2.2.2/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{}}riak/kv/2.2.2/using/cluster-operations/bucket-types). ### Store an Object @@ -138,7 +138,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.2.2/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.2.2/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.2.2/developing/usage/custom-extractors.md b/content/riak/kv/2.2.2/developing/usage/custom-extractors.md index 10d492a0fc..88251293be 100644 --- a/content/riak/kv/2.2.2/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.2.2/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.2.2/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.2.2/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.2.2/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.2.2/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.2.2/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.2.2/developing/usage/deleting-objects.md b/content/riak/kv/2.2.2/developing/usage/deleting-objects.md index c7ac45182d..840b894602 100644 --- a/content/riak/kv/2.2.2/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.2.2/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.2.2/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.2.2/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.2.2/developing/usage/document-store.md b/content/riak/kv/2.2.2/developing/usage/document-store.md index 8eea97bbcc..a7207e2fb9 100644 --- a/content/riak/kv/2.2.2/developing/usage/document-store.md +++ b/content/riak/kv/2.2.2/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.2.2/developing/usage/search/) and [Riak Data Types](/riak/kv/2.2.2/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.2.2/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.2.2/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.2.2/developing/data-types/#maps). +[Riak maps]({{}}riak/kv/2.2.2/developing/data-types/#maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.2.2/developing/data-types/#maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.2.2/developing/data-types/#maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.2.2/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.2.2/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.2.2/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.2.2/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.2.2/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.2.2/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.2.2/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.2.2/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.2.2/developing/usage/mapreduce.md b/content/riak/kv/2.2.2/developing/usage/mapreduce.md index 8491f6128e..511b20146c 100644 --- a/content/riak/kv/2.2.2/developing/usage/mapreduce.md +++ b/content/riak/kv/2.2.2/developing/usage/mapreduce.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.2/dev/using/mapreduce --- -[usage 2i]: /riak/kv/2.2.2/developing/usage/secondary-indexes -[usage search]: /riak/kv/2.2.2/developing/usage/search -[usage types]: /riak/kv/2.2.2/developing/usage/bucket-types -[api http]: /riak/kv/2.2.2/developing/api/http -[api pb]: /riak/kv/2.2.2/developing/api/protocol-buffers -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[guide mapreduce]: /riak/kv/2.2.2/developing/app-guide/advanced-mapreduce +[usage 2i]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search +[usage types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[api http]: {{}}riak/kv/2.2.2/developing/api/http +[api pb]: {{}}riak/kv/2.2.2/developing/api/protocol-buffers +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[guide mapreduce]: {{}}riak/kv/2.2.2/developing/app-guide/advanced-mapreduce {{% note title="Use MapReduce sparingly" %}} In Riak KV, MapReduce is the primary method for non-primary-key-based @@ -116,7 +116,7 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example diff --git a/content/riak/kv/2.2.2/developing/usage/reading-objects.md b/content/riak/kv/2.2.2/developing/usage/reading-objects.md index 9756ccff7e..2bf55e9bbc 100644 --- a/content/riak/kv/2.2.2/developing/usage/reading-objects.md +++ b/content/riak/kv/2.2.2/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.2.2/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type](/riak/kv/2.2.2/using/cluster-operations/bucket-types) page. +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{}}riak/kv/2.2.2/using/cluster-operations/bucket-types) page. ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.2.2/developing/usage/replication.md b/content/riak/kv/2.2.2/developing/usage/replication.md index 1dcb3ed738..f317b6efe9 100644 --- a/content/riak/kv/2.2.2/developing/usage/replication.md +++ b/content/riak/kv/2.2.2/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.2.2/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.2.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.2.2/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.2/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.2.2/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.2/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.2.2/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.2.2/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.2.2/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.2.2/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.2.2/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.2.2/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.2/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.2.2/developing/usage/search-schemas.md b/content/riak/kv/2.2.2/developing/usage/search-schemas.md index f844c7e83b..281fb9a1c3 100644 --- a/content/riak/kv/2.2.2/developing/usage/search-schemas.md +++ b/content/riak/kv/2.2.2/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.2.2/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.2.2/developing/data-types/), and [more](/riak/kv/2.2.2/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.2.2/developing/data-types/), and [more]({{}}riak/kv/2.2.2/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.2.2/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.2.2/developing/usage/search.md b/content/riak/kv/2.2.2/developing/usage/search.md index 4a40b0b767..79376f91d9 100644 --- a/content/riak/kv/2.2.2/developing/usage/search.md +++ b/content/riak/kv/2.2.2/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.2.2/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.2.2/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.2.2/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.2/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.2.2/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.2.2/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.2.2/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.2.2/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.2.2/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.2.2/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.2.2/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.2.2/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.2.2/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.2.2/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.2.2/developing/usage/searching-data-types.md b/content/riak/kv/2.2.2/developing/usage/searching-data-types.md index 43ea8b0db7..a57e513353 100644 --- a/content/riak/kv/2.2.2/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.2.2/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.2/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.2.2/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.2.2/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.2.2/developing/data-types/#counters), [sets](/riak/kv/2.2.2/developing/data-types/#sets), and [maps](/riak/kv/2.2.2/developing/data-types/#maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.2.2/developing/data-types/#counters), [sets]({{}}riak/kv/2.2.2/developing/data-types/#sets), and [maps]({{}}riak/kv/2.2.2/developing/data-types/#maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.2.2/developing/data-types/#counters) indexes each +The default schema for [counters]({{}}riak/kv/2.2.2/developing/data-types/#counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.2.2/developing/data-types/#sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.2.2/developing/data-types/#sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.2.2/developing/data-types/#maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.2.2/developing/data-types/#maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) for [storing counters](/riak/kv/2.2.2/developing/data-types/#counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.2.2/developing/data-types/#counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types) for [storing sets](/riak/kv/2.2.2/developing/data-types/#sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.2.2/developing/data-types/#sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.2.2/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.2.2/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.2.2/developing/data-types/#maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.2.2/developing/data-types/#maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.2.2/developing/usage/secondary-indexes.md b/content/riak/kv/2.2.2/developing/usage/secondary-indexes.md index 694add7947..7ff20a3dc4 100644 --- a/content/riak/kv/2.2.2/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.2.2/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.2.2/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.2/setup/planning/backend/memory -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.2.2/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.2.2/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.2.2/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.2.2/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.2.2/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.2.2/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.2.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.2.2/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.2.2/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.2.2/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.2.2/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.2.2/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.2/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.2.2/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.2.2/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.2.2/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.2.2/developing/usage/security.md b/content/riak/kv/2.2.2/developing/usage/security.md index 72ee4c64fa..980c37621a 100644 --- a/content/riak/kv/2.2.2/developing/usage/security.md +++ b/content/riak/kv/2.2.2/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.2.2/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.2.2/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.2.2/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.2.2/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.2.2/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.2.2/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.2.2/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - [`riak-admin security`](/riak/kv/2.2.2/using/security/managing-sources/#managing-sources) + [`riak-admin security`]({{}}riak/kv/2.2.2/using/security/managing-sources/#managing-sources) command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.2.2/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.2.2/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.2.2/using/security/basics) -* [Managing Security Sources](/riak/kv/2.2.2/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.2.2/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.2.2/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.2.2/developing/usage/security/java) -* [Ruby](/riak/kv/2.2.2/developing/usage/security/ruby) -* [PHP](/riak/kv/2.2.2/developing/usage/security/php) -* [Python](/riak/kv/2.2.2/developing/usage/security/python) -* [Erlang](/riak/kv/2.2.2/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.2.2/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.2.2/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.2.2/developing/usage/security/php) +* [Python]({{}}riak/kv/2.2.2/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.2.2/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.2.2/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.2.2/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.2.2/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.2.2/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.2.2/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.2.2/developing/usage/security/erlang.md b/content/riak/kv/2.2.2/developing/usage/security/erlang.md index 66725c22b0..bd33692882 100644 --- a/content/riak/kv/2.2.2/developing/usage/security/erlang.md +++ b/content/riak/kv/2.2.2/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.2.2/using/security/managing-sources/), [PAM-](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.2.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.2.2/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.2.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.2/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.2/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.2/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.2.2/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.2.2/developing/usage/security/java.md b/content/riak/kv/2.2.2/developing/usage/security/java.md index 0ac0682e6b..2f316e1fb8 100644 --- a/content/riak/kv/2.2.2/developing/usage/security/java.md +++ b/content/riak/kv/2.2.2/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.2/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.2/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.2.2/developing/usage/security/php.md b/content/riak/kv/2.2.2/developing/usage/security/php.md index 8e1984bba0..ad7848e2fc 100644 --- a/content/riak/kv/2.2.2/developing/usage/security/php.md +++ b/content/riak/kv/2.2.2/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.2/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.2.2/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.2.2/developing/usage/security/python.md b/content/riak/kv/2.2.2/developing/usage/security/python.md index 6761f9cdb3..3182e4932a 100644 --- a/content/riak/kv/2.2.2/developing/usage/security/python.md +++ b/content/riak/kv/2.2.2/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.2/using/security/managing-sources/) or [PAM-](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.2.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.2.2/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.2.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.2/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.2/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.2.2/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.2.2/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.2/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.2.2/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.2.2/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.2.2/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.2.2/developing/usage/security/ruby.md b/content/riak/kv/2.2.2/developing/usage/security/ruby.md index b9182a96da..614070db82 100644 --- a/content/riak/kv/2.2.2/developing/usage/security/ruby.md +++ b/content/riak/kv/2.2.2/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.2/using/security/managing-sources/) or [PAM](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.2.2/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.2.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.2.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.2/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.2.2/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.2/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.2/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.2.2/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.2.2/developing/usage/updating-objects.md b/content/riak/kv/2.2.2/developing/usage/updating-objects.md index 8ab2de40fd..b1c35c36e4 100644 --- a/content/riak/kv/2.2.2/developing/usage/updating-objects.md +++ b/content/riak/kv/2.2.2/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/dev/using/updates --- -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.2.2/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.2.2/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.2.2/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.2.2/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.2.2/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.2.2/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.2.2/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.2.2/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.2.2/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.2.2/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.2.2/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.2.2/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.2.2/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.2.2/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.2.2/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.2.2/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.2.2/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.2.2/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.2.2/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.2.2/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.2.2/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.2.2/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.2.2/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.2.2/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.2.2/index.md b/content/riak/kv/2.2.2/index.md index 2fbf3ee2af..c8ce5fa326 100644 --- a/content/riak/kv/2.2.2/index.md +++ b/content/riak/kv/2.2.2/index.md @@ -1,5 +1,5 @@ --- -title: "Riak KV" +title: "Riak KV 2.2.2" description: "" project: "riak_kv" project_version: "2.2.2" @@ -15,15 +15,15 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.2.2/configuring -[downloads]: /riak/kv/2.2.2/downloads/ -[install index]: /riak/kv/2.2.2/setup/installing/ -[plan index]: /riak/kv/2.2.2/setup/planning -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.2.2/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.2.2/developing/usage/search -[getting started]: /riak/kv/2.2.2/developing/getting-started -[dev client libraries]: /riak/kv/2.2.2/developing/client-libraries +[config index]: {{}}riak/kv/2.2.2/configuring +[downloads]: {{}}riak/kv/2.2.2/downloads/ +[install index]: {{}}riak/kv/2.2.2/setup/installing/ +[plan index]: {{}}riak/kv/2.2.2/setup/planning +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.2.2/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search +[getting started]: {{}}riak/kv/2.2.2/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.2.2/developing/client-libraries @@ -56,7 +56,7 @@ Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and the 3. [Configure Riak KV for your needs][config index] {{% note title="Developing with Riak KV" %}} -If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV](/riak/kv/2.2.2/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{}}riak/kv/2.2.2/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. {{% /note %}} ## Popular Docs diff --git a/content/riak/kv/2.2.2/learn/concepts.md b/content/riak/kv/2.2.2/learn/concepts.md index 9ae19ca1d0..86254ec716 100644 --- a/content/riak/kv/2.2.2/learn/concepts.md +++ b/content/riak/kv/2.2.2/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.2.2/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.2.2/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.2.2/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[concept crdts]: /riak/kv/2.2.2/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.2/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.2.2/learn/concepts/vnodes -[config index]: /riak/kv/2.2.2/configuring -[plan index]: /riak/kv/2.2.2/setup/planning -[use index]: /riak/kv/2.2.2/using/ +[concept aae]: {{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.2/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.2/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.2.2/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.2/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.2.2/configuring +[plan index]: {{}}riak/kv/2.2.2/setup/planning +[use index]: {{}}riak/kv/2.2.2/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.2.2/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.2.2/learn/concepts/active-anti-entropy.md index 2367402559..3c4ae67b12 100644 --- a/content/riak/kv/2.2.2/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.2.2/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.2/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.2.2/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.2.2/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.2.2/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.2.2/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.2.2/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.2.2/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.2.2/developing/usage/search +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.2.2/learn/concepts/buckets.md b/content/riak/kv/2.2.2/learn/concepts/buckets.md index 07ed37528f..f716573b67 100644 --- a/content/riak/kv/2.2.2/learn/concepts/buckets.md +++ b/content/riak/kv/2.2.2/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.2.2/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.2.2/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.2.2/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.2.2/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.2.2/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.2.2/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.2.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[config basic]: /riak/kv/2.2.2/configuring/basic -[dev api http]: /riak/kv/2.2.2/developing/api/http -[dev data types]: /riak/kv/2.2.2/developing/data-types -[glossary ring]: /riak/kv/2.2.2/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.2/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.2/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.2.2/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.2.2/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.2.2/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.2.2/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.2.2/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.2.2/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.2.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.2.2/configuring/basic +[dev api http]: {{}}riak/kv/2.2.2/developing/api/http +[dev data types]: {{}}riak/kv/2.2.2/developing/data-types +[glossary ring]: {{}}riak/kv/2.2.2/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.2/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.2/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.2.2/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.2.2/learn/concepts/capability-negotiation.md b/content/riak/kv/2.2.2/learn/concepts/capability-negotiation.md index 2e556a1aa0..6421d2bb77 100644 --- a/content/riak/kv/2.2.2/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.2.2/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.2.2/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.2.2/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.2.2/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.2.2/developing/usage/mapreduce In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.2.2/learn/concepts/causal-context.md b/content/riak/kv/2.2.2/learn/concepts/causal-context.md index d9fc761e41..b4e44b638d 100644 --- a/content/riak/kv/2.2.2/learn/concepts/causal-context.md +++ b/content/riak/kv/2.2.2/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.2.2/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.2.2/developing/api/http -[dev key value]: /riak/kv/2.2.2/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.2.2/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.2.2/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.2.2/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.2.2/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.2.2/developing/api/http +[dev key value]: {{}}riak/kv/2.2.2/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.2.2/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.2.2/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.2.2/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.2.2/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -73,7 +73,7 @@ Causal context comes in two forms in Riak: **vector clocks** and **dotted version vectors**. More information in both can be found in the sections below. -In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). If, however, `allow_mult` is set to `false`, then Riak will not generate diff --git a/content/riak/kv/2.2.2/learn/concepts/clusters.md b/content/riak/kv/2.2.2/learn/concepts/clusters.md index be5cdb0997..eee60821f3 100644 --- a/content/riak/kv/2.2.2/learn/concepts/clusters.md +++ b/content/riak/kv/2.2.2/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.2.2/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.2/learn/concepts/replication -[glossary node]: /riak/kv/2.2.2/learn/glossary/#node -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.2.2/learn/dynamo -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.2.2/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.2.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.2/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.2.2/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.2.2/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.2.2/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.2.2/learn/concepts/crdts.md b/content/riak/kv/2.2.2/learn/concepts/crdts.md index 22b3ffb30a..bb65ccd442 100644 --- a/content/riak/kv/2.2.2/learn/concepts/crdts.md +++ b/content/riak/kv/2.2.2/learn/concepts/crdts.md @@ -17,20 +17,20 @@ aliases: --- [crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf -[data types converg]: /riak/kv/2.2.2/learn/concepts/crdts/#convergence +[data types converg]: {{}}riak/kv/2.2.2/learn/concepts/crdts/#convergence [crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html -[data types impl]: /riak/kv/2.2.2/learn/concepts/crdts/#implementation -[concept causal context dvv]: /riak/kv/2.2.2/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.2.2/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.2.2/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.2.2/developing/data-types +[data types impl]: {{}}riak/kv/2.2.2/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.2.2/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.2.2/developing/data-types [riak_dt]: https://github.com/basho/riak_dt -[dev data types context]: /riak/kv/2.1.4/developing/data-types/#data-types-and-context -[glossary node]: /riak/kv/2.2.2/learn/glossary/#node -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution +[dev data types context]: {{}}riak/kv/2.2.2/developing/data-types/#data-types-and-context +[glossary node]: {{}}riak/kv/2.2.2/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: diff --git a/content/riak/kv/2.2.2/learn/concepts/eventual-consistency.md b/content/riak/kv/2.2.2/learn/concepts/eventual-consistency.md index f7193f94c1..faa6b7fa60 100644 --- a/content/riak/kv/2.2.2/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.2.2/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[concept replication]: /riak/kv/2.2.2/learn/concepts/replication -[glossary node]: /riak/kv/2.2.2/learn/glossary/#node -[glossary read rep]: /riak/kv/2.2.2/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.2.2/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.2.2/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.2.2/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.2.2/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.2.2/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.2.2/developing/data-modeling/). +or models]({{}}riak/kv/2.2.2/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.2.2/learn/concepts/keys-and-objects.md b/content/riak/kv/2.2.2/learn/concepts/keys-and-objects.md index 13c041f0a8..79cccb5f27 100644 --- a/content/riak/kv/2.2.2/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.2.2/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.2/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.2.2/learn/concepts/replication.md b/content/riak/kv/2.2.2/learn/concepts/replication.md index cb81699bc8..1311a45a2f 100644 --- a/content/riak/kv/2.2.2/learn/concepts/replication.md +++ b/content/riak/kv/2.2.2/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.2.2/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.2.2/learn/concepts/vnodes -[glossary node]: /riak/kv/2.2.2/learn/glossary/#node -[glossary ring]: /riak/kv/2.2.2/learn/glossary/#ring -[usage replication]: /riak/kv/2.2.2/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.2.2/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.2.2/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.2.2/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.2.2/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.2.2/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.2.2/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.2.2/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.2.2/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.2.2/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.2.2/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.2.2/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.2.2/learn/concepts/strong-consistency.md b/content/riak/kv/2.2.2/learn/concepts/strong-consistency.md index 354037e843..100767fa7c 100644 --- a/content/riak/kv/2.2.2/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.2.2/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.2/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.2.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.2.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.2.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.2.2/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.2.2/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.2.2/learn/concepts/vnodes.md b/content/riak/kv/2.2.2/learn/concepts/vnodes.md index 9e592bc5b1..f465cd7905 100644 --- a/content/riak/kv/2.2.2/learn/concepts/vnodes.md +++ b/content/riak/kv/2.2.2/learn/concepts/vnodes.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context]: /riak/kv/2.2.2/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.2.2/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.2.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.2/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.2.2/learn/glossary/#node -[glossary ring]: /riak/kv/2.2.2/learn/glossary/#ring -[plan backend]: /riak/kv/2.2.2/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.2.2/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.2.2/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.2.2/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.2.2/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.2.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.2/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.2.2/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.2.2/learn/glossary/#ring +[plan backend]: {{}}riak/kv/2.2.2/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.2.2/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.2.2/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.2.2/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.2.2/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.2.2/learn/dynamo.md b/content/riak/kv/2.2.2/learn/dynamo.md index 49d7fd5b98..56c5074750 100644 --- a/content/riak/kv/2.2.2/learn/dynamo.md +++ b/content/riak/kv/2.2.2/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.2.2/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.2.2/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.2.2/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.2.2/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.2.2/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.2.2/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.2.2/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.2.2/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.2.2/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.2.2/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.2.2/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.2.2/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.2.2/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.2.2/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.2.2/developing/api/http/) +>[REST API]({{}}riak/kv/2.2.2/developing/api/http/) > ->[Writing Data](/riak/kv/2.2.2/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.2.2/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.2.2/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.2.2/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.2.2/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.2.2/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.2.2/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.2.2/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.2.2/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.2.2/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.2.2/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.2.2/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.2.2/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.2.2/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.2.2/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.2.2/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.2.2/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.2.2/setup/planning/backend/ -[Bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.2.2/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.2.2/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.2.2/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.2.2/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.2.2/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.2.2/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.2.2/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.2.2/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.2.2/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.2.2/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.2.2/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.2.2/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.2.2/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.2.2/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.2.2/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.2.2/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.2.2/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.2.2/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.2.2/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.2.2/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.2.2/learn/glossary.md b/content/riak/kv/2.2.2/learn/glossary.md index bcb0a5569b..76f45358f2 100644 --- a/content/riak/kv/2.2.2/learn/glossary.md +++ b/content/riak/kv/2.2.2/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.2.2/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[concept crdts]: /riak/kv/2.2.2/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.2/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.2/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.2/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.2.2/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.2.2/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.2.2/developing/api/http -[dev data model]: /riak/kv/2.2.2/developing/data-modeling -[dev data types]: /riak/kv/2.2.2/developing/data-types -[glossary read rep]: /riak/kv/2.2.2/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.2.2/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.2.2/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.2/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.2.2/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.2.2/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.2.2/developing/api/http +[dev data model]: {{}}riak/kv/2.2.2/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.2/developing/data-types +[glossary read rep]: {{}}riak/kv/2.2.2/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.2.2/learn/dynamo -[plan cluster capacity]: /riak/kv/2.2.2/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.2.2/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.2.2/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.2.2/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.2.2/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.2.2/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.2.2/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.2.2/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.2/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.2.2/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.2.2/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.2.2/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.2.2/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.2.2/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.2.2/learn/use-cases.md b/content/riak/kv/2.2.2/learn/use-cases.md index 9441398049..14b6c21c69 100644 --- a/content/riak/kv/2.2.2/learn/use-cases.md +++ b/content/riak/kv/2.2.2/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.2.2/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.2.2/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.2.2/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.2.2/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.2.2/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.2.2/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.2.2/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.2.2/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.2.2/developing/data-types -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.2.2/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.2/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.2.2/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.2.2/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.2.2/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.2.2/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.2.2/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.2.2/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.2.2/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.2.2/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.2.2/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.2.2/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.2.2/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.2.2/learn/why-riak-kv.md b/content/riak/kv/2.2.2/learn/why-riak-kv.md index 3377786ef2..9922d787fc 100644 --- a/content/riak/kv/2.2.2/learn/why-riak-kv.md +++ b/content/riak/kv/2.2.2/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.2.2/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.2.2/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.2.2/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.2.2/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.2.2/developing/data-types -[glossary read rep]: /riak/kv/2.2.2/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.2.2/developing/data-types +[glossary read rep]: {{}}riak/kv/2.2.2/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.2.2/release-notes.md b/content/riak/kv/2.2.2/release-notes.md index ce11eed14d..60662c8d8e 100644 --- a/content/riak/kv/2.2.2/release-notes.md +++ b/content/riak/kv/2.2.2/release-notes.md @@ -106,10 +106,10 @@ Once all of the Riak KV clusters have been upgraded to version 2.2.0 or greater, ### Deprecation Notification -* [Link Walking](/riak/kv/2.2.2/developing/api/http/link-walking/) is deprecated and will not work if security is enabled. -* Key Filters are deprecated; we strongly discourage key listing in production due to the overhead involved, so it's better to maintain key indexes as values in Riak (see our [set data type](/riak/kv/2.2.2/developing/data-types/sets/) as a useful tool for such indexes). -* JavaScript MapReduce is deprecated; we have expanded our [Erlang MapReduce](/riak/kv/2.2.2/developing/app-guide/advanced-mapreduce/#mapreduce) documentation to assist with the transition. -* Riak search 1.0 is deprecated in favor of our Solr-based [Riak search 2.0](/riak/kv/2.2.2/developing/usage/search/). Version 1.0 will not work if security is enabled. +* [Link Walking]({{}}riak/kv/2.2.2/developing/api/http/link-walking/) is deprecated and will not work if security is enabled. +* Key Filters are deprecated; we strongly discourage key listing in production due to the overhead involved, so it's better to maintain key indexes as values in Riak (see our [set data type]({{}}riak/kv/2.2.2/developing/data-types/sets/) as a useful tool for such indexes). +* JavaScript MapReduce is deprecated; we have expanded our [Erlang MapReduce]({{}}riak/kv/2.2.2/developing/app-guide/advanced-mapreduce/#mapreduce) documentation to assist with the transition. +* Riak search 1.0 is deprecated in favor of our Solr-based [Riak search 2.0]({{}}riak/kv/2.2.2/developing/usage/search/). Version 1.0 will not work if security is enabled. * v2 replication (a component of Riak KV Enterprise) is superseded by v3 and will be removed in the future. * Legacy vnode routing (an early mechanism for managing requests between servers) is deprecated. If `vnode_routing` is set to `legacy` via Riak KV's capability system, it should be removed to prevent upgrade problems in the future. -* Some users in the past have used Riak's internal API (e.g. `riak:local_client/1`); this API may change at any time, so we strongly recommend using our [Erlang client library](http://github.com/basho/riak-erlang-client/) (or [one of the other libraries](/riak/kv/2.2.2/developing/client-libraries/) we support) instead. \ No newline at end of file +* Some users in the past have used Riak's internal API (e.g. `riak:local_client/1`); this API may change at any time, so we strongly recommend using our [Erlang client library](http://github.com/basho/riak-erlang-client/) (or [one of the other libraries]({{}}riak/kv/2.2.2/developing/client-libraries/) we support) instead. \ No newline at end of file diff --git a/content/riak/kv/2.2.2/setup/downgrade.md b/content/riak/kv/2.2.2/setup/downgrade.md index 920fd846e9..92ed3c4cdd 100644 --- a/content/riak/kv/2.2.2/setup/downgrade.md +++ b/content/riak/kv/2.2.2/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.2/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.2.2/setup/upgrading/cluster -[config ref]: /riak/kv/2.2.2/configuring/reference -[concept aae]: /riak/kv/2.2.2/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.2.2/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.2.2/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.2.2/configuring/reference +[concept aae]: {{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#aae-status Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. diff --git a/content/riak/kv/2.2.2/setup/installing.md b/content/riak/kv/2.2.2/setup/installing.md index 33df4871a8..5f9b5ddeab 100644 --- a/content/riak/kv/2.2.2/setup/installing.md +++ b/content/riak/kv/2.2.2/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.2.2/installing/ --- -[install aws]: /riak/kv/2.2.2/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.2.2/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.2.2/setup/installing/freebsd -[install mac osx]: /riak/kv/2.2.2/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.2.2/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.2.2/setup/installing/smartos -[install solaris]: /riak/kv/2.2.2/setup/installing/solaris -[install suse]: /riak/kv/2.2.2/setup/installing/suse -[install windows azure]: /riak/kv/2.2.2/setup/installing/windows-azure -[install source index]: /riak/kv/2.2.2/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.2.2/setup/upgrading +[install aws]: {{}}riak/kv/2.2.2/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.2/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.2/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.2/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.2/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.2/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.2/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.2/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.2/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.2.2/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.2.2/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.2.2/setup/installing/amazon-web-services.md b/content/riak/kv/2.2.2/setup/installing/amazon-web-services.md index fb3069bf3f..85021db2a1 100644 --- a/content/riak/kv/2.2.2/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.2.2/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.2.2/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.2.2/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.2.2/setup/installing/debian-ubuntu.md b/content/riak/kv/2.2.2/setup/installing/debian-ubuntu.md index 7bd91073b0..312dd3e19b 100644 --- a/content/riak/kv/2.2.2/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.2.2/setup/installing/debian-ubuntu.md @@ -18,10 +18,10 @@ aliases: - /riak/kv/2.2.2/installing/debian-ubuntu/ --- -[install source index]: /riak/kv/2.2.2/setup/installing/source/ -[security index]: /riak/kv/2.2.2/using/security/ -[install source erlang]: /riak/kv/2.2.2/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[install source index]: {{}}riak/kv/2.2.2/setup/installing/source/ +[security index]: {{}}riak/kv/2.2.2/using/security/ +[install source erlang]: {{}}riak/kv/2.2.2/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.2.2/setup/installing/freebsd.md b/content/riak/kv/2.2.2/setup/installing/freebsd.md index e92de8f65b..99b7f12b5d 100644 --- a/content/riak/kv/2.2.2/setup/installing/freebsd.md +++ b/content/riak/kv/2.2.2/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.2.2/setup/installing/source/erlang -[downloads]: /riak/kv/2.2.2/downloads/ -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.2.2/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.2.2/downloads/ +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.2.2/setup/installing/mac-osx.md b/content/riak/kv/2.2.2/setup/installing/mac-osx.md index 05eaa2b2c0..68c65b5162 100644 --- a/content/riak/kv/2.2.2/setup/installing/mac-osx.md +++ b/content/riak/kv/2.2.2/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.2.2/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.2.2/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.2.2/setup/installing/rhel-centos.md b/content/riak/kv/2.2.2/setup/installing/rhel-centos.md index ed9724d360..dfa60c0b1f 100644 --- a/content/riak/kv/2.2.2/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.2.2/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.2.2/setup/installing/source -[install source erlang]: /riak/kv/2.2.2/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[install source index]: {{}}riak/kv/2.2.2/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.2/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.2.2/setup/installing/smartos.md b/content/riak/kv/2.2.2/setup/installing/smartos.md index 42f9d3f4f9..6d6d65bc07 100644 --- a/content/riak/kv/2.2.2/setup/installing/smartos.md +++ b/content/riak/kv/2.2.2/setup/installing/smartos.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.2.2/installing/smartos/ --- -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify {{% note title="SmartOS End of Life (EOL) for Riak KV 2.2.2" %}} SmartOS is no longer supported in Riak KV 2.2.2+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). @@ -74,13 +74,13 @@ cat /opt/local/etc/pkgin/repositories.conf Download your version of the Riak binary package for SmartOS: ```bash -curl -o /tmp/riak-2.2.2-SmartOS-x86_64.tgz http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/smartos/1.8/riak-2.2.2-SmartOS-x86_64.tgz +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz ``` Next, install the package: ``` -pkg_add /tmp/riak-2.2.2-SmartOS-x86_64.tgz +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz ``` After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: diff --git a/content/riak/kv/2.2.2/setup/installing/solaris.md b/content/riak/kv/2.2.2/setup/installing/solaris.md index 7a0dc40847..d7b7d432e1 100644 --- a/content/riak/kv/2.2.2/setup/installing/solaris.md +++ b/content/riak/kv/2.2.2/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.2.2/setup/installing/source.md b/content/riak/kv/2.2.2/setup/installing/source.md index 4e6626a3f8..efc0b88c81 100644 --- a/content/riak/kv/2.2.2/setup/installing/source.md +++ b/content/riak/kv/2.2.2/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.2.2/setup/installing/source/erlang -[downloads]: /riak/kv/2.2.2/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.2.2/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.2.2/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.2.2/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.2.2/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.2.2/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.2.2/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.2.2/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.2.2/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.2.2/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.2.2/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.2.2/setup/installing/source/erlang.md b/content/riak/kv/2.2.2/setup/installing/source/erlang.md index cb898ac325..95425d8fa6 100644 --- a/content/riak/kv/2.2.2/setup/installing/source/erlang.md +++ b/content/riak/kv/2.2.2/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.2/installing/source/erlang/ --- -[install index]: /riak/kv/2.2.2/setup/installing -[security basics]: /riak/kv/2.2.2/using/security/basics +[install index]: {{}}riak/kv/2.2.2/setup/installing +[security basics]: {{}}riak/kv/2.2.2/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.2.2/setup/installing/source/jvm.md b/content/riak/kv/2.2.2/setup/installing/source/jvm.md index a70f8a9773..a6ee899c83 100644 --- a/content/riak/kv/2.2.2/setup/installing/source/jvm.md +++ b/content/riak/kv/2.2.2/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.2.2/installing/source/jvm/ --- -[usage search]: /riak/kv/2.2.2/developing/usage/search +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.2.2/setup/installing/suse.md b/content/riak/kv/2.2.2/setup/installing/suse.md index f14415e55c..445bf27567 100644 --- a/content/riak/kv/2.2.2/setup/installing/suse.md +++ b/content/riak/kv/2.2.2/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.2.2/installing/suse/ --- -[install verify]: /riak/kv/2.2.2/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.2/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.2.2/setup/installing/verify.md b/content/riak/kv/2.2.2/setup/installing/verify.md index 68f32f5f96..1336495162 100644 --- a/content/riak/kv/2.2.2/setup/installing/verify.md +++ b/content/riak/kv/2.2.2/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.2/installing/verify-install/ --- -[client libraries]: /riak/kv/2.2.2/developing/client-libraries -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.2.2/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.2.2/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.2.2/developing/client-libraries +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.2.2/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.2.2/setup/installing/windows-azure.md b/content/riak/kv/2.2.2/setup/installing/windows-azure.md index 7262113e8a..e3c1a3d94c 100644 --- a/content/riak/kv/2.2.2/setup/installing/windows-azure.md +++ b/content/riak/kv/2.2.2/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.2.2/setup/planning/backend.md b/content/riak/kv/2.2.2/setup/planning/backend.md index 35e29345de..67635e923a 100644 --- a/content/riak/kv/2.2.2/setup/planning/backend.md +++ b/content/riak/kv/2.2.2/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.2/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.2/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.2/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.2.2/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.2/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.2.2/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.2.2/setup/planning/backend/bitcask.md b/content/riak/kv/2.2.2/setup/planning/backend/bitcask.md index 17ad759509..3dd07394d0 100644 --- a/content/riak/kv/2.2.2/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.2.2/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.2.2/using/admin/riak-cli -[config reference]: /riak/kv/2.2.2/configuring/reference -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.2.2/setup/planning/backend/multi -[usage search]: /riak/kv/2.2.2/developing/usage/search - -[glossary aae]: /riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.2.2/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.2.2/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.2.2/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.2.2/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.2.2/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.2.2/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.2.2/setup/planning/backend/leveldb.md b/content/riak/kv/2.2.2/setup/planning/backend/leveldb.md index 0f4bdbdb7d..39fe249a7e 100644 --- a/content/riak/kv/2.2.2/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.2.2/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.2/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[config reference]: /riak/kv/2.2.2/configuring/reference -[perf index]: /riak/kv/2.2.2/using/performance -[config reference#aae]: /riak/kv/2.2.2/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[perf index]: {{}}riak/kv/2.2.2/using/performance +[config reference#aae]: {{}}riak/kv/2.2.2/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.2.2/setup/planning/backend/memory.md b/content/riak/kv/2.2.2/setup/planning/backend/memory.md index f43715e66b..e63cfc1582 100644 --- a/content/riak/kv/2.2.2/setup/planning/backend/memory.md +++ b/content/riak/kv/2.2.2/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.2/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.2.2/configuring/reference -[plan backend multi]: /riak/kv/2.2.2/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[plan backend multi]: {{}}riak/kv/2.2.2/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.2.2/setup/planning/backend/multi.md b/content/riak/kv/2.2.2/setup/planning/backend/multi.md index 00907dfeb2..71b6b4fd13 100644 --- a/content/riak/kv/2.2.2/setup/planning/backend/multi.md +++ b/content/riak/kv/2.2.2/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.2/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.2.2/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.2/setup/planning/backend/memory -[config reference]: /riak/kv/2.2.2/configuring/reference -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.2.2/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.2.2/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.2/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.2.2/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.2.2/setup/planning/best-practices.md b/content/riak/kv/2.2.2/setup/planning/best-practices.md index 118f23e9a2..4be9b8091a 100644 --- a/content/riak/kv/2.2.2/setup/planning/best-practices.md +++ b/content/riak/kv/2.2.2/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.2/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.2.2/using/reference/handoff -[config mapreduce]: /riak/kv/2.2.2/configuring/mapreduce -[glossary aae]: /riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.2.2/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.2.2/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.2.2/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.2.2/setup/planning/bitcask-capacity-calc.md index 2e25f490ae..63de07fc71 100644 --- a/content/riak/kv/2.2.2/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.2.2/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.2.2/setup/planning/cluster-capacity.md b/content/riak/kv/2.2.2/setup/planning/cluster-capacity.md index d0470f6b16..0dfb75f075 100644 --- a/content/riak/kv/2.2.2/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.2.2/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.2/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.2.2/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.2.2/setup/planning -[concept replication]: /riak/kv/2.2.2/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.2.2/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.2.2/configuring/reference -[perf benchmark]: /riak/kv/2.2.2/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.2.2/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.2.2/setup/planning +[concept replication]: {{}}riak/kv/2.2.2/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[perf benchmark]: {{}}riak/kv/2.2.2/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.2.2/setup/planning/operating-system.md b/content/riak/kv/2.2.2/setup/planning/operating-system.md index 058e7bdc97..d39e96bfcf 100644 --- a/content/riak/kv/2.2.2/setup/planning/operating-system.md +++ b/content/riak/kv/2.2.2/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.2.2/downloads/ +[downloads]: {{}}riak/kv/2.2.2/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.2.2/setup/planning/start.md b/content/riak/kv/2.2.2/setup/planning/start.md index a2e27a008a..3e793be462 100644 --- a/content/riak/kv/2.2.2/setup/planning/start.md +++ b/content/riak/kv/2.2.2/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.2/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.2.2/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.2.2/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.2.2/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.2.2/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.2.2/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.2.2/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.2.2/setup/upgrading/checklist.md b/content/riak/kv/2.2.2/setup/upgrading/checklist.md index daf6c3bf3b..f3378f544a 100644 --- a/content/riak/kv/2.2.2/setup/upgrading/checklist.md +++ b/content/riak/kv/2.2.2/setup/upgrading/checklist.md @@ -15,24 +15,24 @@ aliases: - /riak/kv/2.2.2/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.2.2/using/performance/open-files-limit -[perf index]: /riak/kv/2.2.2/using/performance +[perf open files]: {{}}riak/kv/2.2.2/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.2/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.2.2/using/security/basics -[cluster ops load balance]: /riak/kv/2.2.2/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.2.2/configuring/reference -[config backend]: /riak/kv/2.2.2/configuring/backend -[usage search]: /riak/kv/2.2.2/developing/usage/search -[usage conflict resolution]: /riak/kv/2.2.2/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.2.2/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.2.2/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.2.2/using/admin/commands -[use admin riak control]: /riak/kv/2.2.2/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.2.2/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.2.2/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.2.2/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.2.2/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.2.2/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[config backend]: {{}}riak/kv/2.2.2/configuring/backend +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.2.2/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.2.2/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.2.2/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.2.2/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.2.2/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.2.2/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.2.2/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. diff --git a/content/riak/kv/2.2.2/setup/upgrading/cluster.md b/content/riak/kv/2.2.2/setup/upgrading/cluster.md new file mode 100644 index 0000000000..0164fee0ad --- /dev/null +++ b/content/riak/kv/2.2.2/setup/upgrading/cluster.md @@ -0,0 +1,298 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.2.2" +menu: + riak_kv-2.2.2: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.2/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.2.2/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{}}riak/kv/2.2.2/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.2.2/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.2.2/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.2/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.2/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.2/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{}}riak/kv/2.2.2/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i .deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh .rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d .pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` --- See [JMX Monitoring][jmx monitor] for more information. + * `snmp` --- See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. diff --git a/content/riak/kv/2.2.2/setup/upgrading/search.md b/content/riak/kv/2.2.2/setup/upgrading/search.md new file mode 100644 index 0000000000..0e7a989031 --- /dev/null +++ b/content/riak/kv/2.2.2/setup/upgrading/search.md @@ -0,0 +1,276 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.2.2" +menu: + riak_kv-2.2.2: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.2/ops/advanced/upgrading-search-2 + - /riak/kv/2.2.2/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + +
+
Upgrade First
+ Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. +
+ +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + +
+
Check Results Before Switching (Optional)
+ Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/?q=...`. +
+ +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.2.2/setup/upgrading/version.md b/content/riak/kv/2.2.2/setup/upgrading/version.md index d2674a7643..b5a8232c81 100644 --- a/content/riak/kv/2.2.2/setup/upgrading/version.md +++ b/content/riak/kv/2.2.2/setup/upgrading/version.md @@ -19,18 +19,18 @@ aliases: --- -[production checklist]: /riak/kv/2.2.2/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.2.2/using/admin/riak-control -[use admin commands]: /riak/kv/2.2.2/using/admin/commands -[use admin riak-admin]: /riak/kv/2.2.2/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.2.2/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.2.2/release-notes +[production checklist]: {{}}riak/kv/2.2.2/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.2.2/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.2.2/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.2/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.2/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.2.2/release-notes [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.2.2/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.2.2/using/reference/jmx -[snmp]: /riak/kv/2.2.2/using/reference/snmp -[Release Notes]: /riak/kv/2.2.2/release-notes +[cluster ops mdc]: {{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.2/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.2/using/reference/snmp +[Release Notes]: {{}}riak/kv/2.2.2/release-notes ## Overview diff --git a/content/riak/kv/2.2.2/using/admin/commands.md b/content/riak/kv/2.2.2/using/admin/commands.md index b96e349407..afa1c1c4c1 100644 --- a/content/riak/kv/2.2.2/using/admin/commands.md +++ b/content/riak/kv/2.2.2/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.2/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.2.2/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.2.2/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.2.2/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.2.2/using/admin/riak-admin.md b/content/riak/kv/2.2.2/using/admin/riak-admin.md index 38ecf4c75f..bacfc8e856 100644 --- a/content/riak/kv/2.2.2/using/admin/riak-admin.md +++ b/content/riak/kv/2.2.2/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.2.2/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.2.2/configuring/reference -[use admin commands]: /riak/kv/2.2.2/using/admin/commands -[use admin commands#join]: /riak/kv/2.2.2/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.2.2/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.2.2/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.2.2/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.2.2/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.2.2/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.2.2/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.2.2/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.2.2/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.2.2/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.2.2/setup/downgrade -[security index]: /riak/kv/2.2.2/using/security/ -[security managing]: /riak/kv/2.2.2/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.2.2/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.2.2/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.2.2/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.2.2/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.2.2/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.2.2/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[use admin commands]: {{}}riak/kv/2.2.2/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.2.2/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.2.2/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.2.2/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.2.2/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.2.2/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.2.2/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.2.2/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.2.2/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.2.2/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.2.2/setup/downgrade +[security index]: {{}}riak/kv/2.2.2/using/security/ +[security managing]: {{}}riak/kv/2.2.2/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.2.2/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.2.2/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.2.2/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.2.2/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.2.2/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#stats ## `riak-admin` diff --git a/content/riak/kv/2.2.2/using/admin/riak-cli.md b/content/riak/kv/2.2.2/using/admin/riak-cli.md index e0e643a444..85d6882773 100644 --- a/content/riak/kv/2.2.2/using/admin/riak-cli.md +++ b/content/riak/kv/2.2.2/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.2/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.2.2/configuring/reference/ +[configuration file]: {{}}riak/kv/2.2.2/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.2.2/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.2.2/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.2.2/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.2.2/configuring/reference/ ## riak diff --git a/content/riak/kv/2.2.2/using/admin/riak-control.md b/content/riak/kv/2.2.2/using/admin/riak-control.md index 96a97a632d..bb4fbe18d9 100644 --- a/content/riak/kv/2.2.2/using/admin/riak-control.md +++ b/content/riak/kv/2.2.2/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.2.2/configuring/reference +[config reference]: {{}}riak/kv/2.2.2/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.2.2/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.2.2/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.2.2/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.2.2/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.2.2/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.2.2/using/cluster-operations/active-anti-entropy.md index 7069aab5a3..b4e5a41432 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/active-anti-entropy.md @@ -15,8 +15,8 @@ aliases: - /riak/2.2.2/ops/advanced/aae/ --- -[config search#throttledelay]: /riak/kv/2.2.2/configuring/search/#search-anti-entropy-throttle-$tier-delay -[config search#throttle]: riak/kv/2.2.2/configuring/search/#search-anti-entropy-throttle +[config search#throttledelay]: {{}}riak/kv/2.2.2/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{}}riak/kv/2.2.2/configuring/search/#search-anti-entropy-throttle Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. @@ -57,12 +57,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -90,7 +90,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes.md index 70f95b7a95..c5ce69eb83 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.2.2/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.2.2/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.2.2/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.2.2/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.2.2/using/cluster-operations/backing-up.md b/content/riak/kv/2.2.2/using/cluster-operations/backing-up.md index ec8a12cde7..5e5ea15bb1 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.2/ops/running/backups --- -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[config reference]: /riak/kv/2.2.2/configuring/reference -[plan backend leveldb]: /riak/kv/2.2.2/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.2/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency -[concept aae]: /riak/kv/2.2.2/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.2.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.2/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.2.2/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.2.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.2.2/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.2.2/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.2.2/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.2.2/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.2.2/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.2.2/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.2.2/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.2.2/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.2.2/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.2.2/using/cluster-operations/bucket-types.md b/content/riak/kv/2.2.2/using/cluster-operations/bucket-types.md index 8209465df2..564f9ea73f 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.2.2/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.2.2/using/cluster-operations/changing-cluster-info.md index c7a852e1e2..e06343d12a 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.2.2/configuring/reference +[config reference]: {{}}riak/kv/2.2.2/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.2.2/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.2.2/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.2.2/using/cluster-operations/handoff.md b/content/riak/kv/2.2.2/using/cluster-operations/handoff.md index 1387212523..ff5be368bd 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.2.2/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.2.2/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.2.2/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.2.2/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.2.2/using/cluster-operations/logging.md b/content/riak/kv/2.2.2/using/cluster-operations/logging.md index 6224a0707a..b08983cc81 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/logging.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.2.2/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.2.2/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.2.2/using/cluster-operations/replacing-node.md b/content/riak/kv/2.2.2/using/cluster-operations/replacing-node.md index fe604f32bd..f43ff3052a 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.2.2/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.2.2/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.2.2/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.2.2/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.2.2/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.2.2/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.2.2/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.2.2/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.2.2/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.2.2/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.2.2/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.2.2/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.2.2/using/cluster-operations/strong-consistency.md index 57d7e1bb25..a83bc3b767 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.2.2/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.2.2/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.2.2/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.2.2/using/cluster-operations/v2-multi-datacenter.md index a6b4c26136..ec387795c9 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/v2-multi-datacenter.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication system is largely @@ -163,7 +163,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -183,7 +183,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -221,7 +221,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.2.2/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.2.2/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -242,7 +242,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter.md index 07ce2cde5b..1b0cfaaf0d 100644 --- a/content/riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.2.2/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.2.2/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.2.2/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.2.2/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.2.2/using/performance.md b/content/riak/kv/2.2.2/using/performance.md index 0896324275..dfb48b926b 100644 --- a/content/riak/kv/2.2.2/using/performance.md +++ b/content/riak/kv/2.2.2/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.2.2/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.2.2/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -253,12 +253,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.2.2/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.2.2/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.2.2/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.2.2/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.2.2/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.2.2/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.2.2/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.2.2/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.2.2/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.2.2/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.2.2/using/performance/benchmarking.md b/content/riak/kv/2.2.2/using/performance/benchmarking.md index 0991dc8b6d..7ce970a344 100644 --- a/content/riak/kv/2.2.2/using/performance/benchmarking.md +++ b/content/riak/kv/2.2.2/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.2.2/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.2.2/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.2.2/using/performance/latency-reduction.md b/content/riak/kv/2.2.2/using/performance/latency-reduction.md index c407d0a695..1f4a7b9876 100644 --- a/content/riak/kv/2.2.2/using/performance/latency-reduction.md +++ b/content/riak/kv/2.2.2/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.2.2/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.2.2/using/performance/multi-datacenter-tuning.md index cbc1fb24c7..3788c74c0b 100644 --- a/content/riak/kv/2.2.2/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.2.2/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.2.2/using/performance +[perf index]: {{}}riak/kv/2.2.2/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.2.2/using/performance/open-files-limit.md b/content/riak/kv/2.2.2/using/performance/open-files-limit.md index 91cc223e6c..bc47d7379d 100644 --- a/content/riak/kv/2.2.2/using/performance/open-files-limit.md +++ b/content/riak/kv/2.2.2/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/tuning/open-files-limit/ --- -[plan backend]: /riak/kv/2.2.2/setup/planning/backend/ +[plan backend]: {{}}riak/kv/2.2.2/setup/planning/backend/ [blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. diff --git a/content/riak/kv/2.2.2/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.2.2/using/performance/v2-scheduling-fullsync.md index 76f6629a42..3b953ad878 100644 --- a/content/riak/kv/2.2.2/using/performance/v2-scheduling-fullsync.md +++ b/content/riak/kv/2.2.2/using/performance/v2-scheduling-fullsync.md @@ -14,7 +14,7 @@ commercial_offering: true --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.2/using/reference/bucket-types.md b/content/riak/kv/2.2.2/using/reference/bucket-types.md index 7537ba3c8b..fa91e12f8d 100644 --- a/content/riak/kv/2.2.2/using/reference/bucket-types.md +++ b/content/riak/kv/2.2.2/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.2.2/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.2.2/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.2.2/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.2.2/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.2.2/developing/data-types), and [strong consistency](/riak/kv/2.2.2/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.2.2/developing/data-types), and [strong consistency]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.2.2/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.2.2/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.2.2/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.2/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.2/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.2.2/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.2.2/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.2.2/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.2.2/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.2.2/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.2.2/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.2.2/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.2.2/learn/concepts/buckets) and [keys](/riak/kv/2.2.2/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.2.2/learn/concepts/buckets) and [keys]({{}}riak/kv/2.2.2/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.2.2/using/reference/custom-code.md b/content/riak/kv/2.2.2/using/reference/custom-code.md index 1c8e372eda..e9900e3259 100644 --- a/content/riak/kv/2.2.2/using/reference/custom-code.md +++ b/content/riak/kv/2.2.2/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.2.2/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.2.2/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.2.2/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.2.2/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.2.2/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.2.2/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.2.2/using/reference/handoff.md b/content/riak/kv/2.2.2/using/reference/handoff.md index e6caaae8ee..16eb6620e0 100644 --- a/content/riak/kv/2.2.2/using/reference/handoff.md +++ b/content/riak/kv/2.2.2/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.2.2/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.2.2/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.2.2/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.2.2/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.2.2/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.2.2/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.2.2/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.2.2/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.2.2/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.2.2/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.2.2/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.2.2/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.2.2/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.2.2/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.2.2/using/reference/jmx.md b/content/riak/kv/2.2.2/using/reference/jmx.md index 390d75ca93..8ffaf19ddb 100644 --- a/content/riak/kv/2.2.2/using/reference/jmx.md +++ b/content/riak/kv/2.2.2/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.2/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.2.2/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.2.2/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.2.2/using/reference/logging.md b/content/riak/kv/2.2.2/using/reference/logging.md index 24711efb87..026c39988e 100644 --- a/content/riak/kv/2.2.2/using/reference/logging.md +++ b/content/riak/kv/2.2.2/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.2/ops/running/logging --- -[cluster ops log]: /riak/kv/2.2.2/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.2.2/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.2.2/configuring/reference/#lager) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.2.2/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -259,11 +259,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.2.2/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.2.2/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.2.2/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.2.2/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.2.2/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.2.2/using/reference/multi-datacenter/comparison.md index 67abe959cb..67e3587e35 100644 --- a/content/riak/kv/2.2.2/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.2.2/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.2.2/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.2.2/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.2.2/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.2.2/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.2.2/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.2.2/using/reference/runtime-interaction.md b/content/riak/kv/2.2.2/using/reference/runtime-interaction.md index 13b8aa8d85..b3d128fdfc 100644 --- a/content/riak/kv/2.2.2/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.2.2/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.2/ops/advanced/runtime --- -[config reference]: /riak/kv/2.2.2/configuring/reference -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.2.2/configuring/reference +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.2.2/using/reference/search.md b/content/riak/kv/2.2.2/using/reference/search.md index b57feb3e16..cdbdd4c10a 100644 --- a/content/riak/kv/2.2.2/using/reference/search.md +++ b/content/riak/kv/2.2.2/using/reference/search.md @@ -15,21 +15,21 @@ aliases: - /riak/kv/2.2.2/dev/advanced/search --- -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters -[configuring search]: /riak/kv/2.2.2/configuring/search +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters +[configuring search]: {{}}riak/kv/2.2.2/configuring/search > **Note on search 2.0 vs. legacy search** > > This document refers to Riak search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak search, visit [the old Using Riak search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -search, you should check out the [Using Search](/riak/kv/2.2.2/developing/usage/search) document. +search, you should check out the [Using Search]({{}}riak/kv/2.2.2/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -127,7 +127,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.2.2/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.2.2/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -289,7 +289,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.2.2/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -299,7 +299,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.2.2/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -354,7 +354,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.2.2/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.2.2/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.2.2/using/reference/secondary-indexes.md b/content/riak/kv/2.2.2/using/reference/secondary-indexes.md index ce6460f0ca..c13b6df969 100644 --- a/content/riak/kv/2.2.2/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.2.2/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.2.2/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.2.2/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.2.2/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.2.2/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.2.2/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.2.2/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.2.2/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.2.2/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.2.2/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.2.2/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.2.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.2.2/using/reference/statistics-monitoring.md b/content/riak/kv/2.2.2/using/reference/statistics-monitoring.md index 9d6127b433..d4e48da860 100644 --- a/content/riak/kv/2.2.2/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.2.2/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.2.2/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.2.2/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.2.2/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.2.2/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.2.2/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.2.2/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.2.2/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.2.2/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.2.2/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.2.2/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -156,7 +156,7 @@ Metric | Description ## Command-line Interface -The [`riak-admin`](/riak/kv/2.2.2/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.2.2/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -181,14 +181,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.2.2/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.2.2/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.2.2/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -235,7 +235,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.2.2/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.2.2/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -259,7 +259,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.2.2/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.2.2/developing/api/http/status) endpoint is also available. #### Nagios @@ -333,14 +333,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.2.2/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.2.2/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.2.2/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.2.2/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -362,9 +362,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.2.2/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.2.2/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.2.2/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.2.2/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -380,9 +380,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.2.2/using/reference/strong-consistency.md b/content/riak/kv/2.2.2/using/reference/strong-consistency.md index 069cbc5771..917bc983df 100644 --- a/content/riak/kv/2.2.2/using/reference/strong-consistency.md +++ b/content/riak/kv/2.2.2/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.2.2/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.2/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.2.2/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.2/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.2.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.2.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.2.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.2.2/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.2.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.2.2/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.2.2/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.2.2/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.2.2/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.2.2/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.2.2/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.2.2/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter.md index b793c7d752..e3e5b0b249 100644 --- a/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter.md @@ -16,7 +16,7 @@ toc: true [v2 mdc fullsync]: ./scheduling-fullsync {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/using/reference/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/using/reference/v3-multi-datacenter/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter/architecture.md index 37cd55d024..da383a7eda 100644 --- a/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter/architecture.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/using/reference/v3-multi-datacenter/architecture/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/using/reference/v3-multi-datacenter/architecture/) instead. {{% /note %}} @@ -83,7 +83,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.2.2/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.2.2/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -95,7 +95,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -113,7 +113,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -121,6 +121,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.2.2/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.2.2/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.2.2/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.2.2/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md index 918e8542f7..e7c15f9c79 100644 --- a/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.2.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.2/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.2/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/aae.md index 3482dfb261..1ef2b31cac 100644 --- a/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.2.2/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.2.2/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/architecture.md index 5d334e063b..da287fd457 100644 --- a/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.2.2/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.2.2/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.2.2/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.2.2/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/cascading-writes.md index d12d931e04..5ee566bd76 100644 --- a/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.2.2/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 2fecbaa1bc..7703476a6b 100644 --- a/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.2.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.2/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.2.2/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.2.2/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.2.2/using/repair-recovery/errors.md b/content/riak/kv/2.2.2/using/repair-recovery/errors.md index d194397a08..5dc22dbcc2 100644 --- a/content/riak/kv/2.2.2/using/repair-recovery/errors.md +++ b/content/riak/kv/2.2.2/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.2.2/configuring/reference +[config reference]: {{}}riak/kv/2.2.2/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.2.2/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.2.2/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.2.2/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.2.2/using/repair-recovery/failure-recovery.md index 3089432628..88a418e78a 100644 --- a/content/riak/kv/2.2.2/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.2.2/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.2.2/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.2.2/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.2.2/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.2.2/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -116,7 +116,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.2.2/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.2.2/using/repair-recovery/repairs.md b/content/riak/kv/2.2.2/using/repair-recovery/repairs.md index af8a6725d4..f409bf5d3f 100644 --- a/content/riak/kv/2.2.2/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.2.2/using/repair-recovery/repairs.md @@ -21,13 +21,13 @@ aliases: - /riak/kv/2.2.2/ops/running/recovery/repairing-partitions --- -[cluster ops aae]: /riak/kv/2.2.2/using/cluster-operations/active-anti-entropy/ -[config ref]: /riak/kv/2.2.2/configuring/reference/ +[cluster ops aae]: {{}}riak/kv/2.2.2/using/cluster-operations/active-anti-entropy/ +[config ref]: {{}}riak/kv/2.2.2/configuring/reference/ [Erlang shell]: http://learnyousomeerlang.com/starting-out -[glossary AAE]: /riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae -[glossary readrep]: /riak/kv/2.2.2/learn/glossary/#read-repair -[search config]: /riak/kv/2.2.2/configuring/search/#search-config-settings -[tiered storage]: /riak/kv/2.2.2/setup/planning/backend/leveldb/#tiered-storage +[glossary AAE]: {{}}riak/kv/2.2.2/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{}}riak/kv/2.2.2/learn/glossary/#read-repair +[search config]: {{}}riak/kv/2.2.2/configuring/search/#search-config-settings +[tiered storage]: {{}}riak/kv/2.2.2/setup/planning/backend/leveldb/#tiered-storage @@ -237,23 +237,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.2.2/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.2.2/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.2.2/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.2.2/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.2.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.2.2/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.2.2/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.2.2/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.2.2/using/repair-recovery/rolling-replaces.md index db8ee38b82..3750fdda41 100644 --- a/content/riak/kv/2.2.2/using/repair-recovery/rolling-replaces.md +++ b/content/riak/kv/2.2.2/using/repair-recovery/rolling-replaces.md @@ -12,9 +12,9 @@ menu: toc: true --- -[upgrade]: /riak/kv/2.2.2/setup/upgrading/cluster/ -[rolling restarts]: /riak/kv/2.2.2/using/repair-recovery/rolling-restart/ -[add node]: /riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes +[upgrade]: {{}}riak/kv/2.2.2/setup/upgrading/cluster/ +[rolling restarts]: {{}}riak/kv/2.2.2/using/repair-recovery/rolling-restart/ +[add node]: {{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. diff --git a/content/riak/kv/2.2.2/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.2.2/using/repair-recovery/rolling-restart.md index 87679c2fcc..4162077d5d 100644 --- a/content/riak/kv/2.2.2/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.2.2/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.2/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.2.2/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.2.2/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.2.2/using/running-a-cluster.md b/content/riak/kv/2.2.2/using/running-a-cluster.md index 5b8a9c3087..506d6c699c 100644 --- a/content/riak/kv/2.2.2/using/running-a-cluster.md +++ b/content/riak/kv/2.2.2/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.2.2/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.2.2/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.2.2/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.2.2/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.2.2/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.2.2/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.2.2/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.2.2/using/security.md b/content/riak/kv/2.2.2/using/security.md index bfe7e1ca95..8d0b2e0b7f 100644 --- a/content/riak/kv/2.2.2/using/security.md +++ b/content/riak/kv/2.2.2/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.2.2/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.2.2/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.2.2/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.2.2/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.2.2/using/security/basics +[security managing]: {{}}riak/kv/2.2.2/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.2.2/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.2.2/using/security/basics.md b/content/riak/kv/2.2.2/using/security/basics.md index 4f37b6d74d..698a9743a2 100644 --- a/content/riak/kv/2.2.2/using/security/basics.md +++ b/content/riak/kv/2.2.2/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.2.2/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.2.2/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.2.2/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.2.2/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.2.2/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.2.2/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.2.2/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.2.2/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.2.2/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.2.2/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.2.2/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.2.2/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.2.2/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.2.2/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.2.2/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.2.2/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.2.2/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.2.2/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.2.2/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.2.2/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.2.2/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.2.2/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.2.2/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.2.2/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.2.2/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.2.2/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.2.2/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.2.2/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.2.2/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.2.2/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.2.2/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.2.2/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.2.2/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.2.2/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.2.2/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.2.2/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.2.2/configuring/reference/#directories).
platform_data_dir The directory in which Riak stores its storage backend data, as well -as active anti-entropy data, and cluster metadata. ./data
alive_tokens Determines the number of ticks the leader will wait to hear from its -associated vnode before assuming that the vnode +associated vnode before assuming that the vnode is unhealthy and stepping down as leader. If the vnode does not respond to the leader before ensemble_tick * alive_tokens milliseconds have elapsed, the leader will @@ -1833,8 +1833,8 @@ package) and in R14B04 via a custom repository and branch.
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="{{< baseurl >}}riak/kv/2.2.2/learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.2.2/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.2.2/using/security/managing-sources.md b/content/riak/kv/2.2.2/using/security/managing-sources.md index 392c8ecb9c..6b0a26137b 100644 --- a/content/riak/kv/2.2.2/using/security/managing-sources.md +++ b/content/riak/kv/2.2.2/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.2.2/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.2.2/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.2.2/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.2.2/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.2.2/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.2.2/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.2.2/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.2.2/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.2.2/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.2.2/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.2.2/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.2.2/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.2.2/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.2.2/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.2.3/_reference-links.md b/content/riak/kv/2.2.3/_reference-links.md index 9b6021bae4..6b1fd8398c 100644 --- a/content/riak/kv/2.2.3/_reference-links.md +++ b/content/riak/kv/2.2.3/_reference-links.md @@ -4,245 +4,245 @@ ## Common -[downloads]: /riak/kv/2.2.3/downloads/ -[install index]: /riak/kv/2.2.3/setup/installing -[upgrade index]: /riak/kv/2.2.3/upgrading -[plan index]: /riak/kv/2.2.3/planning -[config index]: /riak/2.1.3/using/configuring/ -[config reference]: /riak/kv/2.2.3/configuring/reference/ -[manage index]: /riak/kv/2.2.3/using/managing -[performance index]: /riak/kv/2.2.3/using/performance -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode +[downloads]: {{}}riak/kv/2.2.3/downloads/ +[install index]: {{}}riak/kv/2.2.3/setup/installing +[upgrade index]: {{}}riak/kv/2.2.3/upgrading +[plan index]: {{}}riak/kv/2.2.3/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.2.3/configuring/reference/ +[manage index]: {{}}riak/kv/2.2.3/using/managing +[performance index]: {{}}riak/kv/2.2.3/using/performance +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode [contact basho]: http://basho.com/contact/ ## Planning -[plan index]: /riak/kv/2.2.3/setup/planning -[plan start]: /riak/kv/2.2.3/setup/planning/start -[plan backend]: /riak/kv/2.2.3/setup/planning/backend -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.3/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.3/setup/planning/backend/multi -[plan cluster capacity]: /riak/kv/2.2.3/setup/planning/cluster-capacity -[plan bitcask capacity]: /riak/kv/2.2.3/setup/planning/bitcask-capacity-calc -[plan best practices]: /riak/kv/2.2.3/setup/planning/best-practices -[plan future]: /riak/kv/2.2.3/setup/planning/future +[plan index]: {{}}riak/kv/2.2.3/setup/planning +[plan start]: {{}}riak/kv/2.2.3/setup/planning/start +[plan backend]: {{}}riak/kv/2.2.3/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.3/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.2.3/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.2.3/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.2.3/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.2.3/setup/planning/future ## Installing -[install index]: /riak/kv/2.2.3/setup/installing -[install aws]: /riak/kv/2.2.3/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.2.3/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.2.3/setup/installing/freebsd -[install mac osx]: /riak/kv/2.2.3/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.2.3/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.2.3/setup/installing/smartos -[install solaris]: /riak/kv/2.2.3/setup/installing/solaris -[install suse]: /riak/kv/2.2.3/setup/installing/suse -[install windows azure]: /riak/kv/2.2.3/setup/installing/windows-azure +[install index]: {{}}riak/kv/2.2.3/setup/installing +[install aws]: {{}}riak/kv/2.2.3/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.3/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.3/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.3/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.3/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.3/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.3/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.3/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.3/setup/installing/windows-azure -[install source index]: /riak/kv/2.2.3/setup/installing/source -[install source erlang]: /riak/kv/2.2.3/setup/installing/source/erlang -[install source jvm]: /riak/kv/2.2.3/setup/installing/source/jvm +[install source index]: {{}}riak/kv/2.2.3/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.3/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.2.3/setup/installing/source/jvm -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify ## Upgrading -[upgrade index]: /riak/kv/2.2.3/setup/upgrading -[upgrade checklist]: /riak/kv/2.2.3/setup/upgrading/checklist -[upgrade version]: /riak/kv/2.2.3/setup/upgrading/version -[upgrade cluster]: /riak/kv/2.2.3/setup/upgrading/cluster -[upgrade mdc]: /riak/kv/2.2.3/setup/upgrading/multi-datacenter -[upgrade downgrade]: /riak/kv/2.2.3/setup/downgrade +[upgrade index]: {{}}riak/kv/2.2.3/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.2.3/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.2.3/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.2.3/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.2.3/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.2.3/setup/downgrade ## Configuring -[config index]: /riak/kv/2.2.3/configuring -[config basic]: /riak/kv/2.2.3/configuring/basic -[config backend]: /riak/kv/2.2.3/configuring/backend -[config manage]: /riak/kv/2.2.3/configuring/managing -[config reference]: /riak/kv/2.2.3/configuring/reference/ -[config strong consistency]: /riak/kv/2.2.3/configuring/strong-consistency -[config load balance]: /riak/kv/2.2.3/configuring/load-balancing-proxy -[config mapreduce]: /riak/kv/2.2.3/configuring/mapreduce -[config search]: /riak/kv/2.2.3/configuring/search/ +[config index]: {{}}riak/kv/2.2.3/configuring +[config basic]: {{}}riak/kv/2.2.3/configuring/basic +[config backend]: {{}}riak/kv/2.2.3/configuring/backend +[config manage]: {{}}riak/kv/2.2.3/configuring/managing +[config reference]: {{}}riak/kv/2.2.3/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.2.3/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.2.3/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.2.3/configuring/mapreduce +[config search]: {{}}riak/kv/2.2.3/configuring/search/ -[config v3 mdc]: /riak/kv/2.2.3/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.2.3/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl +[config v3 mdc]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl -[config v2 mdc]: /riak/kv/2.2.3/configuring/v2-multi-datacenter -[config v2 nat]: /riak/kv/2.2.3/configuring/v2-multi-datacenter/nat -[config v2 quickstart]: /riak/kv/2.2.3/configuring/v2-multi-datacenter/quick-start -[config v2 ssl]: /riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl +[config v2 mdc]: {{}}riak/kv/2.2.3/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.2.3/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.2.3/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl ## Using -[use index]: /riak/kv/2.2.3/using/ -[use admin commands]: /riak/kv/2.2.3/using/cluster-admin-commands -[use running cluster]: /riak/kv/2.2.3/using/running-a-cluster +[use index]: {{}}riak/kv/2.2.3/using/ +[use admin commands]: {{}}riak/kv/2.2.3/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.2.3/using/running-a-cluster ### Reference -[use ref custom code]: /riak/kv/2.2.3/using/reference/custom-code -[use ref handoff]: /riak/kv/2.2.3/using/reference/handoff -[use ref monitoring]: /riak/kv/2.2.3/using/reference/statistics-monitoring -[use ref search]: /riak/kv/2.2.3/using/reference/search -[use ref 2i]: /riak/kv/2.2.3/using/reference/secondary-indexes -[use ref snmp]: /riak/kv/2.2.3/using/reference/snmp -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[use ref jmx]: /riak/kv/2.2.3/using/reference/jmx -[use ref obj del]: /riak/kv/2.2.3/using/reference/object-deletion/ -[use ref v3 mdc]: /riak/kv/2.2.3/using/reference/v3-multi-datacenter -[use ref v2 mdc]: /riak/kv/2.2.3/using/reference/v2-multi-datacenter +[use ref custom code]: {{}}riak/kv/2.2.3/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.2.3/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.2.3/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.2.3/using/reference/search +[use ref 2i]: {{}}riak/kv/2.2.3/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.2.3/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.2.3/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.2.3/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.2.3/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.2.3/using/reference/v2-multi-datacenter ### Cluster Admin -[use admin index]: /riak/kv/2.2.3/using/admin/ -[use admin commands]: /riak/kv/2.2.3/using/admin/commands/ -[use admin riak cli]: /riak/kv/2.2.3/using/admin/riak-cli/ -[use admin riak-admin]: /riak/kv/2.2.3/using/admin/riak-admin/ -[use admin riak control]: /riak/kv/2.2.3/using/admin/riak-control/ +[use admin index]: {{}}riak/kv/2.2.3/using/admin/ +[use admin commands]: {{}}riak/kv/2.2.3/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.2.3/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.2.3/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.2.3/using/admin/riak-control/ ### Cluster Operations -[cluster ops add remove node]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes -[cluster ops inspect node]: /riak/kv/2.2.3/using/cluster-operations/inspecting-node -[cluster ops change info]: /riak/kv/2.2.3/using/cluster-operations/changing-cluster-info -[cluster ops load balance]: /riak/kv/2.2.3/configuring/load-balancing-proxy -[cluster ops bucket types]: /riak/kv/2.2.3/using/cluster-operations/bucket-types -[cluster ops handoff]: /riak/kv/2.2.3/using/cluster-operations/handoff -[cluster ops log]: /riak/kv/2.2.3/using/cluster-operations/logging -[cluster ops obj del]: /riak/kv/2.2.3/using/reference/object-deletion -[cluster ops backup]: /riak/kv/2.2.3/using/cluster-operations/backing-up -[cluster ops mdc]: /riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter -[cluster ops strong consistency]: /riak/kv/2.2.3/using/cluster-operations/strong-consistency -[cluster ops 2i]: /riak/kv/2.2.3/using/reference/secondary-indexes -[cluster ops v3 mdc]: /riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter -[cluster ops v2 mdc]: /riak/kv/2.2.3/using/cluster-operations/v2-multi-datacenter +[cluster ops add remove node]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.2.3/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.2.3/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.2.3/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.2.3/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.2.3/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.2.3/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.2.3/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.2.3/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.2.3/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.2.3/using/cluster-operations/v2-multi-datacenter ### Repair/Recover -[repair recover index]: /riak/kv/2.2.3/using/repair-recovery -[repair recover index]: /riak/kv/2.2.3/using/repair-recovery/failure-recovery/ +[repair recover index]: {{}}riak/kv/2.2.3/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.2.3/using/repair-recovery/failure-recovery/ ### Security -[security index]: /riak/kv/2.2.3/using/security/ -[security basics]: /riak/kv/2.2.3/using/security/basics -[security managing]: /riak/kv/2.2.3/using/security/managing-sources/ +[security index]: {{}}riak/kv/2.2.3/using/security/ +[security basics]: {{}}riak/kv/2.2.3/using/security/basics +[security managing]: {{}}riak/kv/2.2.3/using/security/managing-sources/ ### Performance -[perf index]: /riak/kv/2.2.3/using/performance/ -[perf benchmark]: /riak/kv/2.2.3/using/performance/benchmarking -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit/ -[perf erlang]: /riak/kv/2.2.3/using/performance/erlang -[perf aws]: /riak/kv/2.2.3/using/performance/amazon-web-services -[perf latency checklist]: /riak/kv/2.2.3/using/performance/latency-reduction +[perf index]: {{}}riak/kv/2.2.3/using/performance/ +[perf benchmark]: {{}}riak/kv/2.2.3/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.2.3/using/performance/erlang +[perf aws]: {{}}riak/kv/2.2.3/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.2.3/using/performance/latency-reduction ### Troubleshooting -[troubleshoot http]: /riak/kv/2.2.3/using/troubleshooting/http-204 +[troubleshoot http]: {{}}riak/kv/2.2.3/using/troubleshooting/http-204 ## Developing -[dev index]: /riak/kv/2.2.3/developing -[dev client libraries]: /riak/kv/2.2.3/developing/client-libraries -[dev data model]: /riak/kv/2.2.3/developing/data-modeling -[dev data types]: /riak/kv/2.2.3/developing/data-types -[dev kv model]: /riak/kv/2.2.3/developing/key-value-modeling +[dev index]: {{}}riak/kv/2.2.3/developing +[dev client libraries]: {{}}riak/kv/2.2.3/developing/client-libraries +[dev data model]: {{}}riak/kv/2.2.3/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.3/developing/data-types +[dev kv model]: {{}}riak/kv/2.2.3/developing/key-value-modeling ### Getting Started -[getting started]: /riak/kv/2.2.3/developing/getting-started -[getting started java]: /riak/kv/2.2.3/developing/getting-started/java -[getting started ruby]: /riak/kv/2.2.3/developing/getting-started/ruby -[getting started python]: /riak/kv/2.2.3/developing/getting-started/python -[getting started php]: /riak/kv/2.2.3/developing/getting-started/php -[getting started csharp]: /riak/kv/2.2.3/developing/getting-started/csharp -[getting started nodejs]: /riak/kv/2.2.3/developing/getting-started/nodejs -[getting started erlang]: /riak/kv/2.2.3/developing/getting-started/erlang -[getting started golang]: /riak/kv/2.2.3/developing/getting-started/golang - -[obj model java]: /riak/kv/2.2.3/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.2.3/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.2.3/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.2.3/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.2.3/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.2.3/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.2.3/developing/getting-started/golang/object-modeling +[getting started]: {{}}riak/kv/2.2.3/developing/getting-started +[getting started java]: {{}}riak/kv/2.2.3/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.2.3/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.2.3/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.2.3/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.2.3/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.2.3/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.2.3/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.2.3/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.2.3/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.3/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.3/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.3/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.3/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.3/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.3/developing/getting-started/golang/object-modeling ### Usage -[usage index]: /riak/kv/2.2.3/developing/usage -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.2.3/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution -[usage content types]: /riak/kv/2.2.3/developing/usage/content-types -[usage create objects]: /riak/kv/2.2.3/developing/usage/creating-objects -[usage custom extractors]: /riak/kv/2.2.3/developing/usage/custom-extractors -[usage delete objects]: /riak/kv/2.2.3/developing/usage/deleting-objects -[usage mapreduce]: /riak/kv/2.2.3/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.3/developing/usage/search -[usage search schema]: /riak/kv/2.2.3/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.2.3/developing/usage/searching-data-types -[usage 2i]: /riak/kv/2.2.3/developing/usage/secondary-indexes -[usage update objects]: /riak/kv/2.2.3/developing/usage/updating-objects +[usage index]: {{}}riak/kv/2.2.3/developing/usage +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.3/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.2.3/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.2.3/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.2.3/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.2.3/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.2.3/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.3/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.3/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.2.3/developing/usage/updating-objects ### App Guide -[apps mapreduce]: /riak/kv/2.2.3/developing/app-guide/advanced-mapreduce -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[apps strong consistency]: /riak/kv/2.2.3/developing/app-guide/strong-consistency +[apps mapreduce]: {{}}riak/kv/2.2.3/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.2.3/developing/app-guide/strong-consistency ### API -[dev api backend]: /riak/kv/2.2.3/developing/api/backend -[dev api http]: /riak/kv/2.2.3/developing/api/http -[dev api http status]: /riak/kv/2.2.3/developing/api/http/status -[dev api pbc]: /riak/kv/2.2.3/developing/api/protocol-buffers/ +[dev api backend]: {{}}riak/kv/2.2.3/developing/api/backend +[dev api http]: {{}}riak/kv/2.2.3/developing/api/http +[dev api http status]: {{}}riak/kv/2.2.3/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.2.3/developing/api/protocol-buffers/ ## Learn -[learn new nosql]: /riak/kv/learn/new-to-nosql -[learn use cases]: /riak/kv/learn/use-cases -[learn why riak]: /riak/kv/learn/why-riak-kv +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv -[glossary]: /riak/kv/2.2.3/learn/glossary/ -[glossary aae]: /riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae -[glossary read rep]: /riak/kv/2.2.3/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode +[glossary]: {{}}riak/kv/2.2.3/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.2.3/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode -[concept aae]: /riak/kv/2.2.3/learn/concepts/active-anti-entropy/ -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.2.3/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.2.3/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters/ -[concept crdts]: /riak/kv/2.2.3/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.3/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.2.3/learn/concepts/vnodes +[concept aae]: {{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.3/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.3/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.2.3/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.3/learn/concepts/vnodes ## Community -[community]: /community -[community projects]: /community/projects -[reporting bugs]: /community/reporting-bugs -[taishi]: /community/taishi +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi diff --git a/content/riak/kv/2.2.3/add-ons.md b/content/riak/kv/2.2.3/add-ons.md index 2a858bedd8..406cb97780 100644 --- a/content/riak/kv/2.2.3/add-ons.md +++ b/content/riak/kv/2.2.3/add-ons.md @@ -16,4 +16,4 @@ toc: true Here at Basho, we've developed integrations between Riak KV and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. -* [Riak Redis Add-on](/riak/kv/2.2.3/add-ons/redis/) \ No newline at end of file +* [Riak Redis Add-on]({{}}riak/kv/2.2.3/add-ons/redis/) \ No newline at end of file diff --git a/content/riak/kv/2.2.3/add-ons/redis/developing-rra.md b/content/riak/kv/2.2.3/add-ons/redis/developing-rra.md index 000b068106..0c37f9873c 100644 --- a/content/riak/kv/2.2.3/add-ons/redis/developing-rra.md +++ b/content/riak/kv/2.2.3/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.2.3/developing/api/http +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.3/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.2.3/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.2.3/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.3/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.3/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/kv/2.2.3/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.2.3/add-ons/redis/redis-add-on-features.md index 4d5e0ab6fd..884fb23737 100644 --- a/content/riak/kv/2.2.3/add-ons/redis/redis-add-on-features.md +++ b/content/riak/kv/2.2.3/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.2.3/add-ons/redis/set-up-rra.md b/content/riak/kv/2.2.3/add-ons/redis/set-up-rra.md index 717cc4473e..3bfb084a8e 100644 --- a/content/riak/kv/2.2.3/add-ons/redis/set-up-rra.md +++ b/content/riak/kv/2.2.3/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/kv/2.2.3/setup/installing -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit/#changing-the-limit +[install index]: {{}}riak/kv/2.2.3/setup/installing +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit/#changing-the-limit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/kv/2.2.3/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.2.3/add-ons/redis/set-up-rra/deployment-models.md index 59572e7385..db154b5d7d 100644 --- a/content/riak/kv/2.2.3/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/kv/2.2.3/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/kv/2.2.3/add-ons/redis/using-rra.md b/content/riak/kv/2.2.3/add-ons/redis/using-rra.md index d7438a50ff..57559b80bc 100644 --- a/content/riak/kv/2.2.3/add-ons/redis/using-rra.md +++ b/content/riak/kv/2.2.3/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.2.3/developing/api/http/ +[dev api http]: {{}}riak/kv/2.2.3/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/kv/2.2.3/configuring/backend.md b/content/riak/kv/2.2.3/configuring/backend.md index 4ac21b4782..5a2c6205cf 100644 --- a/content/riak/kv/2.2.3/configuring/backend.md +++ b/content/riak/kv/2.2.3/configuring/backend.md @@ -12,10 +12,10 @@ menu: toc: true --- -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.3/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.3/setup/planning/backend/multi +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.3/setup/planning/backend/multi ## LevelDB diff --git a/content/riak/kv/2.2.3/configuring/basic.md b/content/riak/kv/2.2.3/configuring/basic.md index f60ef1dbc1..40f908993c 100644 --- a/content/riak/kv/2.2.3/configuring/basic.md +++ b/content/riak/kv/2.2.3/configuring/basic.md @@ -15,26 +15,26 @@ aliases: - /riak/kv/2.2.3/ops/building/configuration/ --- -[config reference]: /riak/kv/2.2.3/configuring/reference -[use running cluster]: /riak/kv/2.2.3/using/running-a-cluster -[use admin riak-admin#member-status]: /riak/kv/2.2.3/using/admin/riak-admin/#member-status -[perf erlang]: /riak/kv/2.2.3/using/performance/erlang -[plan start]: /riak/kv/2.2.3/setup/planning/start -[plan best practices]: /riak/kv/2.2.3/setup/planning/best-practices -[cluster ops backup]: /riak/kv/2.2.3/using/cluster-operations/backing-up -[cluster ops add remove node]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes -[plan backend]: /riak/kv/2.2.3/setup/planning/backend -[plan backend multi]: /riak/kv/2.2.3/setup/planning/backend/multi -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[perf benchmark]: /riak/kv/2.2.3/using/performance/benchmarking -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit -[perf index]: /riak/kv/2.2.3/using/performance -[perf aws]: /riak/kv/2.2.3/using/performance/amazon-web-services -[Cluster Capacity Planning]: /riak/kv/2.2.3/setup/planning/cluster-capacity/#ring-size-number-of-partitions +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[use running cluster]: {{}}riak/kv/2.2.3/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.2.3/using/performance/erlang +[plan start]: {{}}riak/kv/2.2.3/setup/planning/start +[plan best practices]: {{}}riak/kv/2.2.3/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.2.3/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.2.3/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.2.3/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.2.3/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.3/using/performance +[perf aws]: {{}}riak/kv/2.2.3/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.2.3/setup/planning/cluster-capacity/#ring-size-number-of-partitions This document covers the parameters that are commonly adjusted when setting up a new cluster. We recommend that you also review the detailed @@ -208,15 +208,15 @@ buckets.default.r = 3 For more on bucket properties, we recommend reviewing our docs on [buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." -* [Part 1](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -* [Part 2](http://basho.com/riaks-config-behaviors-part-2/) -* [Part 3](http://basho.com/riaks-config-behaviors-part-3/) -* [Part 4](http://basho.com/riaks-config-behaviors-part-4/) -* [Epilogue](http://basho.com/riaks-config-behaviors-epilogue/) +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) If the default bucket properties are modified in your configuration files and the node is restarted, any existing buckets will **not** be -directly impacted, although the mechanism described in [HTTP Reset Bucket Properties](/riak/kv/2.2.3/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.3/developing/api/http/reset-bucket-props) can be used to force them to pick up the new defaults. ## System tuning diff --git a/content/riak/kv/2.2.3/configuring/global-object-expiration.md b/content/riak/kv/2.2.3/configuring/global-object-expiration.md index 15c8dca51c..851c2fab5d 100644 --- a/content/riak/kv/2.2.3/configuring/global-object-expiration.md +++ b/content/riak/kv/2.2.3/configuring/global-object-expiration.md @@ -10,7 +10,6 @@ menu: project: "riak_kv" project_version: "2.2.3" toc: true -canonical_link: "https://docs.basho.com/riak/kv/latest/configuring/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/kv/2.2.3/configuring/load-balancing-proxy.md b/content/riak/kv/2.2.3/configuring/load-balancing-proxy.md index 6ac8809b14..702f76139a 100644 --- a/content/riak/kv/2.2.3/configuring/load-balancing-proxy.md +++ b/content/riak/kv/2.2.3/configuring/load-balancing-proxy.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/advanced/configs/load-balanacing-proxy/ --- -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit The recommended best practice for operating Riak in production is to place Riak behind a load-balancing or proxy solution, either hardware- diff --git a/content/riak/kv/2.2.3/configuring/managing.md b/content/riak/kv/2.2.3/configuring/managing.md index 200c38de9d..4e863401dd 100644 --- a/content/riak/kv/2.2.3/configuring/managing.md +++ b/content/riak/kv/2.2.3/configuring/managing.md @@ -12,9 +12,9 @@ menu: toc: true --- -[use admin riak cli]: /riak/kv/2.2.3/using/admin/riak-cli -[use admin riak cli#chkconfig]: /riak/kv/2.2.3/using/admin/riak-cli/#chkconfig -[config reference#search]: /riak/kv/2.2.3/configuring/reference/#search +[use admin riak cli]: {{}}riak/kv/2.2.3/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.2.3/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.2.3/configuring/reference/#search ## Retrieving a Configuration Listing diff --git a/content/riak/kv/2.2.3/configuring/mapreduce.md b/content/riak/kv/2.2.3/configuring/mapreduce.md index 004b0e0749..d3cad9f31b 100644 --- a/content/riak/kv/2.2.3/configuring/mapreduce.md +++ b/content/riak/kv/2.2.3/configuring/mapreduce.md @@ -15,9 +15,9 @@ aliases: - /riak/kv/2.2.3/ops/advanced/configs/mapreduce/ --- -[usage mapreduce]: /riak/kv/2.2.3/developing/usage/mapreduce -[config reference#appconfig]: /riak/kv/2.2.3/configuring/reference/#app-config -[usage secondary-indexes]: /riak/kv/2.2.3/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.2.3/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.2.3/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes ## Configuring MapReduce diff --git a/content/riak/kv/2.2.3/configuring/reference.md b/content/riak/kv/2.2.3/configuring/reference.md index 17c86ce1c7..8c571739c8 100644 --- a/content/riak/kv/2.2.3/configuring/reference.md +++ b/content/riak/kv/2.2.3/configuring/reference.md @@ -200,7 +200,7 @@ executables are stored. +as active anti-entropy data, and cluster metadata. @@ -1684,7 +1684,7 @@ abandons the leader (in milliseconds). This must be set greater than the diff --git a/content/riak/kv/2.2.3/configuring/search.md b/content/riak/kv/2.2.3/configuring/search.md index 72367b3222..9fab7664df 100644 --- a/content/riak/kv/2.2.3/configuring/search.md +++ b/content/riak/kv/2.2.3/configuring/search.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.3/ops/advanced/configs/search/ --- -[usage search]: /riak/kv/2.2.3/developing/usage/search -[usage search schema]: /riak/kv/2.2.3/developing/usage/search-schemas -[usage search data types]: /riak/kv/2.2.3/developing/usage/searching-data-types -[usage custom extractors]: /riak/kv/2.2.3/developing/usage/custom-extractors -[cluster-ops aae throttle]: /riak/kv/2.2.3/using/cluster-operations/active-anti-entropy/#throttling -[config reference]: /riak/kv/2.2.3/configuring/reference -[config reference#search]: /riak/kv/2.2.3/configuring/reference/#search -[glossary aae]: /riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae -[security index]: /riak/kv/2.2.3/using/security/ +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.3/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.3/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.2.3/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.2.3/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[config reference#search]: {{}}riak/kv/2.2.3/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.2.3/using/security/ [java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads [java se docs]: http://www.oracle.com/technetwork/java/javase/documentation @@ -150,15 +150,15 @@ Valid values: `on` or `off` ### `search.index.error_threshold.failure_count` -The number of failures encountered while updating a search index within [`search.queue.error_threshold.failure_interval`](#search-queue-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. Valid values: Integer ### `search.index.error_threshold.failure_interval` -The window of time during which `search.queue.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. -If [`search.queue.error_threshold.failure_count`](#search-queue-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.queue.error_threshold.reset_interval`](search-queue-error-threshold-reset-interval) has passed. +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. Valid values: Milliseconds diff --git a/content/riak/kv/2.2.3/configuring/strong-consistency.md b/content/riak/kv/2.2.3/configuring/strong-consistency.md index 6d8b5b11e6..c627d3ed37 100644 --- a/content/riak/kv/2.2.3/configuring/strong-consistency.md +++ b/content/riak/kv/2.2.3/configuring/strong-consistency.md @@ -12,29 +12,29 @@ menu: toc: true --- -[apps strong consistency]: /riak/kv/2.2.3/developing/app-guide/strong-consistency -[concept strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[cluster ops add remove node]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes -[config reference#strong-cons]: /riak/kv/2.2.3/configuring/reference/#strong-consistency -[use admin riak cli]: /riak/kv/2.2.3/using/admin/riak-cli -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[cluster ops bucket types]: /riak/kv/2.2.3/using/cluster-operations/bucket-types -[use admin riak-admin#ensemble]: /riak/kv/2.2.3/using/admin/riak-admin/#riak-admin-ensemble-status -[use admin riak-admin]: /riak/kv/2.2.3/using/admin/riak-admin -[config reference#advanced]: /riak/kv/2.2.3/configuring/reference/#advanced-configuration -[plan cluster capacity]: /riak/kv/2.2.3/setup/planning/cluster-capacity -[cluster ops strong consistency]: /riak/kv/2.2.3/using/cluster-operations/strong-consistency -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[concept causal context]: /riak/kv/2.2.3/learn/concepts/causal-context -[dev data types]: /riak/kv/2.2.3/developing/data-types -[glossary aae]: /riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae -[cluster ops 2i]: /riak/kv/2.2.3/using/reference/secondary-indexes -[usage commit hooks]: /riak/kv/2.2.3/developing/usage/commit-hooks -[cluster ops obj del]: /riak/kv/2.2.3/using/reference/object-deletion -[dev client libraries]: /riak/kv/2.2.3/developing/client-libraries +[apps strong consistency]: {{}}riak/kv/2.2.3/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.2.3/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.2.3/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.2.3/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.2.3/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.2.3/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.2.3/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.2.3/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.2.3/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.2.3/developing/data-types +[glossary aae]: {{}}riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.2.3/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.2.3/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.2.3/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.2.3/developing/client-libraries > **Please Note:** > @@ -308,11 +308,11 @@ The following table provides a guide to `ensemble-status` output: Item | Meaning :----|:------- -`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. `Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. -`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. -`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. -`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. `Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
**Note**: The **root ensemble**, designated by `root` in the sample diff --git a/content/riak/kv/2.2.3/configuring/v2-multi-datacenter.md b/content/riak/kv/2.2.3/configuring/v2-multi-datacenter.md index 26fa73b8cb..401acfeafc 100644 --- a/content/riak/kv/2.2.3/configuring/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.3/configuring/v2-multi-datacenter.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v2/configuration --- -[config v2 ssl]: /riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/configuring/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication capabilities offer a diff --git a/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/nat.md index fe1452cd8e..5a15155147 100644 --- a/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/nat.md +++ b/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/nat.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v2/nat --- -[config v2 ssl]: /riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl +[config v2 ssl]: {{}}riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/configuring/v3-multi-datacenter/nat/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/nat/) instead. {{% /note %}} Riak Enterprise supports replication of data on networks that use static diff --git a/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/quick-start.md index 1a9ed6fa79..b57158cbaa 100644 --- a/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/quick-start.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start/) instead. {{% /note %}} The Riak Multi-Datacenter Replication Quick Start will walk you through diff --git a/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl.md index 51b4ea8802..b0df5b3301 100644 --- a/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl.md +++ b/content/riak/kv/2.2.3/configuring/v2-multi-datacenter/ssl.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl/) instead. {{% /note %}} ## Features diff --git a/content/riak/kv/2.2.3/configuring/v3-multi-datacenter.md b/content/riak/kv/2.2.3/configuring/v3-multi-datacenter.md index 18ecda20ea..9f71443339 100644 --- a/content/riak/kv/2.2.3/configuring/v3-multi-datacenter.md +++ b/content/riak/kv/2.2.3/configuring/v3-multi-datacenter.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v3/configuration --- -[config reference#advanced]: /riak/kv/2.2.3/configuring/reference/#advanced-configuration -[config v3 ssl#verify-peer]: /riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates +[config reference#advanced]: {{}}riak/kv/2.2.3/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates > **Note on the `cluster_mgr` setting** > diff --git a/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/nat.md index 27f396784e..41efe7ee61 100644 --- a/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/nat.md +++ b/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/nat.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v3/nat --- -[config v3 ssl]: /riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl +[config v3 ssl]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl Riak Enterprise Version 3 Replication supports replication of data on networks that use static NAT. diff --git a/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start.md index bc8104abb7..c1ba3f63a8 100644 --- a/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start.md +++ b/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v3/quick-start --- -[perf index]: /riak/kv/2.2.3/using/performance -[config v3 mdc]: /riak/kv/2.2.3/configuring/v3-multi-datacenter -[cluster ops v3 mdc]: /riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter +[perf index]: {{}}riak/kv/2.2.3/using/performance +[config v3 mdc]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter This guide will walk you through the process of configuring Riak's v3 Replication to perform replication between two sample Riak clusters on diff --git a/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl.md index 29d2595492..211aec284c 100644 --- a/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl.md +++ b/content/riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v3/ssl --- -[config reference#advanced.config]: /riak/kv/2.2.3/configuring/reference/#the-advanced-config-file +[config reference#advanced.config]: {{}}riak/kv/2.2.3/configuring/reference/#the-advanced-config-file ## Features diff --git a/content/riak/kv/2.2.3/developing/api/backend.md b/content/riak/kv/2.2.3/developing/api/backend.md index 26864289e5..7d701999e1 100644 --- a/content/riak/kv/2.2.3/developing/api/backend.md +++ b/content/riak/kv/2.2.3/developing/api/backend.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/backend-api --- -[plan backend]: /riak/kv/2.2.3/setup/planning/backend +[plan backend]: {{}}riak/kv/2.2.3/setup/planning/backend Riak's storage API uniformly applies to all of the [supported backends][plan backend]. This page presents the details of diff --git a/content/riak/kv/2.2.3/developing/api/http.md b/content/riak/kv/2.2.3/developing/api/http.md index 24a8fd093a..13b3934377 100644 --- a/content/riak/kv/2.2.3/developing/api/http.md +++ b/content/riak/kv/2.2.3/developing/api/http.md @@ -29,21 +29,21 @@ slashes. Use a URL-escaping library or replace slashes with `%2F`. Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties](/riak/kv/2.2.3/developing/api/http/get-bucket-props) -`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties](/riak/kv/2.2.3/developing/api/http/set-bucket-props) -`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties](/riak/kv/2.2.3/developing/api/http/reset-bucket-props) -`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets](/riak/kv/2.2.3/developing/api/http/list-buckets) -`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys](/riak/kv/2.2.3/developing/api/http/list-keys) +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.2.3/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.2.3/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.3/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.2.3/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.2.3/developing/api/http/list-keys) ## Object-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/types//buckets//keys/` | [HTTP Fetch Object](/riak/kv/2.2.3/developing/api/http/fetch-object) -`POST` | `/types//buckets//keys` | [HTTP Store Object](/riak/kv/2.2.3/developing/api/http/store-object) -`PUT` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.2.3/developing/api/http/store-object) -`POST` | `/types//buckets//keys/` | [HTTP Store Object](/riak/kv/2.2.3/developing/api/http/store-object) -`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object](/riak/kv/2.2.3/developing/api/http/delete-object) +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.2.3/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.2.3/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.3/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.3/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.2.3/developing/api/http/delete-object) ## Riak-Data-Type-related Operations @@ -53,9 +53,9 @@ Method | URL `POST` | `/types//buckets//datatypes` `POST` | `/types//buckets//datatypes/` -For documentation on the HTTP API for [Riak Data Types](/riak/kv/2.2.3/learn/concepts/crdts), -see the `curl` examples in [Using Data Types](/riak/kv/2.2.3/developing/data-types/#usage-examples) -and subpages e.g. [sets](/riak/kv/2.2.3/developing/data-types/sets). +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.2.3/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.2.3/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.2.3/developing/data-types/sets). Advanced users may consult the technical documentation inside the Riak KV internal module `riak_kv_wm_crdt`. @@ -64,26 +64,26 @@ KV internal module `riak_kv_wm_crdt`. Method | URL | Doc :------|:----|:--- -`POST` | `/mapred` | [HTTP MapReduce](/riak/kv/2.2.3/developing/api/http/mapreduce) -`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes](/riak/kv/2.2.3/developing/api/http/secondary-indexes) -`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes](/riak/kv/2.2.3/developing/api/http/secondary-indexes) +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.2.3/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.3/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.3/developing/api/http/secondary-indexes) ## Server-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/ping` | [HTTP Ping](/riak/kv/2.2.3/developing/api/http/ping) -`GET` | `/stats` | [HTTP Status](/riak/kv/2.2.3/developing/api/http/status) -`GET` | `/` | [HTTP List Resources](/riak/kv/2.2.3/developing/api/http/list-resources) +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.2.3/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.2.3/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.2.3/developing/api/http/list-resources) ## Search-related Operations Method | URL | Doc :------|:----|:--- -`GET` | `/search/query/` | [HTTP Search Query](/riak/kv/2.2.3/developing/api/http/search-query) -`GET` | `/search/index` | [HTTP Search Index Info](/riak/kv/2.2.3/developing/api/http/search-index-info) -`GET` | `/search/index/` | [HTTP Fetch Search Index](/riak/kv/2.2.3/developing/api/http/fetch-search-index) -`PUT` | `/search/index/` | [HTTP Store Search Index](/riak/kv/2.2.3/developing/api/http/store-search-index) -`DELETE` | `/search/index/` | [HTTP Delete Search Index](/riak/kv/2.2.3/developing/api/http/delete-search-index) -`GET` | `/search/schema/` | [HTTP Fetch Search Schema](/riak/kv/2.2.3/developing/api/http/fetch-search-schema) -`PUT` | `/search/schema/` | [HTTP Store Search Schema](/riak/kv/2.2.3/developing/api/http/store-search-schema) +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.2.3/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.2.3/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.2.3/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.2.3/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.2.3/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.2.3/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.2.3/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.2.3/developing/api/http/counters.md b/content/riak/kv/2.2.3/developing/api/http/counters.md index 1da3449809..a20e46de81 100644 --- a/content/riak/kv/2.2.3/developing/api/http/counters.md +++ b/content/riak/kv/2.2.3/developing/api/http/counters.md @@ -54,7 +54,7 @@ GET /buckets/BUCKET/counters/KEY ## Response -The regular POST/PUT ([HTTP Store Object](/riak/kv/2.2.3/developing/api/http/store-object)) and GET ([HTTP Fetch Object](/riak/kv/2.2.3/developing/api/http/fetch-object)) responses apply here. +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.2.3/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.2.3/developing/api/http/fetch-object)) responses apply here. Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. diff --git a/content/riak/kv/2.2.3/developing/api/http/fetch-object.md b/content/riak/kv/2.2.3/developing/api/http/fetch-object.md index 9e0da3a9e2..e5b4a00c51 100644 --- a/content/riak/kv/2.2.3/developing/api/http/fetch-object.md +++ b/content/riak/kv/2.2.3/developing/api/http/fetch-object.md @@ -41,14 +41,14 @@ response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc Optional query parameters: * `r` - (read quorum) how many replicas need to agree when retrieving the -object ([default is defined by the bucket](/riak/kv/2.2.3/developing/api/http/set-bucket-props)) +object ([default is defined by the bucket]({{}}riak/kv/2.2.3/developing/api/http/set-bucket-props)) * `pr` - how many primary replicas need to be online when doing the read -([default is defined by the bucket](/riak/kv/2.2.3/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.2.3/developing/api/http/set-bucket-props)) * `basic_quorum` - whether to return early in some failure cases (eg. when r=1 and you get 2 errors and a success `basic_quorum=true` would return an error) -([default is defined by the bucket](/riak/kv/2.2.3/developing/api/http/set-bucket-props)) +([default is defined by the bucket]({{}}riak/kv/2.2.3/developing/api/http/set-bucket-props)) * `notfound_ok` - whether to treat notfounds as successful reads for the -purposes of R ([default is defined by the bucket](/riak/kv/2.2.3/developing/api/http/set-bucket-props)) +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.2.3/developing/api/http/set-bucket-props)) * `vtag` - when accessing an object with siblings, which sibling to retrieve. Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. @@ -75,7 +75,7 @@ Important headers: and validation-based caching * `Last-Modified` - a timestamp for when the object was last written, in HTTP datetime format -* `Link` - user- and system-defined links to other resources. [Read more about Links.](/riak/kv/2.2.3/learn/glossary/#links) +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.3/learn/glossary/#links) The body of the response will be the contents of the object except when siblings are present. diff --git a/content/riak/kv/2.2.3/developing/api/http/fetch-search-index.md b/content/riak/kv/2.2.3/developing/api/http/fetch-search-index.md index d6d9e42599..ca7c03744b 100644 --- a/content/riak/kv/2.2.3/developing/api/http/fetch-search-index.md +++ b/content/riak/kv/2.2.3/developing/api/http/fetch-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/http/fetch-search-index --- -Retrieves information about a Riak Search [index](/riak/kv/2.2.3/developing/usage/search/#simple-setup). +Retrieves information about a Riak Search [index]({{}}riak/kv/2.2.3/developing/usage/search/#simple-setup). ## Request @@ -36,7 +36,7 @@ GET /search/index/ ## Response If the index is found, Riak will output a JSON object describing the -index, including its name, the [`n_val`](/riak/kv/2.2.3/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema](/riak/kv/2.2.3/developing/usage/search-schemas) used by the index. Here is an example: +index, including its name, the [`n_val`]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas) used by the index. Here is an example: ```json { diff --git a/content/riak/kv/2.2.3/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.2.3/developing/api/http/fetch-search-schema.md index 9c804a77b1..e9d111cbc4 100644 --- a/content/riak/kv/2.2.3/developing/api/http/fetch-search-schema.md +++ b/content/riak/kv/2.2.3/developing/api/http/fetch-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/http/fetch-search-schema --- -Retrieves a Riak KV [search schema](/riak/kv/2.2.3/developing/usage/search-schemas). +Retrieves a Riak KV [search schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas). ## Request diff --git a/content/riak/kv/2.2.3/developing/api/http/get-bucket-props.md b/content/riak/kv/2.2.3/developing/api/http/get-bucket-props.md index 23892168d1..5bf7988a6c 100644 --- a/content/riak/kv/2.2.3/developing/api/http/get-bucket-props.md +++ b/content/riak/kv/2.2.3/developing/api/http/get-bucket-props.md @@ -33,7 +33,7 @@ Optional query parameters (only valid for the old format): * `props` - whether to return the bucket properties (`true` is the default) * `keys` - whether to return the keys stored in the bucket. (`false` is the -default). See also [HTTP List Keys](/riak/kv/2.2.3/developing/api/http/list-keys). +default). See also [HTTP List Keys]({{}}riak/kv/2.2.3/developing/api/http/list-keys). ## Response @@ -49,8 +49,8 @@ The JSON object in the response will contain up to two entries, `"props"` and `"keys"`, which are present or missing, according to the optional query parameters. The default is for only `"props"` to be present. -See [HTTP Set Bucket Properties](/riak/kv/2.2.3/developing/api/http/set-bucket-props) for more information about the available -bucket properties. See [Managing Bucket Types Through the Command Line](http://docs.basho.com/riak/kv/2.2.0/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. +See [HTTP Set Bucket Properties]({{}}riak/kv/2.2.3/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.2.3/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. ## Example diff --git a/content/riak/kv/2.2.3/developing/api/http/link-walking.md b/content/riak/kv/2.2.3/developing/api/http/link-walking.md index 2180062ec0..9173511201 100644 --- a/content/riak/kv/2.2.3/developing/api/http/link-walking.md +++ b/content/riak/kv/2.2.3/developing/api/http/link-walking.md @@ -21,8 +21,8 @@ This feature is deprecated and will be removed in a future version. Link walking (traversal) finds and returns objects by following links attached to them, starting from the object specified by the bucket and key portion. It -is a special case of [MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce), and can be expressed more verbosely as such. -[Read more about Links](/riak/kv/2.2.3/learn/glossary/#links). +is a special case of [MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.2.3/learn/glossary/#links). ## Request @@ -68,7 +68,7 @@ single object that was found. If no objects were found or "keep" was not set on the phase, no chunks will be present in that phase. Objects inside phase results will include `Location` headers that can be used to determine bucket and key. In fact, you can treat each object-chunk similarly to a complete -response from [fetching the object](/riak/kv/2.2.3/developing/api/http/fetch-object), without the status +response from [fetching the object]({{}}riak/kv/2.2.3/developing/api/http/fetch-object), without the status code. ## Example diff --git a/content/riak/kv/2.2.3/developing/api/http/list-resources.md b/content/riak/kv/2.2.3/developing/api/http/list-resources.md index a07d54e007..f4dcee1c9a 100644 --- a/content/riak/kv/2.2.3/developing/api/http/list-resources.md +++ b/content/riak/kv/2.2.3/developing/api/http/list-resources.md @@ -20,14 +20,14 @@ automatically recognize the location of the resources for specific operations. The standard resources are: -* `riak_kv_wm_buckets` - [Bucket Operations](/riak/kv/2.2.3/developing/api/http/#bucket-operations) -* `riak_kv_wm_index` - [HTTP Secondary Indexes](/riak/kv/2.2.3/developing/api/http/secondary-indexes) -* `riak_kv_wm_link_walker` - [HTTP Link Walking](/riak/kv/2.2.3/developing/api/http/link-walking) -* `riak_kv_wm_mapred` - [HTTP MapReduce](/riak/kv/2.2.3/developing/api/http/mapreduce) -* `riak_kv_wm_object`- [Object/Key Operations](/riak/kv/2.2.3/developing/api/http/#object-key-operations) -* `riak_kv_wm_ping` - [HTTP Ping](/riak/kv/2.2.3/developing/api/http/ping) -* `riak_kv_wm_props` - [HTTP Set Bucket Properties](/riak/kv/2.2.3/developing/api/http/set-bucket-props) -* `riak_kv_wm_stats` - [HTTP Status](/riak/kv/2.2.3/developing/api/http/status) +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.2.3/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.2.3/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.2.3/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.2.3/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.2.3/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.2.3/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.2.3/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.2.3/developing/api/http/status) ## Request diff --git a/content/riak/kv/2.2.3/developing/api/http/mapreduce.md b/content/riak/kv/2.2.3/developing/api/http/mapreduce.md index 8d3dc76148..1cddf7a818 100644 --- a/content/riak/kv/2.2.3/developing/api/http/mapreduce.md +++ b/content/riak/kv/2.2.3/developing/api/http/mapreduce.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/http/mapreduce --- -[MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. +[MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. ## Request @@ -24,7 +24,7 @@ POST /mapred ``` Important headers: -* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce) page. +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce) page. Optional query parameters: * `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. diff --git a/content/riak/kv/2.2.3/developing/api/http/search-index-info.md b/content/riak/kv/2.2.3/developing/api/http/search-index-info.md index 2e85390cc9..ac338805b7 100644 --- a/content/riak/kv/2.2.3/developing/api/http/search-index-info.md +++ b/content/riak/kv/2.2.3/developing/api/http/search-index-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/http/search-index-info --- -Retrieves information about all currently available [Search indexes](/riak/kv/2.2.3/developing/usage/search) in JSON format. +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.2.3/developing/usage/search) in JSON format. ## Request diff --git a/content/riak/kv/2.2.3/developing/api/http/search-query.md b/content/riak/kv/2.2.3/developing/api/http/search-query.md index 1ce6ecec77..0d2682b072 100644 --- a/content/riak/kv/2.2.3/developing/api/http/search-query.md +++ b/content/riak/kv/2.2.3/developing/api/http/search-query.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/http/search-query --- -Performs a [Riak KV Search](/riak/kv/2.2.3/developing/usage/search) query. +Performs a [Riak KV Search]({{}}riak/kv/2.2.3/developing/usage/search) query. ## Request @@ -30,7 +30,7 @@ GET /search/query/ to be used when returning the Search payload. The currently available options are `json` and `xml`. The default is `xml`. * `q` --- The actual Search query itself. Examples can be found in - [Using Search](/riak/kv/2.2.3/developing/usage/search). If a query is not specified, Riak will return + [Using Search]({{}}riak/kv/2.2.3/developing/usage/search). If a query is not specified, Riak will return information about the index itself, e.g. the number of documents indexed. diff --git a/content/riak/kv/2.2.3/developing/api/http/secondary-indexes.md b/content/riak/kv/2.2.3/developing/api/http/secondary-indexes.md index 9e8a65502f..6337421d90 100644 --- a/content/riak/kv/2.2.3/developing/api/http/secondary-indexes.md +++ b/content/riak/kv/2.2.3/developing/api/http/secondary-indexes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/http/secondary-indexes --- -[Secondary Indexes](/riak/kv/2.2.3/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. +[Secondary Indexes]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. ## Request diff --git a/content/riak/kv/2.2.3/developing/api/http/set-bucket-props.md b/content/riak/kv/2.2.3/developing/api/http/set-bucket-props.md index d66832624b..4ec43e75c6 100644 --- a/content/riak/kv/2.2.3/developing/api/http/set-bucket-props.md +++ b/content/riak/kv/2.2.3/developing/api/http/set-bucket-props.md @@ -37,8 +37,8 @@ Available properties: (concurrent updates) * `last_write_wins` (true or false) - whether to ignore object history (vector clock) when writing -* `precommit` - [precommit hooks](/riak/kv/2.2.3/developing/usage/commit-hooks) -* `postcommit` - [postcommit hooks](/riak/kv/2.2.3/developing/usage/commit-hooks) +* `precommit` - [precommit hooks]({{}}riak/kv/2.2.3/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.2.3/developing/usage/commit-hooks) * `r, w, dw, rw` - default quorum values for operations on keys in the bucket. Valid values are: * `"all"` - all nodes must respond diff --git a/content/riak/kv/2.2.3/developing/api/http/status.md b/content/riak/kv/2.2.3/developing/api/http/status.md index 0d399591a3..adf4b5aa13 100644 --- a/content/riak/kv/2.2.3/developing/api/http/status.md +++ b/content/riak/kv/2.2.3/developing/api/http/status.md @@ -135,7 +135,7 @@ $ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" ## Output Explanation -The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node](/riak/kv/2.2.3/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. Stat | Description ------------------------------|--------------------------------------------------- diff --git a/content/riak/kv/2.2.3/developing/api/http/store-object.md b/content/riak/kv/2.2.3/developing/api/http/store-object.md index 34b856a2b6..4d5be3adef 100644 --- a/content/riak/kv/2.2.3/developing/api/http/store-object.md +++ b/content/riak/kv/2.2.3/developing/api/http/store-object.md @@ -40,8 +40,8 @@ object when read. * `X-Riak-Meta-*` - any additional metadata headers that should be stored with the object. * `X-Riak-Index-*` - index entries under which this object should be indexed. -[Read more about Secondary Indexing](/riak/kv/2.2.3/developing/api/http/secondary-indexes) -* `Link` - user and system-defined links to other resources. [Read more about Links.](/riak/kv/2.2.3/developing/api/http/link-walking) +[Read more about Secondary Indexing]({{}}riak/kv/2.2.3/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.3/developing/api/http/link-walking) Optional headers (only valid on `PUT`): @@ -85,7 +85,7 @@ Important headers: * `Location` a relative URL to the newly-created object (when submitting without a key) -If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object](/riak/kv/2.2.3/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.2.3/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` may be returned if siblings existed or were created as part of the operation, and the response can be dealt with similarly. diff --git a/content/riak/kv/2.2.3/developing/api/http/store-search-index.md b/content/riak/kv/2.2.3/developing/api/http/store-search-index.md index e1deb1389c..547e4b1f27 100644 --- a/content/riak/kv/2.2.3/developing/api/http/store-search-index.md +++ b/content/riak/kv/2.2.3/developing/api/http/store-search-index.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/http/store-search-index --- -Creates a new Riak Search [index](/riak/kv/2.2.3/developing/usage/search/#simple-setup). +Creates a new Riak Search [index]({{}}riak/kv/2.2.3/developing/usage/search/#simple-setup). ## Request @@ -26,11 +26,11 @@ PUT /search/index/ ## Optional Request Body If you run a `PUT` request to this endpoint without a request body, Riak -will create a new Search index that uses the [default Search schema](/riak/kv/2.2.3/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. To specify a different schema, however, you must pass Riak a JSON object as the request body in which the `schema` field specifies the name of -the schema to use. If you've [stored a schema](/riak/kv/2.2.3/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +the schema to use. If you've [stored a schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` request would create an index called `my_index` that used that schema: ```curl @@ -39,7 +39,7 @@ curl -XPUT http://localhost:8098/search/index/my_index \ -d '{"schema": "my_custom_schema"}' ``` -More information can be found in [Using Search](/riak/kv/2.2.3/developing/usage/search). +More information can be found in [Using Search]({{}}riak/kv/2.2.3/developing/usage/search). ## Normal Response Codes diff --git a/content/riak/kv/2.2.3/developing/api/http/store-search-schema.md b/content/riak/kv/2.2.3/developing/api/http/store-search-schema.md index e1b296f7b7..537670aa16 100644 --- a/content/riak/kv/2.2.3/developing/api/http/store-search-schema.md +++ b/content/riak/kv/2.2.3/developing/api/http/store-search-schema.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/http/store-search-schema --- -Creates a new Riak [Search schema](/riak/kv/2.2.3/developing/usage/search-schemas). +Creates a new Riak [Search schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas). ## Request @@ -26,7 +26,7 @@ PUT /search/schema/ ## Required Form Data In order to create a new Search schema, you must pass Riak a properly -formed XML schema. More information can be found in the [Search Schema](/riak/kv/2.2.3/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed `my_schema.xml` and would like to create a new schema called `my_custom_schema`, you would use the following HTTP request: diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers.md index d2ecaeab41..44471504f2 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers.md @@ -139,47 +139,47 @@ message RpbErrorResp { ## Bucket Operations -* [PBC List Buckets](/riak/kv/2.2.3/developing/api/protocol-buffers/list-buckets) -* [PBC List Keys](/riak/kv/2.2.3/developing/api/protocol-buffers/list-keys) -* [PBC Get Bucket Properties](/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props) -* [PBC Set Bucket Properties](/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props) -* [PBC Reset Bucket Properties](/riak/kv/2.2.3/developing/api/protocol-buffers/reset-bucket-props) +* [PBC List Buckets]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/reset-bucket-props) ## Object/Key Operations -* [PBC Fetch Object](/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object) -* [PBC Store Object](/riak/kv/2.2.3/developing/api/protocol-buffers/store-object) -* [PBC Delete Object](/riak/kv/2.2.3/developing/api/protocol-buffers/delete-object) +* [PBC Fetch Object]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/delete-object) ## Query Operations -* [PBC MapReduce](/riak/kv/2.2.3/developing/api/protocol-buffers/mapreduce) -* [PBC Secondary Indexes](/riak/kv/2.2.3/developing/api/protocol-buffers/secondary-indexes) -* [PBC Search](/riak/kv/2.2.3/developing/api/protocol-buffers/search) +* [PBC MapReduce]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/search) ## Server Operations -* [PBC Ping](/riak/kv/2.2.3/developing/api/protocol-buffers/ping) -* [PBC Server Info](/riak/kv/2.2.3/developing/api/protocol-buffers/server-info) +* [PBC Ping]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/server-info) ## Bucket Type Operations -* [PBC Get Bucket Type](/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-type) -* [PBC Set Bucket Type](/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-type) +* [PBC Get Bucket Type]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-type) ## Data Type Operations -* [PBC Data Type Fetch](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-fetch) -* [PBC Data Type Union](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-union) -* [PBC Data Type Store](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-store) -* [PBC Data Type Counter Store](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store) -* [PBC Data Type Set Store](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store) -* [PBC Data Type Map Store](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store) +* [PBC Data Type Fetch]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store) ## Yokozuna Operations -* [PBC Yokozuna Index Get](/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-get) -* [PBC Yokozuna Index Put](/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-put) -* [PBC Yokozuna Index Delete](/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-delete) -* [PBC Yokozuna Schema Get](/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-get) -* [PBC Yokozuna Schema Put](/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-put) +* [PBC Yokozuna Index Get]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/auth-req.md index ca68f2dfb7..a14306fb4f 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/auth-req.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/auth-req.md @@ -27,4 +27,4 @@ message RpbAuthReq { } ``` -For more on authentication, see our documentation on [Authentication and Authorization](/riak/kv/2.2.3/using/security/basics). +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.2.3/using/security/basics). diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..21d4283310 --- /dev/null +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,78 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.2.3" +menu: + riak_kv-2.2.3: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.2.3/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.2.3/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/delete-object.md index 9d17d0b635..9a120246dc 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/delete-object.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/delete-object.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/protocol-buffers/delete-object --- -Delete an object in the specified [bucket type](/riak/kv/2.2.3/using/cluster-operations/bucket-types)/bucket/key location. +Delete an object in the specified [bucket type]({{}}riak/kv/2.2.3/using/cluster-operations/bucket-types)/bucket/key location. ## Request diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store.md index c93a2a973a..7ba05088cb 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/protocol-buffers/dt-counter-store --- -An operation to update a [counter](/riak/kv/2.2.3/developing/data-types). +An operation to update a [counter]({{}}riak/kv/2.2.3/developing/data-types). ## Request @@ -28,4 +28,4 @@ message CounterOp { The `increment` value specifies how much the counter will be incremented or decremented, depending on whether the `increment` value is positive or negative. This operation can be used to update counters that are -stored on their own in a key or [within a map](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store). +stored on their own in a key or [within a map]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-fetch.md index e69996feaf..2528255d0d 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-fetch.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-fetch.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/protocol-buffers/dt-fetch --- -The equivalent of [`RpbGetReq`](/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object) for [Riak Data Types](/riak/kv/2.2.3/developing/data-types). This request results in a `DtFetchResp` +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.2.3/developing/data-types). This request results in a `DtFetchResp` message (explained in the **Response** section below). ## Request @@ -42,14 +42,14 @@ Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored `key` | The key where the Data Type is stored -`type` | The [Using Bucket Types](/riak/kv/2.2.3/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) +`type` | The [Using Bucket Types]({{}}riak/kv/2.2.3/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) #### Optional Parameters > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props) for more information. Furthermore, you can assign an integer value to the `r` and `pr`, provided that that integer value is less than or equal @@ -72,7 +72,7 @@ Parameter | Description ## Response -The response to a fetch request ([`DtFetchReq`](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. ```protobuf message DtFetchResp { @@ -91,7 +91,7 @@ message DtFetchResp { If the `include_context` option is specified, an opaque "context" value will be returned along with the user-readable data. When sending an update request, the client should send this context as well, just as one -would send a [vclock](/riak/kv/2.2.3/learn/glossary/#vector-clock) for standard KV updates. +would send a [vclock]({{}}riak/kv/2.2.3/learn/glossary/#vector-clock) for standard KV updates. The type of the Data Type is specified in the `type` field, and must be one of the three possible values of the `DataType` enum (`COUNTER`, diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store.md index 79735c74b2..e159d39b7a 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store.md @@ -66,7 +66,7 @@ message MapUpdate { } ``` -The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store). +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store). If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store.md index 83ddeb0209..d141a6bbad 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store.md @@ -16,7 +16,7 @@ aliases: --- An operation to update a set, either on its own (at the bucket/key -level) or [inside of a map](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store). +level) or [inside of a map]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store). ## Request diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-store.md index b5bf64b6c0..8151bdb481 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-store.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-store.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/protocol-buffers/dt-store --- -A request to update the value of a [Riak Data Type](/riak/kv/2.2.3/developing/data-types). +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.2.3/developing/data-types). ## Request @@ -25,7 +25,7 @@ Data Type's opaque context should be returned in the resulting `DtUpdateResp`. The `DtOp` value specifies which Data Type-specific operation is being -performed. More on that in the [PBC Data Type Union](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-union) document. +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-union) document. ```protobuf message DtUpdateReq { @@ -50,11 +50,11 @@ message DtUpdateReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket in which the Data Type is stored -`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types](/riak/kv/2.2.3/using/cluster-operations/bucket-types). +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.2.3/using/cluster-operations/bucket-types). Also required is a `DtOp` message that specifies which operation is to be performed, depending on whether the Data Type being updated is a -[counter](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store), [set](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store), or [map](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store). +[counter]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-map-store). ```protobuf message DtOp { @@ -79,7 +79,7 @@ a special value denoting `one` (`4294967295-1`), `quorum` Parameter | Description :---------|:----------- `key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. -`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks](/riak/kv/2.2.3/learn/glossary/#vector-clock) +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.2.3/learn/glossary/#vector-clock) `w` | Write quorum, i.e. how many replicas to write to before returning a successful response `dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response `pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted @@ -92,7 +92,7 @@ Parameter | Description ## Response The response to a Data Type update request is analogous to -[`RpbPutResp`](/riak/kv/2.2.3/developing/api/protocol-buffers/store-object) for KV operations. If the +[`RpbPutResp`]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/store-object) for KV operations. If the `return_body` is set in the update request message (as explained above), the message will include the opaque context of the Data Type (`context`) and the new value of the Data Type _after_ the update has completed diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-union.md index 63de5c7eb5..420a868e3c 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-union.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/dt-union.md @@ -28,4 +28,4 @@ message DtOp { ``` The included operation depends on the Data Type that is being updated. -`DtOp` messages are sent only as part of a [`DtUpdateReq`](/riak/kv/2.2.3/developing/api/protocol-buffers/dt-store) message. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object.md index 185105a2db..2a624268de 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object.md @@ -47,7 +47,7 @@ message RpbGetReq { > **Note on defaults and special values** > > All of the optional parameters below have default values determined on a -per-bucket basis. Please refer to the documentation on [setting bucket properties](/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props) for more information. +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props) for more information. > > Furthermore, you can assign an integer value to the `r` and `pr` parameters, provided that that integer value is less than or @@ -87,7 +87,7 @@ Value | Description The content entries hold the object value and any metadata. Below is the structure of a RpbContent message, which is included in GET/PUT responses (`RpbGetResp` (above) and -[`RpbPutResp`](/riak/kv/2.2.3/developing/api/protocol-buffers/store-object), respectively): +[`RpbPutResp`]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/store-object), respectively): ```protobuf message RpbContent { @@ -114,7 +114,7 @@ of the following optional parameters: * `charset` --- The character encoding of the object, e.g. `utf-8` * `content_encoding` --- The content encoding of the object, e.g. `video/mp4` -* `vtag` --- The object's [vtag](/riak/kv/2.2.3/learn/glossary/#vector-clock) +* `vtag` --- The object's [vtag]({{}}riak/kv/2.2.3/learn/glossary/#vector-clock) * `links` --- This parameter is associated with the now-deprecated link walking feature and should not be used by Riak clients * `last_mod` --- A timestamp for when the object was last modified, in @@ -132,7 +132,7 @@ of the following optional parameters: } ``` Notice that both a key and value can be stored or just a key. - `RpbPair` messages are also used to attach [secondary indexes](/riak/kv/2.2.3/developing/usage/secondary-indexes) to objects (in the optional + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) to objects (in the optional `indexes` field). * `deleted` --- Whether the object has been deleted (i.e. whether a tombstone for the object has been found under the specified key) diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props.md index 6cc95395ed..a94cdfeb19 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props.md @@ -26,7 +26,7 @@ message RpbGetBucketReq { } ``` -The bucket's name (`bucket`) must be specified. The [bucket type](/riak/kv/2.2.3/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.2.3/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, the `default` bucket type will be used. ## Response @@ -85,7 +85,7 @@ message RpbBucketProps { Each `RpbBucketProps` message returns all of the properties associated with a particular bucket. Default values for bucket properties, as well as descriptions of all of the above properties, can be found in the -[configuration file](/riak/kv/2.2.3/configuring/reference/#default-bucket-properties) documentation. +[configuration file]({{}}riak/kv/2.2.3/configuring/reference/#default-bucket-properties) documentation. It should be noted that the value of an `RpbBucketProps` message may include other message types, such as `RpbModFun` (specifying @@ -106,5 +106,5 @@ message RpbCommitHook { ``` {{% note title="Note on `RpbReplMode`" %}} -The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities](http://docs.basho.com/riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) {{% /note %}} diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-type.md index d402e81c29..e38fa5f702 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-type.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-type.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/protocol-buffers/get-bucket-type --- -Gets the bucket properties associated with a [bucket type](/riak/kv/2.2.3/using/cluster-operations/bucket-types). +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.2.3/using/cluster-operations/bucket-types). ## Request @@ -30,4 +30,4 @@ Only the name of the bucket type needs to be specified (under `name`). ## Response A bucket type's properties will be sent to the client as part of an -[`RpbBucketProps`](/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props) message. +[`RpbBucketProps`]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-client-id.md index 9a26216cb1..587c5dc774 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-client-id.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/get-client-id.md @@ -24,7 +24,7 @@ with Riak 1.4 or later, you can safely ignore client IDs. Get the client id used for this connection. Client ids are used for conflict resolution and each unique actor in the system should be assigned one. A client id is assigned randomly when the socket is -connected and can be changed using [Set Client ID](/riak/kv/2.2.3/developing/api/protocol-buffers/set-client-id). +connected and can be changed using [Set Client ID]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/set-client-id). ## Request diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/mapreduce.md index 223457a622..79634131ad 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/mapreduce.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/mapreduce.md @@ -38,8 +38,8 @@ Mapreduce jobs can be encoded in two different ways * `application/json` --- JSON-encoded MapReduce job * `application/x-erlang-binary` --- Erlang external term format -The JSON encoding is the same as [REST API](/riak/kv/2.2.3/developing/usage/mapreduce/#rest) and -the external term format is the same as the [local Erlang API](/riak/kv/2.2.3/developing/app-guide/advanced-mapreduce/#erlang) +The JSON encoding is the same as [REST API]({{}}riak/kv/2.2.3/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.2.3/developing/app-guide/advanced-mapreduce/#erlang) ## Response diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/reset-bucket-props.md index 2bd3255870..7024e16e30 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/reset-bucket-props.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/reset-bucket-props.md @@ -27,7 +27,7 @@ message RpbResetBucketReq { ``` You must specify the name of the bucket (`bucket`) and optionally a -[bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) using the `type` value. If you do not +[bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. ## Response diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/secondary-indexes.md index 249c25dbcd..ed5bdabf20 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/secondary-indexes.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/secondary-indexes.md @@ -61,7 +61,7 @@ Parameter | Description `max_results` | If pagination is turned on, the number of results to be returned to the client `continuation` | If set to `true`, values are returned in a paginated response `timeout` | The timeout duration, in milliseconds, after which Riak will return an error message -`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types](/riak/kv/2.2.3/developing/usage/bucket-types). +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types). `term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query `pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key @@ -84,7 +84,7 @@ message RpbIndexResp { Parameter | Description :---------|:----------- `keys` | A list of keys that match the index request -`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object](/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object). +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object). `continuation` | Used for paginated responses `done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props.md index 30c375a54c..e749bb4a9b 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props.md @@ -29,9 +29,9 @@ message RpbSetBucketReq { You must specify the name of the bucket (`bucket`) and include an `RpbBucketProps` message. More on that message type can be found in the -[PBC Get Bucket Properties](/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props) documentation. +[PBC Get Bucket Properties]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props) documentation. -You can also specify a [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) using the +You can also specify a [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) using the `type` value. If you do not specify a bucket type, the `default` bucket type will be used by Riak. diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-type.md index cbbc7b20c7..33f633b9e2 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-type.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-type.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.3/dev/references/protocol-buffers/set-bucket-type --- -Assigns a set of [bucket properties](/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props) to a -[bucket type](/riak/kv/2.2.3/developing/usage/bucket-types). +Assigns a set of [bucket properties]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types). ## Request @@ -28,4 +28,4 @@ message RpbSetBucketTypeReq { ``` The `type` field specifies the name of the bucket type as a binary. The -`props` field contains an [`RpbBucketProps`](/riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props). +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/store-object.md index 996aa2ddc8..4434c55795 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/store-object.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/store-object.md @@ -16,11 +16,11 @@ aliases: --- Stores an object under the specified location, as determined by the -intended [key](/riak/kv/2.2.3/learn/concepts/keys-and-objects), [bucket](/riak/kv/2.2.3/learn/concepts/buckets), and [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types). A bucket must always be specified (via +intended [key]({{}}riak/kv/2.2.3/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.2.3/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types). A bucket must always be specified (via `bucket`), whereas key (`key`) and bucket type (`type`) are optional. If no key is specified, Riak will assign a random key to the object. If no -[bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) is assigned, Riak will assign -`default`, which means that the [default bucket configuration](/riak/kv/2.2.3/configuring/reference/#default-bucket-properties) will be used. +[bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.2.3/configuring/reference/#default-bucket-properties) will be used. #### Request @@ -50,7 +50,7 @@ message RpbPutReq { Parameter | Description :---------|:----------- `bucket` | The name of the bucket, in bytes, in which the key/value is to reside -`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object](/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object) +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object) #### Optional Parameters @@ -93,7 +93,7 @@ message RpbPutResp { If `return_body` is set to `true` on the PUT request, the `RpbPutResp` will contain the current object after the PUT completes, in `contents`, -as well as the object's [causal context](/riak/kv/2.2.3/learn/concepts/causal-context), in the `vclock` +as well as the object's [causal context]({{}}riak/kv/2.2.3/learn/concepts/causal-context), in the `vclock` field. The `key` will be sent only if the server generated a random key for the object. diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-delete.md index ee3ad58134..a5314b419e 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-delete.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-delete.md @@ -29,5 +29,5 @@ message RpbYokozunaIndexDeleteReq { ## Response -Returns a [RpbDelResp](/riak/kv/2.2.3/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbDelResp]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-get.md index 0c7c2385a9..801fec566a 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-get.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-get.md @@ -53,7 +53,7 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.2.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-put.md index f68d144b70..ca5ea133d8 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-put.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-index-put.md @@ -37,9 +37,9 @@ message RpbYokozunaIndex { ``` Each message specifying an index must include the index's name as a -binary (as `name`). Optionally, you can specify a [`schema`](/riak/kv/2.2.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. ## Response -Returns a [RpbPutResp](/riak/kv/2.2.3/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-get.md index 3584ebb926..66ffc58e12 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-get.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-get.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/protocol-buffers/yz-schema-get --- -Fetch a [search schema](/riak/kv/2.2.3/developing/usage/search-schemas) from Riak Search. +Fetch a [search schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas) from Riak Search. ## Request diff --git a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-put.md index 812a9b18c0..cc95b3d5de 100644 --- a/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-put.md +++ b/content/riak/kv/2.2.3/developing/api/protocol-buffers/yz-schema-put.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/references/protocol-buffers/yz-schema-put --- -Create a new Solr [search schema](/riak/kv/2.2.3/developing/usage/search-schemas). +Create a new Solr [search schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas). ## Request @@ -34,8 +34,8 @@ message RpbYokozunaSchema { } ``` -This message *must* include both the schema `name` and its Solr [search schema](/riak/kv/2.2.3/developing/usage/search-schemas) `content` as XML. +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas) `content` as XML. ## Response -Returns a [RpbPutResp](/riak/kv/2.2.3/developing/api/protocol-buffers/#message-codes) code with no data on success. +Returns a [RpbPutResp]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.3/developing/app-guide.md b/content/riak/kv/2.2.3/developing/app-guide.md index 8221a2854e..4d4d321e0a 100644 --- a/content/riak/kv/2.2.3/developing/app-guide.md +++ b/content/riak/kv/2.2.3/developing/app-guide.md @@ -15,47 +15,47 @@ aliases: - /riak/kv/2.2.3/dev/using/application-guide/ --- -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution -[dev data model#log]: /riak/kv/2.2.3/developing/data-modeling/#log-data -[dev data model#sensor]: /riak/kv/2.2.3/developing/data-modeling/#sensor-data -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[dev data model#user]: /riak/kv/2.2.3/developing/data-modeling/#user-data -[dev kv model]: /riak/kv/2.2.3/developing/key-value-modeling -[dev data types]: /riak/kv/2.2.3/developing/data-types -[dev data types#counters]: /riak/kv/2.2.3/developing/data-types/#counters -[dev data types#sets]: /riak/kv/2.2.3/developing/data-types/#sets -[dev data types#maps]: /riak/kv/2.2.3/developing/data-types/#maps -[usage create objects]: /riak/kv/2.2.3/developing/usage/creating-objects -[usage search]: /riak/kv/2.2.3/developing/usage/search -[use ref search]: /riak/kv/2.2.3/using/reference/search -[usage 2i]: /riak/kv/2.2.3/developing/usage/secondary-indexes -[dev client libraries]: /riak/kv/2.2.3/developing/client-libraries -[concept crdts]: /riak/kv/2.2.3/learn/concepts/crdts -[dev data model]: /riak/kv/2.2.3/developing/data-modeling -[usage mapreduce]: /riak/kv/2.2.3/developing/usage/mapreduce -[apps mapreduce]: /riak/kv/2.2.3/developing/app-guide/advanced-mapreduce -[use ref 2i]: /riak/kv/2.2.3/using/reference/secondary-indexes -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.3/setup/planning/backend/memory -[obj model java]: /riak/kv/2.2.3/developing/getting-started/java/object-modeling -[obj model ruby]: /riak/kv/2.2.3/developing/getting-started/ruby/object-modeling -[obj model python]: /riak/kv/2.2.3/developing/getting-started/python/object-modeling -[obj model csharp]: /riak/kv/2.2.3/developing/getting-started/csharp/object-modeling -[obj model nodejs]: /riak/kv/2.2.3/developing/getting-started/nodejs/object-modeling -[obj model erlang]: /riak/kv/2.2.3/developing/getting-started/erlang/object-modeling -[obj model golang]: /riak/kv/2.2.3/developing/getting-started/golang/object-modeling -[concept strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[cluster ops strong consistency]: /riak/kv/2.2.3/using/cluster-operations/strong-consistency -[config strong consistency]: /riak/kv/2.2.3/configuring/strong-consistency -[apps strong consistency]: /riak/kv/2.2.3/developing/app-guide/strong-consistency -[usage update objects]: /riak/kv/2.2.3/developing/usage/updating-objects -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[install index]: /riak/kv/2.2.3/setup/installing -[getting started]: /riak/kv/2.2.3/developing/getting-started -[usage index]: /riak/kv/2.2.3/developing/usage -[glossary]: /riak/kv/2.2.3/learn/glossary +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.2.3/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.2.3/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.2.3/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.2.3/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.2.3/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.2.3/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.2.3/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.2.3/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.2.3/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search +[use ref search]: {{}}riak/kv/2.2.3/using/reference/search +[usage 2i]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.2.3/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.2.3/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.2.3/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.2.3/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.2.3/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.2.3/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.2.3/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.3/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.3/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.3/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.3/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.3/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.3/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.2.3/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.2.3/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.2.3/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.2.3/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.2.3/setup/installing +[getting started]: {{}}riak/kv/2.2.3/developing/getting-started +[usage index]: {{}}riak/kv/2.2.3/developing/usage +[glossary]: {{}}riak/kv/2.2.3/learn/glossary So you've decided to build an application using Riak as a data store. We think that this is a wise choice for a broad variety of use cases. But @@ -118,7 +118,7 @@ Riak may not such be a good choice if you use it to store: * **Objects that exceed 1-2MB in size** --- If you will be storing a lot of objects over that size, we would recommend checking - out [Riak CS](http://docs.basho.com/riakcs/latest/) instead, as Riak + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak CS was built to solve this problem. Storing large objects in Riak will typically lead to substandard performance. * **Objects with complex interdependencies** --- If your data cannot be diff --git a/content/riak/kv/2.2.3/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.2.3/developing/app-guide/advanced-mapreduce.md index 1482231ccc..9e61e1a85f 100644 --- a/content/riak/kv/2.2.3/developing/app-guide/advanced-mapreduce.md +++ b/content/riak/kv/2.2.3/developing/app-guide/advanced-mapreduce.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.2.3/dev/advanced/mapreduce/ --- -[usage 2i]: /riak/kv/2.2.3/developing/usage/secondary-indexes -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[use ref custom code]: /riak/kv/2.2.3/using/reference/custom-code -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[config reference]: /riak/kv/2.2.3/configuring/reference +[usage 2i]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.2.3/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.3/configuring/reference [google mr]: http://research.google.com/archive/mapreduce.html [mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map [function contrib]: https://github.com/basho/riak_function_contrib @@ -381,7 +381,7 @@ Erlang client. {{% note title="Distributing Erlang MapReduce Code" %}} Any modules and functions you use in your Erlang MapReduce calls must be available on all nodes in the cluster. Please read about -[installing custom code](/riak/kv/2.2.3/using/reference/custom-code). +[installing custom code]({{}}riak/kv/2.2.3/using/reference/custom-code). {{% /note %}} ### Erlang Example @@ -728,7 +728,7 @@ You can use streaming with Erlang via the Riak KV local client or the Erlang Protocol Buffers API. In either case, you will provide the call to `mapred_stream` with a `Pid` that will receive the streaming results. -For examples, see [MapReduce pbstream.erl](/data/MapReduceExamples/pbstream.erl) +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) ## Troubleshooting MapReduce, illustrated diff --git a/content/riak/kv/2.2.3/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.2.3/developing/app-guide/cluster-metadata.md index b5b6f71a04..d035ef69d0 100644 --- a/content/riak/kv/2.2.3/developing/app-guide/cluster-metadata.md +++ b/content/riak/kv/2.2.3/developing/app-guide/cluster-metadata.md @@ -19,7 +19,7 @@ to work with information that is stored cluster wide and can be read without blocking on communication over the network. One notable example of a subsystem of Riak relying on cluster metadata -is Riak's [bucket types](/riak/kv/2.2.3/using/reference/bucket-types) feature. This feature +is Riak's [bucket types]({{}}riak/kv/2.2.3/using/reference/bucket-types) feature. This feature requires that a particular form of key/value pairs, namely bucket type names (the key) and their associated bucket properties (the value), be asynchronously broadcast to all nodes in a Riak cluster. @@ -54,7 +54,7 @@ already reached all nodes in the previous set of members. All cluster metadata is eventually stored both in memory and on disk, but it should be noted that reads are only from memory, while writes are -made both to memory and to disk. Logical clocks, namely [dotted version vectors](/riak/kv/2.2.3/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks](/riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang +made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.2.3/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang terms addressed by both prefix and a key. ## Erlang Code Interface diff --git a/content/riak/kv/2.2.3/developing/app-guide/replication-properties.md b/content/riak/kv/2.2.3/developing/app-guide/replication-properties.md index 6aaa421800..fd6fb9032e 100644 --- a/content/riak/kv/2.2.3/developing/app-guide/replication-properties.md +++ b/content/riak/kv/2.2.3/developing/app-guide/replication-properties.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.3/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -40,14 +40,14 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.2.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. > **Note on strong consistency** > -> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent](/riak/kv/2.2.3/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices +> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.2.3/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you -should consult the [Using Strong Consistency](/riak/kv/2.2.3/developing/app-guide/strong-consistency) documentation, as this option will not be covered +should consult the [Using Strong Consistency]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency) documentation, as this option will not be covered in this tutorial. ## How Replication Properties Work @@ -64,7 +64,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.2.3/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.3/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -80,7 +80,7 @@ Now, any time you store an object in a bucket with the type The table below lists the most frequently used replication parameters that are available in Riak. Symbolic values like `quorum` are discussed -[below](/riak/kv/2.2.3/developing/app-guide/replication-properties#symbolic-consistency-names). Each +[below]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties#symbolic-consistency-names). Each parameter will be explained in more detail in later sections: Parameter | Common name | Default value | Description @@ -88,8 +88,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -98,7 +98,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -110,7 +110,7 @@ considered a success (W, or `w`). In addition to the bucket level, you can also specify replication properties on the client side for any given read or write. The examples immediately below will deal with bucket-level replication settings, but -check out the [section below](/riak/kv/2.2.3/developing/app-guide/replication-properties#client-level-replication-settings) +check out the [section below]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties#client-level-replication-settings) for more information on setting properties on a per-operation basis. The most general trade-off to be aware of when setting these values is @@ -310,7 +310,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.2.3/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.3/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -348,7 +348,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask), [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb), and [multiple backends](/riak/kv/2.2.3/setup/planning/backend/multi). +documentation on [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.2.3/setup/planning/backend/multi). ## Delete Quorum with RW @@ -523,9 +523,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.2.3/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.2.3/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the section on [development usage with Riak KV](/riak/kv/2.2.3/developing/usage) +refer to the section on [development usage with Riak KV]({{}}riak/kv/2.2.3/developing/usage) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -539,7 +539,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -549,8 +549,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.2.3/developing/app-guide/strong-consistency.md b/content/riak/kv/2.2.3/developing/app-guide/strong-consistency.md index c54bbdad27..38bd9cd0e4 100644 --- a/content/riak/kv/2.2.3/developing/app-guide/strong-consistency.md +++ b/content/riak/kv/2.2.3/developing/app-guide/strong-consistency.md @@ -15,25 +15,25 @@ aliases: - /riak/kv/2.2.3/dev/advanced/strong-consistency --- -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[use ref strong consistency#trade-offs]: /riak/kv/2.2.3/using/reference/strong-consistency/#trade-offs -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[config strong consistency#enable]: /riak/kv/2.2.3/configuring/strong-consistency/#enabling-strong-consistency -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[cluster ops bucket types]: /riak/kv/2.2.3/using/cluster-operations/bucket-types -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[config strong consistency]: /riak/kv/2.2.3/configuring/strong-consistency -[config strong consistency#fault]: /riak/kv/2.2.3/configuring/strong-consistency/#fault-tolerance -[concept causal context]: /riak/kv/2.2.3/learn/concepts/causal-context -[concept causal context#vector]: /riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks -[concept version vector]: /riak/kv/2.2.3/learn/concepts/causal-context/#dotted-version-vectors -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution -[usage update objects]: /riak/kv/2.2.3/developing/usage/updating-objects -[use ref strong consistency#vs]: /riak/kv/2.2.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency -[dev client libraries]: /riak/kv/2.2.3/developing/client-libraries -[getting started]: /riak/kv/2.2.3/developing/getting-started -[config strong consistency#details]: /riak/kv/2.2.3/configuring/strong-consistency/#implementation-details +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[use ref strong consistency#trade-offs]: {{}}riak/kv/2.2.3/using/reference/strong-consistency/#trade-offs +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[config strong consistency#enable]: {{}}riak/kv/2.2.3/configuring/strong-consistency/#enabling-strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[cluster ops bucket types]: {{}}riak/kv/2.2.3/using/cluster-operations/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[config strong consistency]: {{}}riak/kv/2.2.3/configuring/strong-consistency +[config strong consistency#fault]: {{}}riak/kv/2.2.3/configuring/strong-consistency/#fault-tolerance +[concept causal context]: {{}}riak/kv/2.2.3/learn/concepts/causal-context +[concept causal context#vector]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks +[concept version vector]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#dotted-version-vectors +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution +[usage update objects]: {{}}riak/kv/2.2.3/developing/usage/updating-objects +[use ref strong consistency#vs]: {{}}riak/kv/2.2.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency +[dev client libraries]: {{}}riak/kv/2.2.3/developing/client-libraries +[getting started]: {{}}riak/kv/2.2.3/developing/getting-started +[config strong consistency#details]: {{}}riak/kv/2.2.3/configuring/strong-consistency/#implementation-details > **Please Note:** > diff --git a/content/riak/kv/2.2.3/developing/app-guide/write-once.md b/content/riak/kv/2.2.3/developing/app-guide/write-once.md index 3752943a88..04f2e4fbb3 100644 --- a/content/riak/kv/2.2.3/developing/app-guide/write-once.md +++ b/content/riak/kv/2.2.3/developing/app-guide/write-once.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.3/dev/advanced/write-once --- -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[bucket type]: /riak/kv/2.2.3/developing/usage/bucket-types -[Riak data types]: /riak/kv/2.2.3/developing/data-types -[strong consistency]: /riak/kv/2.2.3/developing/app-guide/strong-consistency +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.2.3/developing/data-types +[strong consistency]: {{}}riak/kv/2.2.3/developing/app-guide/strong-consistency Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. @@ -98,7 +98,7 @@ The relationship between the `riak_client`, write-once workers, and vnode proxies is illustrated in the following diagram:
-![Write Once](/images/write_once.png) +![Write Once]({{}}images/write_once.png)
## Client Impacts @@ -149,7 +149,7 @@ LevelDB. Riak will automatically fall back to synchronous writes with all other backends. {{% note title="Note on the `multi` backend" %}} -The [Multi](/riak/kv/2.2.3/setup/planning/backend/multi) backend does not +The [Multi]({{}}riak/kv/2.2.3/setup/planning/backend/multi) backend does not support asynchronous writes. Therefore, if LevelDB is used with the Multi backend, it will be used in synchronous mode. {{% /note %}} diff --git a/content/riak/kv/2.2.3/developing/client-libraries.md b/content/riak/kv/2.2.3/developing/client-libraries.md index c41aa2ba11..80743472ea 100644 --- a/content/riak/kv/2.2.3/developing/client-libraries.md +++ b/content/riak/kv/2.2.3/developing/client-libraries.md @@ -36,7 +36,7 @@ GitHub for bug reporting. In addition to the official clients, Basho provides some unofficial client libraries, listed below. There are also many client libraries and -related [community projects](/community/projects/). +related [community projects]({{}}community/projects/). ## Community Libraries diff --git a/content/riak/kv/2.2.3/developing/data-types.md b/content/riak/kv/2.2.3/developing/data-types.md index 1b0214efa0..479c22d848 100644 --- a/content/riak/kv/2.2.3/developing/data-types.md +++ b/content/riak/kv/2.2.3/developing/data-types.md @@ -43,9 +43,9 @@ For more information on how CRDTs work in Riak KV see [Concepts: Data Types][con The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: -1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-data-type). +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). 2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). -3. [Activate the bucket type](#activate-the-bucket-type). +3. [Activate the bucket type](#activate-bucket-type). ### Creating a Bucket with a Riak Data Type @@ -268,5 +268,5 @@ Riak data types can be searched like any other object, but with the added benefit that your data type is indexed as a different type by Solr, the search platform behind Riak Search. -In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/search/#data-types-and-search-examples), including code +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code samples from each of our official client libraries. diff --git a/content/riak/kv/2.2.3/developing/faq.md b/content/riak/kv/2.2.3/developing/faq.md index 464410be44..399d7ac0e1 100644 --- a/content/riak/kv/2.2.3/developing/faq.md +++ b/content/riak/kv/2.2.3/developing/faq.md @@ -16,19 +16,19 @@ aliases: - /riak/kv/2.2.3/community/faqs/developing --- -[[Basho Bench]: /riak/kv/2.2.3/using/performance/benchmarking -[Bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[Bucket Properties]: /riak/kv/2.2.3/developing/usage +[[Basho Bench]: {{}}riak/kv/2.2.3/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.2.3/developing/usage [built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js -[commit hooks]: /riak/kv/2.2.3/developing/usage/commit-hooks -[Configuration Files]: /riak/kv/2.2.3/configuring/reference +[commit hooks]: {{}}riak/kv/2.2.3/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.2.3/configuring/reference [contrib.basho.com]: https://github.com/basho/riak_function_contrib -[Erlang Riak Client]: /riak/kv/2.2.3/developing/client-libraries -[MapReduce]: /riak/kv/2.2.3/developing/usage/mapreduce -[Memory]: /riak/kv/2.2.3/setup/planning/backend/memory -[Riak CS]: /riak/cs/2.1.1 -[System Planning]: /riak/kv/2.2.3/setup/planning/start/#network-configuration-load-balancing -[vector clocks]: /riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks +[Erlang Riak Client]: {{}}riak/kv/2.2.3/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.2.3/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.2.3/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks ## General diff --git a/content/riak/kv/2.2.3/developing/getting-started.md b/content/riak/kv/2.2.3/developing/getting-started.md index cf970df2b8..1b4b11f191 100644 --- a/content/riak/kv/2.2.3/developing/getting-started.md +++ b/content/riak/kv/2.2.3/developing/getting-started.md @@ -12,8 +12,8 @@ menu: toc: true --- -[install index]: /riak/kv/2.2.3/setup/installing -[dev client libraries]: /riak/kv/2.2.3/developing/client-libraries +[install index]: {{}}riak/kv/2.2.3/setup/installing +[dev client libraries]: {{}}riak/kv/2.2.3/developing/client-libraries Welcome, new Riak developer! This guide will get you started developing against Riak KV with minimal fuss. diff --git a/content/riak/kv/2.2.3/developing/getting-started/csharp.md b/content/riak/kv/2.2.3/developing/getting-started/csharp.md index dcaef519ab..c45fc4f4ba 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/csharp.md +++ b/content/riak/kv/2.2.3/developing/getting-started/csharp.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. @@ -79,4 +79,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.3/developing/getting-started/csharp/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.3/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.2.3/developing/getting-started/csharp/querying.md b/content/riak/kv/2.2.3/developing/getting-started/csharp/querying.md index d1dcaa9942..5ec818599a 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/csharp/querying.md +++ b/content/riak/kv/2.2.3/developing/getting-started/csharp/querying.md @@ -125,9 +125,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.3/developing/getting-started/erlang.md b/content/riak/kv/2.2.3/developing/getting-started/erlang.md index 2903418339..fea7b56b40 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/erlang.md +++ b/content/riak/kv/2.2.3/developing/getting-started/erlang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/taste-of-riak/erlang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Erlang is required. You can also use the `erts` Erlang installation that comes @@ -52,4 +52,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.3/developing/getting-started/erlang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.3/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.2.3/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.2.3/developing/getting-started/erlang/object-modeling.md index 3205f79d6e..9f5e11d4f0 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/erlang/object-modeling.md +++ b/content/riak/kv/2.2.3/developing/getting-started/erlang/object-modeling.md @@ -43,7 +43,7 @@ here for brevity. -record(timeline, {owner, msg_type, msgs}). ``` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.2.3/developing/getting-started/erlang/querying.md b/content/riak/kv/2.2.3/developing/getting-started/erlang/querying.md index 8b801c14b5..a552f64d04 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/erlang/querying.md +++ b/content/riak/kv/2.2.3/developing/getting-started/erlang/querying.md @@ -26,7 +26,7 @@ data to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. -A more comprehensive discussion can be found in [Key/Value Modeling](/riak/kv/2.2.3/developing/key-value-modeling). +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.2.3/developing/key-value-modeling). ## Denormalization @@ -215,9 +215,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.3/developing/getting-started/golang.md b/content/riak/kv/2.2.3/developing/getting-started/golang.md index 6d53414c49..e4c2513485 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/golang.md +++ b/content/riak/kv/2.2.3/developing/getting-started/golang.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/taste-of-riak/golang --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.3/using/running-a-cluster) first and ensure you have +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.3/using/running-a-cluster) first and ensure you have [a working installation of Go](http://golang.org/doc/install). ## Client Setup @@ -75,4 +75,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.3/developing/getting-started/golang/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.3/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.2.3/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.2.3/developing/getting-started/golang/object-modeling.md index e2b18607df..58d2ad0fdd 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/golang/object-modeling.md +++ b/content/riak/kv/2.2.3/developing/getting-started/golang/object-modeling.md @@ -135,7 +135,7 @@ func (t *Timeline) GetId() string { } ```` -We'll be using the bucket `Users` to store our data. We won't be [using bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) here, so we don't need to specify one. +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) here, so we don't need to specify one. To use these records to store data, we will first have to create a user record. Then, when a user creates a message, we will append that message diff --git a/content/riak/kv/2.2.3/developing/getting-started/golang/querying.md b/content/riak/kv/2.2.3/developing/getting-started/golang/querying.md index ac114a5846..9c47ed2c48 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/golang/querying.md +++ b/content/riak/kv/2.2.3/developing/getting-started/golang/querying.md @@ -416,9 +416,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: diff --git a/content/riak/kv/2.2.3/developing/getting-started/java.md b/content/riak/kv/2.2.3/developing/getting-started/java.md index 09a4d849d2..e7f4a5ab61 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/java.md +++ b/content/riak/kv/2.2.3/developing/getting-started/java.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Java is required. @@ -86,4 +86,4 @@ RiakClient client = new RiakClient(cluster); ## Next Steps -[CRUD Operations](/riak/kv/2.2.3/developing/getting-started/java/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.3/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.2.3/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.2.3/developing/getting-started/java/crud-operations.md index 81a9b9f1e5..58742d67b1 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/java/crud-operations.md +++ b/content/riak/kv/2.2.3/developing/getting-started/java/crud-operations.md @@ -67,8 +67,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.3/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.3/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/) documentation. ## Updating Objects @@ -85,8 +85,8 @@ StoreValue.Response updateOpResp = client.execute(updateOp); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.3/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.3/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/) documentation. ## Deleting Objects @@ -196,6 +196,6 @@ UpdateValue.Response response = client.execute(updateValue); ``` For more in depth information on updating objects and sibling resolution in -Riak, see [Updating Objects](/riak/kv/2.2.3/developing/usage/updating-objects/) -and [Conflict Resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution/) +Riak, see [Updating Objects]({{}}riak/kv/2.2.3/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/) documention. diff --git a/content/riak/kv/2.2.3/developing/getting-started/java/querying.md b/content/riak/kv/2.2.3/developing/getting-started/java/querying.md index f762666094..2d487c5e3d 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/java/querying.md +++ b/content/riak/kv/2.2.3/developing/getting-started/java/querying.md @@ -194,9 +194,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.3/developing/getting-started/nodejs.md b/content/riak/kv/2.2.3/developing/getting-started/nodejs.md index 02e2a3944f..368ba138b7 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/nodejs.md +++ b/content/riak/kv/2.2.3/developing/getting-started/nodejs.md @@ -21,7 +21,7 @@ aliases: [nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Node.js 0.12 or later is required. @@ -97,4 +97,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.3/developing/getting-started/nodejs/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.3/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.2.3/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.2.3/developing/getting-started/nodejs/querying.md index 0c06593f2e..b8830bf976 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/nodejs/querying.md +++ b/content/riak/kv/2.2.3/developing/getting-started/nodejs/querying.md @@ -87,9 +87,9 @@ intrinsic relationships. ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from an SQL world, Secondary Indexes (2i) are a lot diff --git a/content/riak/kv/2.2.3/developing/getting-started/php.md b/content/riak/kv/2.2.3/developing/getting-started/php.md index 0628912008..d34e709eae 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/php.md +++ b/content/riak/kv/2.2.3/developing/getting-started/php.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/taste-of-riak/php --- -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. @@ -73,4 +73,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.3/developing/getting-started/php/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.3/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.2.3/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.2.3/developing/getting-started/php/crud-operations.md index 2dc2f3cab5..289c97af3f 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/php/crud-operations.md +++ b/content/riak/kv/2.2.3/developing/getting-started/php/crud-operations.md @@ -179,4 +179,4 @@ Now that we’ve ruined the magic of object encoding, let’s clean up our mess: ## Next Steps -More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter](/riak/kv/2.2.3/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.2.3/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.2.3/developing/getting-started/php/querying.md b/content/riak/kv/2.2.3/developing/getting-started/php/querying.md index 6de4ff3d43..7c910967c2 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/php/querying.md +++ b/content/riak/kv/2.2.3/developing/getting-started/php/querying.md @@ -298,9 +298,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ## Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.3/developing/getting-started/python.md b/content/riak/kv/2.2.3/developing/getting-started/python.md index 2411c13e5b..cb0b0aebb4 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/python.md +++ b/content/riak/kv/2.2.3/developing/getting-started/python.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.3/using/running-a-cluster) first. +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Python is required, with Python 2.7 preferred. One of the Python package managers, @@ -96,4 +96,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.3/developing/getting-started/python/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.3/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.2.3/developing/getting-started/python/querying.md b/content/riak/kv/2.2.3/developing/getting-started/python/querying.md index 6d4fe0a6ec..1285aea6c3 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/python/querying.md +++ b/content/riak/kv/2.2.3/developing/getting-started/python/querying.md @@ -181,9 +181,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.3/developing/getting-started/ruby.md b/content/riak/kv/2.2.3/developing/getting-started/ruby.md index 482862dac9..de19f70e1b 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/ruby.md +++ b/content/riak/kv/2.2.3/developing/getting-started/ruby.md @@ -17,7 +17,7 @@ aliases: -If you haven't set up a Riak Node and started it, please visit [Running A Cluster](/riak/kv/2.2.3/using/running-a-cluster) first. To try this flavor +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.3/using/running-a-cluster) first. To try this flavor of Riak, a working installation of Ruby is required. ## Client Setup @@ -61,4 +61,4 @@ We are now ready to start interacting with Riak. ## Next Steps -[CRUD Operations](/riak/kv/2.2.3/developing/getting-started/ruby/crud-operations) +[CRUD Operations]({{}}riak/kv/2.2.3/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.2.3/developing/getting-started/ruby/querying.md b/content/riak/kv/2.2.3/developing/getting-started/ruby/querying.md index 05f04441a0..53b7e5f163 100644 --- a/content/riak/kv/2.2.3/developing/getting-started/ruby/querying.md +++ b/content/riak/kv/2.2.3/developing/getting-started/ruby/querying.md @@ -191,9 +191,9 @@ While this pattern is very easy and extremely fast with respect to queries and c ### Secondary Indexes {{% note %}} -Secondary indexes in Riak KV require a sorted backend: [Memory](/riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB](/riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) does not support secondary indexes. -See [Using Secondary Indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. {{% /note %}} If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. diff --git a/content/riak/kv/2.2.3/developing/key-value-modeling.md b/content/riak/kv/2.2.3/developing/key-value-modeling.md index 4f69519d91..5fd63b4e33 100644 --- a/content/riak/kv/2.2.3/developing/key-value-modeling.md +++ b/content/riak/kv/2.2.3/developing/key-value-modeling.md @@ -16,7 +16,7 @@ aliases: --- While Riak enables you to take advantage of a wide variety of features -that can be useful in application development, such as [Search](/riak/kv/2.2.3/developing/usage/search), [secondary indexes (2i)](/riak/kv/2.2.3/developing/usage/secondary-indexes/), and [Riak Data Types](/riak/kv/2.2.3/developing/data-types/), Riak almost always performs best when you +that can be useful in application development, such as [Search]({{}}riak/kv/2.2.3/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.2.3/developing/data-types/), Riak almost always performs best when you build your application around basic CRUD operations (create, read, update, and delete) on objects, i.e. when you use Riak as a "pure" key/value store. @@ -24,7 +24,7 @@ key/value store. In this tutorial, we'll suggest some strategies for naming and modeling for key/value object interactions with Riak. If you'd like to use some of Riak's other features, we recommend checking out the documentation -for each of them or consulting our guide to [building applications with Riak](/riak/kv/2.2.3/developing/app-guide/) for a better sense of which features you might need. +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.2.3/developing/app-guide/) for a better sense of which features you might need. ## Advantages of Key/Value Operations @@ -37,12 +37,12 @@ objects. Instead, it interacts with objects on a one-by-one basis, using Primary key lookups store and fetch objects in Riak on the basis of three basic locators: -* The object's [key](/riak/kv/2.2.3/learn/concepts/keys-and-objects#keys), which can be anything you +* The object's [key]({{}}riak/kv/2.2.3/learn/concepts/keys-and-objects#keys), which can be anything you want as long as it is [Unicode compliant](http://www.unicode.org/) -* The [bucket](/riak/kv/2.2.3/learn/concepts/buckets) which houses the object and its key (bucket +* The [bucket]({{}}riak/kv/2.2.3/learn/concepts/buckets) which houses the object and its key (bucket names are also Unicode compliant) -* The [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) that determines the bucket's - [replication](/riak/kv/2.2.3/developing/app-guide/replication-properties) and other properties +* The [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties) and other properties It may be useful to think of this system as analogous to a nested key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you @@ -79,7 +79,7 @@ Storing data in Riak is a lot like this. Let's say that we want to store JSON objects with a variety of information about every episode of the Simpsons. We could store each season in its own bucket and each episode in its own key within that bucket. Here's what the URL structure would -look like (for the [HTTP API](/riak/kv/2.2.3/developing/api/http)): +look like (for the [HTTP API]({{}}riak/kv/2.2.3/developing/api/http)): ``` GET/PUT/DELETE /bucket//keys/ @@ -137,13 +137,13 @@ records, which are all stored in the bucket `users` with each user's username acting as the key. The problem at this point is this: how can Riak know which user records actually exist? -One way to determine this is to [list all keys](/riak/kv/2.2.3/developing/api/protocol-buffers/list-keys) in the +One way to determine this is to [list all keys]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/list-keys) in the bucket `users`. This approach, however, is _not_ recommended, because listing all keys in a bucket is a very expensive operation that should not be used in production. And so another strategy must be employed. -A better possibility is to use [Riak sets](/riak/kv/2.2.3/developing/data-types/#sets) to -store lists of keys in a bucket. Riak sets are a [Riak Data Type](/riak/kv/2.2.3/developing/data-types) that enable you to store lists of binaries or strings in Riak. +A better possibility is to use [Riak sets]({{}}riak/kv/2.2.3/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.2.3/developing/data-types) that enable you to store lists of binaries or strings in Riak. Unlike normal Riak objects, you can interact with Riak sets much like you interact with sets in most programming languages, i.e. you can add and remove elements at will. @@ -152,7 +152,7 @@ Going back to our user data example, instead of simply storing user records in our `users` bucket, we could set up our application to store each key in a set when a new record is created. We'll store this set in the bucket `user_info_sets` (we'll keep it simple) and in the key -`usernames`. The following will also assume that we've [set up a bucket type](/riak/kv/2.2.3/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.2.3/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called `sets`. We can interact with that set on the basis of its location: @@ -190,7 +190,7 @@ user_id_set = Set(bucket, 'usernames') > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.2.3/developing/getting-started). +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.3/developing/getting-started). Then, we can create a function that stores a user record's key in that set every time a record is created: @@ -434,8 +434,8 @@ def get_user_by_username(username): ## Bucket Types as Additional Namespaces -Riak [bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) have two essential functions: -they enable you to manage [bucket configurations](/riak/kv/2.2.3/learn/concepts/buckets) in an +Riak [bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.2.3/learn/concepts/buckets) in an efficient and streamlined way and, more importantly for our purposes here, they act as a third namespace in Riak in addition to buckets and keys. Thus, in Riak versions 2.0 and later you have access to a third @@ -443,7 +443,7 @@ layer of information for locating objects if you wish. While bucket types are typically used to assign different bucket properties to groups of buckets, you can also create named bucket types -that simply extend Riak's [defaults](/riak/kv/2.2.3/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +that simply extend Riak's [defaults]({{}}riak/kv/2.2.3/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have the same configuration but have different names. Here's an example of creating four bucket types that only extend Riak's diff --git a/content/riak/kv/2.2.3/developing/usage/commit-hooks.md b/content/riak/kv/2.2.3/developing/usage/commit-hooks.md index dbbd80b138..b00f185032 100644 --- a/content/riak/kv/2.2.3/developing/usage/commit-hooks.md +++ b/content/riak/kv/2.2.3/developing/usage/commit-hooks.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/using/commit-hooks --- -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types Pre- and post-commit hooks are functions that are invoked before or after an object has been written to Riak. To provide a few examples, @@ -31,7 +31,7 @@ invoked can cause nasty feedback loops which will wedge the hook into an infinite cycle unless the hook functions are carefully written to detect and short-circuit such cycles. -Pre- and post-commit hooks are applied at the [bucket](/riak/kv/2.2.3/learn/concepts/buckets) level, +Pre- and post-commit hooks are applied at the [bucket]({{}}riak/kv/2.2.3/learn/concepts/buckets) level, [using bucket types][usage bucket types]. They are run once per successful response to the client. @@ -40,7 +40,7 @@ functions. ## Setting Commit Hooks Using Bucket Types -Because hooks are defined at the bucket level, you can create [bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) +Because hooks are defined at the bucket level, you can create [bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) that associate one or more hooks with any bucket that bears that type. Let's create a bucket type called `with_post_commit` that adds a post-commit hook to operations on any bucket that bears the @@ -87,13 +87,13 @@ Riak object being modified. Remember that deletes are also considered "writes," and so pre-commit hooks will be fired when a delete occurs in the bucket as well. This means that hook functions will need to inspect the object for the `X-Riak-Deleted` metadata entry (more on this in our -documentation on [object deletion](/riak/kv/2.2.3/using/reference/object-deletion)) to determine whether a delete is +documentation on [object deletion]({{}}riak/kv/2.2.3/using/reference/object-deletion)) to determine whether a delete is occurring. Erlang pre-commit functions are allowed three possible return values: - A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. -- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API](/riak/kv/2.2.3/developing/api/http)) along with a generic error message about why the write was blocked. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{}}riak/kv/2.2.3/developing/api/http)) along with a generic error message about why the write was blocked. - `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. Errors that occur when processing Erlang pre-commit hooks will be diff --git a/content/riak/kv/2.2.3/developing/usage/conflict-resolution.md b/content/riak/kv/2.2.3/developing/usage/conflict-resolution.md index 889bf574b3..423e780386 100644 --- a/content/riak/kv/2.2.3/developing/usage/conflict-resolution.md +++ b/content/riak/kv/2.2.3/developing/usage/conflict-resolution.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.3/dev/using/conflict-resolution --- -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency -One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered](/riak/kv/2.2.3/learn/concepts/clusters) system in which any [node](/riak/kv/2.2.3/learn/glossary/#node) is capable of receiving requests without requiring that +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{}}riak/kv/2.2.3/learn/concepts/clusters) system in which any [node]({{}}riak/kv/2.2.3/learn/glossary/#node) is capable of receiving requests without requiring that every node participate in each request. -If you are using Riak in an [eventually consistent](/riak/kv/2.2.3/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +If you are using Riak in an [eventually consistent]({{}}riak/kv/2.2.3/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is unavoidable. Often, Riak can resolve these conflicts on its own -internally if you use causal context, i.e. [vector clocks](/riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks) or [dotted version vectors](/riak/kv/2.2.3/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). +internally if you use causal context, i.e. [vector clocks]({{}}riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{}}riak/kv/2.2.3/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). {{% note title="Important note on terminology" %}} In versions of Riak prior to 2.0, vector clocks were the only causal context @@ -44,7 +44,7 @@ though, please bear in mind that we strongly recommend one of the following two options: 1. If your data can be modeled as one of the currently available [Riak - Data Types](/riak/kv/2.2.3/developing/data-types), we recommend using one of these types, + Data Types]({{}}riak/kv/2.2.3/developing/data-types), we recommend using one of these types, because all of them have conflict resolution _built in_, completely relieving applications of the need to engage in conflict resolution. 2. If your data cannot be modeled as one of the available Data Types, @@ -63,8 +63,8 @@ a strongly consistent fashion. This document pertains to usage of Riak as an _eventually_ consistent system. If you'd like to use Riak's strong consistency feature, please refer to the following documents: > -> * [Using Strong Consistency](/riak/kv/2.2.3/developing/app-guide/strong-consistency) --- A guide for developers -> * [Managing Strong Consistency](/riak/kv/2.2.3/configuring/strong-consistency) --- A guide for operators +> * [Using Strong Consistency]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{}}riak/kv/2.2.3/configuring/strong-consistency) --- A guide for operators > * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong consistency @@ -72,10 +72,10 @@ strong consistency feature, please refer to the following documents: Riak's eventual consistency model is powerful because Riak is fundamentally non-opinionated about how data resolution takes place. -While Riak _does_ have a set of [defaults](/riak/kv/2.2.3/developing/app-guide/replication-properties#available-parameters), there are a variety of general +While Riak _does_ have a set of [defaults]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties#available-parameters), there are a variety of general approaches to conflict resolution that are available. In Riak, you can mix and match conflict resolution strategies at the bucket level, -[using bucket types][usage bucket types]. The most important [bucket properties](/riak/kv/2.2.3/learn/concepts/buckets) +[using bucket types][usage bucket types]. The most important [bucket properties]({{}}riak/kv/2.2.3/learn/concepts/buckets) to consider when reasoning about conflict resolution are the `allow_mult` and `last_write_wins` properties. @@ -87,7 +87,7 @@ If the [`allow_mult`](#siblings) parameter is set to `false`, Riak resolves all object replica conflicts internally and does not return siblings to the client. How Riak resolves those conflicts depends on the value that you set for a different bucket property, -[`last_write_wins`](/riak/kv/2.2.3/learn/concepts/buckets). If `last_write_wins` is set to `false`, +[`last_write_wins`]({{}}riak/kv/2.2.3/learn/concepts/buckets). If `last_write_wins` is set to `false`, Riak will resolve all conflicts on the basis of [timestamps](http://en.wikipedia.org/wiki/Timestamp), which are attached to all Riak objects as metadata. @@ -141,20 +141,20 @@ made in accordance with your data model(s), business needs, and use cases. For examples of client-side sibling resolution, see the following client-library-specific docs: -* [Java](/riak/kv/2.2.3/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.2.3/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.2.3/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.2.3/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.2.3/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/nodejs) In Riak versions 2.0 and later, `allow_mult` is set to `true` by default -for any [bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) that you create. This means +for any [bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) that you create. This means that if you wish to avoid client-side sibling resolution, you have a few options: -* Explicitly create and activate [bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) +* Explicitly create and activate [bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) that set `allow_mult` to `false` -* Use Riak's [Configuration Files](/riak/kv/2.2.3/configuring/reference) to change the [default bucket properties](/riak/kv/2.2.3/configuring/reference#default-bucket-properties) for your +* Use Riak's [Configuration Files]({{}}riak/kv/2.2.3/configuring/reference) to change the [default bucket properties]({{}}riak/kv/2.2.3/configuring/reference#default-bucket-properties) for your cluster. If you set the `buckets.default.allow_mult` parameter to `false`, all bucket types that you create will have `allow_mult` set to `false` by default. @@ -164,7 +164,7 @@ options: When a value is stored in Riak, it is tagged with a piece of metadata called a **causal context** which establishes the object's initial version. Causal context comes in one of two possible forms, depending -on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors](/riak/kv/2.2.3/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks](/riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks) will be used. +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{}}riak/kv/2.2.3/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{}}riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks) will be used. Causal context essentially enables Riak to compare the different values of objects stored in Riak and to determine a number of important things @@ -189,11 +189,11 @@ If `allow_mult` is set to `true`, you should _always_ use causal context when updating objects, _unless you are certain that no object exists under that key_. Failing to use causal context with mutable data, especially for objects that are frequently updated, can lead to -[sibling explosion](/riak/kv/2.2.3/using/performance/latency-reduction#siblings), which can +[sibling explosion]({{}}riak/kv/2.2.3/using/performance/latency-reduction#siblings), which can produce a variety of problems in your cluster. Fortunately, much of the work involved with using causal context is handled automatically by -Basho's official [client libraries](/riak/kv/2.2.3/developing/client-libraries). Examples can be found for each -client library in the [Object Updates](/riak/kv/2.2.3/developing/usage/updating-objects) document. +Basho's official [client libraries]({{}}riak/kv/2.2.3/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{}}riak/kv/2.2.3/developing/usage/updating-objects) document. ## Siblings @@ -208,7 +208,7 @@ clients, Riak may not be able to choose a single value to store, in which case the object will be given a sibling. These writes could happen on the same node or on different nodes. 2. **Stale causal context** --- Writes from any client using a stale -[causal context](/riak/kv/2.2.3/learn/concepts/causal-context). This is a less likely scenario if a client updates +[causal context]({{}}riak/kv/2.2.3/learn/concepts/causal-context). This is a less likely scenario if a client updates the object by reading the object first, fetching the causal context currently attached to the object, and then returning that causal context to Riak when performing the update (fortunately, our client libraries @@ -357,7 +357,7 @@ curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/key > **Getting started with Riak KV clients** > > If you are connecting to Riak using one of Basho's official -[client libraries](/riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started](/riak/kv/2.2.3/developing/getting-started) section. +[client libraries]({{}}riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.3/developing/getting-started) section. At this point, multiple objects have been stored in the same key without passing any causal context to Riak. Let's see what happens if we try to @@ -478,11 +478,11 @@ by presenting the conflicting objects to the end user. For more information on application-side conflict resolution, see our client-library-specific documentation for the following languages: -* [Java](/riak/kv/2.2.3/developing/usage/conflict-resolution/java) -* [Ruby](/riak/kv/2.2.3/developing/usage/conflict-resolution/ruby) -* [Python](/riak/kv/2.2.3/developing/usage/conflict-resolution/python) -* [C#](/riak/kv/2.2.3/developing/usage/conflict-resolution/csharp) -* [Node.js](/riak/kv/2.2.3/developing/usage/conflict-resolution/nodejs) +* [Java]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/java) +* [Ruby]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/ruby) +* [Python]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/python) +* [C#]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/csharp) +* [Node.js]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/nodejs) We won't deal with conflict resolution in this section. Instead, we'll focus on how to use causal context. @@ -610,7 +610,7 @@ once that limit has been exceeded. Sibling explosion occurs when an object rapidly collects siblings without being reconciled. This can lead to myriad issues. Having an enormous object in your node can cause reads of that object to crash -the entire node. Other issues include [increased cluster latency](/riak/kv/2.2.3/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. +the entire node. Other issues include [increased cluster latency]({{}}riak/kv/2.2.3/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. ### Vector Clock Explosion @@ -665,7 +665,7 @@ Parameter | Default value | Description This diagram shows how the values of these parameters dictate the vector clock pruning process: -![Vclock Pruning](/images/vclock-pruning.png) +![Vclock Pruning]({{}}images/vclock-pruning.png) ## More Information diff --git a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/csharp.md index e085e376e4..8a9004a028 100644 --- a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/csharp.md +++ b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/csharp.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.3/dev/using/conflict-resolution/csharp --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak .NET client][riak_dotnet_client]. diff --git a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/golang.md index edb75ad0b0..73a5524db2 100644 --- a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/golang.md +++ b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/golang.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.3/dev/using/conflict-resolution/golang --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to usecase-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Go client](https://github.com/basho/riak-go-client). diff --git a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/java.md index 0ba2460565..732fecc13a 100644 --- a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/java.md +++ b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/java.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.3/dev/using/conflict-resolution/java --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Java @@ -56,7 +56,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -189,7 +189,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Java client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.3/developing/usage) section. +the official Java client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.3/developing/usage) section. ## More Advanced Example @@ -258,9 +258,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.3/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.3/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.3/developing/data-types/#counters), [set](/riak/kv/2.2.3/developing/data-types/#sets), or [map](/riak/kv/2.2.3/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.3/developing/data-types/#counters), [set]({{}}riak/kv/2.2.3/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.3/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -269,4 +269,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.3/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.3/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/nodejs.md index 5903dfbfbc..4e8591ac5e 100644 --- a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/nodejs.md +++ b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/nodejs.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.3/dev/using/conflict-resolution/nodejs --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). diff --git a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/php.md index 13d14fabd0..9fc2737ee5 100644 --- a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/php.md +++ b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/php.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.3/dev/using/conflict-resolution/php --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak PHP @@ -46,7 +46,7 @@ Let's say that we're building a social network application and storing lists of usernames representing each user's "friends" in the network. Each user will bear the class `User`, which we'll create below. All of the data for our application will be stored in buckets that bear the -[bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) `siblings`, and for this bucket type +[bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when they arise. @@ -196,7 +196,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official PHP client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.3/developing/usage) section. +the official PHP client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.3/developing/usage) section. ## More Advanced Example @@ -226,9 +226,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.3/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.3/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.3/developing/data-types/#counters), [set](/riak/kv/2.2.3/developing/data-types/#sets), or [map](/riak/kv/2.2.3/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.3/developing/data-types/#counters), [set]({{}}riak/kv/2.2.3/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.3/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -237,4 +237,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.3/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.3/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/python.md index 659aecddb7..3300532f5a 100644 --- a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/python.md +++ b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/python.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.3/dev/using/conflict-resolution/python --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Python @@ -51,7 +51,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -185,7 +185,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including code examples -from the official Python client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.3/developing/usage) section. +from the official Python client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.3/developing/usage) section. ## More Advanced Example @@ -240,9 +240,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.3/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.3/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.3/developing/data-types/#counters), [set](/riak/kv/2.2.3/developing/data-types/#sets), or [map](/riak/kv/2.2.3/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.3/developing/data-types/#counters), [set]({{}}riak/kv/2.2.3/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.3/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -251,4 +251,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.3/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.3/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/ruby.md index 0deaee8086..e01335bf5d 100644 --- a/content/riak/kv/2.2.3/developing/usage/conflict-resolution/ruby.md +++ b/content/riak/kv/2.2.3/developing/usage/conflict-resolution/ruby.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.3/dev/using/conflict-resolution/ruby --- -For reasons explained in the [Introduction to conflict resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a +For reasons explained in the [Introduction to conflict resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific criteria. Here, we'll provide a brief guide to conflict resolution using the official [Riak Ruby @@ -49,7 +49,7 @@ below. Let's say that we're building a social network application and storing lists of usernames representing each user's "friends." Each user will be of the class `User`, which we'll create below. All of the data for our -application will be stored in buckets that bear the [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +application will be stored in buckets that bear the [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set to `true`, which means that Riak will generate siblings in certain cases---siblings that our application will need to be equipped to resolve when necessary. @@ -175,7 +175,7 @@ step is the subject of this tutorial) made You can find more on writing objects to Riak, including examples from -the official Ruby client library, in the [Developing with Riak KV: Usage](/riak/kv/2.2.3/developing/usage) section. +the official Ruby client library, in the [Developing with Riak KV: Usage]({{}}riak/kv/2.2.3/developing/usage) section. ## More Advanced Example @@ -235,9 +235,9 @@ always carry potential drawbacks of this sort. ## Riak Data Types An important thing to always bear in mind when working with conflict -resolution is that Riak offers a variety of [Data Types](/riak/kv/2.2.3/developing/data-types/) that have +resolution is that Riak offers a variety of [Data Types]({{}}riak/kv/2.2.3/developing/data-types/) that have specific conflict resolution mechanics built in. If you have data that -can be modeled as a [counter](/riak/kv/2.2.3/developing/data-types/#counters), [set](/riak/kv/2.2.3/developing/data-types/#sets), or [map](/riak/kv/2.2.3/developing/data-types/#maps), then you should seriously +can be modeled as a [counter]({{}}riak/kv/2.2.3/developing/data-types/#counters), [set]({{}}riak/kv/2.2.3/developing/data-types/#sets), or [map]({{}}riak/kv/2.2.3/developing/data-types/#maps), then you should seriously consider using those Data Types instead of creating your own application-side resolution logic. @@ -247,4 +247,4 @@ set, in particular the `friends` list associated with each `User` object. The merge operation that we built to handle conflict resolution is analogous to the resolution logic that is built into Riak sets. For more information on how you could potentially replace the client-side -resolution that we implemented above, see our [tutorial on Riak sets](/riak/kv/2.2.3/developing/data-types/#sets). +resolution that we implemented above, see our [tutorial on Riak sets]({{}}riak/kv/2.2.3/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.3/developing/usage/creating-objects.md b/content/riak/kv/2.2.3/developing/usage/creating-objects.md index 1ea1d86c40..012e3882b9 100644 --- a/content/riak/kv/2.2.3/developing/usage/creating-objects.md +++ b/content/riak/kv/2.2.3/developing/usage/creating-objects.md @@ -12,7 +12,7 @@ menu: toc: true --- -[usage content types]: /riak/kv/2.2.3/developing/usage/content-types +[usage content types]: {{}}riak/kv/2.2.3/developing/usage/content-types Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` requests. Here is the basic form of writes: @@ -23,7 +23,7 @@ PUT /types//buckets//keys/ # If you're using HTTP to interact with Riak, you can also use POST ``` -As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type](/riak/kv/2.2.3/using/cluster-operations/bucket-types). +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{}}riak/kv/2.2.3/using/cluster-operations/bucket-types). The object we're storing will be very simple, just a basic text snippet of something that Rufus might say. Let's build the object and then store @@ -118,7 +118,7 @@ rsp := svc.Response Notice that we specified both a value for the object, i.e. `WOOF!`, and a content type, `text/plain`. See [content types][usage content types] for more information. -Now, you run the same read operation as in [Reading Objects](/riak/kv/2.2.3/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types](/riak/kv/2.2.3/using/cluster-operations/bucket-types). +Now, you run the same read operation as in [Reading Objects]({{}}riak/kv/2.2.3/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{}}riak/kv/2.2.3/using/cluster-operations/bucket-types). ### Store an Object @@ -138,7 +138,7 @@ PUT /types/TYPE/buckets/BUCKET/keys/KEY There is no need to intentionally create buckets in Riak. They pop into existence when keys are added to them, and disappear when all keys have been removed from them. If you don't specify a bucket's type, the type -[`default`](/riak/kv/2.2.3/developing/usage/bucket-types) will be applied. +[`default`]({{}}riak/kv/2.2.3/developing/usage/bucket-types) will be applied. #### Write Parameters diff --git a/content/riak/kv/2.2.3/developing/usage/custom-extractors.md b/content/riak/kv/2.2.3/developing/usage/custom-extractors.md index f3d016b292..6ebe4e80ab 100644 --- a/content/riak/kv/2.2.3/developing/usage/custom-extractors.md +++ b/content/riak/kv/2.2.3/developing/usage/custom-extractors.md @@ -27,7 +27,7 @@ Content Type | Erlang Module `text/xml` | `yz_xml_extractor` No specified type | `yz_noop_extractor` -There are also built-in extractors for [Riak Data Types](/riak/kv/2.2.3/developing/usage/searching-data-types). +There are also built-in extractors for [Riak Data Types]({{}}riak/kv/2.2.3/developing/usage/searching-data-types). If you're working with a data format that does not have a default Solr extractor, you can create your own and register it with Riak Search. @@ -145,7 +145,7 @@ erlc search_test_extractor.erl To instruct Riak where to find the resulting `search_test_extractor.beam` file, we'll need to add a line to an `advanced.config` file in the node's `/etc` directory (more information -can be found in our documentation on [advanced](/riak/kv/2.2.3/configuring/reference/#advanced-configuration)). Here's an +can be found in our documentation on [advanced]({{}}riak/kv/2.2.3/configuring/reference/#advanced-configuration)). Here's an example: ```advancedconfig @@ -229,7 +229,7 @@ yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extra ## Indexing and Searching HTTP Header Packet Data Now that Solr knows how to extract HTTP header packet data, we need to -create a schema that extends the [default schema](/riak/kv/2.2.3/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +create a schema that extends the [default schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added to `` in the schema, which we'll name `http_header_schema` and store in a `http_header_schema.xml` file: @@ -316,7 +316,7 @@ curl -XPUT $RIAK_HOST/search/index/header_data \ -d '{"schema":"http_header_schema"}' ``` -Now, we can create and activate a [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) +Now, we can create and activate a [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) for all of the HTTP header data that we plan to store. Any bucket that bears this type will be associated with our `header_data` search index. We'll call our bucket type `http_data_store`. diff --git a/content/riak/kv/2.2.3/developing/usage/deleting-objects.md b/content/riak/kv/2.2.3/developing/usage/deleting-objects.md index 0adba24f4d..1e71119329 100644 --- a/content/riak/kv/2.2.3/developing/usage/deleting-objects.md +++ b/content/riak/kv/2.2.3/developing/usage/deleting-objects.md @@ -103,9 +103,9 @@ curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius If you are updating an object that has been deleted---or if an update might target a deleted object---we recommend that -you first fetch the [causal context](/riak/kv/2.2.3/learn/concepts/causal-context) of the object prior to updating. +you first fetch the [causal context]({{}}riak/kv/2.2.3/learn/concepts/causal-context) of the object prior to updating. This can be done by setting the `deletedvclock` parameter to `true` as -part of the [fetch operation](/riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object). This can also be done +part of the [fetch operation]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/fetch-object). This can also be done with the official Riak clients for Ruby, Java, and Erlang, as in the example below: diff --git a/content/riak/kv/2.2.3/developing/usage/document-store.md b/content/riak/kv/2.2.3/developing/usage/document-store.md index ebadab4c39..dff534193b 100644 --- a/content/riak/kv/2.2.3/developing/usage/document-store.md +++ b/content/riak/kv/2.2.3/developing/usage/document-store.md @@ -16,23 +16,23 @@ aliases: --- Although Riak wasn't explicitly created as a document store, two -features recently added to Riak---[Riak Search](/riak/kv/2.2.3/developing/usage/search/) and [Riak Data Types](/riak/kv/2.2.3/developing/data-types/)---make it possible to use Riak as a +features recently added to Riak---[Riak Search]({{}}riak/kv/2.2.3/developing/usage/search/) and [Riak Data Types]({{}}riak/kv/2.2.3/developing/data-types/)---make it possible to use Riak as a highly scalable document store with rich querying capabilities. In this tutorial, we'll build a basic implementation of a document store using -[Riak maps](/riak/kv/2.2.3/developing/data-types/#maps). +[Riak maps]({{}}riak/kv/2.2.3/developing/data-types/#maps). ## Basic Approach Riak Search enables you to implement a document store in Riak in a variety of ways. You could, for example, store and query JSON objects or XML and then retrieve them later via Solr queries. In this tutorial, -however, we will store data in [Riak maps](/riak/kv/2.2.3/developing/data-types/#maps), +however, we will store data in [Riak maps]({{}}riak/kv/2.2.3/developing/data-types/#maps), index that data using Riak Search, and then run Solr queries against those stored objects. You can think of these Search indexes as **collections**. Each indexed document will have an ID generated automatically by Search, and because -we're not interested in running normal [key/value queries](/riak/kv/2.2.3/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys](/riak/kv/2.2.3/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. +we're not interested in running normal [key/value queries]({{}}riak/kv/2.2.3/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{}}riak/kv/2.2.3/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. ## Use Case @@ -69,7 +69,7 @@ Riak Search with an appropriate index and schema. ## Creating a Schema and Index -In the documentation on [search schemas](/riak/kv/2.2.3/developing/usage/search-schemas), you'll find a +In the documentation on [search schemas]({{}}riak/kv/2.2.3/developing/usage/search-schemas), you'll find a baseline schema to be used for creating custom schemas. We'll use that baseline schema here and add the following fields to the `` list: @@ -242,7 +242,7 @@ as part of our "collection." Now that we know how each element of a blog post can be translated into one of the Riak Data Types, we can create an interface in our application to serve as that translation layer. Using the method -described in [Data Modeling with Riak Data Types](/riak/kv/2.2.3/developing/data-modeling), we can construct a +described in [Data Modeling with Riak Data Types]({{}}riak/kv/2.2.3/developing/data-modeling), we can construct a class that looks like this: ```java diff --git a/content/riak/kv/2.2.3/developing/usage/mapreduce.md b/content/riak/kv/2.2.3/developing/usage/mapreduce.md index 8d2a0ea02c..c6b0173a6c 100644 --- a/content/riak/kv/2.2.3/developing/usage/mapreduce.md +++ b/content/riak/kv/2.2.3/developing/usage/mapreduce.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.3/dev/using/mapreduce --- -[usage 2i]: /riak/kv/2.2.3/developing/usage/secondary-indexes -[usage search]: /riak/kv/2.2.3/developing/usage/search -[usage types]: /riak/kv/2.2.3/developing/usage/bucket-types -[api http]: /riak/kv/2.2.3/developing/api/http -[api pb]: /riak/kv/2.2.3/developing/api/protocol-buffers -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[guide mapreduce]: /riak/kv/2.2.3/developing/app-guide/advanced-mapreduce +[usage 2i]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search +[usage types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[api http]: {{}}riak/kv/2.2.3/developing/api/http +[api pb]: {{}}riak/kv/2.2.3/developing/api/protocol-buffers +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[guide mapreduce]: {{}}riak/kv/2.2.3/developing/app-guide/advanced-mapreduce {{% note title="Use MapReduce sparingly" %}} In Riak KV, MapReduce is the primary method for non-primary-key-based @@ -116,7 +116,7 @@ assuming that the next phase in the list is a reduce phase. The diagram below provides an illustration of how a coordinating vnode orchestrates a MapReduce job. -![MapReduce Diagram](/images/MapReduce-diagram.png) +![MapReduce Diagram]({{}}images/MapReduce-diagram.png) ## Example diff --git a/content/riak/kv/2.2.3/developing/usage/reading-objects.md b/content/riak/kv/2.2.3/developing/usage/reading-objects.md index 43f7e78589..ec61bd1480 100644 --- a/content/riak/kv/2.2.3/developing/usage/reading-objects.md +++ b/content/riak/kv/2.2.3/developing/usage/reading-objects.md @@ -12,11 +12,11 @@ menu: toc: true --- -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode You can think of reads in Riak as analogous to HTTP `GET` requests. You specify a bucket type, bucket, and key, and Riak either returns the -object that's stored there---including its [siblings](/riak/kv/2.2.3/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +object that's stored there---including its [siblings]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the equivalent of an HTTP `404 Object Not Found`). Here is the basic command form for retrieving a specific key from a @@ -27,7 +27,7 @@ GET /types//buckets//keys/ ``` Here is an example of a read performed on the key `rufus` in the bucket -`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type](/riak/kv/2.2.3/using/cluster-operations/bucket-types) page. +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{}}riak/kv/2.2.3/using/cluster-operations/bucket-types) page. ```java // In the Java client, it is best to specify a bucket type/bucket/key diff --git a/content/riak/kv/2.2.3/developing/usage/replication.md b/content/riak/kv/2.2.3/developing/usage/replication.md index 346ae71d75..5b4a2ec456 100644 --- a/content/riak/kv/2.2.3/developing/usage/replication.md +++ b/content/riak/kv/2.2.3/developing/usage/replication.md @@ -15,12 +15,12 @@ aliases: - /riak/kv/2.2.3/dev/advanced/replication-properties --- -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters Riak was built to act as a multi-node [cluster][concept clusters]. It distributes data across multiple physical servers, which enables it to @@ -42,17 +42,17 @@ manner, you can fine-tune that trade-off. The ability to make these kinds of fundamental choices has immense value for your applications and is one of the features that differentiates Riak from other databases. -At the bottom of the page, you'll find a [screencast](/riak/kv/2.2.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your replication levels to match your application and business needs. {{% note title="Note on strong consistency" %}} An option introduced in Riak version 2.0 is to use Riak as a -strongly +strongly consistent system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices _all_ availability guarantees when necessary. Therefore, you should consult the -Using +Using Strong Consistency documentation, as this option will not be covered in this tutorial. {{% /note %}} @@ -72,7 +72,7 @@ Riak is to specify those properties ### Replication Properties Through Bucket Types Let's say, for example, that you want to apply an `n_val` of 5, an `r` -of 3, and a `w` of 3 to all of the data in some of the [buckets](/riak/kv/2.2.3/learn/concepts/buckets) that +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.3/learn/concepts/buckets) that you're using. In order to set those replication properties, you should create a bucket type that sets those properties. Below is an example: @@ -96,8 +96,8 @@ Parameter | Common name | Default value | Description `n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored `r` | R | `quorum` | The number of servers that must respond to a read request `w` | W | `quorum` | Number of servers that must respond to a write request -`pr` | PR | `0` | The number of primary vnodes that must respond to a read request -`pw` | PW | `0` | The number of primary vnodes that must respond to a write request +`pr` | PR | `0` | The number of primary vnodes that must respond to a read request +`pw` | PW | `0` | The number of primary vnodes that must respond to a write request `dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk `rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. `notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). @@ -106,7 +106,7 @@ Parameter | Common name | Default value | Description ## A Primer on N, R, and W The most important thing to note about Riak's replication controls is -that they can be at the bucket level. You can use [bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) +that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) to set up bucket `A` to use a particular set of replication properties and bucket `B` to use entirely different properties. @@ -318,7 +318,7 @@ seeks to write the object to is unavailable. ## Primary Reads and Writes with PR and PW -In Riak's replication model, there are N [vnodes](/riak/kv/2.2.3/learn/glossary/#vnode), +In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.3/learn/glossary/#vnode), called _primary vnodes_, that hold primary responsibility for any given key. Riak will attempt reads and writes to primary vnodes first, but in case of failure, those operations will go to failover nodes in order to @@ -356,7 +356,7 @@ successful. The default value is `quorum` (more on symbolic names below). How quickly and robustly data is written to disk depends on the configuration of your backend or backends. For more details, see the -documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends](/riak/kv/2.2.3/setup/planning/backend/multi). +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{}}riak/kv/2.2.3/setup/planning/backend/multi). ## Delete Quorum with RW @@ -531,9 +531,9 @@ curl -XPUT \ http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 ``` -All of Basho's [official Riak clients](/riak/kv/2.2.3/developing/client-libraries) enable you to +All of Basho's [official Riak clients]({{}}riak/kv/2.2.3/developing/client-libraries) enable you to set replication properties this way. For more detailed information, -refer to the tutorial on [basic key/value operations in Riak KV](/riak/kv/2.2.3/developing/getting-started) +refer to the tutorial on [basic key/value operations in Riak KV]({{}}riak/kv/2.2.3/developing/getting-started) or to client-specific documentation: * [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) @@ -547,7 +547,7 @@ In case the above explanations were a bit too abstract for your tastes, the following table lays out a number of possible scenarios for reads and writes in Riak and how Riak is likely to respond. Some of these scenarios involve issues surrounding conflict resolution, vector clocks, -and siblings, so we recommend reading the [Vector Clocks](/riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks) documentation for more information. +and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.3/learn/concepts/causal-context#vector-clocks) documentation for more information. #### Read Scenarios @@ -557,8 +557,8 @@ vnodes responsible for an object. Scenario | What happens in Riak :--------|:-------------------- All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client -2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings -2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings +2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes 2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes 2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) diff --git a/content/riak/kv/2.2.3/developing/usage/search-schemas.md b/content/riak/kv/2.2.3/developing/usage/search-schemas.md index a907d5f560..424779e56b 100644 --- a/content/riak/kv/2.2.3/developing/usage/search-schemas.md +++ b/content/riak/kv/2.2.3/developing/usage/search-schemas.md @@ -15,17 +15,17 @@ aliases: - /riak/kv/2.2.3/dev/advanced/search-schema --- -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters > **Note on Search 2.0 vs. Legacy Search** > > This document refers to the new Riak Search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak Search, visit [the old Using Riak Search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). Riak Search is built for ease of use, allowing you to write values into Riak and query for values using Solr. Riak Search does a lot of work -under the hood to convert your values---plain text, JSON, XML, [Riak Data Types](/riak/kv/2.2.3/developing/data-types/), and [more](/riak/kv/2.2.3/developing/usage/custom-extractors)---into something that can be indexed and searched later. +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{}}riak/kv/2.2.3/developing/data-types/), and [more]({{}}riak/kv/2.2.3/developing/usage/custom-extractors)---into something that can be indexed and searched later. Nonetheless, you must still instruct Riak/Solr how to index a value. Are you providing and array of strings? An integer? A date? Is your text in English or Russian? You can provide such instructions to Riak Search by @@ -223,7 +223,7 @@ Malformed JSON or XML will cause Riak Search to index a key and set Field | Name | Description :-------|:-----|:----------- `_yz_id` | ID | Unique identifier of this Solr document -`_yz_ed` | Entropy Data | Data related to [active anti-entropy](/riak/kv/2.2.3/learn/concepts/active-anti-entropy) +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy) `_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes `_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions `_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them diff --git a/content/riak/kv/2.2.3/developing/usage/search.md b/content/riak/kv/2.2.3/developing/usage/search.md index 1cc7cf5a8d..fbce22758f 100644 --- a/content/riak/kv/2.2.3/developing/usage/search.md +++ b/content/riak/kv/2.2.3/developing/usage/search.md @@ -68,7 +68,7 @@ support. All `curl` examples in this document assume that you have set an environment variable named `RIAK_HOST`, which points to a Riak base URL, such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` -will depend on your [configuration](/riak/kv/2.2.3/configuring/reference#client-interfaces). +will depend on your [configuration]({{}}riak/kv/2.2.3/configuring/reference#client-interfaces). ## Creating an Index @@ -144,7 +144,7 @@ curl -XPUT $RIAK_HOST/search/index/famous > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.2.3/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.3/developing/getting-started) section. Note that the above command is exactly the same as the following, which @@ -345,7 +345,7 @@ ingest according to your schema. ## Riak Search Security Setup -[Security](/riak/kv/2.2.3/using/security/) is a new feature as of +[Security]({{}}riak/kv/2.2.3/using/security/) is a new feature as of Riak 2.0 that lets an administrator limit access to certain resources. In the case of search, your options are to limit administration of schemas or indexes (the `search.admin` permission) to certain users, and @@ -641,7 +641,7 @@ store opaque values in Riak? For that, we employ extractors. Extractors are modules in Riak that accept a Riak value with a certain content type and convert it into a list of fields that can be indexed by Solr. This is done transparently and automatically as part of the -indexing process. You can even create your own [custom extractors](/riak/kv/2.2.3/developing/usage/custom-extractors). +indexing process. You can even create your own [custom extractors]({{}}riak/kv/2.2.3/developing/usage/custom-extractors). Our current example uses the JSON extractor, but Riak Search also extracts indexable fields from the following content types: @@ -649,13 +649,13 @@ extracts indexable fields from the following content types: * JSON (`application/json`) * XML (`application/xml`, `text/xml`) * Plain text (`text/plain`) -* [Riak Data Types](/riak/kv/2.2.3/developing/data-types/) +* [Riak Data Types]({{}}riak/kv/2.2.3/developing/data-types/) * counter (`application/riak_counter`) * map (`application/riak_map`) * set (`application/riak_set`) * noop (unknown content type) -More on Riak Data Types can be found in [Riak Data Types and Search](/riak/kv/2.2.3/developing/usage/searching-data-types). +More on Riak Data Types can be found in [Riak Data Types and Search]({{}}riak/kv/2.2.3/developing/usage/searching-data-types). In the examples we've seen, the JSON field `name_s` is translated to a Solr index document field insert. Solr will index any field that it @@ -689,7 +689,7 @@ The above JSON will insert a list of three values into Solr to be indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. You can also create your own custom extractors if your data doesn't fit -one of the default types. A full tutorial can be found in [Custom Search Extractors](/riak/kv/2.2.3/developing/usage/custom-extractors). +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{}}riak/kv/2.2.3/developing/usage/custom-extractors). ### Automatic Fields @@ -1406,7 +1406,7 @@ fix this shortcoming in a future version of Riak. ### MapReduce Riak Search allows for piping search results as inputs for -[MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce/) jobs. This is a useful cross-section for +[MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce/) jobs. This is a useful cross-section for performing post-calculations of results or aggregations of ad-hoc queries. The Riak Search MapReduce integration works similarly to regular MapReduce, with the notable exception that your input is not a diff --git a/content/riak/kv/2.2.3/developing/usage/searching-data-types.md b/content/riak/kv/2.2.3/developing/usage/searching-data-types.md index 0d7f4bb2f5..d4a1f5d459 100644 --- a/content/riak/kv/2.2.3/developing/usage/searching-data-types.md +++ b/content/riak/kv/2.2.3/developing/usage/searching-data-types.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.3/dev/search/search-data-types --- -Although [Riak Data Types](/riak/kv/2.2.3/developing/data-types) function differently from other +Although [Riak Data Types]({{}}riak/kv/2.2.3/developing/data-types) function differently from other Riak objects in some respects, when you're using Search you can think of them as normal Riak objects with special metadata attached (metadata -that you don't need to worry about as a user). Riak's [counters](/riak/kv/2.2.3/developing/data-types/#counters), [sets](/riak/kv/2.2.3/developing/data-types/#sets), and [maps](/riak/kv/2.2.3/developing/data-types/#maps) +that you don't need to worry about as a user). Riak's [counters]({{}}riak/kv/2.2.3/developing/data-types/#counters), [sets]({{}}riak/kv/2.2.3/developing/data-types/#sets), and [maps]({{}}riak/kv/2.2.3/developing/data-types/#maps) can be indexed and have their contents searched just like other Riak objects. @@ -69,7 +69,7 @@ to search Data Types, they are provided only for reference. ### Top-level Schemas -The default schema for [counters](/riak/kv/2.2.3/developing/data-types/#counters) indexes each +The default schema for [counters]({{}}riak/kv/2.2.3/developing/data-types/#counters) indexes each counter as an integer. ```xml @@ -86,7 +86,7 @@ Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 Counters with a value of 15 | `counter:15` All counters within the index | `counter:*` -The schema for [sets](/riak/kv/2.2.3/developing/data-types/#sets) indexes each element of a set as +The schema for [sets]({{}}riak/kv/2.2.3/developing/data-types/#sets) indexes each element of a set as a string and indexes the set itself as multi-valued. ```xml @@ -105,7 +105,7 @@ All sets within the index | `set:*` ### Embedded Schemas -For searching within [maps](/riak/kv/2.2.3/developing/data-types/#maps), there are four schemas +For searching within [maps]({{}}riak/kv/2.2.3/developing/data-types/#maps), there are four schemas for embedded, aka dynamic, fields. Flags are indexed as booleans: ```xml @@ -155,7 +155,7 @@ slightly more complex map example. Let's say that we're storing scores in a multiplayer online game in Riak. The game is called Boulderdash and it involves smashing digital boulders armed with nothing but witty retorts and arcane trivia -knowledge. We'll create and activate a [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) for [storing counters](/riak/kv/2.2.3/developing/data-types/#counters) simply called +knowledge. We'll create and activate a [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) for [storing counters]({{}}riak/kv/2.2.3/developing/data-types/#counters) simply called `counters`, like so: ```bash @@ -615,7 +615,7 @@ curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | jsonpp ## Sets Example Let's say that we're storing information about the hobbies of a group of -people in sets. We'll create and activate a [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types) for [storing sets](/riak/kv/2.2.3/developing/data-types/#sets) simply called `sets`, +people in sets. We'll create and activate a [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) for [storing sets]({{}}riak/kv/2.2.3/developing/data-types/#sets) simply called `sets`, like so: ```bash @@ -987,9 +987,9 @@ Just as expected, both sets we stored contain the element `winning`. ## Maps Example -This example will build on the example in the [Using Data Types](/riak/kv/2.2.3/developing/data-types) +This example will build on the example in the [Using Data Types]({{}}riak/kv/2.2.3/developing/data-types) tutorial. That tutorial walks you through storing CMS-style user data in -Riak [maps](/riak/kv/2.2.3/developing/data-types/#maps), and we'd suggest that you +Riak [maps]({{}}riak/kv/2.2.3/developing/data-types/#maps), and we'd suggest that you familiarize yourself with that tutorial first. More specifically, user data is stored in the following fields in each user's map: diff --git a/content/riak/kv/2.2.3/developing/usage/secondary-indexes.md b/content/riak/kv/2.2.3/developing/usage/secondary-indexes.md index 8927b54fe6..3e74df9a0a 100644 --- a/content/riak/kv/2.2.3/developing/usage/secondary-indexes.md +++ b/content/riak/kv/2.2.3/developing/usage/secondary-indexes.md @@ -15,29 +15,29 @@ aliases: - /riak/kv/2.2.3/dev/using/2i --- -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.3/setup/planning/backend/memory -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.2.3/developing/usage/search/) rather than secondary indexes for +recommend [Riak Search]({{}}riak/kv/2.2.3/developing/usage/search/) rather than secondary indexes for a variety of reasons. Most importantly, Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, at write time, with one or more queryable values. Those values can then -be used to find multiple objects in Riak. If you're storing [user data](/riak/kv/2.2.3/developing/data-modeling/#user-accounts), for example, you could tag each object +be used to find multiple objects in Riak. If you're storing [user data]({{}}riak/kv/2.2.3/developing/data-modeling/#user-accounts), for example, you could tag each object associated with that user with a username or other unique marker. Once tagged, you could find all objects in a Riak bucket sharing that tag. Secondary indexes can be either a binary or string, such as `sensor_1_data` or `admin_user` or `click_event`, or an integer, such as `99` or `141121`. -[Riak Search](/riak/kv/2.2.3/developing/usage/search/) serves analogous purposes but is quite +[Riak Search]({{}}riak/kv/2.2.3/developing/usage/search/) serves analogous purposes but is quite different because it parses key/value data itself and builds indexes on the basis of Solr schemas. @@ -51,13 +51,13 @@ backends. * Allows querying by exact match or range on one index * Allows pagination of results * Allows streaming of results -* Query results can be used as input to a [MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce/) +* Query results can be used as input to a [MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce/) query > **Note on 2i and strong consistency** Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in -[strongly consistent buckets](/riak/kv/2.2.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +[strongly consistent buckets]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. @@ -75,7 +75,7 @@ you to discover them later. Indexing enables you to tag those objects and find all objects with the same tag in a specified bucket later on. 2i is thus recommended when your use case requires an easy-to-use search -mechanism that does not require a schema (as does [Riak Search](/riak/kv/2.2.3/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +mechanism that does not require a schema (as does [Riak Search]({{}}riak/kv/2.2.3/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that enables an application to tell Riak things like "fetch all objects tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged with numbers between 1500 and 1509." @@ -89,11 +89,11 @@ piggybacks off of read-repair. * If your ring size exceeds 512 partitions, 2i can cause performance issues in large clusters. * When you need more than the exact match and range searches that 2i - supports. If that's the case, we recommend checking out [Riak Search](/riak/kv/2.2.3/developing/usage/search/). + supports. If that's the case, we recommend checking out [Riak Search]({{}}riak/kv/2.2.3/developing/usage/search/). * When you want to use composite queries. A query like `last_name=zezeski AND state=MD` would have to be split into two queries and the results merged (or it would need to involve - [MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce/)). + [MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce/)). ## Query Interfaces and Examples @@ -259,8 +259,8 @@ curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with -your client in the [Developing with Riak KV: Getting Started](/riak/kv/2.2.3/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.3/developing/getting-started) section. This has accomplished the following: @@ -1144,8 +1144,8 @@ Could not parse field 'field2_int', value 'bar'. > **Note on 2i queries and the R parameter** > -> For all 2i queries, the [R](/riak/kv/2.2.3/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, -which means that queries that are run while [handoffs](/riak/kv/2.2.3/learn/glossary/#hinted-handoff) and related operations are underway may not +> For all 2i queries, the [R]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{}}riak/kv/2.2.3/learn/glossary/#hinted-handoff) and related operations are underway may not return all keys as expected. ### Exact Match diff --git a/content/riak/kv/2.2.3/developing/usage/security.md b/content/riak/kv/2.2.3/developing/usage/security.md index 717801cdce..0bd185a0a1 100644 --- a/content/riak/kv/2.2.3/developing/usage/security.md +++ b/content/riak/kv/2.2.3/developing/usage/security.md @@ -15,49 +15,49 @@ aliases: - /riak/kv/2.2.3/dev/advanced/client-security --- -Versions of Riak 2.0 and later come equipped with a [security subsystem](/riak/kv/2.2.3/using/security/basics) that enables you to choose +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{}}riak/kv/2.2.3/using/security/basics) that enables you to choose * which Riak users/clients are authorized to perform a wide variety of Riak operations, and * how those users/clients are required to authenticate themselves. -The following four authentication mechanisms, aka [security sources](/riak/kv/2.2.3/using/security/managing-sources/) are available: +The following four authentication mechanisms, aka [security sources]({{}}riak/kv/2.2.3/using/security/managing-sources/) are available: -* [Trust](/riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication)-based +* [Trust]({{}}riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication)-based authentication enables you to specify trusted [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s from which all clients will be authenticated by default -* [Password](/riak/kv/2.2.3/using/security/managing-sources/#password-based-authentication)-based authentication requires +* [Password]({{}}riak/kv/2.2.3/using/security/managing-sources/#password-based-authentication)-based authentication requires that clients provide a username and password -* [Certificate](/riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication +* [Certificate]({{}}riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication requires that clients -* [Pluggable authentication module (PAM)](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication)-based authentication requires +* [Pluggable authentication module (PAM)]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication)-based authentication requires clients to authenticate using the PAM service specified using the - [`riak-admin security`](/riak/kv/2.2.3/using/security/managing-sources/#managing-sources) + [`riak-admin security`]({{}}riak/kv/2.2.3/using/security/managing-sources/#managing-sources) command line interface Riak's approach to security is highly flexible. If you choose to use Riak's security feature, you do not need to require that all clients authenticate via the same means. Instead, you can specify authentication sources on a client-by-client, i.e. user-by-user, basis. This means that -you can require clients performing, say, [MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce/) -operations to use certificate auth, while clients performing [K/V Operations](/riak/kv/2.2.3/developing/usage) have to use username and password. The approach +you can require clients performing, say, [MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{}}riak/kv/2.2.3/developing/usage) have to use username and password. The approach that you adopt will depend on your security needs. This document provides a general overview of how that works. For managing security in Riak itself, see the following documents: -* [Authentication and Authorization](/riak/kv/2.2.3/using/security/basics) -* [Managing Security Sources](/riak/kv/2.2.3/using/security/managing-sources/) +* [Authentication and Authorization]({{}}riak/kv/2.2.3/using/security/basics) +* [Managing Security Sources]({{}}riak/kv/2.2.3/using/security/managing-sources/) We also provide client-library-specific guides for the following officially supported clients: -* [Java](/riak/kv/2.2.3/developing/usage/security/java) -* [Ruby](/riak/kv/2.2.3/developing/usage/security/ruby) -* [PHP](/riak/kv/2.2.3/developing/usage/security/php) -* [Python](/riak/kv/2.2.3/developing/usage/security/python) -* [Erlang](/riak/kv/2.2.3/developing/usage/security/erlang) +* [Java]({{}}riak/kv/2.2.3/developing/usage/security/java) +* [Ruby]({{}}riak/kv/2.2.3/developing/usage/security/ruby) +* [PHP]({{}}riak/kv/2.2.3/developing/usage/security/php) +* [Python]({{}}riak/kv/2.2.3/developing/usage/security/python) +* [Erlang]({{}}riak/kv/2.2.3/developing/usage/security/erlang) ## Certificates, Keys, and Authorities @@ -82,12 +82,12 @@ keys should never be shared beyond Riak and connecting clients. > **HTTP not supported** > > Certificate-based authentication is available only through Riak's -[Protocol Buffers](/riak/kv/2.2.3/developing/api/protocol-buffers/) interface. It is not available through the -[HTTP API](/riak/kv/2.2.3/developing/api/http). +[Protocol Buffers]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{}}riak/kv/2.2.3/developing/api/http). ### Default Names -In Riak's [configuration files](/riak/kv/2.2.3/configuring/reference/#security), the +In Riak's [configuration files]({{}}riak/kv/2.2.3/configuring/reference/#security), the default certificate file names are as follows: Cert | Filename diff --git a/content/riak/kv/2.2.3/developing/usage/security/erlang.md b/content/riak/kv/2.2.3/developing/usage/security/erlang.md index 998cece76a..b4849f7cd7 100644 --- a/content/riak/kv/2.2.3/developing/usage/security/erlang.md +++ b/content/riak/kv/2.2.3/developing/usage/security/erlang.md @@ -19,9 +19,9 @@ aliases: This tutorial shows you how to set up a Riak Erlang client to authenticate itself when connecting to Riak. -If you are using [trust](/riak/kv/2.2.3/using/security/managing-sources/), [PAM-](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password](/riak/kv/2.2.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust]({{}}riak/kv/2.2.3/using/security/managing-sources/), [PAM-]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{}}riak/kv/2.2.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -43,7 +43,7 @@ connection to `localhost` on port 8087: If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.3/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.3/using/security/managing-sources/) you choose. In addition, all clients should provide a username. The example above created a connection to Riak without specifying a username or CA. That information is specified as a list of options passed to the @@ -68,7 +68,7 @@ This client is not currently set up to use any of the available security sources, with the exception of trust-based authentication, provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More -on specifying trusted CIDRs can be found in [Trust-based Authentication](/riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication). +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{}}riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -90,10 +90,10 @@ SecurityOptions = [ ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.3/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.2.3/using/security/basics/#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.2.3/developing/usage/security/java.md b/content/riak/kv/2.2.3/developing/usage/security/java.md index b5cc26f9b7..fa0da70b63 100644 --- a/content/riak/kv/2.2.3/developing/usage/security/java.md +++ b/content/riak/kv/2.2.3/developing/usage/security/java.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak Java client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#java-client-basics). [Certificate](/riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{}}riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the Java client. {{% note title="Note on certificate generation" %}} @@ -39,7 +39,7 @@ configuration. In this document, we will be working with only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.3/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.3/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port diff --git a/content/riak/kv/2.2.3/developing/usage/security/php.md b/content/riak/kv/2.2.3/developing/usage/security/php.md index bfac94ccbf..4580722f76 100644 --- a/content/riak/kv/2.2.3/developing/usage/security/php.md +++ b/content/riak/kv/2.2.3/developing/usage/security/php.md @@ -19,8 +19,8 @@ aliases: This tutorial shows you how to set up a Riak PHP client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication) or [PAM](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the -security setup described [below](#php-client-basics). [Certificate](/riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +If you are using [trust-]({{}}riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication) or [PAM]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{}}riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not yet supported in the PHP client due to limitations of the HTTP interface of Riak. ## PHP Client Basics @@ -33,7 +33,7 @@ only one node. If you are using Riak security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.3/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +regardless of which [security source]({{}}riak/kv/2.2.3/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of security source. The example below sets up a single node object (we'll simply call it `node`) that connects to Riak on `localhost` and on port 8087 and specifies `riakuser` as a username. That object will be used to diff --git a/content/riak/kv/2.2.3/developing/usage/security/python.md b/content/riak/kv/2.2.3/developing/usage/security/python.md index 110fe0f1af..915c008926 100644 --- a/content/riak/kv/2.2.3/developing/usage/security/python.md +++ b/content/riak/kv/2.2.3/developing/usage/security/python.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Python client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.3/using/security/managing-sources/) or [PAM-](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you can use the security -setup described [below](#python-client-basics). [Password](/riak/kv/2.2.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +If you are using [trust-]({{}}riak/kv/2.2.3/using/security/managing-sources/) or [PAM-]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{}}riak/kv/2.2.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered in a [later section](#password-based-authentication). If you are using -[certificate](/riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +[certificate]({{}}riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow the instructions in the [section below](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -51,7 +51,7 @@ instantiation by creating a `SecurityCreds` object. If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.3/using/security/managing-sources/) you +regardless of which [security source]({{}}riak/kv/2.2.3/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087 without any security credentials: @@ -83,7 +83,7 @@ provided that the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from which the client is connecting has been specified as trusted. More on specifying trusted CIDRs can be found in [Trust-based -Authentication](/riak/kv/2.2.3/using/security/managing-sources/#Trust-based-Authentication). +Authentication]({{}}riak/kv/2.2.3/using/security/managing-sources/#Trust-based-Authentication). **Note**: The examples in the following sections specify certs on the basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to @@ -106,10 +106,10 @@ creds = SecurityCreds(username='riakuser', ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.3/using/security/basics/#user-management). +documentation on [User Management]({{}}riak/kv/2.2.3/using/security/basics/#user-management). ## Certificate-based Authentication @@ -138,7 +138,7 @@ creds = SecurityCreds(username='riakuser', ## Specifying Ciphers -To specify a list of preferred [security ciphers](/riak/kv/2.2.3/using/security/basics/#security-ciphers), you can pass in a colon-delimited +To specify a list of preferred [security ciphers]({{}}riak/kv/2.2.3/using/security/basics/#security-ciphers), you can pass in a colon-delimited string to the `ciphers` parameter: ```python diff --git a/content/riak/kv/2.2.3/developing/usage/security/ruby.md b/content/riak/kv/2.2.3/developing/usage/security/ruby.md index 2a548c13e6..37c27eac60 100644 --- a/content/riak/kv/2.2.3/developing/usage/security/ruby.md +++ b/content/riak/kv/2.2.3/developing/usage/security/ruby.md @@ -19,10 +19,10 @@ aliases: This tutorial shows you how to set up a Riak Ruby client to authenticate itself when connecting to Riak. -If you are using [trust-](/riak/kv/2.2.3/using/security/managing-sources/) or [PAM](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you +If you are using [trust-]({{}}riak/kv/2.2.3/using/security/managing-sources/) or [PAM]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. -[Password](/riak/kv/2.2.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered -in a [later section](#password-based-authentication), while [certificate](/riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication +[Password]({{}}riak/kv/2.2.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{}}riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is covered [further down](#certificate-based-authentication). {{% note title="Note on certificate generation" %}} @@ -42,7 +42,7 @@ needs to be used can be passed to the object upon instantiation in an If you are using Riak Security, _all_ connecting clients should have access to the same Certificate Authority (CA) used on the server side, -regardless of which [security source](/riak/kv/2.2.3/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +regardless of which [security source]({{}}riak/kv/2.2.3/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects to Riak on `localhost` and on port 8087, specifies `riakuser` as a username, and points the client to a CA located at `/ssl_dir/cacertfile.pem`. @@ -63,7 +63,7 @@ client = Riak::Client.new( This client object is currently not set up to use any of the available security sources, except trust-based auth, provided that the CIDR from which the client is connecting has been specified as trusted. More on -this in [Trust-based Authentication](/riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication). +this in [Trust-based Authentication]({{}}riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication). ## Password-based Authentication @@ -86,10 +86,10 @@ client = Riak::Client.new( ## PAM-based Authentication If you have specified that a specific client be authenticated using -[PAM](/riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you will +[PAM]({{}}riak/kv/2.2.3/using/security/managing-sources/#pam-based-authentication), you will need to provide a CA as well as the username and password that you specified when creating the user in Riak. For more, see our -documentation on [User Management](/riak/kv/2.2.3/using/security/basics#user-management). +documentation on [User Management]({{}}riak/kv/2.2.3/using/security/basics#user-management). ## Certificate-based Authentication diff --git a/content/riak/kv/2.2.3/developing/usage/updating-objects.md b/content/riak/kv/2.2.3/developing/usage/updating-objects.md index 297db5ad9b..71bf93d7ae 100644 --- a/content/riak/kv/2.2.3/developing/usage/updating-objects.md +++ b/content/riak/kv/2.2.3/developing/usage/updating-objects.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/dev/using/updates --- -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode ## Using Causal Context @@ -23,9 +23,9 @@ If an object already exists under a certain key and you want to write a new object to that key, Riak needs to know what to do, especially if multiple writes are happening at the same time. Which of the objects being written should be deemed correct? These kinds of scenarios can -arise quite frequently in distributed, [eventually consistent](/riak/kv/2.2.3/learn/concepts/eventual-consistency) systems. +arise quite frequently in distributed, [eventually consistent]({{}}riak/kv/2.2.3/learn/concepts/eventual-consistency) systems. -Riak decides which object to choose in case of conflict using [causal context](/riak/kv/2.2.3/learn/concepts/causal-context). These objects track the causal history of objects. +Riak decides which object to choose in case of conflict using [causal context]({{}}riak/kv/2.2.3/learn/concepts/causal-context). These objects track the causal history of objects. They are attached to _all_ Riak objects as metadata, and they are not readable by humans. They may sound complex---and they are fairly complex behind the scenes---but using them in your application is very simple. @@ -33,12 +33,12 @@ behind the scenes---but using them in your application is very simple. Using causal context in an update would involve the following steps; 1. Fetch the object -2. Modify the object's value (without modifying the fetched [context object](/riak/kv/2.2.3/learn/concepts/causal-context) +2. Modify the object's value (without modifying the fetched [context object]({{}}riak/kv/2.2.3/learn/concepts/causal-context) 3. Write the new object to Riak Step 2 is the most important here. All of Basho's official Riak clients -enable you to modify an object's value without modifying its [causal context](/riak/kv/2.2.3/learn/concepts/causal-context). Although a more detailed tutorial on context objects and -object updates can be found in [Conflict Resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), we'll walk you +enable you to modify an object's value without modifying its [causal context]({{}}riak/kv/2.2.3/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), we'll walk you through a basic example here. Let's say that the current NBA champion is the Washington Generals. @@ -306,22 +306,22 @@ recommend that you: That cycle looks something like this: 1. **Read** the object from Riak. This step is important for updates -because this enables you to fetch the object's [causal context](/riak/kv/2.2.3/learn/concepts/causal-context), which +because this enables you to fetch the object's [causal context]({{}}riak/kv/2.2.3/learn/concepts/causal-context), which is the information that Riak uses to make decisions about which object values are most recent (this is especially useful for objects that are frequently updated). This context object needs to be passed back to Riak when you update the object. This step is handled for you by Basho's client libraries as long as you perform a read prior to an update. In addition, if you have chosen to allow Riak to generate -[siblings](/riak/kv/2.2.3/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +[siblings]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/#siblings) \(which we recommend), you should **resolve sibling conflicts** upon read if they exist. For more -on this, please see our documentation on [conflict resolution](/riak/kv/2.2.3/developing/usage/conflict-resolution), along +on this, please see our documentation on [conflict resolution]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution), along with examples from our official client libraries: - * [Java](/riak/kv/2.2.3/developing/usage/conflict-resolution/java) - * [Ruby](/riak/kv/2.2.3/developing/usage/conflict-resolution/ruby) - * [Python](/riak/kv/2.2.3/developing/usage/conflict-resolution/python) - * [C#](/riak/kv/2.2.3/developing/usage/conflict-resolution/csharp) - * [Go](/riak/kv/2.2.3/developing/usage/conflict-resolution/golang) + * [Java]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/java) + * [Ruby]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/ruby) + * [Python]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/python) + * [C#]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/csharp) + * [Go]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution/golang) 2. **Modify the object** on the application side. 3. **Write** the new, modified object to Riak. Because you read the object first, Riak will receive the object's causal context metadata. @@ -333,14 +333,14 @@ API. > **Note on strong consistency** > -> If you are using Riak's [strong consistency](/riak/kv/2.2.3/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the -[strong consistency documentation](/riak/kv/2.2.3/developing/app-guide/strong-consistency/#strongly-consistent-writes). +> If you are using Riak's [strong consistency]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency/#strongly-consistent-writes). ### Updating Deleted Objects You should use the read-modify-write cycle explained above at all times, _even if you're updating deleted objects_. The reasons for that can be -found in our documentation on [tombstones](/riak/kv/2.2.3/using/reference/object-deletion/#tombstones). +found in our documentation on [tombstones]({{}}riak/kv/2.2.3/using/reference/object-deletion/#tombstones). There are some modifications that you may need to make if you are updating objects that may have been deleted previously. If you are using @@ -621,7 +621,7 @@ of the new coach; and finally the object is written back to Riak. The most important thing to bear in mind when updating objects is this: you should always read an object prior to updating it _unless_ you are -certain that no object is stored there. If you are storing [sensor data](/riak/kv/2.2.3/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +certain that no object is stored there. If you are storing [sensor data]({{}}riak/kv/2.2.3/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If you're not certain, however, then we recommend always reading the object first. @@ -734,7 +734,7 @@ The Java client also enables you to construct **no-operation updates** that don't actually modify the object and simply write the original value back to Riak. What is the use of that, given that it isn't changing the value of the object at all? No-operation updates can be -useful because they can help Riak resolve [sibling conflicts](/riak/kv/2.2.3/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +useful because they can help Riak resolve [sibling conflicts]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that matter---with siblings, a no-operation update will fetch the object _and its causal context_ and write the object back to Riak with the same, fetched context. This has the effect of telling Riak that you deem this diff --git a/content/riak/kv/2.2.3/index.md b/content/riak/kv/2.2.3/index.md index 9e82b96cb7..3aa9cbf6a8 100644 --- a/content/riak/kv/2.2.3/index.md +++ b/content/riak/kv/2.2.3/index.md @@ -1,5 +1,5 @@ --- -title: "Riak KV" +title: "Riak KV 2.2.3" description: "" project: "riak_kv" project_version: "2.2.3" @@ -15,15 +15,15 @@ aliases: --- [aboutenterprise]: http://basho.com/contact/ -[config index]: /riak/kv/2.2.3/configuring -[downloads]: /riak/kv/2.2.3/downloads/ -[install index]: /riak/kv/2.2.3/setup/installing/ -[plan index]: /riak/kv/2.2.3/setup/planning -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit -[install debian & ubuntu]: /riak/kv/2.2.3/setup/installing/debian-ubuntu -[usage search]: /riak/kv/2.2.3/developing/usage/search -[getting started]: /riak/kv/2.2.3/developing/getting-started -[dev client libraries]: /riak/kv/2.2.3/developing/client-libraries +[config index]: {{}}riak/kv/2.2.3/configuring +[downloads]: {{}}riak/kv/2.2.3/downloads/ +[install index]: {{}}riak/kv/2.2.3/setup/installing/ +[plan index]: {{}}riak/kv/2.2.3/setup/planning +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit +[install debian & ubuntu]: {{}}riak/kv/2.2.3/setup/installing/debian-ubuntu +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search +[getting started]: {{}}riak/kv/2.2.3/developing/getting-started +[dev client libraries]: {{}}riak/kv/2.2.3/developing/client-libraries @@ -56,7 +56,7 @@ Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and the 3. [Configure Riak KV for your needs][config index] {{% note title="Developing with Riak KV" %}} -If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV](/riak/kv/2.2.3/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{}}riak/kv/2.2.3/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. {{% /note %}} ## Popular Docs diff --git a/content/riak/kv/2.2.3/learn/concepts.md b/content/riak/kv/2.2.3/learn/concepts.md index adb7f5db9a..40560d6d86 100644 --- a/content/riak/kv/2.2.3/learn/concepts.md +++ b/content/riak/kv/2.2.3/learn/concepts.md @@ -12,20 +12,20 @@ menu: toc: true --- -[concept aae]: /riak/kv/2.2.3/learn/concepts/active-anti-entropy -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[concept cap neg]: /riak/kv/2.2.3/learn/concepts/capability-negotiation -[concept causal context]: /riak/kv/2.2.3/learn/concepts/causal-context -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[concept crdts]: /riak/kv/2.2.3/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.3/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[concept vnodes]: /riak/kv/2.2.3/learn/concepts/vnodes -[config index]: /riak/kv/2.2.3/configuring -[plan index]: /riak/kv/2.2.3/setup/planning -[use index]: /riak/kv/2.2.3/using/ +[concept aae]: {{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.3/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.3/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.2.3/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.3/learn/concepts/vnodes +[config index]: {{}}riak/kv/2.2.3/configuring +[plan index]: {{}}riak/kv/2.2.3/setup/planning +[use index]: {{}}riak/kv/2.2.3/using/ Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. diff --git a/content/riak/kv/2.2.3/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.2.3/learn/concepts/active-anti-entropy.md index c61797e116..0070224373 100644 --- a/content/riak/kv/2.2.3/learn/concepts/active-anti-entropy.md +++ b/content/riak/kv/2.2.3/learn/concepts/active-anti-entropy.md @@ -15,15 +15,15 @@ aliases: - /riak/kv/2.2.3/theory/concepts/aae --- -[cluster ops v3 mdc]: /riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter -[cluster ops aae]: /riak/kv/2.2.3/using/cluster-operations/active-anti-entropy -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[config aae]: /riak/kv/2.2.3/configuring/reference/#active-anti-entropy -[glossary read rep]: /riak/kv/2.2.3/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode +[cluster ops v3 mdc]: {{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{}}riak/kv/2.2.3/using/cluster-operations/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[config aae]: {{}}riak/kv/2.2.3/configuring/reference/#active-anti-entropy +[glossary read rep]: {{}}riak/kv/2.2.3/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode [Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree -[usage search]: /riak/kv/2.2.3/developing/usage/search +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored diff --git a/content/riak/kv/2.2.3/learn/concepts/buckets.md b/content/riak/kv/2.2.3/learn/concepts/buckets.md index d3f5a0e532..8016717d54 100644 --- a/content/riak/kv/2.2.3/learn/concepts/buckets.md +++ b/content/riak/kv/2.2.3/learn/concepts/buckets.md @@ -17,25 +17,25 @@ aliases: - /riak/kv/2.2.3/theory/concepts/buckets --- -[apps cluster metadata]: /riak/kv/2.2.3/developing/app-guide/cluster-metadata -[cluster ops bucket types]: /riak/kv/2.2.3/using/cluster-operations/bucket-types -[cluster ops strong consistency]: /riak/kv/2.2.3/using/cluster-operations/strong-consistency -[concept causal context]: /riak/kv/2.2.3/learn/concepts/causal-context -[concept causal context sib]: /riak/kv/2.2.3/learn/concepts/causal-context/#siblings -[concept replication]: /riak/kv/2.2.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[config basic]: /riak/kv/2.2.3/configuring/basic -[dev api http]: /riak/kv/2.2.3/developing/api/http -[dev data types]: /riak/kv/2.2.3/developing/data-types -[glossary ring]: /riak/kv/2.2.3/learn/glossary/#ring -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[plan backend memory]: /riak/kv/2.2.3/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.3/setup/planning/backend/multi -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[usage commit hooks]: /riak/kv/2.2.3/developing/usage/commit-hooks -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.2.3/developing/usage/replication +[apps cluster metadata]: {{}}riak/kv/2.2.3/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{}}riak/kv/2.2.3/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{}}riak/kv/2.2.3/using/cluster-operations/strong-consistency +[concept causal context]: {{}}riak/kv/2.2.3/learn/concepts/causal-context +[concept causal context sib]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#siblings +[concept replication]: {{}}riak/kv/2.2.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[config basic]: {{}}riak/kv/2.2.3/configuring/basic +[dev api http]: {{}}riak/kv/2.2.3/developing/api/http +[dev data types]: {{}}riak/kv/2.2.3/developing/data-types +[glossary ring]: {{}}riak/kv/2.2.3/learn/glossary/#ring +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.3/setup/planning/backend/multi +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.3/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.2.3/developing/usage/replication Buckets are used to define a virtual keyspace for storing Riak objects. diff --git a/content/riak/kv/2.2.3/learn/concepts/capability-negotiation.md b/content/riak/kv/2.2.3/learn/concepts/capability-negotiation.md index a45a320604..2d32bf5365 100644 --- a/content/riak/kv/2.2.3/learn/concepts/capability-negotiation.md +++ b/content/riak/kv/2.2.3/learn/concepts/capability-negotiation.md @@ -16,9 +16,9 @@ aliases: --- -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[upgrade cluster]: /riak/kv/2.2.3/setup/upgrading/cluster -[usage mapreduce]: /riak/kv/2.2.3/developing/usage/mapreduce +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[upgrade cluster]: {{}}riak/kv/2.2.3/setup/upgrading/cluster +[usage mapreduce]: {{}}riak/kv/2.2.3/developing/usage/mapreduce In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. diff --git a/content/riak/kv/2.2.3/learn/concepts/causal-context.md b/content/riak/kv/2.2.3/learn/concepts/causal-context.md index b833276e44..1f668efdc3 100644 --- a/content/riak/kv/2.2.3/learn/concepts/causal-context.md +++ b/content/riak/kv/2.2.3/learn/concepts/causal-context.md @@ -16,18 +16,18 @@ aliases: --- -[concept aae]: /riak/kv/2.2.3/learn/concepts/active-anti-entropy -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency +[concept aae]: {{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency [CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management -[dev api http]: /riak/kv/2.2.3/developing/api/http -[dev key value]: /riak/kv/2.2.3/developing/key-value-modeling -[glossary read rep]: /riak/kv/2.2.3/learn/glossary/#read-repair -[perf latency reduc]: /riak/kv/2.2.3/using/performance/latency-reduction -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution -[usage protocol buffers]: /riak/kv/2.2.3/developing/api/protocol-buffers -[usage updating objects]: /riak/kv/2.2.3/developing/usage/updating-objects +[dev api http]: {{}}riak/kv/2.2.3/developing/api/http +[dev key value]: {{}}riak/kv/2.2.3/developing/key-value-modeling +[glossary read rep]: {{}}riak/kv/2.2.3/learn/glossary/#read-repair +[perf latency reduc]: {{}}riak/kv/2.2.3/using/performance/latency-reduction +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution +[usage protocol buffers]: {{}}riak/kv/2.2.3/developing/api/protocol-buffers +[usage updating objects]: {{}}riak/kv/2.2.3/developing/usage/updating-objects [Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock [Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ [Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ @@ -73,7 +73,7 @@ Causal context comes in two forms in Riak: **vector clocks** and **dotted version vectors**. More information in both can be found in the sections below. -In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other scenarios, e.g. healed network partitions). If, however, `allow_mult` is set to `false`, then Riak will not generate diff --git a/content/riak/kv/2.2.3/learn/concepts/clusters.md b/content/riak/kv/2.2.3/learn/concepts/clusters.md index 89eed6effe..4b5651afcd 100644 --- a/content/riak/kv/2.2.3/learn/concepts/clusters.md +++ b/content/riak/kv/2.2.3/learn/concepts/clusters.md @@ -18,15 +18,15 @@ aliases: --- -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[concept keys objects]: /riak/kv/2.2.3/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.3/learn/concepts/replication -[glossary node]: /riak/kv/2.2.3/learn/glossary/#node -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[learn dynamo]: /riak/kv/2.2.3/learn/dynamo -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution -[usage replication]: /riak/kv/2.2.3/developing/usage/replication +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[concept keys objects]: {{}}riak/kv/2.2.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.3/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.2.3/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[learn dynamo]: {{}}riak/kv/2.2.3/learn/dynamo +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution +[usage replication]: {{}}riak/kv/2.2.3/developing/usage/replication Riak's default mode of operation is to work as a cluster consisting of @@ -63,7 +63,7 @@ _claims_ that partition). Below is a visual representation of a Riak ring: -![A Riak Ring](/images/riak-ring.png) +![A Riak Ring]({{}}images/riak-ring.png) The nodes of a Riak cluster each attempt to run a roughly equal number of vnodes at any given time. In the general case, this means that each @@ -95,7 +95,7 @@ number of vnodes that must reply before a response is returned. Here is an illustration of this process: -![A Riak Ring](/images/riak-data-distribution.png) +![A Riak Ring]({{}}images/riak-data-distribution.png) When N is set to 3, the value `REM` is stored in the key `artist`. That key is assigned to 3 partitions out of 32 available partitions. When a diff --git a/content/riak/kv/2.2.3/learn/concepts/crdts.md b/content/riak/kv/2.2.3/learn/concepts/crdts.md index a078fd2936..045098ac11 100644 --- a/content/riak/kv/2.2.3/learn/concepts/crdts.md +++ b/content/riak/kv/2.2.3/learn/concepts/crdts.md @@ -17,20 +17,20 @@ aliases: --- [crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf -[data types converg]: /riak/kv/2.2.3/learn/concepts/crdts/#convergence +[data types converg]: {{}}riak/kv/2.2.3/learn/concepts/crdts/#convergence [crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html -[data types impl]: /riak/kv/2.2.3/learn/concepts/crdts/#implementation -[concept causal context dvv]: /riak/kv/2.2.3/learn/concepts/causal-context/#dotted-version-vectors -[concept causal context sib]: /riak/kv/2.2.3/learn/concepts/causal-context/#siblings -[concept causal context vc]: /riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[concept strong consistency]: /riak/kv/2.2.3/learn/concepts/strong-consistency -[dev data types]: /riak/kv/2.2.3/developing/data-types +[data types impl]: {{}}riak/kv/2.2.3/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[concept strong consistency]: {{}}riak/kv/2.2.3/learn/concepts/strong-consistency +[dev data types]: {{}}riak/kv/2.2.3/developing/data-types [riak_dt]: https://github.com/basho/riak_dt -[dev data types context]: /riak/kv/2.1.4/developing/data-types/#data-types-and-context -[glossary node]: /riak/kv/2.2.3/learn/glossary/#node -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution +[dev data types context]: {{}}riak/kv/2.2.3/developing/data-types/#data-types-and-context +[glossary node]: {{}}riak/kv/2.2.3/learn/glossary/#node +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: diff --git a/content/riak/kv/2.2.3/learn/concepts/eventual-consistency.md b/content/riak/kv/2.2.3/learn/concepts/eventual-consistency.md index 338cc975a3..8f394e9265 100644 --- a/content/riak/kv/2.2.3/learn/concepts/eventual-consistency.md +++ b/content/riak/kv/2.2.3/learn/concepts/eventual-consistency.md @@ -18,14 +18,14 @@ aliases: --- -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[concept replication]: /riak/kv/2.2.3/learn/concepts/replication -[glossary node]: /riak/kv/2.2.3/learn/glossary/#node -[glossary read rep]: /riak/kv/2.2.3/learn/glossary/#read-repair -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[concept replication]: {{}}riak/kv/2.2.3/learn/concepts/replication +[glossary node]: {{}}riak/kv/2.2.3/learn/glossary/#node +[glossary read rep]: {{}}riak/kv/2.2.3/learn/glossary/#read-repair +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution In a distributed and fault-tolerant system like Riak, server and network @@ -54,14 +54,14 @@ defaults if you want to employ a different strategy. Among those strategies, you can enable Riak to resolve object conflicts automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or -special eventually consistent [Data Types](/riak/kv/2.2.3/developing/data-types/), or you can resolve those +special eventually consistent [Data Types]({{}}riak/kv/2.2.3/developing/data-types/), or you can resolve those conflicts on the application side by employing a use case-specific logic of your choosing. More information on this can be found in our guide to [conflict resolution][usage conflict resolution]. This variety of options enables you to manage Riak's eventually consistent behavior in accordance with your application's [data model -or models](/riak/kv/2.2.3/developing/data-modeling/). +or models]({{}}riak/kv/2.2.3/developing/data-modeling/). ## Replication Properties and Request Tuning @@ -78,7 +78,7 @@ implemented on the application side can be found in our guides to In addition to our official documentation, we also recommend checking out the [Understanding Riak's Configurable Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) -series from [the Basho blog](http://basho.com/blog/). +series from [the Basho blog](https://riak.com/blog/). ## A Simple Example of Eventual Consistency diff --git a/content/riak/kv/2.2.3/learn/concepts/keys-and-objects.md b/content/riak/kv/2.2.3/learn/concepts/keys-and-objects.md index 7b38813eb6..9a2fb3a5eb 100644 --- a/content/riak/kv/2.2.3/learn/concepts/keys-and-objects.md +++ b/content/riak/kv/2.2.3/learn/concepts/keys-and-objects.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.3/theory/concepts/keys-and-values --- -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks In an RDBMS, data is organized by tables that are individually identifiable entities. Within those tables exist rows of a data diff --git a/content/riak/kv/2.2.3/learn/concepts/replication.md b/content/riak/kv/2.2.3/learn/concepts/replication.md index 5910440ba5..f2b7b1374b 100644 --- a/content/riak/kv/2.2.3/learn/concepts/replication.md +++ b/content/riak/kv/2.2.3/learn/concepts/replication.md @@ -18,14 +18,14 @@ aliases: --- -[cluster ops v3 mdc]: /riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter -[concept aae]: /riak/kv/2.2.3/learn/concepts/active-anti-entropy -[concept causal context vc]: /riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[concept vnodes]: /riak/kv/2.2.3/learn/concepts/vnodes -[glossary node]: /riak/kv/2.2.3/learn/glossary/#node -[glossary ring]: /riak/kv/2.2.3/learn/glossary/#ring -[usage replication]: /riak/kv/2.2.3/developing/usage/replication +[cluster ops v3 mdc]: {{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy +[concept causal context vc]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[concept vnodes]: {{}}riak/kv/2.2.3/learn/concepts/vnodes +[glossary node]: {{}}riak/kv/2.2.3/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.2.3/learn/glossary/#ring +[usage replication]: {{}}riak/kv/2.2.3/developing/usage/replication Data replication is a core feature of Riak's basic architecture. Riak @@ -37,13 +37,13 @@ Replication is fundamental and automatic in Riak, providing security that your data will still be there if a node in your Riak cluster goes down. All data stored in Riak will be replicated to a number of nodes in the cluster according to the N value (`n_val`) property set in a -bucket's [bucket type](/riak/kv/2.2.3/developing/usage/bucket-types). +bucket's [bucket type]({{}}riak/kv/2.2.3/developing/usage/bucket-types). >**Note: Replication across clusters** > >If you're interested in replication not just within a cluster but across multiple clusters, we recommend checking out our documentation on Riak's -[Multi-Datacenter Replications](/riak/kv/2.2.3/setup/planning/backend/multi) capabilities. +[Multi-Datacenter Replications]({{}}riak/kv/2.2.3/setup/planning/backend/multi) capabilities. ## Selecting an N value (`n_val`) @@ -70,7 +70,7 @@ nodes with the data will cause the read to fail. ## Setting the N value (`n_val`) To change the N value for a bucket, you need to create a [bucket -type](/riak/kv/2.2.3/developing/usage/bucket-types) with `n_val` set to your desired value and +type]({{}}riak/kv/2.2.3/developing/usage/bucket-types) with `n_val` set to your desired value and then make sure that the bucket bears that type. In this example, we'll set N to 2. First, we'll create the bucket type @@ -101,8 +101,8 @@ objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the o can end up Unreachable data is a problem because it can negatively impact coverage -queries, e.g. [secondary index](/riak/kv/2.2.3/developing/usage/secondary-indexes/) and -[MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce/) queries. Lowering an object or bucket's +queries, e.g. [secondary index]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes/) and +[MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce/) queries. Lowering an object or bucket's `n_val` will likely mean that objects that you would expect to be returned from those queries will no longer be returned. diff --git a/content/riak/kv/2.2.3/learn/concepts/strong-consistency.md b/content/riak/kv/2.2.3/learn/concepts/strong-consistency.md index 6833490661..6a39d0932f 100644 --- a/content/riak/kv/2.2.3/learn/concepts/strong-consistency.md +++ b/content/riak/kv/2.2.3/learn/concepts/strong-consistency.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.3/theory/concepts/strong-consistency --- -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency {{% note title="Please Note:" %}} Riak KV's strong consistency is an experimental feature and may be removed @@ -27,7 +27,7 @@ Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. {{% /note %}} -Riak was originally designed as an [eventually consistent](/riak/kv/2.2.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.2.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -38,7 +38,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.2.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.2.3/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.2.3/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency diff --git a/content/riak/kv/2.2.3/learn/concepts/vnodes.md b/content/riak/kv/2.2.3/learn/concepts/vnodes.md index ad67ce8fc2..89320b08c7 100644 --- a/content/riak/kv/2.2.3/learn/concepts/vnodes.md +++ b/content/riak/kv/2.2.3/learn/concepts/vnodes.md @@ -16,15 +16,15 @@ aliases: --- -[concept causal context]: /riak/kv/2.2.3/learn/concepts/causal-context -[concept clusters ring]: /riak/kv/2.2.3/learn/concepts/clusters/#the-ring -[concept replication]: /riak/kv/2.2.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.3/learn/concepts/strong-consistency -[glossary node]: /riak/kv/2.2.3/learn/glossary/#node -[glossary ring]: /riak/kv/2.2.3/learn/glossary/#ring -[plan backend]: /riak/kv/2.2.3/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.2.3/setup/planning/cluster-capacity -[use admin riak cli]: /riak/kv/2.2.3/using/admin/riak-cli +[concept causal context]: {{}}riak/kv/2.2.3/learn/concepts/causal-context +[concept clusters ring]: {{}}riak/kv/2.2.3/learn/concepts/clusters/#the-ring +[concept replication]: {{}}riak/kv/2.2.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.3/learn/concepts/strong-consistency +[glossary node]: {{}}riak/kv/2.2.3/learn/glossary/#node +[glossary ring]: {{}}riak/kv/2.2.3/learn/glossary/#ring +[plan backend]: {{}}riak/kv/2.2.3/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.2.3/setup/planning/cluster-capacity +[use admin riak cli]: {{}}riak/kv/2.2.3/using/admin/riak-cli Virtual nodes, more commonly referred to as **vnodes**, are processes @@ -80,7 +80,7 @@ of all such values. The illustration below provides a visual representation of the Riak ring: ![The Riak -Ring](http://docs.basho.com/shared/2.0.2/images/riak-ring.png?1416296175) +Ring]({{}}images/shared/riak-ring.png) You can think of vnodes as managers, responsible for handling incoming requests from other nodes/vnodes, storing objects in the appropriate @@ -102,7 +102,7 @@ asynchronous tasks on behalf of the vnode. If you're navigating through the file system of a Riak node, you'll notice that each node's `/data` directory holds a variety of -subdirectories. If you're using, say, [Bitcask](/riak/kv/2.2.3/setup/planning/backend/bitcask) as a backend, navigate +subdirectories. If you're using, say, [Bitcask]({{}}riak/kv/2.2.3/setup/planning/backend/bitcask) as a backend, navigate into the `/bitcask` directory (you'll also see a `/ring` directory and several others). If you open up the `/bitcask` directory, you'll see a wide assortment of directories with numbers as names, e.g. `0` or diff --git a/content/riak/kv/2.2.3/learn/dynamo.md b/content/riak/kv/2.2.3/learn/dynamo.md index 75cd2e23f2..7e92f10e88 100644 --- a/content/riak/kv/2.2.3/learn/dynamo.md +++ b/content/riak/kv/2.2.3/learn/dynamo.md @@ -146,8 +146,8 @@ or redistribution. > > And, nodes can be added and removed from your Riak cluster as needed. -[Consistent Hashing]: /riak/kv/2.2.3/learn/glossary/#consistent-hashing -[Gossip Protocol]: /riak/kv/2.2.3/learn/glossary/#gossiping +[Consistent Hashing]: {{}}riak/kv/2.2.3/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{}}riak/kv/2.2.3/learn/glossary/#gossiping In the past year, Dynamo has been the underlying storage technology for a number of the core services in Amazon’s e-commerce platform. It was able to scale to @@ -667,8 +667,8 @@ verify the validity of the context object supplied in the put request. > > Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. -[HTTP API]: /riak/kv/2.2.3/developing/api/http/ -[Protocol Buffers API]: /riak/kv/2.2.3/developing/api/protocol-buffers/ +[HTTP API]: {{}}riak/kv/2.2.3/developing/api/http/ +[Protocol Buffers API]: {{}}riak/kv/2.2.3/developing/api/protocol-buffers/ Dynamo treats both the key and the object supplied by the caller as an opaque array of bytes. It applies a MD5 hash on the key to generate a 128-bit @@ -744,8 +744,8 @@ capacity, accounting for heterogeneity in the physical infrastructure. > [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. -[Further Reading on Partitioning in Riak KV]: /riak/kv/2.2.3/learn/concepts/clusters/ -[All about the Riak KV Ring]: /riak/kv/2.2.3/learn/concepts/clusters/#the-ring +[Further Reading on Partitioning in Riak KV]: {{}}riak/kv/2.2.3/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{}}riak/kv/2.2.3/learn/concepts/clusters/#the-ring ### 4.3 Replication @@ -868,7 +868,7 @@ require reconciliation. > As you may have already figured out, Riak KV uses vector clocks for object > versioning, too. Here are a whole host of resources to keep you busy for a while: > -> [Vector Clock on Riak KV Glossary](/riak/kv/2.2.3/learn/glossary/#vector-clock) +> [Vector Clock on Riak KV Glossary]({{}}riak/kv/2.2.3/learn/glossary/#vector-clock) > > [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) > | @@ -993,11 +993,11 @@ R and W are usually configured to be less than N, to provide better latency. > >Some more resources on R and W: > ->[REST API](/riak/kv/2.2.3/developing/api/http/) +>[REST API]({{}}riak/kv/2.2.3/developing/api/http/) > ->[Writing Data](/riak/kv/2.2.3/developing/usage/creating-objects/) +>[Writing Data]({{}}riak/kv/2.2.3/developing/usage/creating-objects/) > ->[Reading Data](/riak/kv/2.2.3/developing/usage/reading-objects/) +>[Reading Data]({{}}riak/kv/2.2.3/developing/usage/reading-objects/) Upon receiving a put() request for a key, the coordinator generates the vector clock for the new version and writes the new version locally. The coordinator @@ -1018,7 +1018,7 @@ versions are then reconciled and the reconciled version superseding the current versions is written back. > Same for Riak KV. Reconciling divergent versions in Riak KV is called -> [Read Repair](/riak/kv/2.2.3/learn/concepts/replication/#read-repair). +> [Read Repair]({{}}riak/kv/2.2.3/learn/concepts/replication/#read-repair). ### 4.6 Handling Failures: Hinted Handoff @@ -1036,8 +1036,8 @@ consistent hashing ring. > You can glimpse at Riak KV's preference list (or *preflist*) calculation in > the [Replication] walkthrough. -[Hinted handoff]: /riak/kv/2.2.3/learn/glossary/#hinted-handoff -[Replication]: /riak/kv/2.2.3/developing/usage/replication/ +[Hinted handoff]: {{}}riak/kv/2.2.3/learn/glossary/#hinted-handoff +[Replication]: {{}}riak/kv/2.2.3/developing/usage/replication/ Consider the example of Dynamo configuration given in Figure 2 with N=3. In this example, if node A is temporarily down or unreachable @@ -1077,7 +1077,7 @@ outage. > [Multi Datacenter Replication] is implemented in the commercial extension to > Riak KV, called [Riak KV Enterprise Edition]. -[Multi Datacenter Replication]: /riak/kv/2.2.3/using/reference/v3-multi-datacenter/architecture/ +[Multi Datacenter Replication]: {{}}riak/kv/2.2.3/using/reference/v3-multi-datacenter/architecture/ [Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ @@ -1128,15 +1128,15 @@ addressed, however, by the refined partitioning scheme described in Section 6.2. > This section is well expressed in [Adding and Removing Nodes] and > [Failure Scenarios]. -[Adding and Removing Nodes]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes/ -[Failure Scenarios]: /riak/kv/2.2.3/learn/concepts/eventual-consistency/ +[Adding and Removing Nodes]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency/ #### 4.8.1 Ring Membership > Riak KV operators can trigger node management via the > [riak-admin command-line tool]. -[riak-admin command-line tool]: /riak/kv/2.2.3/using/admin/riak-admin/ +[riak-admin command-line tool]: {{}}riak/kv/2.2.3/using/admin/riak-admin/ In Amazon’s environment node outages (due to failures and maintenance tasks) are often transient but may last for extended intervals. A node outage rarely @@ -1164,7 +1164,7 @@ membership change histories. > Riak KV's ring state holds membership information, and is propgated via > [gossiping], including random reconciliation, defaulting to once a minute. -[gossiping]: /riak/kv/2.2.3/learn/glossary/#gossiping +[gossiping]: {{}}riak/kv/2.2.3/learn/glossary/#gossiping When a node starts for the first time, it chooses its set of tokens (virtual nodes in the consistent hash space) and maps nodes to their respective token @@ -1199,7 +1199,7 @@ service. Typically seeds are fully functional nodes in the Dynamo ring. > > See _[The Node Join Process]_ for more. -[The Node Join Process]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[The Node Join Process]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster #### 4.8.3 Failure Detection @@ -1293,11 +1293,11 @@ majority of Dynamo’s production instances use BDB Transactional Data Store. > supports [secondary indexes]). The Memory backend is an excellent choice when > speed is important and durability is not. It also has TTL support. -[backend options]: /riak/kv/2.2.3/setup/planning/backend/ -[Bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask/ -[LevelDB]: /riak/kv/2.2.3/setup/planning/backend/leveldb/ -[Memory]: /riak/kv/2.2.3/setup/planning/backend/memory/ -[secondary indexes]: /riak/kv/2.2.3/developing/usage/secondary-indexes/ +[backend options]: {{}}riak/kv/2.2.3/setup/planning/backend/ +[Bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask/ +[LevelDB]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb/ +[Memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory/ +[secondary indexes]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes/ The request coordination component is built on top of an event-driven messaging substrate where the message processing pipeline is split into multiple stages @@ -1331,7 +1331,7 @@ relieves the anti-entropy protocol from having to do it. > Riak KV implements [Read Repair]. -[Read Repair]: /riak/kv/2.2.3/learn/concepts/replication/#read-repair +[Read Repair]: {{}}riak/kv/2.2.3/learn/concepts/replication/#read-repair As noted earlier, write requests are coordinated by one of the top N nodes in the preference list. Although it is desirable always to have the first node @@ -1355,7 +1355,7 @@ the performance at the 99.9 percentile. > [Basho Bench] against your own Riak cluster to discover your own > optimal values. -[Basho Bench]: /riak/kv/2.2.3/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.2.3/using/performance/benchmarking/ Dynamo is used by several services with different configurations. These instances differ by their version reconciliation logic, and read/write quorum @@ -1372,12 +1372,12 @@ shopping cart. > remanding more complex reconciliation to the client. There are several tools > to help simplify this task, such as [Statebox]. > -> Riak KV supports a simple reconciliation strategy, called [CRDTs] (Commutative -> Replicated Data Types), for reconciling common data types like sets and +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and > counters. [Statebox]: https://github.com/mochi/statebox_riak -[CRDTs]: /riak/kv/2.2.3/developing/data-types/ +[CRDTs (Commutative Replicated Data Types)]: {{}}riak/kv/2.2.3/developing/data-types/ * Timestamp based reconciliation: This case differs from the previous one only @@ -1526,7 +1526,7 @@ strategies on load distribution. > Riak follows a SHA1 based consistent hashing for [partitioning]. -[partitioning]: /riak/kv/2.2.3/learn/concepts/replication/#understanding-replication-by-example +[partitioning]: {{}}riak/kv/2.2.3/learn/concepts/replication/#understanding-replication-by-example To study the load imbalance and its correlation with request load, the total number of requests received by each node was measured for a period of 24 hours - @@ -1640,8 +1640,8 @@ system in a way that preserves these properties. > > See [The Node Join Process] and [Replacing a Node]. -[The Node Join Process]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster -[Replacing a Node]: /riak/kv/2.2.3/using/cluster-operations/replacing-node/ +[The Node Join Process]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{}}riak/kv/2.2.3/using/cluster-operations/replacing-node/ The efficiency of these three strategies is evaluated for a system with S=30 and N=3. However, comparing these different strategies in a fair manner is hard as @@ -1750,7 +1750,7 @@ on physical timestamps, any node can coordinate a write request. > > See [Load Balancing] for more information. -[Load Balancing]: /riak/kv/2.2.3/configuring/load-balancing-proxy/ +[Load Balancing]: {{}}riak/kv/2.2.3/configuring/load-balancing-proxy/ An alternative approach to request coordination is to move the state machine to the client nodes. In this scheme client applications use a library to perform @@ -1776,7 +1776,7 @@ using timestamps based versioning. > Note that the Riak clients do not coordinate with Riak's preference list, but > simply round-robin requests, letting the Riak cluster handle routing. -[client libraries]: /riak/kv/2.2.3/developing/client-libraries/ +[client libraries]: {{}}riak/kv/2.2.3/developing/client-libraries/ An important advantage of the client-driven coordination approach is that a load balancer is no longer required to uniformly distribute client load. Fair load @@ -1898,14 +1898,14 @@ is actively addressed by O(1) DHT systems(e.g., [14]). > [Basho Bench] to help discover your optimal setup. Nothing will give you > better numbers than real experimentation. -[Basho Bench]: /riak/kv/2.2.3/using/performance/benchmarking/ +[Basho Bench]: {{}}riak/kv/2.2.3/using/performance/benchmarking/ ## 7. Conclusions > This paper was an overview of Riak from a Dynamo point-of-view. To get a > better sense of the Riak ecosystem, read our ever-expanding [documentation]. -[documentation]: https://docs.basho.com +[documentation]: {{}} This paper described Dynamo, a highly available and scalable data store, used for storing state of a number of core services of Amazon.com’s e-commerce diff --git a/content/riak/kv/2.2.3/learn/glossary.md b/content/riak/kv/2.2.3/learn/glossary.md index e90207db56..696fe78a62 100644 --- a/content/riak/kv/2.2.3/learn/glossary.md +++ b/content/riak/kv/2.2.3/learn/glossary.md @@ -13,39 +13,39 @@ toc: true --- -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties [basho bench GH]: http://github.com/basho/basho_bench/ -[cluster ops add remove node]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes -[cluster ops strong consistency]: /riak/kv/2.2.3/using/cluster-operations/strong-consistency -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[concept causal context vc]: /riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[concept crdts]: /riak/kv/2.2.3/learn/concepts/crdts -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[concept keys objects]: /riak/kv/2.2.3/learn/concepts/keys-and-objects -[concept replication]: /riak/kv/2.2.3/learn/concepts/replication -[concept strong consistency]: /riak/kv/2.2.3/learn/concepts/strong-consistency -[dev kv model]: /riak/kv/2.2.3/developing/key-value-modeling -[concept replication aae]: /riak/kv/2.2.3/learn/concepts/replication/#active-anti-entropy -[dev api http]: /riak/kv/2.2.3/developing/api/http -[dev data model]: /riak/kv/2.2.3/developing/data-modeling -[dev data types]: /riak/kv/2.2.3/developing/data-types -[glossary read rep]: /riak/kv/2.2.3/learn/glossary/#read-repair -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode +[cluster ops add remove node]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{}}riak/kv/2.2.3/using/cluster-operations/strong-consistency +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[concept causal context vc]: {{}}riak/kv/2.2.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[concept crdts]: {{}}riak/kv/2.2.3/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.3/learn/concepts/strong-consistency +[dev kv model]: {{}}riak/kv/2.2.3/developing/key-value-modeling +[concept replication aae]: {{}}riak/kv/2.2.3/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{}}riak/kv/2.2.3/developing/api/http +[dev data model]: {{}}riak/kv/2.2.3/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.3/developing/data-types +[glossary read rep]: {{}}riak/kv/2.2.3/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode [Lager]: https://github.com/basho/lager -[learn dynamo]: /riak/kv/2.2.3/learn/dynamo -[plan cluster capacity]: /riak/kv/2.2.3/setup/planning/cluster-capacity -[repair recover failure recovery]: /riak/kv/2.2.3/using/repair-recovery/failure-recovery -[repair recover repairs]: /riak/kv/2.2.3/using/repair-recovery/repairs +[learn dynamo]: {{}}riak/kv/2.2.3/learn/dynamo +[plan cluster capacity]: {{}}riak/kv/2.2.3/setup/planning/cluster-capacity +[repair recover failure recovery]: {{}}riak/kv/2.2.3/using/repair-recovery/failure-recovery +[repair recover repairs]: {{}}riak/kv/2.2.3/using/repair-recovery/repairs [Riak Core]: https://github.com/basho/riak_core [Riak KV]: https://github.com/basho/riak_kv [Riak Pipe]: https://github.com/basho/riak_pipe [Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ [Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 [Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ -[usage mapreduce]: /riak/kv/2.2.3/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.3/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.2.3/developing/usage/secondary-indexes +[usage mapreduce]: {{}}riak/kv/2.2.3/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes [Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ [Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing @@ -73,7 +73,7 @@ Basho Bench is a benchmarking tool created to conduct accurate and repeatable performance tests and stress tests and to produce performance graphs. -* [Basho Bench](/riak/kv/2.2.3/using/performance/benchmarking) +* [Basho Bench]({{}}riak/kv/2.2.3/using/performance/benchmarking) * [GitHub repository][basho bench GH] ## Bucket @@ -92,7 +92,7 @@ Bucket types enable you to create and manage sets of bucket properties that, when applied to buckets, dictate those buckets' behavior. They also act as a third namespace in Riak in addition to buckets and keys. -* [Bucket Types](/riak/kv/2.2.3/developing/usage/bucket-types) +* [Bucket Types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) ## Cluster diff --git a/content/riak/kv/2.2.3/learn/use-cases.md b/content/riak/kv/2.2.3/learn/use-cases.md index 1d96cfbd37..434e9abeeb 100644 --- a/content/riak/kv/2.2.3/learn/use-cases.md +++ b/content/riak/kv/2.2.3/learn/use-cases.md @@ -16,20 +16,20 @@ aliases: --- -[dev data model articles etc]: /riak/kv/2.2.3/developing/data-modeling/#articles-blog-posts-and-other-content -[dev data model log data]: /riak/kv/2.2.3/developing/data-modeling/#log-data -[dev data model sensor data]: /riak/kv/2.2.3/developing/data-modeling/#sensor-data -[dev data model serve advertisements]: /riak/kv/2.2.3/developing/data-modeling/#serving-advertisements -[dev data model sess storage]: /riak/kv/2.2.3/developing/data-modeling/#session-storage -[dev data model user acct]: /riak/kv/2.2.3/developing/data-modeling/#user-accounts -[dev data model user events]: /riak/kv/2.2.3/developing/data-modeling/#user-events-and-timelines -[dev data model user settings]: /riak/kv/2.2.3/developing/data-modeling/#user-settings-and-preferences -[dev data types]: /riak/kv/2.2.3/developing/data-types -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[usage mapreduce]: /riak/kv/2.2.3/developing/usage/mapreduce -[usage search]: /riak/kv/2.2.3/developing/usage/search -[usage secondary-indexes]: /riak/kv/2.2.3/developing/usage/secondary-indexes +[dev data model articles etc]: {{}}riak/kv/2.2.3/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{}}riak/kv/2.2.3/developing/data-modeling/#log-data +[dev data model sensor data]: {{}}riak/kv/2.2.3/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{}}riak/kv/2.2.3/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{}}riak/kv/2.2.3/developing/data-modeling/#session-storage +[dev data model user acct]: {{}}riak/kv/2.2.3/developing/data-modeling/#user-accounts +[dev data model user events]: {{}}riak/kv/2.2.3/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{}}riak/kv/2.2.3/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{}}riak/kv/2.2.3/developing/data-types +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[usage mapreduce]: {{}}riak/kv/2.2.3/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search +[usage secondary-indexes]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes Riak is a flexible data storage technology capable of addressing a wide variety of problems in a scalable way. In this guide, we'll list a number of use cases diff --git a/content/riak/kv/2.2.3/learn/why-riak-kv.md b/content/riak/kv/2.2.3/learn/why-riak-kv.md index d71e33169b..69a234aa84 100644 --- a/content/riak/kv/2.2.3/learn/why-riak-kv.md +++ b/content/riak/kv/2.2.3/learn/why-riak-kv.md @@ -16,14 +16,14 @@ aliases: --- -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[Basho Bench]: /riak/kv/2.2.3/using/performance/benchmarking -[cluster ops strong consistency]: /riak/kv/2.2.3/using/cluster-operations/strong-consistency -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[Basho Bench]: {{}}riak/kv/2.2.3/using/performance/benchmarking +[cluster ops strong consistency]: {{}}riak/kv/2.2.3/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency [convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf [Datomic]: http://www.datomic.com/overview.html -[dev data types]: /riak/kv/2.2.3/developing/data-types -[glossary read rep]: /riak/kv/2.2.3/learn/glossary/#read-repair +[dev data types]: {{}}riak/kv/2.2.3/developing/data-types +[glossary read rep]: {{}}riak/kv/2.2.3/learn/glossary/#read-repair ## What is Riak? diff --git a/content/riak/kv/2.2.3/release-notes.md b/content/riak/kv/2.2.3/release-notes.md index 256f04d603..9a6d013090 100644 --- a/content/riak/kv/2.2.3/release-notes.md +++ b/content/riak/kv/2.2.3/release-notes.md @@ -27,4 +27,4 @@ In fixing a `riak` and `riak-admin` [issue](https://github.com/basho/node_packag ## Previous Release Notes -Please see the KV 2.2.2 release notes [here](/riak/kv/2.2.2/release-notes/), and the KV 2.2.1 release notes [here](/riak/kv/2.2.1/release-notes/). +Please see the KV 2.2.2 release notes [here]({{}}riak/kv/2.2.3/release-notes/), and the KV 2.2.1 release notes [here]({{}}riak/kv/2.2.3/release-notes/). diff --git a/content/riak/kv/2.2.3/setup/downgrade.md b/content/riak/kv/2.2.3/setup/downgrade.md index 664d0e6c74..3068548c8d 100644 --- a/content/riak/kv/2.2.3/setup/downgrade.md +++ b/content/riak/kv/2.2.3/setup/downgrade.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.3/ops/upgrading/rolling-downgrades/ --- -[rolling upgrade]: /riak/kv/2.2.3/setup/upgrading/cluster -[config ref]: /riak/kv/2.2.3/configuring/reference -[concept aae]: /riak/kv/2.2.3/learn/concepts/active-anti-entropy/ -[aae status]: /riak/kv/2.2.3/using/admin/riak-admin/#aae-status +[rolling upgrade]: {{}}riak/kv/2.2.3/setup/upgrading/cluster +[config ref]: {{}}riak/kv/2.2.3/configuring/reference +[concept aae]: {{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/ +[aae status]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#aae-status Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. diff --git a/content/riak/kv/2.2.3/setup/installing.md b/content/riak/kv/2.2.3/setup/installing.md index a16edf96e1..d5a5585b3d 100644 --- a/content/riak/kv/2.2.3/setup/installing.md +++ b/content/riak/kv/2.2.3/setup/installing.md @@ -18,18 +18,18 @@ aliases: - /riak/kv/2.2.3/installing/ --- -[install aws]: /riak/kv/2.2.3/setup/installing/amazon-web-services -[install debian & ubuntu]: /riak/kv/2.2.3/setup/installing/debian-ubuntu -[install freebsd]: /riak/kv/2.2.3/setup/installing/freebsd -[install mac osx]: /riak/kv/2.2.3/setup/installing/mac-osx -[install rhel & centos]: /riak/kv/2.2.3/setup/installing/rhel-centos -[install smartos]: /riak/kv/2.2.3/setup/installing/smartos -[install solaris]: /riak/kv/2.2.3/setup/installing/solaris -[install suse]: /riak/kv/2.2.3/setup/installing/suse -[install windows azure]: /riak/kv/2.2.3/setup/installing/windows-azure -[install source index]: /riak/kv/2.2.3/setup/installing/source -[community projects]: /community/projects -[upgrade index]: /riak/kv/2.2.3/setup/upgrading +[install aws]: {{}}riak/kv/2.2.3/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.3/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.3/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.3/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.3/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.3/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.3/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.3/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.3/setup/installing/windows-azure +[install source index]: {{}}riak/kv/2.2.3/setup/installing/source +[community projects]: {{}}community/projects +[upgrade index]: {{}}riak/kv/2.2.3/setup/upgrading ## Supported Platforms diff --git a/content/riak/kv/2.2.3/setup/installing/amazon-web-services.md b/content/riak/kv/2.2.3/setup/installing/amazon-web-services.md index c6727b000b..55b10839a3 100644 --- a/content/riak/kv/2.2.3/setup/installing/amazon-web-services.md +++ b/content/riak/kv/2.2.3/setup/installing/amazon-web-services.md @@ -28,7 +28,7 @@ In order to launch a Riak virtual machine via the AWS Marketplace, you will firs 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Click the **Accept Terms and Launch with 1-Click** button. @@ -52,9 +52,9 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) -We also recommend that you read more about Riak's [Security and Firewalls](/riak/kv/2.2.3/using/security/). +We also recommend that you read more about Riak's [Security and Firewalls]({{}}riak/kv/2.2.3/using/security/). ## Clustering Riak on AWS diff --git a/content/riak/kv/2.2.3/setup/installing/debian-ubuntu.md b/content/riak/kv/2.2.3/setup/installing/debian-ubuntu.md index 15d2890672..c5bd984fff 100644 --- a/content/riak/kv/2.2.3/setup/installing/debian-ubuntu.md +++ b/content/riak/kv/2.2.3/setup/installing/debian-ubuntu.md @@ -18,10 +18,10 @@ aliases: - /riak/kv/2.2.3/installing/debian-ubuntu/ --- -[install source index]: /riak/kv/2.2.3/setup/installing/source/ -[security index]: /riak/kv/2.2.3/using/security/ -[install source erlang]: /riak/kv/2.2.3/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[install source index]: {{}}riak/kv/2.2.3/setup/installing/source/ +[security index]: {{}}riak/kv/2.2.3/using/security/ +[install source erlang]: {{}}riak/kv/2.2.3/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify Riak KV can be installed on Debian or Ubuntu-based systems using a binary package or by compiling from source code. diff --git a/content/riak/kv/2.2.3/setup/installing/freebsd.md b/content/riak/kv/2.2.3/setup/installing/freebsd.md index 085c465e3e..373fba96fb 100644 --- a/content/riak/kv/2.2.3/setup/installing/freebsd.md +++ b/content/riak/kv/2.2.3/setup/installing/freebsd.md @@ -20,9 +20,9 @@ aliases: -[install source erlang]: /riak/kv/2.2.3/setup/installing/source/erlang -[downloads]: /riak/kv/2.2.3/downloads/ -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.2.3/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.2.3/downloads/ +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. diff --git a/content/riak/kv/2.2.3/setup/installing/mac-osx.md b/content/riak/kv/2.2.3/setup/installing/mac-osx.md index 2ee3c8cb2c..2cb86d79b9 100644 --- a/content/riak/kv/2.2.3/setup/installing/mac-osx.md +++ b/content/riak/kv/2.2.3/setup/installing/mac-osx.md @@ -20,9 +20,9 @@ aliases: -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit -[install source erlang]: /riak/kv/2.2.3/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit +[install source erlang]: {{}}riak/kv/2.2.3/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify The following steps are known to work with Mac OS X 10.8, 10.9 (Mavericks), and Yosemite. You can install from source or download a diff --git a/content/riak/kv/2.2.3/setup/installing/rhel-centos.md b/content/riak/kv/2.2.3/setup/installing/rhel-centos.md index 47d3b800e9..ec30073ea8 100644 --- a/content/riak/kv/2.2.3/setup/installing/rhel-centos.md +++ b/content/riak/kv/2.2.3/setup/installing/rhel-centos.md @@ -20,9 +20,9 @@ aliases: -[install source index]: /riak/kv/2.2.3/setup/installing/source -[install source erlang]: /riak/kv/2.2.3/setup/installing/source/erlang -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[install source index]: {{}}riak/kv/2.2.3/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.3/setup/installing/source/erlang +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on diff --git a/content/riak/kv/2.2.3/setup/installing/smartos.md b/content/riak/kv/2.2.3/setup/installing/smartos.md index 8178bb1986..9396c540d9 100644 --- a/content/riak/kv/2.2.3/setup/installing/smartos.md +++ b/content/riak/kv/2.2.3/setup/installing/smartos.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.2.3/installing/smartos/ --- -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify {{% note title="SmartOS End of Life (EOL) for Riak KV 2.2.3" %}} SmartOS is no longer supported in Riak KV 2.2.3+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). @@ -74,13 +74,13 @@ cat /opt/local/etc/pkgin/repositories.conf Download your version of the Riak binary package for SmartOS: ```bash -curl -o /tmp/riak-2.2.3-SmartOS-x86_64.tgz http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/smartos/1.8/riak-2.2.3-SmartOS-x86_64.tgz +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz ``` Next, install the package: ``` -pkg_add /tmp/riak-2.2.3-SmartOS-x86_64.tgz +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz ``` After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: diff --git a/content/riak/kv/2.2.3/setup/installing/solaris.md b/content/riak/kv/2.2.3/setup/installing/solaris.md index d0973171e8..2f87aaf3d3 100644 --- a/content/riak/kv/2.2.3/setup/installing/solaris.md +++ b/content/riak/kv/2.2.3/setup/installing/solaris.md @@ -20,7 +20,7 @@ aliases: -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. diff --git a/content/riak/kv/2.2.3/setup/installing/source.md b/content/riak/kv/2.2.3/setup/installing/source.md index b427c04222..db3a49a30a 100644 --- a/content/riak/kv/2.2.3/setup/installing/source.md +++ b/content/riak/kv/2.2.3/setup/installing/source.md @@ -20,13 +20,13 @@ aliases: -[install source erlang]: /riak/kv/2.2.3/setup/installing/source/erlang -[downloads]: /riak/kv/2.2.3/downloads/ -[install debian & ubuntu#source]: /riak/kv/2.2.3/setup/installing/debian-ubuntu/#installing-from-source -[install freebsd#source]: /riak/kv/2.2.3/setup/installing/freebsd/#installing-from-source -[install mac osx#source]: /riak/kv/2.2.3/setup/installing/mac-osx/#installing-from-source -[install rhel & centos#source]: /riak/kv/2.2.3/setup/installing/rhel-centos/#installing-from-source -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[install source erlang]: {{}}riak/kv/2.2.3/setup/installing/source/erlang +[downloads]: {{}}riak/kv/2.2.3/downloads/ +[install debian & ubuntu#source]: {{}}riak/kv/2.2.3/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{}}riak/kv/2.2.3/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{}}riak/kv/2.2.3/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{}}riak/kv/2.2.3/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify Riak should be installed from source if you are building on a platform for which a package does not exist or if you are interested in diff --git a/content/riak/kv/2.2.3/setup/installing/source/erlang.md b/content/riak/kv/2.2.3/setup/installing/source/erlang.md index 32dcaeed80..9beae0c5d3 100644 --- a/content/riak/kv/2.2.3/setup/installing/source/erlang.md +++ b/content/riak/kv/2.2.3/setup/installing/source/erlang.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.3/installing/source/erlang/ --- -[install index]: /riak/kv/2.2.3/setup/installing -[security basics]: /riak/kv/2.2.3/using/security/basics +[install index]: {{}}riak/kv/2.2.3/setup/installing +[security basics]: {{}}riak/kv/2.2.3/using/security/basics Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](http://s3.amazonaws.com/downloads.basho.com/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** diff --git a/content/riak/kv/2.2.3/setup/installing/source/jvm.md b/content/riak/kv/2.2.3/setup/installing/source/jvm.md index ee90d41333..2717cd17ac 100644 --- a/content/riak/kv/2.2.3/setup/installing/source/jvm.md +++ b/content/riak/kv/2.2.3/setup/installing/source/jvm.md @@ -19,7 +19,7 @@ aliases: - /riak/kv/2.2.3/installing/source/jvm/ --- -[usage search]: /riak/kv/2.2.3/developing/usage/search +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search If you are using [Riak Search 2.0][usage search], codename Yokozuna, you will need to install **Java 1.6 or later** to run [Apache diff --git a/content/riak/kv/2.2.3/setup/installing/suse.md b/content/riak/kv/2.2.3/setup/installing/suse.md index c74a567077..bf163e9605 100644 --- a/content/riak/kv/2.2.3/setup/installing/suse.md +++ b/content/riak/kv/2.2.3/setup/installing/suse.md @@ -18,7 +18,7 @@ aliases: - /riak/kv/2.2.3/installing/suse/ --- -[install verify]: /riak/kv/2.2.3/setup/installing/verify +[install verify]: {{}}riak/kv/2.2.3/setup/installing/verify Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on the following x86/x86_64 flavors of SuSE: diff --git a/content/riak/kv/2.2.3/setup/installing/verify.md b/content/riak/kv/2.2.3/setup/installing/verify.md index c8ed531f89..09a2a4fa4f 100644 --- a/content/riak/kv/2.2.3/setup/installing/verify.md +++ b/content/riak/kv/2.2.3/setup/installing/verify.md @@ -17,10 +17,10 @@ aliases: - /riak/kv/2.2.3/installing/verify-install/ --- -[client libraries]: /riak/kv/2.2.3/developing/client-libraries -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit -[cluster ops bucket types]: /riak/kv/2.2.3/using/cluster-operations/bucket-types -[cluster ops inspect node]: /riak/kv/2.2.3/using/cluster-operations/inspecting-node +[client libraries]: {{}}riak/kv/2.2.3/developing/client-libraries +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit +[cluster ops bucket types]: {{}}riak/kv/2.2.3/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node After you've installed Riak KV, we recommend checking the liveness of each node to ensure that requests are being properly served. diff --git a/content/riak/kv/2.2.3/setup/installing/windows-azure.md b/content/riak/kv/2.2.3/setup/installing/windows-azure.md index ef4db15fce..5c61d774b0 100644 --- a/content/riak/kv/2.2.3/setup/installing/windows-azure.md +++ b/content/riak/kv/2.2.3/setup/installing/windows-azure.md @@ -26,15 +26,15 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. Click "preview features" to view the available previews. - ![](/images/antares-iaas-preview-01.png) + ![]({{}}images/antares-iaas-preview-01.png) 3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". - ![](/images/antares-iaas-preview-02.png) + ![]({{}}images/antares-iaas-preview-02.png) 4. Select your subscription and click the check. - ![](/images/antares-iaas-preview-04.png) + ![]({{}}images/antares-iaas-preview-04.png) ### Create a virtual machine running CentOS Linux @@ -42,11 +42,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature 2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". - ![](/images/createvm_small.png) + ![]({{}}images/createvm_small.png) 3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. - ![](/images/vmconfiguration0.png) + ![]({{}}images/vmconfiguration0.png) 4. On the VM Configuration page, provide the following information: - Provide a "Virtual Machine Name", such as "testlinuxvm". @@ -56,7 +56,7 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - Select the appropriate "Size" from the drop down list. - Click the next arrow to continue. - ![](/images/vmconfiguration1.png) + ![]({{}}images/vmconfiguration1.png) 5. On the VM Mode page, provide the following information: - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. @@ -65,11 +65,11 @@ You will need to sign up for the Windows Azure Virtual Machines preview feature - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. - Click the next arrow to continue. - ![](/images/vmconfiguration2.png) + ![]({{}}images/vmconfiguration2.png) 6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. - ![](/images/vmconfiguration3.png) + ![]({{}}images/vmconfiguration3.png) 7. Wait while Windows Azure prepares your virtual machine. @@ -106,7 +106,7 @@ If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be 2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. - ![](/images/putty.png) + ![]({{}}images/putty.png) ## Install Riak and configure using a shell script diff --git a/content/riak/kv/2.2.3/setup/planning/backend.md b/content/riak/kv/2.2.3/setup/planning/backend.md index c61d8dd615..19f16b79c8 100644 --- a/content/riak/kv/2.2.3/setup/planning/backend.md +++ b/content/riak/kv/2.2.3/setup/planning/backend.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.3/ops/building/planning/backends/ --- -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.3/setup/planning/backend/memory -[plan backend multi]: /riak/kv/2.2.3/setup/planning/backend/multi -[dev api backend]: /riak/kv/2.2.3/developing/api/backend +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.3/setup/planning/backend/multi +[dev api backend]: {{}}riak/kv/2.2.3/developing/api/backend Pluggable storage backends are a key feature of Riak KV. They enable you to choose a low-level storage engine that suits specific operational needs. diff --git a/content/riak/kv/2.2.3/setup/planning/backend/bitcask.md b/content/riak/kv/2.2.3/setup/planning/backend/bitcask.md index 24f29ef819..a2f5f64501 100644 --- a/content/riak/kv/2.2.3/setup/planning/backend/bitcask.md +++ b/content/riak/kv/2.2.3/setup/planning/backend/bitcask.md @@ -17,18 +17,18 @@ aliases: [github bitcask]: https://github.com/basho/bitcask [bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf -[use admin riak cli]: /riak/kv/2.2.3/using/admin/riak-cli -[config reference]: /riak/kv/2.2.3/configuring/reference -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[learn clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[plan backend multi]: /riak/kv/2.2.3/setup/planning/backend/multi -[usage search]: /riak/kv/2.2.3/developing/usage/search - -[glossary aae]: /riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit - -[plan bitcask capacity]: /riak/kv/2.2.3/setup/planning/bitcask-capacity-calc -[usage delete objects]: /riak/kv/2.2.3/developing/usage/deleting-objects +[use admin riak cli]: {{}}riak/kv/2.2.3/using/admin/riak-cli +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[learn clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[plan backend multi]: {{}}riak/kv/2.2.3/setup/planning/backend/multi +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search + +[glossary aae]: {{}}riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit + +[plan bitcask capacity]: {{}}riak/kv/2.2.3/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{}}riak/kv/2.2.3/developing/usage/deleting-objects [Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. diff --git a/content/riak/kv/2.2.3/setup/planning/backend/leveldb.md b/content/riak/kv/2.2.3/setup/planning/backend/leveldb.md index 3b516e925c..bca7b2ae1e 100644 --- a/content/riak/kv/2.2.3/setup/planning/backend/leveldb.md +++ b/content/riak/kv/2.2.3/setup/planning/backend/leveldb.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.3/ops/advanced/backends/leveldb/ --- -[upgrade 2.0#upgrading-leveldB]: / -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[config reference]: /riak/kv/2.2.3/configuring/reference -[perf index]: /riak/kv/2.2.3/using/performance -[config reference#aae]: /riak/kv/2.2.3/configuring/reference/#active-anti-entropy +[upgrade 2.0#upgrading-leveldB]: {{}} +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[perf index]: {{}}riak/kv/2.2.3/using/performance +[config reference#aae]: {{}}riak/kv/2.2.3/configuring/reference/#active-anti-entropy > **Note on upgrading to 2.0** > diff --git a/content/riak/kv/2.2.3/setup/planning/backend/memory.md b/content/riak/kv/2.2.3/setup/planning/backend/memory.md index 51f3f34e3f..2600a158f7 100644 --- a/content/riak/kv/2.2.3/setup/planning/backend/memory.md +++ b/content/riak/kv/2.2.3/setup/planning/backend/memory.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.3/ops/advanced/backends/memory/ --- -[config reference]: /riak/kv/2.2.3/configuring/reference -[plan backend multi]: /riak/kv/2.2.3/setup/planning/backend/multi -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[plan backend multi]: {{}}riak/kv/2.2.3/setup/planning/backend/multi +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb The Memory storage backend uses in-memory tables to store all data. This data is never persisted to disk or to any other storage mechanism. diff --git a/content/riak/kv/2.2.3/setup/planning/backend/multi.md b/content/riak/kv/2.2.3/setup/planning/backend/multi.md index ad804d75c7..5e04c6e7a4 100644 --- a/content/riak/kv/2.2.3/setup/planning/backend/multi.md +++ b/content/riak/kv/2.2.3/setup/planning/backend/multi.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.3/ops/advanced/backends/multi/ --- -[concept buckets]: /riak/kv/2.2.3/learn/concepts/buckets -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend memory]: /riak/kv/2.2.3/setup/planning/backend/memory -[config reference]: /riak/kv/2.2.3/configuring/reference -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[use admin riak-admin cli]: /riak/kv/2.2.3/using/admin/riak-admin +[concept buckets]: {{}}riak/kv/2.2.3/learn/concepts/buckets +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.3/setup/planning/backend/memory +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[use admin riak-admin cli]: {{}}riak/kv/2.2.3/using/admin/riak-admin Riak allows you to run multiple backends within a single Riak cluster. Selecting the Multi backend enables you to use different storage diff --git a/content/riak/kv/2.2.3/setup/planning/best-practices.md b/content/riak/kv/2.2.3/setup/planning/best-practices.md index 03ad7dc3d5..db9b22b7b3 100644 --- a/content/riak/kv/2.2.3/setup/planning/best-practices.md +++ b/content/riak/kv/2.2.3/setup/planning/best-practices.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.3/ops/building/planning/best-practices --- -[use ref handoff]: /riak/kv/2.2.3/using/reference/handoff -[config mapreduce]: /riak/kv/2.2.3/configuring/mapreduce -[glossary aae]: /riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae -[cluster ops add remove node]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes +[use ref handoff]: {{}}riak/kv/2.2.3/using/reference/handoff +[config mapreduce]: {{}}riak/kv/2.2.3/configuring/mapreduce +[glossary aae]: {{}}riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. diff --git a/content/riak/kv/2.2.3/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.2.3/setup/planning/bitcask-capacity-calc.md index 1d8761e546..d4340d6fdc 100644 --- a/content/riak/kv/2.2.3/setup/planning/bitcask-capacity-calc.md +++ b/content/riak/kv/2.2.3/setup/planning/bitcask-capacity-calc.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/building/planning/bitcask --- -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask These calculators will assist you in sizing your cluster if you plan to use the default [Bitcask][plan backend bitcask] storage back end. diff --git a/content/riak/kv/2.2.3/setup/planning/cluster-capacity.md b/content/riak/kv/2.2.3/setup/planning/cluster-capacity.md index 867b44af89..22eac092bd 100644 --- a/content/riak/kv/2.2.3/setup/planning/cluster-capacity.md +++ b/content/riak/kv/2.2.3/setup/planning/cluster-capacity.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.3/ops/building/planning/cluster --- -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan bitcask capacity]: /riak/kv/2.2.3/setup/planning/bitcask-capacity-calc -[plan index]: /riak/kv/2.2.3/setup/planning -[concept replication]: /riak/kv/2.2.3/learn/concepts/replication -[use admin riak-admin#cluster]: /riak/kv/2.2.3/using/admin/riak-admin/#cluster -[config reference]: /riak/kv/2.2.3/configuring/reference -[perf benchmark]: /riak/kv/2.2.3/using/performance/benchmarking +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan bitcask capacity]: {{}}riak/kv/2.2.3/setup/planning/bitcask-capacity-calc +[plan index]: {{}}riak/kv/2.2.3/setup/planning +[concept replication]: {{}}riak/kv/2.2.3/learn/concepts/replication +[use admin riak-admin#cluster]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[perf benchmark]: {{}}riak/kv/2.2.3/using/performance/benchmarking [LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) diff --git a/content/riak/kv/2.2.3/setup/planning/operating-system.md b/content/riak/kv/2.2.3/setup/planning/operating-system.md index a0dac5f22e..f280335239 100644 --- a/content/riak/kv/2.2.3/setup/planning/operating-system.md +++ b/content/riak/kv/2.2.3/setup/planning/operating-system.md @@ -12,7 +12,7 @@ menu: toc: true --- -[downloads]: /riak/kv/2.2.3/downloads/ +[downloads]: {{}}riak/kv/2.2.3/downloads/ We recommend deploying Riak KV on a mainstream Unix-like operating system. Mainstream distributions have larger support communities, making diff --git a/content/riak/kv/2.2.3/setup/planning/start.md b/content/riak/kv/2.2.3/setup/planning/start.md index e81dcddae8..b20708bdce 100644 --- a/content/riak/kv/2.2.3/setup/planning/start.md +++ b/content/riak/kv/2.2.3/setup/planning/start.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.3/ops/building/planning/system-planning --- -[plan backend]: /riak/kv/2.2.3/setup/planning/backend -[plan cluster capacity]: /riak/kv/2.2.3/setup/planning/cluster-capacity -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[plan bitcask capacity]: /riak/kv/2.2.3/setup/planning/bitcask-capacity-calc +[plan backend]: {{}}riak/kv/2.2.3/setup/planning/backend +[plan cluster capacity]: {{}}riak/kv/2.2.3/setup/planning/cluster-capacity +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[plan bitcask capacity]: {{}}riak/kv/2.2.3/setup/planning/bitcask-capacity-calc Here are some steps and recommendations designing and configuring your Riak cluster. diff --git a/content/riak/kv/2.2.3/setup/upgrading/checklist.md b/content/riak/kv/2.2.3/setup/upgrading/checklist.md index 5ebef79098..be768d4126 100644 --- a/content/riak/kv/2.2.3/setup/upgrading/checklist.md +++ b/content/riak/kv/2.2.3/setup/upgrading/checklist.md @@ -15,24 +15,24 @@ aliases: - /riak/kv/2.2.3/ops/upgrading/production-checklist/ --- -[perf open files]: /riak/kv/2.2.3/using/performance/open-files-limit -[perf index]: /riak/kv/2.2.3/using/performance +[perf open files]: {{}}riak/kv/2.2.3/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.3/using/performance [ntp]: http://www.ntp.org/ -[security basics]: /riak/kv/2.2.3/using/security/basics -[cluster ops load balance]: /riak/kv/2.2.3/configuring/load-balancing-proxy -[config reference]: /riak/kv/2.2.3/configuring/reference -[config backend]: /riak/kv/2.2.3/configuring/backend -[usage search]: /riak/kv/2.2.3/developing/usage/search -[usage conflict resolution]: /riak/kv/2.2.3/developing/usage/conflict-resolution -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency -[apps replication properties]: /riak/kv/2.2.3/developing/app-guide/replication-properties -[concept strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[cluster ops bucket types]: /riak/kv/2.2.3/using/cluster-operations/bucket-types -[use admin commands]: /riak/kv/2.2.3/using/admin/commands -[use admin riak control]: /riak/kv/2.2.3/using/admin/riak-control -[cluster ops inspect node]: /riak/kv/2.2.3/using/cluster-operations/inspecting-node -[troubleshoot http]: /riak/kv/2.2.3/using/troubleshooting/http-204 -[use admin riak-admin]: /riak/kv/2.2.3/using/admin/riak-admin +[security basics]: {{}}riak/kv/2.2.3/using/security/basics +[cluster ops load balance]: {{}}riak/kv/2.2.3/configuring/load-balancing-proxy +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[config backend]: {{}}riak/kv/2.2.3/configuring/backend +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search +[usage conflict resolution]: {{}}riak/kv/2.2.3/developing/usage/conflict-resolution +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency +[apps replication properties]: {{}}riak/kv/2.2.3/developing/app-guide/replication-properties +[concept strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[cluster ops bucket types]: {{}}riak/kv/2.2.3/using/cluster-operations/bucket-types +[use admin commands]: {{}}riak/kv/2.2.3/using/admin/commands +[use admin riak control]: {{}}riak/kv/2.2.3/using/admin/riak-control +[cluster ops inspect node]: {{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node +[troubleshoot http]: {{}}riak/kv/2.2.3/using/troubleshooting/http-204 +[use admin riak-admin]: {{}}riak/kv/2.2.3/using/admin/riak-admin [SANs]: http://en.wikipedia.org/wiki/Storage_area_network Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. diff --git a/content/riak/kv/2.2.3/setup/upgrading/cluster.md b/content/riak/kv/2.2.3/setup/upgrading/cluster.md new file mode 100644 index 0000000000..1efbb168a9 --- /dev/null +++ b/content/riak/kv/2.2.3/setup/upgrading/cluster.md @@ -0,0 +1,298 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.2.3" +menu: + riak_kv-2.2.3: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.3/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.2.3/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{}}riak/kv/2.2.3/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.2.3/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.2.3/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.3/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.3/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.3/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{}}riak/kv/2.2.3/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i .deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh .rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d .pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` --- See [JMX Monitoring][jmx monitor] for more information. + * `snmp` --- See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. diff --git a/content/riak/kv/2.2.3/setup/upgrading/search.md b/content/riak/kv/2.2.3/setup/upgrading/search.md new file mode 100644 index 0000000000..d7969c020b --- /dev/null +++ b/content/riak/kv/2.2.3/setup/upgrading/search.md @@ -0,0 +1,276 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.2.3" +menu: + riak_kv-2.2.3: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.3/ops/advanced/upgrading-search-2 + - /riak/kv/2.2.3/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + +
+
Upgrade First
+ Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. +
+ +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + +
+
Check Results Before Switching (Optional)
+ Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/?q=...`. +
+ +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.2.3/setup/upgrading/version.md b/content/riak/kv/2.2.3/setup/upgrading/version.md index efc2af328d..7852f38b04 100644 --- a/content/riak/kv/2.2.3/setup/upgrading/version.md +++ b/content/riak/kv/2.2.3/setup/upgrading/version.md @@ -19,18 +19,18 @@ aliases: --- -[production checklist]: /riak/kv/2.2.3/setup/upgrading/checklist -[use admin riak control]: /riak/kv/2.2.3/using/admin/riak-control -[use admin commands]: /riak/kv/2.2.3/using/admin/commands -[use admin riak-admin]: /riak/kv/2.2.3/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.2.3/developing/usage/secondary-indexes -[release notes]: /riak/kv/2.2.3/release-notes +[production checklist]: {{}}riak/kv/2.2.3/setup/upgrading/checklist +[use admin riak control]: {{}}riak/kv/2.2.3/using/admin/riak-control +[use admin commands]: {{}}riak/kv/2.2.3/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.3/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.3/developing/usage/secondary-indexes +[release notes]: {{}}riak/kv/2.2.3/release-notes [riak enterprise]: http://basho.com/products/riak-kv/ -[cluster ops mdc]: /riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.2.3/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.2.3/using/reference/jmx -[snmp]: /riak/kv/2.2.3/using/reference/snmp -[Release Notes]: /riak/kv/2.2.3/release-notes +[cluster ops mdc]: {{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.3/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.3/using/reference/snmp +[Release Notes]: {{}}riak/kv/2.2.3/release-notes ## Overview diff --git a/content/riak/kv/2.2.3/using/admin/commands.md b/content/riak/kv/2.2.3/using/admin/commands.md index 5fe8312433..1977865934 100644 --- a/content/riak/kv/2.2.3/using/admin/commands.md +++ b/content/riak/kv/2.2.3/using/admin/commands.md @@ -15,11 +15,11 @@ aliases: - /riak/kv/2.2.3/ops/running/cluster-admin --- -[use admin riak-admin#cluster]: /riak/kv/2.2.3/using/admin/riak-admin/#cluster -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[cluster ops add remove node]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes -[use admin riak-admin#cluster-plan]: /riak/kv/2.2.3/using/admin/riak-admin/#cluster-plan -[use admin riak-admin#cluster-commit]: /riak/kv/2.2.3/using/admin/riak-admin/#cluster-commit +[use admin riak-admin#cluster]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[cluster ops add remove node]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster-commit This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide diff --git a/content/riak/kv/2.2.3/using/admin/riak-admin.md b/content/riak/kv/2.2.3/using/admin/riak-admin.md index 2eb7b2bf90..c9d66aa0a1 100644 --- a/content/riak/kv/2.2.3/using/admin/riak-admin.md +++ b/content/riak/kv/2.2.3/using/admin/riak-admin.md @@ -15,27 +15,27 @@ aliases: - /riak/kv/2.2.3/ops/running/tools/riak-admin --- -[config reference]: /riak/kv/2.2.3/configuring/reference -[use admin commands]: /riak/kv/2.2.3/using/admin/commands -[use admin commands#join]: /riak/kv/2.2.3/using/admin/commands/#join -[use admin commands#leave]: /riak/kv/2.2.3/using/admin/commands/#leave -[cluster ops backup]: /riak/kv/2.2.3/using/cluster-operations/backing-up -[config reference#node-metadata]: /riak/kv/2.2.3/configuring/reference/#node-metadata -[cluster ops change info]: /riak/kv/2.2.3/using/cluster-operations/changing-cluster-info -[usage mapreduce]: /riak/kv/2.2.3/developing/usage/mapreduce -[usage commit hooks]: /riak/kv/2.2.3/developing/usage/commit-hooks -[config reference#ring]: /riak/kv/2.2.3/configuring/reference/#ring -[cluster ops inspect node]: /riak/kv/2.2.3/using/cluster-operations/inspecting-node -[use ref monitoring]: /riak/kv/2.2.3/using/reference/statistics-monitoring -[downgrade]: /riak/kv/2.2.3/setup/downgrade -[security index]: /riak/kv/2.2.3/using/security/ -[security managing]: /riak/kv/2.2.3/using/security/managing-sources -[cluster ops bucket types]: /riak/kv/2.2.3/using/cluster-operations/bucket-types -[cluster ops 2i]: /riak/kv/2.2.3/using/reference/secondary-indexes -[repair recover index]: /riak/kv/2.2.3/using/repair-recovery -[cluster ops strong consistency]: /riak/kv/2.2.3/using/cluster-operations/strong-consistency -[cluster ops handoff]: /riak/kv/2.2.3/using/cluster-operations/handoff -[use admin riak-admin#stats]: /riak/kv/2.2.3/using/admin/riak-admin/#stats +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[use admin commands]: {{}}riak/kv/2.2.3/using/admin/commands +[use admin commands#join]: {{}}riak/kv/2.2.3/using/admin/commands/#join +[use admin commands#leave]: {{}}riak/kv/2.2.3/using/admin/commands/#leave +[cluster ops backup]: {{}}riak/kv/2.2.3/using/cluster-operations/backing-up +[config reference#node-metadata]: {{}}riak/kv/2.2.3/configuring/reference/#node-metadata +[cluster ops change info]: {{}}riak/kv/2.2.3/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{}}riak/kv/2.2.3/developing/usage/mapreduce +[usage commit hooks]: {{}}riak/kv/2.2.3/developing/usage/commit-hooks +[config reference#ring]: {{}}riak/kv/2.2.3/configuring/reference/#ring +[cluster ops inspect node]: {{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node +[use ref monitoring]: {{}}riak/kv/2.2.3/using/reference/statistics-monitoring +[downgrade]: {{}}riak/kv/2.2.3/setup/downgrade +[security index]: {{}}riak/kv/2.2.3/using/security/ +[security managing]: {{}}riak/kv/2.2.3/using/security/managing-sources +[cluster ops bucket types]: {{}}riak/kv/2.2.3/using/cluster-operations/bucket-types +[cluster ops 2i]: {{}}riak/kv/2.2.3/using/reference/secondary-indexes +[repair recover index]: {{}}riak/kv/2.2.3/using/repair-recovery +[cluster ops strong consistency]: {{}}riak/kv/2.2.3/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{}}riak/kv/2.2.3/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#stats ## `riak-admin` diff --git a/content/riak/kv/2.2.3/using/admin/riak-cli.md b/content/riak/kv/2.2.3/using/admin/riak-cli.md index c081adc087..f6253320ad 100644 --- a/content/riak/kv/2.2.3/using/admin/riak-cli.md +++ b/content/riak/kv/2.2.3/using/admin/riak-cli.md @@ -15,10 +15,10 @@ aliases: - /riak/kv/2.2.3/ops/running/tools/riak --- -[configuration file]: /riak/kv/2.2.3/configuring/reference/ +[configuration file]: {{}}riak/kv/2.2.3/configuring/reference/ [escript]: http://www.erlang.org/doc/man/escript.html -[`riak-admin`]: /riak/kv/2.2.3/using/admin/riak-admin/#top -[configuration]: /riak/kv/2.2.3/configuring/reference/ +[`riak-admin`]: {{}}riak/kv/2.2.3/using/admin/riak-admin/#top +[configuration]: {{}}riak/kv/2.2.3/configuring/reference/ ## riak diff --git a/content/riak/kv/2.2.3/using/admin/riak-control.md b/content/riak/kv/2.2.3/using/admin/riak-control.md index 52c94407ff..27151c12b1 100644 --- a/content/riak/kv/2.2.3/using/admin/riak-control.md +++ b/content/riak/kv/2.2.3/using/admin/riak-control.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/advanced/riak-control --- -[config reference]: /riak/kv/2.2.3/configuring/reference +[config reference]: {{}}riak/kv/2.2.3/configuring/reference Riak Control is a web-based administrative console for inspecting and manipulating Riak clusters. @@ -109,7 +109,7 @@ and configured SSL and HTTPS. ## Enabling SSL and HTTPS In order to use SSL in conjunction with Riak Control, SSL must be -enabled on each Riak node. For more information, see our [security documentation](/riak/kv/2.2.3/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. +enabled on each Riak node. For more information, see our [security documentation]({{}}riak/kv/2.2.3/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. Please note that Riak Control will not work if you have enabled authentication but SSL is not set up properly. @@ -182,7 +182,7 @@ enable it, follow browser-specific instructions. When you first navigate to Riak Control, you will land on the Snapshot view: -[ ![Snapshot View](/images/control_current_snapshot.png) ] (/images/control_current_snapshot.png) +[ ![Snapshot View]({{}}images/control_current_snapshot.png) ] ({{}}images/control_current_snapshot.png) In this interface, the health of your cluster is made immediately obvious. In the event that something isn't quite right (or has the @@ -204,29 +204,29 @@ as adding, removing, and marking nodes as down. Staged changes to the cluster: -[ ![Cluster Management Staged](/images/control_cluster_management_staged.png) ] (/images/control_cluster_management_staged.png) +[ ![Cluster Management Staged]({{}}images/control_cluster_management_staged.png) ] ({{}}images/control_cluster_management_staged.png) Changes committed; transfers active: -[ ![Cluster Management Transfers](/images/control_cluster_management_transfers.png) ] (/images/control_cluster_management_transfers.png) +[ ![Cluster Management Transfers]({{}}images/control_cluster_management_transfers.png) ] ({{}}images/control_cluster_management_transfers.png) Cluster stabilizes after changes: -[ ![Cluster Management Stable](/images/control_cluster_management_stable.png) ] (/images/control_cluster_management_stable.png) +[ ![Cluster Management Stable]({{}}images/control_cluster_management_stable.png) ] ({{}}images/control_cluster_management_stable.png) ### Node Management View The node management view allows you to operate against the individual nodes in the cluster. -[ ![Node Management](/images/control_node_management.png) ] (/images/control_node_management.png) +[ ![Node Management]({{}}images/control_node_management.png) ] ({{}}images/control_node_management.png) ### Ring View One level deeper than the cluster view is the ring view. This is where you can -see the health of each [vnode](/riak/kv/2.2.3/learn/glossary/#vnode). +see the health of each [vnode]({{}}riak/kv/2.2.3/learn/glossary/#vnode). -[ ![Ring View](/images/control_current_ring.png) ] (/images/control_current_ring.png) +[ ![Ring View]({{}}images/control_current_ring.png) ] ({{}}images/control_current_ring.png) Most of the time, your ring will be too large to effectively manage from the ring view. That said, with filters you can easily identify partition diff --git a/content/riak/kv/2.2.3/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.2.3/using/cluster-operations/active-anti-entropy.md index dd19a865a9..892c00e7a1 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/active-anti-entropy.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/active-anti-entropy.md @@ -15,8 +15,8 @@ aliases: - /riak/2.2.3/ops/advanced/aae/ --- -[config search#throttledelay]: /riak/kv/2.2.3/configuring/search/#search-anti-entropy-throttle-$tier-delay -[config search#throttle]: riak/kv/2.2.3/configuring/search/#search-anti-entropy-throttle +[config search#throttledelay]: {{}}riak/kv/2.2.3/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{}}riak/kv/2.2.3/configuring/search/#search-anti-entropy-throttle Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. @@ -57,12 +57,12 @@ anti_entropy = active-debug ]} ``` -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. ## Disabling Active Anti-Entropy Alternatively, AAE can be switched off if you would like to repair -object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair) alone: +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: ```riakconf anti_entropy = passive @@ -90,7 +90,7 @@ The default directory for AAE data is `./data/anti_entropy`, as in the example above, but this can be changed. See the section below titled **Data Directory**. -Remember that you will need to [restart the node](../../admin/riak-admin/#restart) for any configuration-related changes to take effect. +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. The directory deletion method above can also be used to force a rebuilding of hash trees. diff --git a/content/riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes.md index 63f245d583..b3d63ec45b 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/running/nodes/adding-removing --- -[use running cluster]: /riak/kv/2.2.3/using/running-a-cluster +[use running cluster]: {{}}riak/kv/2.2.3/using/running-a-cluster This page describes the process of adding and removing nodes to and from a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. @@ -26,7 +26,7 @@ Just like the initial configuration steps, this step has to be repeated for every node in your cluster. Before a node can join an existing cluster it needs to be started. Depending on your mode of installation, use either the init scripts installed by the Riak binary packages or -simply the script [`riak`](/riak/kv/2.2.3/using/admin/riak-cli/): +simply the script [`riak`]({{}}riak/kv/2.2.3/using/admin/riak-cli/): ```bash /etc/init.d/riak start diff --git a/content/riak/kv/2.2.3/using/cluster-operations/backing-up.md b/content/riak/kv/2.2.3/using/cluster-operations/backing-up.md index ba16c8c568..77540c55cf 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/backing-up.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/backing-up.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.3/ops/running/backups --- -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[config reference]: /riak/kv/2.2.3/configuring/reference -[plan backend leveldb]: /riak/kv/2.2.3/setup/planning/backend/leveldb -[plan backend bitcask]: /riak/kv/2.2.3/setup/planning/backend/bitcask -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency -[concept aae]: /riak/kv/2.2.3/learn/concepts/active-anti-entropy/ -[aae read repair]: /riak/kv/2.2.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[plan backend leveldb]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.3/setup/planning/backend/bitcask +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency +[concept aae]: {{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/ +[aae read repair]: {{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. @@ -46,7 +46,7 @@ Downtime of a node can be significantly reduced by using an OS feature or filesy {{% note title="Backups and eventual consistency" %}} Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. -Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy](/riak/kv/2.2.3/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair](/riak/kv/2.2.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). {{% /note %}} ## OS-Specific Directory Locations @@ -131,7 +131,7 @@ Strong consistency | `/opt/riak/data/ensembles` ## Performing Backups {{% note title="Deprecation notice" %}} -In previous versions of Riak KV, there was a [`riak-admin backup`](/riak/kv/2.2.3/using/admin/riak-admin/#backup) command commonly used for +In previous versions of Riak KV, there was a [`riak-admin backup`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#backup) command commonly used for backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. {{% /note %}} @@ -202,16 +202,16 @@ node that the restored backup was taken from, you will need to additionally: 1. Mark the original instance down in the cluster using - [`riak-admin down `](/riak/kv/2.2.3/using/admin/riak-admin/#down) + [`riak-admin down `]({{}}riak/kv/2.2.3/using/admin/riak-admin/#down) 2. Join the restored node to the cluster using - [`riak-admin cluster join `](/riak/kv/2.2.3/using/admin/riak-admin/#cluster-join) + [`riak-admin cluster join `]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster-join) 3. Replace the original instance with the renamed instance with - [`riak-admin cluster force-replace `](/riak/kv/2.2.3/using/admin/riak-admin/#cluster-force-replace) + [`riak-admin cluster force-replace `]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster-force-replace) 4. Plan the changes to the cluster with `riak-admin cluster plan` 5. Finally, commit the cluster changes with `riak-admin cluster commit` {{% note %}} -For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration](/riak/kv/2.2.3/using/admin/). +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{}}riak/kv/2.2.3/using/admin/). {{% /note %}} For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. @@ -264,4 +264,4 @@ and for any other nodes whose names have changed: ## Restoring a Cluster -Restoring a cluster from backups is documented [on its own page](/riak/kv/2.2.3/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). +Restoring a cluster from backups is documented [on its own page]({{}}riak/kv/2.2.3/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.2.3/using/cluster-operations/bucket-types.md b/content/riak/kv/2.2.3/using/cluster-operations/bucket-types.md index e5f0e285e4..cb4e1efab7 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/bucket-types.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/bucket-types.md @@ -24,7 +24,7 @@ words, buckets using the `default` bucket type are free. More on that in the next section. {{% /note %}} -In Riak versions 2.0 and later, Basho suggests that you [use bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the default bucket namespace but require an additional setup step on the command line. diff --git a/content/riak/kv/2.2.3/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.2.3/using/cluster-operations/changing-cluster-info.md index f01c417905..67774f141e 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/changing-cluster-info.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/changing-cluster-info.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/running/nodes/renaming --- -[config reference]: /riak/kv/2.2.3/configuring/reference +[config reference]: {{}}riak/kv/2.2.3/configuring/reference ## Change the Node Name @@ -117,7 +117,7 @@ To rename a single-node development cluster: For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. -Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`](/riak/kv/2.2.3/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`](/riak/kv/2.2.3/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. diff --git a/content/riak/kv/2.2.3/using/cluster-operations/handoff.md b/content/riak/kv/2.2.3/using/cluster-operations/handoff.md index 3568d3a892..6a1eea9aa9 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/handoff.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/handoff.md @@ -68,7 +68,7 @@ riak-admin handoff disable both --all ## Other Command-line Tools In addition to enabling and disabling handoff, the -[`riak-admin`](/riak/kv/2.2.3/using/admin/riak-admin/) interface enables you to +[`riak-admin`]({{}}riak/kv/2.2.3/using/admin/riak-admin/) interface enables you to retrieve a summary of handoff-related activity and other information. ### summary @@ -105,7 +105,7 @@ ongoing transfers`. Otherwise, you will something like this: ### config -This command displays the values for handoff-specific [configurable parameters](/riak/kv/2.2.3/configuring/reference/#intra-cluster-handoff) on each node in +This command displays the values for handoff-specific [configurable parameters]({{}}riak/kv/2.2.3/configuring/reference/#intra-cluster-handoff) on each node in the cluster, including: * `transfer_limit` diff --git a/content/riak/kv/2.2.3/using/cluster-operations/logging.md b/content/riak/kv/2.2.3/using/cluster-operations/logging.md index 60f15da078..f69ef4a0e4 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/logging.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/logging.md @@ -14,7 +14,7 @@ toc: true If you'd like to enable debug logging on the current node, i.e. set the console log level to `debug`, you can do so without restarting the node -by accessing the Erlang console directly using the [`riak attach`](/riak/kv/2.2.3/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: +by accessing the Erlang console directly using the [`riak attach`]({{}}riak/kv/2.2.3/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: ```erlang lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). diff --git a/content/riak/kv/2.2.3/using/cluster-operations/replacing-node.md b/content/riak/kv/2.2.3/using/cluster-operations/replacing-node.md index 3a55e706de..298b0052be 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/replacing-node.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/replacing-node.md @@ -13,7 +13,7 @@ toc: true --- At some point, for various reasons, you might need to replace a node in -your Riak cluster (which is different from [recovering a failed node](/riak/kv/2.2.3/using/repair-recovery)). Here is the recommended way to go +your Riak cluster (which is different from [recovering a failed node]({{}}riak/kv/2.2.3/using/repair-recovery)). Here is the recommended way to go about replacing a node. 1. Back up your data directory on the node in question. In this example @@ -31,21 +31,21 @@ scenario, we'll call the node `riak4`: cluster and have it replace the `riak4` node. We'll call the new node `riak7` for the purpose of this example. -3. Start the new `riak7` node with [`riak start`](/riak/kv/2.2.3/using/admin/riak-cli/#start): +3. Start the new `riak7` node with [`riak start`]({{}}riak/kv/2.2.3/using/admin/riak-cli/#start): ```bash riak start ``` 4. Plan the join of the new `riak7` node to an existing node already -participating in the cluster; for example `riak0` with the [`riak-admin cluster join`](/riak/kv/2.2.3/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash riak-admin cluster join riak0 ``` 5. Plan the replacement of the existing `riak4` node with the new -`riak7` node using the [`riak-admin cluster replace`](/riak/kv/2.2.3/using/admin/riak-admin/#cluster) command: +`riak7` node using the [`riak-admin cluster replace`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster replace riak4 riak7 @@ -60,7 +60,7 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster the node has not been joined to a cluster. -6. Examine the proposed cluster changes with the [`riak-admin cluster plan`](/riak/kv/2.2.3/using/admin/riak-admin/#cluster) command executed on the new +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: ```bash @@ -68,13 +68,13 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster ``` 7. If the changes are correct, you can commit them with the -[`riak-admin cluster commit`](/riak/kv/2.2.3/using/admin/riak-admin/#cluster) command: +[`riak-admin cluster commit`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster) command: ```bash riak-admin cluster commit ``` - If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`](/riak/kv/2.2.3/using/admin/riak-admin/#cluster): + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster): ```bash riak-admin cluster clear @@ -82,8 +82,8 @@ participating in the cluster; for example `riak0` with the [`riak-admin cluster Once you have successfully replaced the node, it should begin leaving the cluster. You can check on ring readiness after replacing the node -with the [`riak-admin ringready`](/riak/kv/2.2.3/using/admin/riak-admin/#ringready) -and [`riak-admin member-status`](/riak/kv/2.2.3/using/admin/riak-admin/#member-status) +with the [`riak-admin ringready`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#member-status) commands. {{% note title="Ring Settling" %}} diff --git a/content/riak/kv/2.2.3/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.2.3/using/cluster-operations/strong-consistency.md index b68be8afe3..4e38bb4daa 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/strong-consistency.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/strong-consistency.md @@ -24,10 +24,10 @@ usage in any production environment. ## Monitoring Strong Consistency Riak provides a wide variety of data related to the current operating -status of a node. This data is available by running the [`riak-admin status`](/riak/kv/2.2.3/using/admin/riak-admin/#riak-admin-status) command. That data now +status of a node. This data is available by running the [`riak-admin status`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#status) command. That data now includes statistics specific to strongly consistent operations. -A full listing of these stats is available in [Inspecting a Node](/riak/kv/2.2.3/using/cluster-operations/inspecting-node). +A full listing of these stats is available in [Inspecting a Node]({{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node). All strong consistency-related stats are prefixed with `consistent_`, e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are so-called "one-minute stats," meaning that they reflect node activity in diff --git a/content/riak/kv/2.2.3/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.2.3/using/cluster-operations/v2-multi-datacenter.md index b4e3de76ab..a34d0fddfb 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/v2-multi-datacenter.md @@ -18,7 +18,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter/) instead. {{% /note %}} Riak Enterprise's Multi-Datacenter Replication system is largely @@ -163,7 +163,7 @@ restarting Riak Enterprise. Field | Description :-----|:----------- -`client_stats` | See Client Statistics +`client_stats` | See Client Statistics `client_bytes_recv` | The total number of bytes the client has received since the server has been started `client_bytes_sent` | The total number of bytes sent to all connected sites `client_connect_errors` | The number of TCP/IP connection errors @@ -183,7 +183,7 @@ Field | Description `server_fullsyncs` | The number of fullsync operations that have occurred since the server was started `server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. `server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. -`server_stats` | See Server Statistics +`server_stats` | See Server Statistics ### Elections and Objects @@ -221,7 +221,7 @@ Field | Description ## Bounded Queue The bounded queue is responsible for holding objects that are waiting to -participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration](/riak/kv/2.2.3/configuring/v2-multi-datacenter/) guide for more information. +participate in realtime replication. Please see the [Riak Enterprise MDC Replication Configuration]({{}}riak/kv/2.2.3/configuring/v2-multi-datacenter/) guide for more information. Field | Description ------|------------ @@ -242,7 +242,7 @@ Field | Description `site` | The connected site name configured with `riak-repl add-site` `strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. `fullsync_worker` | The Erlang process ID of the fullsync worker -`bounded_queue` | See the Bounded Queue section above +`bounded_queue` | See the Bounded Queue section above `state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.
  • `wait_for_partition`
  • `build_keylist`
  • `wait_keylist`
  • `diff_bloom`
  • `diff_keylist`
s `message_queue_len` | The number of Erlang messages that are waiting to be processed by the server diff --git a/content/riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter.md index febe0b9047..5b4429084f 100644 --- a/content/riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter.md +++ b/content/riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter.md @@ -17,11 +17,11 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v3/operations --- -[config v3 mdc]: /riak/kv/2.2.3/configuring/v3-multi-datacenter -[config v3 nat]: /riak/kv/2.2.3/configuring/v3-multi-datacenter/nat -[config v3 quickstart]: /riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start -[config v3 ssl]: /riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl -[ref v3 stats]: /riak/kv/2.2.3/using/reference/multi-datacenter/statistics +[config v3 mdc]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{}}riak/kv/2.2.3/using/reference/multi-datacenter/statistics This document explains how to manage replication with the `riak-repl` command. Some of these commands can be set or behavior altered by diff --git a/content/riak/kv/2.2.3/using/performance.md b/content/riak/kv/2.2.3/using/performance.md index 1bfe111319..7f51815afa 100644 --- a/content/riak/kv/2.2.3/using/performance.md +++ b/content/riak/kv/2.2.3/using/performance.md @@ -24,7 +24,7 @@ changes are made and when in order to measure the impact of those changes. For performance and tuning recommendations specific to running Riak -clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning](/riak/kv/2.2.3/using/performance/amazon-web-services). +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{}}riak/kv/2.2.3/using/performance/amazon-web-services). {{% note title="Note on other operating systems" %}} Unless otherwise specified, the tunings recommended below are for Linux @@ -253,12 +253,12 @@ These settings have been tested and benchmarked by Basho in nodes with Riak and supporting tools can consume a large number of open file handles during normal operation. For stability, increasing the number of -open files limit is necessary. See [Open Files Limit](/riak/kv/2.2.3/using/performance/open-files-limit/) for more +open files limit is necessary. See [Open Files Limit]({{}}riak/kv/2.2.3/using/performance/open-files-limit/) for more details. ## Other Tuning Docs -* [AWS Performance Tuning](/riak/kv/2.2.3/using/performance/amazon-web-services) -* [Erlang VM Tuning](/riak/kv/2.2.3/using/performance/erlang) -* [Latency Reduction](/riak/kv/2.2.3/using/performance/latency-reduction) -* [Open Files Limit](/riak/kv/2.2.3/using/performance/open-files-limit/) +* [AWS Performance Tuning]({{}}riak/kv/2.2.3/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{}}riak/kv/2.2.3/using/performance/erlang) +* [Latency Reduction]({{}}riak/kv/2.2.3/using/performance/latency-reduction) +* [Open Files Limit]({{}}riak/kv/2.2.3/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.2.3/using/performance/benchmarking.md b/content/riak/kv/2.2.3/using/performance/benchmarking.md index 4c00419aee..6a4ea746fe 100644 --- a/content/riak/kv/2.2.3/using/performance/benchmarking.md +++ b/content/riak/kv/2.2.3/using/performance/benchmarking.md @@ -51,7 +51,7 @@ You can download the pre-built packages below, or build it from source. #### Prerequisites -* Erlang must be installed. See [Installing Erlang](/riak/kv/2.2.3/setup/installing/source/erlang) for instructions +* Erlang must be installed. See [Installing Erlang]({{}}riak/kv/2.2.3/setup/installing/source/erlang) for instructions and versioning requirements. Note: Unless you're an experienced Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and not CentOS), when building ```basho_bench``` from source. Later diff --git a/content/riak/kv/2.2.3/using/performance/latency-reduction.md b/content/riak/kv/2.2.3/using/performance/latency-reduction.md index b8243a0154..3979d54788 100644 --- a/content/riak/kv/2.2.3/using/performance/latency-reduction.md +++ b/content/riak/kv/2.2.3/using/performance/latency-reduction.md @@ -33,7 +33,7 @@ objects can impact latency in a cluster, even for requests that are unrelated to those objects. If your use case requires large objects, we recommend checking out -[Riak CS](/riak/cs/latest/), which is intended as a storage system for large objects. +[Riak CS]({{}}riak/cs/latest/), which is intended as a storage system for large objects. ### Mitigation diff --git a/content/riak/kv/2.2.3/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.2.3/using/performance/multi-datacenter-tuning.md index b054b5ce46..6d246fa9a2 100644 --- a/content/riak/kv/2.2.3/using/performance/multi-datacenter-tuning.md +++ b/content/riak/kv/2.2.3/using/performance/multi-datacenter-tuning.md @@ -14,7 +14,7 @@ toc: true commercial_offering: true --- -[perf index]: /riak/kv/2.2.3/using/performance +[perf index]: {{}}riak/kv/2.2.3/using/performance Depending on the size of your objects and your replication latency needs, you may need to configure your kernel settings to optimize diff --git a/content/riak/kv/2.2.3/using/performance/open-files-limit.md b/content/riak/kv/2.2.3/using/performance/open-files-limit.md index 957ebcb153..43de687e22 100644 --- a/content/riak/kv/2.2.3/using/performance/open-files-limit.md +++ b/content/riak/kv/2.2.3/using/performance/open-files-limit.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/tuning/open-files-limit/ --- -[plan backend]: /riak/kv/2.2.3/setup/planning/backend/ +[plan backend]: {{}}riak/kv/2.2.3/setup/planning/backend/ [blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. diff --git a/content/riak/kv/2.2.3/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.2.3/using/performance/v2-scheduling-fullsync.md index 56a0da8124..5fd77b0cdb 100644 --- a/content/riak/kv/2.2.3/using/performance/v2-scheduling-fullsync.md +++ b/content/riak/kv/2.2.3/using/performance/v2-scheduling-fullsync.md @@ -14,7 +14,7 @@ commercial_offering: true --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.3/using/reference/bucket-types.md b/content/riak/kv/2.2.3/using/reference/bucket-types.md index a451382404..271efb2c08 100644 --- a/content/riak/kv/2.2.3/using/reference/bucket-types.md +++ b/content/riak/kv/2.2.3/using/reference/bucket-types.md @@ -14,7 +14,7 @@ toc: true Bucket types allow groups of buckets to share configuration details and for Riak users to manage bucket properties more efficiently than in the -older configuration system based on [bucket properties](/riak/kv/2.2.3/developing/usage/bucket-types/#bucket-properties-and-operations). +older configuration system based on [bucket properties]({{}}riak/kv/2.2.3/developing/usage/bucket-types/#bucket-properties-and-operations). {{% note title="Important note on cluster downgrades" %}} If you upgrade a Riak to version 2.0 or later, you can still downgrade the @@ -27,7 +27,7 @@ activated, you can no longer downgrade the cluster to a pre-2.0 version. The older configuration system, based on bucket properties, involves setting bucket properties for specific buckets either through -[HTTP](/riak/kv/2.2.3/developing/api/http/set-bucket-props) or [Protocol Buffers](/riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +[HTTP]({{}}riak/kv/2.2.3/developing/api/http/set-bucket-props) or [Protocol Buffers]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and modify a wide range of properties, from `n_val` to `allow_mult` and far beyond. @@ -39,13 +39,13 @@ with a few crucial differences: previous system required configuration to be set on a per-bucket basis * Nearly all bucket properties can be updated using bucket types, except the `datatype` and `consistent` properties, related to - [Riak data types](/riak/kv/2.2.3/developing/data-types), and [strong consistency](/riak/kv/2.2.3/developing/app-guide/strong-consistency) respectively + [Riak data types]({{}}riak/kv/2.2.3/developing/data-types), and [strong consistency]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency) respectively * Bucket types are more performant than bucket properties because divergence from Riak's defaults doesn't have to be gossiped around the cluster for every bucket, which means less computational overhead It is important to note that buckets are not assigned types in the same -way that they are configured when using [bucket properties](/riak/kv/2.2.3/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +way that they are configured when using [bucket properties]({{}}riak/kv/2.2.3/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a bucket `my_bucket` and assign it a type the way that you would, say, set `allow_mult` to `false` or `n_val` to `5`, because there is no `type` parameter contained within the bucket's properties (i.e. @@ -119,7 +119,7 @@ object of the following form: > **Getting started with Riak clients** > -> If you are connecting to Riak using one of Basho's official [client libraries](/riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started](/riak/kv/2.2.3/developing/getting-started) section. +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.3/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.3/developing/getting-started) section. If creation is successful, you should see the following output: @@ -521,7 +521,7 @@ associated with the `default` bucket type: ## Bucket Types and the `allow_mult` Setting -Prior to Riak 2.0, Riak created [siblings](/riak/kv/2.2.3/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. +Prior to Riak 2.0, Riak created [siblings]({{}}riak/kv/2.2.3/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. In version 2.0, this is changing in a subtle way. Now, there are two different default settings for `allow_mult` in play: @@ -534,7 +534,7 @@ different default settings for `allow_mult` in play: The consequence is that applications that have previously ignored conflict resolutions in certain buckets (or all buckets) can continue to -do so. New applications, however, are encouraged to retain and [resolve siblings](/riak/kv/2.2.3/developing/usage/conflict-resolution) with the appropriate application-side business logic. +do so. New applications, however, are encouraged to retain and [resolve siblings]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution) with the appropriate application-side business logic. To give an example, let's have a look at the properties associated with the `default` bucket type: @@ -585,8 +585,8 @@ riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' ## Bucket Type Example Let's say that you'd like to create a bucket type called -`user_account_bucket` with a [pre-commit hook](/riak/kv/2.2.3/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit -hooks](/riak/kv/2.2.3/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: +`user_account_bucket` with a [pre-commit hook]({{}}riak/kv/2.2.3/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{}}riak/kv/2.2.3/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: 1. Creating a JavaScript object containing the appropriate `props` settings: @@ -714,7 +714,7 @@ curl -XPUT \ In this example, the bucket `sensitive_user_data` bears the configuration established by the `no_siblings` bucket type, and it bears that configuration _on the basis of the query's structure_. This is -because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets](/riak/kv/2.2.3/learn/concepts/buckets) and [keys](/riak/kv/2.2.3/learn/concepts/keys-and-objects). +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{}}riak/kv/2.2.3/learn/concepts/buckets) and [keys]({{}}riak/kv/2.2.3/learn/concepts/keys-and-objects). Let's say that we're using Riak to store internet memes. We've been using a bucket called `current_memes` using the bucket type diff --git a/content/riak/kv/2.2.3/using/reference/custom-code.md b/content/riak/kv/2.2.3/using/reference/custom-code.md index 9b2a695af5..1589d27317 100644 --- a/content/riak/kv/2.2.3/using/reference/custom-code.md +++ b/content/riak/kv/2.2.3/using/reference/custom-code.md @@ -16,10 +16,10 @@ aliases: --- Riak supports the use of Erlang named functions in compiled modules for -[pre/post-commit hooks](/riak/kv/2.2.3/developing/usage/commit-hooks), and MapReduce operations. This +[pre/post-commit hooks]({{}}riak/kv/2.2.3/developing/usage/commit-hooks), and MapReduce operations. This doc contains installation steps with simple examples for each use case. -Your developers can compile [custom erlang code](/riak/kv/2.2.3/developing/usage/commit-hooks), which +Your developers can compile [custom erlang code]({{}}riak/kv/2.2.3/developing/usage/commit-hooks), which they can send to you as a *beam* file. You should note that in Erlang, a file name must have the same name the module. So if you are given a file named `validate_json.beam`, do not rename it. @@ -123,7 +123,7 @@ that you do so in a rolling fashion, taking time to ensure that the Riak key value store has fully initialized and become available for use. This is done with the `riak-admin wait-for-service` command as detailed -in the [Commands documentation](/riak/kv/2.2.3/using/admin/riak-admin/#wait-for-service). +in the [Commands documentation]({{}}riak/kv/2.2.3/using/admin/riak-admin/#wait-for-service). {{% note %}} It is important that you ensure riak_kv is active before restarting the next diff --git a/content/riak/kv/2.2.3/using/reference/handoff.md b/content/riak/kv/2.2.3/using/reference/handoff.md index 25fce9dfbb..5ff4b83200 100644 --- a/content/riak/kv/2.2.3/using/reference/handoff.md +++ b/content/riak/kv/2.2.3/using/reference/handoff.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/running/handoff/ --- -[cluster ops handoff]: /riak/kv/2.2.3/using/cluster-operations/handoff +[cluster ops handoff]: {{}}riak/kv/2.2.3/using/cluster-operations/handoff Riak is a distributed system built with two essential goals in mind: @@ -35,13 +35,13 @@ re-assigning is referred to as **intra-cluster handoff** (or simply Intra-cluster handoff typically takes one of two forms: **hinted handoff** and **ownership transfer**. -Hinted handoff occurs when a [vnode](/riak/kv/2.2.3/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +Hinted handoff occurs when a [vnode]({{}}riak/kv/2.2.3/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick up the slack, so to speak, assuming responsibility for node C's operations. When node C comes back online, responsibility will be handed back to the original vnodes. Ownership transfer is different because it is meant to be permanent. -It occurs when a [vnode](/riak/kv/2.2.3/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +It occurs when a [vnode]({{}}riak/kv/2.2.3/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very makeup of a cluster changes, e.g. when nodes are added or removed from the cluster. In this case, responsibility for portions of the keyspace needs to be fundamentally re-assigned. @@ -54,7 +54,7 @@ handoff behavior. More information can be found below. ## Configuring Handoff A full listing of configurable parameters can be found in our -[configuration files](/riak/kv/2.2.3/configuring/reference/#intra-cluster-handoff) +[configuration files]({{}}riak/kv/2.2.3/configuring/reference/#intra-cluster-handoff) document. The sections below provide a more narrative description of handoff configuration. @@ -121,14 +121,14 @@ handoff.use_background_manager = on ### Maximum Rejects -If you're using Riak features such as [Riak Search](/riak/kv/2.2.3/developing/usage/search/), +If you're using Riak features such as [Riak Search]({{}}riak/kv/2.2.3/developing/usage/search/), those subsystems can block handoff of primary key/value data, i.e. data that you interact with via normal reads and writes. The `handoff.max_rejects` setting enables you to set the maximum -duration that a [vnode](/riak/kv/2.2.3/learn/glossary/#vnode) can be blocked by multiplying the +duration that a [vnode]({{}}riak/kv/2.2.3/learn/glossary/#vnode) can be blocked by multiplying the `handoff.max_rejects` setting by the value of -[`vnode_management_timer`](/riak/kv/2.2.3/configuring/reference/#vnode_management_timer). +[`vnode_management_timer`]({{}}riak/kv/2.2.3/configuring/reference/#vnode_management_timer). Thus, if you set `handoff.max_rejects` to 10 and `vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems can block K/V handoff for a maximum of 50 seconds. The default for diff --git a/content/riak/kv/2.2.3/using/reference/jmx.md b/content/riak/kv/2.2.3/using/reference/jmx.md index 0a7b2d27e3..033673a09c 100644 --- a/content/riak/kv/2.2.3/using/reference/jmx.md +++ b/content/riak/kv/2.2.3/using/reference/jmx.md @@ -16,7 +16,7 @@ aliases: - /riak/kv/2.2.3/ops/running/monitoring/jmx --- -Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`](/riak/kv/2.2.3/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{}}riak/kv/2.2.3/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). ```erlang {riak_jmx, [ diff --git a/content/riak/kv/2.2.3/using/reference/logging.md b/content/riak/kv/2.2.3/using/reference/logging.md index e1ff2d286d..2b093bdb03 100644 --- a/content/riak/kv/2.2.3/using/reference/logging.md +++ b/content/riak/kv/2.2.3/using/reference/logging.md @@ -15,13 +15,13 @@ aliases: - /riak/kv/2.2.3/ops/running/logging --- -[cluster ops log]: /riak/kv/2.2.3/using/cluster-operations/logging +[cluster ops log]: {{}}riak/kv/2.2.3/using/cluster-operations/logging Logging in Riak KV is handled by a Basho-produced logging framework for [Erlang](http://www.erlang.org) called [lager](https://github.com/basho/lager). -lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files](/riak/kv/2.2.3/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{}}riak/kv/2.2.3/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. ## Log Directory @@ -274,11 +274,11 @@ or to neither. This is determined by the value that you give to the * `file` --- Console logs will be emitted to a file. This is Riak's default behavior. The location of that file is determined by the `log.console.file` parameter. The default location is - `./log/console.log` on an installation from [source](/riak/kv/2.2.3/setup/installing/source), but will differ on platform-specific installation, + `./log/console.log` on an installation from [source]({{}}riak/kv/2.2.3/setup/installing/source), but will differ on platform-specific installation, e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or `/opt/riak/log` on Solaris-based platforms. * `console` --- Console logs will be emitted to standard output, which - can be viewed by running the [`riak attach-direct`](/riak/kv/2.2.3/using/admin/riak-cli/#attach-direct) command + can be viewed by running the [`riak attach-direct`]({{}}riak/kv/2.2.3/using/admin/riak-cli/#attach-direct) command * `both` --- Console logs will be emitted both to a file and to standard output * `off` --- Console log messages will be disabled diff --git a/content/riak/kv/2.2.3/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.2.3/using/reference/multi-datacenter/comparison.md index 4c4b4ba75f..03c7f6156f 100644 --- a/content/riak/kv/2.2.3/using/reference/multi-datacenter/comparison.md +++ b/content/riak/kv/2.2.3/using/reference/multi-datacenter/comparison.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/mdc/comparison --- -This document is a systematic comparison of [Version 2](/riak/kv/2.2.3/using/reference/v2-multi-datacenter) and [Version 3](/riak/kv/2.2.3/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter +This document is a systematic comparison of [Version 2]({{}}riak/kv/2.2.3/using/reference/v2-multi-datacenter) and [Version 3]({{}}riak/kv/2.2.3/using/reference/v3-multi-datacenter) of Riak Enterprise's Multi-Datacenter Replication capabilities. {{% note title="Important note on mixing versions" %}} @@ -89,7 +89,7 @@ version 3. of workers that will run on a source node, a sink node, and across the entire source cluster. This allows for limiting impact on the cluster and dialing in fullsync performance. -* Version 3 is able to take advantage of [Active Anti-Entropy](/riak/kv/2.2.3/learn/concepts/active-anti-entropy/) \(AAE) +* Version 3 is able to take advantage of [Active Anti-Entropy]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/) \(AAE) technology, which can greatly improve fullsync performance. * Riak CS MDC `proxy_get` connections will be distributed across the source cluster (as CS blocks are requested from the sink cluster in diff --git a/content/riak/kv/2.2.3/using/reference/runtime-interaction.md b/content/riak/kv/2.2.3/using/reference/runtime-interaction.md index 8412d803e8..86c1ff15ca 100644 --- a/content/riak/kv/2.2.3/using/reference/runtime-interaction.md +++ b/content/riak/kv/2.2.3/using/reference/runtime-interaction.md @@ -15,8 +15,8 @@ aliases: - /riak/kv/2.2.3/ops/advanced/runtime --- -[config reference]: /riak/kv/2.2.3/configuring/reference -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters +[config reference]: {{}}riak/kv/2.2.3/configuring/reference +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters Riak's [configuration files][config reference] provide a variety of parameters that enable you to fine-tune how Riak interacts with two important elements diff --git a/content/riak/kv/2.2.3/using/reference/search.md b/content/riak/kv/2.2.3/using/reference/search.md index d2c19b4174..e325c80e8f 100644 --- a/content/riak/kv/2.2.3/using/reference/search.md +++ b/content/riak/kv/2.2.3/using/reference/search.md @@ -15,21 +15,21 @@ aliases: - /riak/kv/2.2.3/dev/advanced/search --- -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters -[configuring search]: /riak/kv/2.2.3/configuring/search +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters +[configuring search]: {{}}riak/kv/2.2.3/configuring/search > **Note on search 2.0 vs. legacy search** > > This document refers to Riak search 2.0 with [Solr](http://lucene.apache.org/solr/) integration (codenamed -Yokozuna). For information about the deprecated Riak search, visit [the old Using Riak search docs](http://docs.basho.com/riak/1.4.10/dev/using/search/). +Yokozuna). The project that implements Riak search is codenamed Yokozuna. This is a more detailed overview of the concepts and reasons behind the design of Yokozuna, for those interested. If you're simply looking to use Riak -search, you should check out the [Using Search](/riak/kv/2.2.3/developing/usage/search) document. +search, you should check out the [Using Search]({{}}riak/kv/2.2.3/developing/usage/search) document. -![Yokozuna](/images/yokozuna.png) +![Yokozuna]({{}}images/yokozuna.png) ## Riak Search is Erlang @@ -127,7 +127,7 @@ but logically partition them in KV by using a date as the bucket name. A bucket _cannot_ be associated with many indexes---the `search_index` property must be a single name, not a list. -See the [main Search documentation](/riak/kv/2.2.3/developing/usage/search/#simple-setup) for details on creating an index. +See the [main Search documentation]({{}}riak/kv/2.2.3/developing/usage/search/#simple-setup) for details on creating an index. ## Extractors @@ -289,7 +289,7 @@ The corresponding date type is declared under `` like so. ``` -You can also find more information on to how customize your own [search schema](/riak/kv/2.2.3/developing/usage/search-schemas). +You can also find more information on to how customize your own [search schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas). Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) called `_yz_default`. This is an extremely general schema which makes @@ -299,7 +299,7 @@ indexed. ## Active Anti-Entropy (AAE) -[Active Anti-Entropy](/riak/kv/2.2.3/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +[Active Anti-Entropy]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and correcting entropy (divergence) between the data stored in Riak's key-value backend and the indexes stored in Solr. The impetus for AAE is that failures come in all shapes and sizes---disk failure, dropped @@ -354,7 +354,7 @@ _analysis_. Solr provides many different field types which analyze data in different ways, and custom analyzer chains may be built by stringing together XML in the schema file, allowing custom analysis for each field. For more -information on analysis, see [Search Schema](/riak/kv/2.2.3/developing/usage/search-schemas). +information on analysis, see [Search Schema]({{}}riak/kv/2.2.3/developing/usage/search-schemas). ## Tagging diff --git a/content/riak/kv/2.2.3/using/reference/secondary-indexes.md b/content/riak/kv/2.2.3/using/reference/secondary-indexes.md index 3b7c188886..3ae804fe99 100644 --- a/content/riak/kv/2.2.3/using/reference/secondary-indexes.md +++ b/content/riak/kv/2.2.3/using/reference/secondary-indexes.md @@ -15,28 +15,28 @@ aliases: - /riak/kv/2.2.3/dev/advanced/2i --- -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[use ref strong consistency]: /riak/kv/2.2.3/using/reference/strong-consistency +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[use ref strong consistency]: {{}}riak/kv/2.2.3/using/reference/strong-consistency > **Note: Riak Search preferred for querying** > > If you're interested in non-primary-key-based querying in Riak, i.e. if you're looking to go beyond straightforward K/V operations, we now -recommend [Riak Search](/riak/kv/2.2.3/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. +recommend [Riak Search]({{}}riak/kv/2.2.3/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. This document provides implementation and other details for Riak's -[secondary indexes](/riak/kv/2.2.3/developing/usage/secondary-indexes/) \(2i) feature. +[secondary indexes]({{}}riak/kv/2.2.3/developing/usage/secondary-indexes/) \(2i) feature. ## How It Works Secondary indexes use **document-based partitioning**, a system where -indexes reside with each document, local to the [vnode](/riak/kv/2.2.3/learn/glossary/#vnode). This +indexes reside with each document, local to the [vnode]({{}}riak/kv/2.2.3/learn/glossary/#vnode). This system is also a local index. Secondary indexes are a list of key/value pairs that are similar to HTTP headers. At write time, objects are tagged with index entries consisting of key/value metadata. This metadata can be queried to retrieve the matching keys. -![Secondary Index](/images/Secondary-index-example.png) +![Secondary Index]({{}}images/Secondary-index-example.png) Indexes reside on multiple machines. Since indexes for an object are stored on the same partition as the object itself, query-time @@ -58,7 +58,7 @@ that an object will be present in future index queries as soon as the write operation completes. Riak stores 3 replicas of all objects by default, although this can be -changed [using bucket types][usage bucket types], which manage buckets' [replication properties](/riak/kv/2.2.3/developing/app-guide/replication-properties). The system is capable of generating a full set of results +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties). The system is capable of generating a full set of results from one third of the system’s partitions as long as it chooses the right set of partitions. The query is sent to each partition, the index data is read, and a list of keys is generated and then sent back to the @@ -66,7 +66,7 @@ requesting node. > **Note on 2i and strong consistency** > -> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets](/riak/kv/2.2.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach secondary index metadata to those objects, you can still perform strongly consistent operations on those objects but the secondary indexes will be ignored. diff --git a/content/riak/kv/2.2.3/using/reference/statistics-monitoring.md b/content/riak/kv/2.2.3/using/reference/statistics-monitoring.md index b0e072742a..dea55a3f58 100644 --- a/content/riak/kv/2.2.3/using/reference/statistics-monitoring.md +++ b/content/riak/kv/2.2.3/using/reference/statistics-monitoring.md @@ -17,13 +17,13 @@ aliases: Riak provides data related to current operating status, which includes statistics in the form of counters and histograms. These statistics -are made available through the HTTP API via the [`/stats`](/riak/kv/2.2.3/developing/api/http/status) endpoint, or through the [`riak-admin`](/riak/kv/2.2.3/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. +are made available through the HTTP API via the [`/stats`]({{}}riak/kv/2.2.3/developing/api/http/status) endpoint, or through the [`riak-admin`]({{}}riak/kv/2.2.3/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. This page presents the most commonly monitored and gathered statistics, as well as numerous solutions for monitoring and gathering statistics that our customers and community report using successfully in Riak cluster environments. You can learn more about the specific -Riak statistics provided in the [Inspecting a Node](/riak/kv/2.2.3/using/cluster-operations/inspecting-node) and [HTTP Status](/riak/kv/2.2.3/developing/api/http/status) documentation. +Riak statistics provided in the [Inspecting a Node]({{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node) and [HTTP Status]({{}}riak/kv/2.2.3/developing/api/http/status) documentation. ## System Metrics To Graph @@ -47,7 +47,7 @@ We also recommend tracking your system's virtual and writebacks. Things like massive flushes of dirty pages or steadily climbing writeback volumes can indicate poor virtual memory tuning. More information can be found [here][sysctl_vm_txt] and in our -documentation on [system tuning](/riak/kv/2.2.3/using/performance/#storage-and-file-system-tuning). +documentation on [system tuning]({{}}riak/kv/2.2.3/using/performance/#storage-and-file-system-tuning). ## Riak Metrics to Graph Riak metrics fall into several general categories: @@ -57,7 +57,7 @@ Riak metrics fall into several general categories: 3. Erlang resource usage metrics 4. General Riak load/health metrics -If graphing all of the [available Riak metrics](/riak/kv/2.2.3/using/cluster-operations/inspecting-node) is +If graphing all of the [available Riak metrics]({{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node) is not practical, you should pick a minimum relevant subset from these categories. Some of the most helpful metrics are discussed below. @@ -156,7 +156,7 @@ Metric | Description ## Command-line Interface -The [`riak-admin`](/riak/kv/2.2.3/using/admin/riak-admin/) tool provides two +The [`riak-admin`]({{}}riak/kv/2.2.3/using/admin/riak-admin/) tool provides two interfaces for retrieving statistics and other information: `status` and `stat`. @@ -181,14 +181,14 @@ consistent_get_objsize_195 : 0 ``` A comprehensive list of available stats can be found in the -[Inspecting a Node](/riak/kv/2.2.3/using/cluster-operations/inspecting-node/#riak-admin-status) document. +[Inspecting a Node]({{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node/#riak-admin-status) document. ### stat The `riak-admin stat` command is related to the `riak-admin status` command but provides a more fine-grained interface for interacting with stats and information. Full documentation of this command can be found -in the [Inspecting a Node](/riak/kv/2.2.3/using/cluster-operations/inspecting-node/#riak-admin-stat) document. +in the [Inspecting a Node]({{}}riak/kv/2.2.3/using/admin/riak-admin/#stat) document. ## Statistics and Monitoring Tools @@ -235,7 +235,7 @@ troubleshooting issues on Riak nodes. #### Riak Control -[Riak Control](/riak/kv/2.2.3/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +[Riak Control]({{}}riak/kv/2.2.3/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak clusters. It is designed to give you quick insight into the health of your cluster and allow for easy management of nodes. @@ -259,7 +259,7 @@ clusters and grids. Customers and community members using Riak have reported success in using Ganglia to monitor Riak clusters. A [Riak Ganglia module][riak_ganglia] for collecting statistics from -the Riak HTTP [`/stats`](/riak/kv/2.2.3/developing/api/http/status) endpoint is also available. +the Riak HTTP [`/stats`]({{}}riak/kv/2.2.3/developing/api/http/status) endpoint is also available. #### Nagios @@ -333,14 +333,14 @@ capacity planning in a Riak cluster environment. that can provide information on the current and past states of Riak nodes and visualizations of machine generated data such as log files. -A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`](/riak/kv/2.2.3/developing/api/http/status) endpoint is also available. +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{}}riak/kv/2.2.3/developing/api/http/status) endpoint is also available. #### Splunk [Splunk](http://www.splunk.com) is available as downloadable software or as a service, and provides tools for visualization of machine generated data such as log files. It can be connected to Riak's HTTP statistics -[`/stats`](/riak/kv/2.2.3/developing/api/http/status) endpoint. +[`/stats`]({{}}riak/kv/2.2.3/developing/api/http/status) endpoint. Splunk can be used to aggregate all Riak cluster node operational log files, including operating system and Riak-specific logs and Riak @@ -362,9 +362,9 @@ Docs](https://github.com/basho/basho_docs). ## References -* [Inspecting a Node](/riak/kv/2.2.3/using/cluster-operations/inspecting-node) +* [Inspecting a Node]({{}}riak/kv/2.2.3/using/cluster-operations/inspecting-node) * [Riaknostic](http://riaknostic.basho.com) -* [Riak Control](/riak/kv/2.2.3/using/admin/riak-control/) +* [Riak Control]({{}}riak/kv/2.2.3/using/admin/riak-control/) * [collectd](http://collectd.org) * [Ganglia](http://ganglia.info) * [Nagios](http://www.nagios.org) @@ -380,9 +380,9 @@ Docs](https://github.com/basho/basho_docs). [sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt -[data_types_counters]: http://docs.basho.com/riak/latest/dev/using/data-types/#Counters -[data_types_sets]: http://docs.basho.com/riak/latest/dev/using/data-types/#Sets -[data_types_maps]: http://docs.basho.com/riak/latest/dev/using/data-types/#Maps +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ [riak_nagios]: https://github.com/basho/riak_nagios [tcollector]: https://github.com/stumbleupon/tcollector [tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py diff --git a/content/riak/kv/2.2.3/using/reference/strong-consistency.md b/content/riak/kv/2.2.3/using/reference/strong-consistency.md index 8847f7f12a..031d8c95c0 100644 --- a/content/riak/kv/2.2.3/using/reference/strong-consistency.md +++ b/content/riak/kv/2.2.3/using/reference/strong-consistency.md @@ -12,10 +12,10 @@ menu: toc: true --- -[usage bucket types]: /riak/kv/2.2.3/developing/usage/bucket-types -[concept eventual consistency]: /riak/kv/2.2.3/learn/concepts/eventual-consistency +[usage bucket types]: {{}}riak/kv/2.2.3/developing/usage/bucket-types +[concept eventual consistency]: {{}}riak/kv/2.2.3/learn/concepts/eventual-consistency -Riak was originally designed as an [eventually consistent](/riak/kv/2.2.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +Riak was originally designed as an [eventually consistent]({{}}riak/kv/2.2.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition (i.e. fault) tolerance and high read and write availability. While this focus on high availability is a great fit for many data @@ -26,7 +26,7 @@ In Riak, strong consistency is applied [using bucket types][usage bucket types], enables developers to apply strong consistency guarantees on a per-key basis. -Elsewhere in the documentation there are instructions for [enabling and using](/riak/kv/2.2.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators](/riak/kv/2.2.3/configuring/strong-consistency) looking to manage, +Elsewhere in the documentation there are instructions for [enabling and using]({{}}riak/kv/2.2.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{}}riak/kv/2.2.3/configuring/strong-consistency) looking to manage, configure, and monitor strong consistency. ## Strong vs. Eventual Consistency @@ -134,12 +134,12 @@ tolerance. Consistent operations can still succeed when a minority of replicas in each ensemble can be offline, faulty, or unreachable. In other words, **strongly consistent operations will succeed as long as quorum is maintained**. A fuller discussion can be found in the -[operations](/riak/kv/2.2.3/configuring/strong-consistency/#fault-tolerance) +[operations]({{}}riak/kv/2.2.3/configuring/strong-consistency/#fault-tolerance) documentation. A second trade-off regards performance. Riak's implementation of strong -consistency involves a complex [consensus subsystem](/riak/kv/2.2.3/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +consistency involves a complex [consensus subsystem]({{}}riak/kv/2.2.3/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, which can entail a performance hit of varying proportions, depending on a variety of factors. -Ways to address this issue can be found in [strong consistency and performance](/riak/kv/2.2.3/configuring/strong-consistency/#performance). +Ways to address this issue can be found in [strong consistency and performance]({{}}riak/kv/2.2.3/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter.md index 07c45b7055..fd0d06bf22 100644 --- a/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter.md +++ b/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter.md @@ -16,7 +16,7 @@ toc: true [v2 mdc fullsync]: ./scheduling-fullsync {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/using/reference/v3-multi-datacenter/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/using/reference/v3-multi-datacenter/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter/architecture.md index 41f25785d4..c6c3f86d87 100644 --- a/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter/architecture.md +++ b/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter/architecture.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/using/reference/v3-multi-datacenter/architecture/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/using/reference/v3-multi-datacenter/architecture/) instead. {{% /note %}} @@ -83,7 +83,7 @@ replication, as illustrated in the Figure below. 2. The site node in the secondary cluster initiates fullsync replication with the primary node by sending a message to the listener node in the primary cluster -3. The site and listener nodes iterate through each [vnode](/riak/kv/2.2.3/learn/glossary/#vnode) in their respective clusters and compute a hash for +3. The site and listener nodes iterate through each [vnode]({{}}riak/kv/2.2.3/learn/glossary/#vnode) in their respective clusters and compute a hash for each key's object value. The site node on the secondary cluster sends its complete list of key/hash pairs to the listener node in the primary cluster. The listener node then sequentially compares its @@ -95,7 +95,7 @@ replication, as illustrated in the Figure below. achieve the new object values, completing the fullsync cycle
-![MDC Fullsync](/images/MDC_Full-sync-small.png) +![MDC Fullsync]({{}}images/MDC_Full-sync-small.png)
## Realtime Replication @@ -113,7 +113,7 @@ replication, as illustrated in the Figure below. the update
-![MDC Realtime](/images/MDC-real-time-sync-small.png) +![MDC Realtime]({{}}images/MDC-real-time-sync-small.png)
## Restrictions @@ -121,6 +121,6 @@ replication, as illustrated in the Figure below. It is important to note that both clusters must have certain attributes in common for Multi-Datacenter Replication to work. If you are using either fullsync or realtime replication, both clusters must have the -same [ring size](/riak/kv/2.2.3/learn/concepts/clusters/#the-ring); if you are using fullsync -replication, every bucket's [`n_val`](/riak/kv/2.2.3/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +same [ring size]({{}}riak/kv/2.2.3/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{}}riak/kv/2.2.3/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the source and sink cluster. diff --git a/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter/scheduling-fullsync.md index 270fe87f75..e3c95aaf32 100644 --- a/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.2.3/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -16,7 +16,7 @@ aliases: --- {{% note title="Deprecation Warning" %}} -v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3](/riak/kv/2.2.3/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.3/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. {{% /note %}} diff --git a/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/aae.md index 07633d492e..fe641eb330 100644 --- a/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/aae.md +++ b/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/aae.md @@ -17,9 +17,9 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v3/aae --- -[glossary aae]: /riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae -[config reference#advanced]: /riak/kv/2.2.3/configuring/reference/#advanced-configuration -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters +[glossary aae]: {{}}riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{}}riak/kv/2.2.3/configuring/reference/#advanced-configuration +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters > **Note: Technical preview** > diff --git a/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/architecture.md index cbc388104a..2a1f829b03 100644 --- a/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/architecture.md +++ b/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/architecture.md @@ -17,8 +17,8 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v3/architecture --- -[glossary vnode]: /riak/kv/2.2.3/learn/glossary/#vnode -[concept clusters]: /riak/kv/2.2.3/learn/concepts/clusters +[glossary vnode]: {{}}riak/kv/2.2.3/learn/glossary/#vnode +[concept clusters]: {{}}riak/kv/2.2.3/learn/concepts/clusters ## How Version 3 Replication Works @@ -111,7 +111,7 @@ the following commands must be issued: cluster.
- ![MDC fullsync](/images/MDC-v3-realtime1.png) + ![MDC fullsync]({{}}images/MDC-v3-realtime1.png)
At this point realtime replication commences. @@ -122,7 +122,7 @@ and replication begins.
-![MDC fullsync](/images/MDC-v3-realtime2.png) +![MDC fullsync]({{}}images/MDC-v3-realtime2.png)
### Realtime queueing and synchronization @@ -136,7 +136,7 @@ store data updates for synchronization.
-![MDC fullsync](/images/MDC-v3-realtime3.png) +![MDC fullsync]({{}}images/MDC-v3-realtime3.png)
    @@ -145,7 +145,7 @@ store data updates for synchronization.

-![MDC fullsync](/images/MDC-v3-realtime4.png) +![MDC fullsync]({{}}images/MDC-v3-realtime4.png)
    @@ -154,7 +154,7 @@ nodes.

-![MDC fullsync](/images/MDC-v3-realtime5.png) +![MDC fullsync]({{}}images/MDC-v3-realtime5.png)
    @@ -163,7 +163,7 @@ acknowledged and the object removed from the realtime queue.

-![MDC fullsync](/images/MDC-v3-realtime6.png) +![MDC fullsync]({{}}images/MDC-v3-realtime6.png)
## Restrictions diff --git a/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/cascading-writes.md index c02eaa25f4..85113ac659 100644 --- a/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/cascading-writes.md +++ b/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/cascading-writes.md @@ -83,7 +83,7 @@ cascade. ## Usage Riak Enterprise Cascading Writes can be enabled and disabled using the -`riak-repl` command. Please see the [Version 3 Operations guide](/riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter) for more information. +`riak-repl` command. Please see the [Version 3 Operations guide]({{}}riak/kv/2.2.3/using/cluster-operations/v3-multi-datacenter) for more information. To show current the settings: diff --git a/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md index 2dfa7ce50e..5f13b7f1c9 100644 --- a/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md +++ b/content/riak/kv/2.2.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -17,7 +17,7 @@ aliases: - /riak/kv/2.2.3/ops/mdc/v3/scheduling-fullsync --- -[config reference#advanced]: /riak/kv/2.2.3/configuring/reference/#advanced-configuration +[config reference#advanced]: {{}}riak/kv/2.2.3/configuring/reference/#advanced-configuration The `fullsync_interval` parameter can be configured in the `riak-repl` section of [`advanced.config`][config reference#advanced] with either: diff --git a/content/riak/kv/2.2.3/using/repair-recovery/errors.md b/content/riak/kv/2.2.3/using/repair-recovery/errors.md index 2d2ef5c7d3..12d66bdeec 100644 --- a/content/riak/kv/2.2.3/using/repair-recovery/errors.md +++ b/content/riak/kv/2.2.3/using/repair-recovery/errors.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/running/recovery/errors --- -[config reference]: /riak/kv/2.2.3/configuring/reference +[config reference]: {{}}riak/kv/2.2.3/configuring/reference This is not a comprehensive listing of every error that Riak may encounter -- screws fall out all of the time, the world is an imperfect @@ -120,8 +120,8 @@ generally due to network, permission, or configuration problems. Error | Description | Resolution :-----|:------------|:---------- `{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd -`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See Step 1. `{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others `{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems @@ -186,7 +186,7 @@ Error | Message | Description | Resolution | `status_update for non-existing handoff ` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. | `SSL handoff config error: property : .` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper | `Failure processing SSL handoff config ::` | | Ensure your SSL settings and certificates are proper - | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. + | ` transfer of from to failed because of ` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See Step 1. | `Failed to start application: ` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues | `Failed to read ring file: ` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read | `Failed to load ring file: ` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found @@ -206,8 +206,8 @@ Error | Message | Description | Resolution :-----|:--------|:------------|:---------- `all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status `{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query -`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. -`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. +`{coord_handoff_failed, Reason}` | `Unable to forward put for to - ` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See Step 1. `{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone `{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value `{field_parsing_failed, {Field, Value}}` | `Could not parse field @@ -237,7 +237,7 @@ Error | Message | Description | Resolution `receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called `{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value `{siblings_not_allowed, Object}` | `Siblings not allowed: ` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` -`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See Step 1. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. `{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format `{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format `too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value @@ -321,12 +321,12 @@ gen_server riak_core_capability terminated with reason: no function clause match gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). <`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` -(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See Step 2. enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. -** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP](/riak/kv/2.2.3/using/reference/snmp) configuration. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{}}riak/kv/2.2.3/using/reference/snmp) configuration. RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. diff --git a/content/riak/kv/2.2.3/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.2.3/using/repair-recovery/failure-recovery.md index 015c1e9ee2..5399339146 100644 --- a/content/riak/kv/2.2.3/using/repair-recovery/failure-recovery.md +++ b/content/riak/kv/2.2.3/using/repair-recovery/failure-recovery.md @@ -38,7 +38,7 @@ does not necessarily cause data loss, as other replicas of every key are available elsewhere in the cluster. Once the node is detected as down, other nodes in the cluster will take over its responsibilities temporarily and transmit the updated data to it when it eventually -returns to service (also called [hinted handoff](/riak/kv/2.2.3/learn/glossary/#hinted-handoff)). +returns to service (also called [hinted handoff]({{}}riak/kv/2.2.3/learn/glossary/#hinted-handoff)). More severe data loss scenarios usually relate to hardware failure. If data is lost, several options are available for restoring it. @@ -92,7 +92,7 @@ too much data and growing the cluster may be necessary. Additional RAM may also improve latency because more of the active dataset will be cached by the operating system. -Sometimes extreme latency spikes can be caused by [sibling explosion](/riak/kv/2.2.3/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +Sometimes extreme latency spikes can be caused by [sibling explosion]({{}}riak/kv/2.2.3/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to the number of siblings, causing longer disk service times and slower network responses. @@ -116,7 +116,7 @@ spreading load and increasing available CPU and IOPS. ## Cluster Recovery From Backups -See [Changing Cluster Information](/riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. +See [Changing Cluster Information]({{}}riak/kv/2.2.3/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. {{% note title="Tip" %}} If you are a licensed Riak Enterprise or CS customer and require assistance or diff --git a/content/riak/kv/2.2.3/using/repair-recovery/repairs.md b/content/riak/kv/2.2.3/using/repair-recovery/repairs.md index 0fb22c9281..0970894c86 100644 --- a/content/riak/kv/2.2.3/using/repair-recovery/repairs.md +++ b/content/riak/kv/2.2.3/using/repair-recovery/repairs.md @@ -21,13 +21,13 @@ aliases: - /riak/kv/2.2.3/ops/running/recovery/repairing-partitions --- -[cluster ops aae]: /riak/kv/2.2.3/using/cluster-operations/active-anti-entropy/ -[config ref]: /riak/kv/2.2.3/configuring/reference/ +[cluster ops aae]: {{}}riak/kv/2.2.3/using/cluster-operations/active-anti-entropy/ +[config ref]: {{}}riak/kv/2.2.3/configuring/reference/ [Erlang shell]: http://learnyousomeerlang.com/starting-out -[glossary AAE]: /riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae -[glossary readrep]: /riak/kv/2.2.3/learn/glossary/#read-repair -[search config]: /riak/kv/2.2.3/configuring/search/#search-config-settings -[tiered storage]: /riak/kv/2.2.3/setup/planning/backend/leveldb/#tiered-storage +[glossary AAE]: {{}}riak/kv/2.2.3/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{}}riak/kv/2.2.3/learn/glossary/#read-repair +[search config]: {{}}riak/kv/2.2.3/configuring/search/#search-config-settings +[tiered storage]: {{}}riak/kv/2.2.3/setup/planning/backend/leveldb/#tiered-storage @@ -237,23 +237,23 @@ riak start If you have experienced a loss of object replicas in your cluster, you may need to perform a repair operation on one or more of your data -[partitions](/riak/kv/2.2.3/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +[partitions]({{}}riak/kv/2.2.3/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically run in situations where partitions or whole nodes are lost due to corruption or hardware failure. In these cases, nodes or partitions are brought back online without any data, which means that the need to -repair data will depend mainly on your use case and on whether [active anti-entropy](/riak/kv/2.2.3/learn/concepts/active-anti-entropy/) is enabled. +repair data will depend mainly on your use case and on whether [active anti-entropy]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/) is enabled. You will need to run a repair if the following are both true: -* Active anti-entropy is [disabled](/riak/kv/2.2.3/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* Active anti-entropy is [disabled]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) * You have both non-expiring data and keys that are not accessed frequently (which means that they are not likely to be subject to - [read repair](/riak/kv/2.2.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + [read repair]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) You will most likely not need to run a repair operation if _any_ of the following is true: -* Active anti-entropy is [enabled](/riak/kv/2.2.3/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Active anti-entropy is [enabled]({{}}riak/kv/2.2.3/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) * Your entire key set is accessed frequently, allowing passive read repair to repair the partitions * Your data expires frequently diff --git a/content/riak/kv/2.2.3/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.2.3/using/repair-recovery/rolling-replaces.md index 86114c6ba1..10ccea85fe 100644 --- a/content/riak/kv/2.2.3/using/repair-recovery/rolling-replaces.md +++ b/content/riak/kv/2.2.3/using/repair-recovery/rolling-replaces.md @@ -12,9 +12,9 @@ menu: toc: true --- -[upgrade]: /riak/kv/2.2.3/setup/upgrading/cluster/ -[rolling restarts]: /riak/kv/2.2.3/using/repair-recovery/rolling-restart/ -[add node]: /riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes +[upgrade]: {{}}riak/kv/2.2.3/setup/upgrading/cluster/ +[rolling restarts]: {{}}riak/kv/2.2.3/using/repair-recovery/rolling-restart/ +[add node]: {{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. diff --git a/content/riak/kv/2.2.3/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.2.3/using/repair-recovery/rolling-restart.md index 36ffe3cb8a..898da562a9 100644 --- a/content/riak/kv/2.2.3/using/repair-recovery/rolling-restart.md +++ b/content/riak/kv/2.2.3/using/repair-recovery/rolling-restart.md @@ -15,7 +15,7 @@ aliases: - /riak/kv/2.2.3/ops/running/recovery/rolling-restart --- -Because Riak functions as a multi-node system, cluster-level [Riak version upgrades](/riak/kv/2.2.3/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{}}riak/kv/2.2.3/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. The following steps should be undertaken on each Riak node that you wish to restart: diff --git a/content/riak/kv/2.2.3/using/running-a-cluster.md b/content/riak/kv/2.2.3/using/running-a-cluster.md index febefbd7d8..c85d9a434c 100644 --- a/content/riak/kv/2.2.3/using/running-a-cluster.md +++ b/content/riak/kv/2.2.3/using/running-a-cluster.md @@ -19,7 +19,7 @@ Configuring a Riak cluster involves instructing each node to listen on a non-local interface, i.e. not `127.0.0.1`, and then joining all of the nodes together to participate in the cluster. -Most configuration changes will be applied to the [configuration file](/riak/kv/2.2.3/configuring/reference/) located in your `rel/riak/etc` directory (if +Most configuration changes will be applied to the [configuration file]({{}}riak/kv/2.2.3/configuring/reference/) located in your `rel/riak/etc` directory (if you compiled from source) or `/etc` (if you used a binary install of Riak). @@ -46,7 +46,7 @@ options: `ring` directory. This will require rejoining all nodes into a cluster again. > -> *Rename the node using the [`riak-admin cluster replace`](/riak/kv/2.2.3/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. +> *Rename the node using the [`riak-admin cluster replace`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. ## Configure the First Node @@ -59,7 +59,7 @@ riak stop #### Select an IP address and port Let's say that the IP address for your cluster is 192.168.1.10 and that -you'll be using the default port (8087). If you're using the [Protocol Buffers interface](/riak/kv/2.2.3/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP interface due to performance gains), you should change your configuration file: @@ -153,7 +153,7 @@ preferred. > > Once a node has been started, in order to change the name you must either remove ring files from the `/data/ring` directory or -[`riak-admin cluster force-replace`](/riak/kv/2.2.3/using/admin/riak-admin/#cluster-force-replace) the node. +[`riak-admin cluster force-replace`]({{}}riak/kv/2.2.3/using/admin/riak-admin/#cluster-force-replace) the node. #### Start the node @@ -249,7 +249,7 @@ into which you can type the following command: ``` To join additional nodes to your cluster, repeat the above steps. You -can also find more detailed instructions about [adding and removing nodes](/riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes) from a cluster. +can also find more detailed instructions about [adding and removing nodes]({{}}riak/kv/2.2.3/using/cluster-operations/adding-removing-nodes) from a cluster. > **Ring Creation Size** > diff --git a/content/riak/kv/2.2.3/using/security.md b/content/riak/kv/2.2.3/using/security.md index cf0321b680..39b2d1cd77 100644 --- a/content/riak/kv/2.2.3/using/security.md +++ b/content/riak/kv/2.2.3/using/security.md @@ -15,14 +15,14 @@ aliases: - /riak/kv/2.2.3/ops/advanced/security --- -[config reference search]: /riak/kv/2.1.4/configuring/reference/#search -[config search enabling]: /riak/kv/2.1.4/configuring/search/#enabling-riak-search -[config v3 ssl]: /riak/kv/2.1.4/configuring/v3-multi-datacenter/ssl +[config reference search]: {{}}riak/kv/2.2.3/configuring/reference/#search +[config search enabling]: {{}}riak/kv/2.2.3/configuring/search/#enabling-riak-search +[config v3 ssl]: {{}}riak/kv/2.2.3/configuring/v3-multi-datacenter/ssl [JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html -[security basics]: /riak/kv/2.1.4/using/security/basics -[security managing]: /riak/kv/2.1.4/using/security/managing-sources/ +[security basics]: {{}}riak/kv/2.2.3/using/security/basics +[security managing]: {{}}riak/kv/2.2.3/using/security/managing-sources/ [Solr]: http://lucene.apache.org/solr/ -[usage search]: /riak/kv/2.1.4/developing/usage/search +[usage search]: {{}}riak/kv/2.2.3/developing/usage/search > **Internal security** > diff --git a/content/riak/kv/2.2.3/using/security/basics.md b/content/riak/kv/2.2.3/using/security/basics.md index bdd899f55b..0926a7a0a2 100644 --- a/content/riak/kv/2.2.3/using/security/basics.md +++ b/content/riak/kv/2.2.3/using/security/basics.md @@ -18,7 +18,7 @@ aliases: > **Note on Network security** > > This document covers only the 2.0 authentication and authorization -features. For a look at network security in Riak, see [Security and Firewalls](/riak/kv/2.2.3/using/security/managing-sources/). +features. For a look at network security in Riak, see [Security and Firewalls]({{}}riak/kv/2.2.3/using/security/managing-sources/). As of version 2.0, Riak administrators can selectively apportion access to a wide variety of Riak's functionality, including accessing, @@ -47,7 +47,7 @@ of the following **before** enabling security: 1. Make certain that the original Riak Search (version 1) and link walking are not required. Enabling security will break this functionality. If you wish to use security and Search together, you - will need to use the [new Search feature](/riak/kv/2.2.3/developing/usage/search/). + will need to use the [new Search feature]({{}}riak/kv/2.2.3/developing/usage/search/). 1. Because Riak security requires a secure SSL connection, you will need to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you enable security without having established a functioning SSL @@ -59,10 +59,10 @@ of the following **before** enabling security: 1. Check any Erlang MapReduce code for invocations of Riak modules other than `riak_kv_mapreduce`. Enabling security will prevent those from succeeding unless those modules are available via the `add_path` - mechanism documented in [Installing Custom Code](/riak/kv/2.2.3/using/reference/custom-code). + mechanism documented in [Installing Custom Code]({{}}riak/kv/2.2.3/using/reference/custom-code). 1. Make sure that your client software will work properly: * It must pass authentication information with each request - * It must support HTTPS or encrypted [Protocol Buffers](/riak/kv/2.2.3/developing/api/protocol-buffers/) + * It must support HTTPS or encrypted [Protocol Buffers]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/) traffic * If using HTTPS, the proper port (presumably 443) is open from client to server @@ -75,7 +75,7 @@ of the following **before** enabling security: Security should be enabled only after all of the above steps have been performed and your security setup has been properly vetted. -Clients that use [Protocol Buffers](/riak/kv/2.2.3/developing/api/protocol-buffers/) will typically have to be +Clients that use [Protocol Buffers]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/) will typically have to be reconfigured/restarted with the proper credentials once security is enabled. @@ -405,7 +405,7 @@ riak-admin security revoke on from all|{}}riak/kv/2.2.3/developing/usage/bucket-types). If you specify a bucket type only, then the permission is granted/revoked for all buckets of that type. If you specify a bucket type _and_ a bucket, the permission is granted/revoked only for that bucket type/bucket combination. @@ -460,7 +460,7 @@ riak-admin security grant riak_kv.get,riak_kv.put on any to client ### MapReduce Permissions -Permission to perform [MapReduce](/riak/kv/2.2.3/developing/usage/mapreduce/) jobs can be assigned +Permission to perform [MapReduce]({{}}riak/kv/2.2.3/developing/usage/mapreduce/) jobs can be assigned using `riak_kv.mapreduce`. The following example grants MapReduce permissions to the user `mapreduce-power-user` for all buckets and bucket types: @@ -471,7 +471,7 @@ riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user ### Bucket Type Permissions -In versions 2.0 and later, Riak users can manage [bucket types](/riak/kv/2.2.3/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +In versions 2.0 and later, Riak users can manage [bucket types]({{}}riak/kv/2.2.3/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin security` allows you to manage the following bucket type-related permissions: @@ -507,7 +507,7 @@ disabled, you will get the following error: > `{error,{unknown_permission,"search.query"}}` > > More information on Riak Search and how to enable it can be found in the -[Riak Search Settings](/riak/kv/2.2.3/configuring/search/) document. +[Riak Search Settings]({{}}riak/kv/2.2.3/configuring/search/) document. #### Usage Examples @@ -558,7 +558,7 @@ access to some or all of Riak's functionality as described in the [User Manageme you will then need to define security sources required for authentication. -An more in-depth tutorial can be found in [Managing Security Sources](/riak/kv/2.2.3/using/security/managing-sources/). +An more in-depth tutorial can be found in [Managing Security Sources]({{}}riak/kv/2.2.3/using/security/managing-sources/). ### Add Source @@ -648,7 +648,7 @@ riak-admin security del-source riakuser 127.0.0.1/32 password This section provides only a very brief overview of the syntax for working with sources. For more information on using the `trust`, -`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources](/riak/kv/2.2.3/using/security/managing-sources/) document. +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{}}riak/kv/2.2.3/using/security/managing-sources/) document. ## Security Ciphers @@ -717,7 +717,7 @@ documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). ### Client vs. Server Cipher Order By default, Riak prefers the cipher order that you set on the server, -i.e. the [`honor_cipher_order`](/riak/kv/2.2.3/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +i.e. the [`honor_cipher_order`]({{}}riak/kv/2.2.3/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > **Note on Erlang versions** @@ -725,17 +725,17 @@ order dictate which cipher is chosen, set `honor_cipher_order` to `off`. > Riak's default cipher order behavior has the potential to crash Erlang VMs that do not support it. Erlang VMs that are known to support it include Basho's patched version of Erlang R16. Instructions on -installing it can be found in [Installing Erlang](/riak/kv/2.2.3/setup/installing/source/erlang). This issue should +installing it can be found in [Installing Erlang]({{}}riak/kv/2.2.3/setup/installing/source/erlang). This issue should not affect Erlang 17.0 and later. ## Enabling SSL In order to use any authentication or authorization features, you must enable SSL for Riak. **SSL is disabled by default**, but you will need -to enable it prior to enabling security. If you are using [Protocol Buffers](/riak/kv/2.2.3/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port](/riak/kv/2.2.3/configuring/reference/#client-interfaces) for the node +to enable it prior to enabling security. If you are using [Protocol Buffers]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{}}riak/kv/2.2.3/configuring/reference/#client-interfaces) for the node as well as a [certification configuration](#certificate-configuration). -If, however, you are using the [HTTP API](/riak/kv/2.2.3/developing/api/http) for Riak and would like to +If, however, you are using the [HTTP API]({{}}riak/kv/2.2.3/developing/api/http) for Riak and would like to configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host and port. The following configuration would establish port 8088 on `localhost` as the HTTPS port: @@ -758,7 +758,7 @@ listener.https.$name = 127.0.0.1:8088 When using Riak security, you can choose which versions of SSL/TLS are allowed. By default, only TLS 1.2 is allowed, but this version can be -disabled and others enabled by setting the following [configurable parameters](/riak/kv/2.2.3/configuring/reference/#security) to `on` or `off`: +disabled and others enabled by setting the following [configurable parameters]({{}}riak/kv/2.2.3/configuring/reference/#security) to `on` or `off`: * `tls_protocols.tlsv1` * `tls_protocols.tlsv1.1` @@ -775,16 +775,16 @@ Three things to note: ## Certificate Configuration -If you are using any of the available [security sources](/riak/kv/2.2.3/using/security/managing-sources/), including [trust-based authentication](/riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication), you will need to do so +If you are using any of the available [security sources]({{}}riak/kv/2.2.3/using/security/managing-sources/), including [trust-based authentication]({{}}riak/kv/2.2.3/using/security/managing-sources/#trust-based-authentication), you will need to do so over a secure SSL connection. In order to establish a secure connection, -you will need to ensure that each Riak node's [configuration files](/riak/kv/2.2.3/configuring/reference/#security) point to the proper paths for your +you will need to ensure that each Riak node's [configuration files]({{}}riak/kv/2.2.3/configuring/reference/#security) point to the proper paths for your generated certs. By default, Riak assumes that all certs are stored in each node's `/etc` directory. If you are using the newer, `riak.conf`-based configuration system, you can change the location of the `/etc` directory by modifying the `platform_etc_dir`. More information can be found in our documentation -on [configuring directories](/riak/kv/2.2.3/configuring/reference/#directories). +on [configuring directories]({{}}riak/kv/2.2.3/configuring/reference/#directories).
platform_data_dir The directory in which Riak stores its storage backend data, as well -as active anti-entropy data, and cluster metadata. ./data
alive_tokens Determines the number of ticks the leader will wait to hear from its -associated vnode before assuming that the vnode +associated vnode before assuming that the vnode is unhealthy and stepping down as leader. If the vnode does not respond to the leader before ensemble_tick * alive_tokens milliseconds have elapsed, the leader will @@ -1833,8 +1833,8 @@ package) and in R14B04 via a custom repository and branch.
vnode_management_timer Sets the frequency with which vnodes attempt to trigger handoff between +href="{{< baseurl >}}riak/kv/2.2.3/learn/glossary/#vnode">vnodes attempt to trigger handoff between this node and other nodes in the cluster. 10s (10 seconds)
@@ -842,6 +842,6 @@ checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those checks make it impossible to serve data directly from Riak. To disable those checks, set the `secure_referer_check` parameter to `off`. -If you are using [certificate-based authentication](/riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +If you are using [certificate-based authentication]({{}}riak/kv/2.2.3/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by default. To disable this behavior, set the `check_crl` parameter to `off`. diff --git a/content/riak/kv/2.2.3/using/security/managing-sources.md b/content/riak/kv/2.2.3/using/security/managing-sources.md index 37e7e8482e..57bb6d56f0 100644 --- a/content/riak/kv/2.2.3/using/security/managing-sources.md +++ b/content/riak/kv/2.2.3/using/security/managing-sources.md @@ -16,7 +16,7 @@ aliases: --- If you're looking for more general information on Riak Security, it may -be best to start with our general guide to [authentication and authorization](/riak/kv/2.2.3/using/security/basics). +be best to start with our general guide to [authentication and authorization]({{}}riak/kv/2.2.3/using/security/basics). This document provides more granular information on the four available authentication sources in Riak Security: trusted networks, password, @@ -26,8 +26,8 @@ respectively, in the `riak-admin security` interface. The examples below will assume that the network in question is `127.0.0.1/32` and that a Riak user named `riakuser` has been -[created](/riak/kv/2.2.3/using/security/basics/#user-management) and that -security has been [enabled](/riak/kv/2.2.3/using/security/basics/#the-basics). +[created]({{}}riak/kv/2.2.3/using/security/basics/#user-management) and that +security has been [enabled]({{}}riak/kv/2.2.3/using/security/basics/#the-basics). {{% note title="Note on SSL connections" %}} If you use _any_ of the aforementioned security sources, even `trust`, you @@ -122,7 +122,7 @@ Authority](http://en.wikipedia.org/wiki/Root_certificate). > **Note** > > At this time, client certificates are not supported in Riak's HTTP -interface, and can be used only through the [protocol buffers interface](/riak/kv/2.2.3/developing/api/protocol-buffers/). +interface, and can be used only through the [protocol buffers interface]({{}}riak/kv/2.2.3/developing/api/protocol-buffers/). Let's specify that our user `riakuser` is going to be authenticated using a certificate on `localhost`: @@ -145,7 +145,7 @@ their `CN` and Riak username match. On the server side, you need to configure Riak by specifying a path to your certificates. First, copy all relevant files to your Riak cluster. The default directory for certificates is `/etc`, though you can specify -a different directory in your [`riak.conf`](/riak/kv/2.2.3/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: +a different directory in your [`riak.conf`]({{}}riak/kv/2.2.3/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: ```riakconf ssl.certfile = /path/to/cert.pem @@ -156,7 +156,7 @@ ssl.cacertfile = /path/to/cacert.pem In the client-side example above, the client's `CN` and Riak username needed to match. On the server (i.e. Riak) side, the `CN` specified _on each node_ must match the node's name as registered by Riak. You can -find the node's name in [`riak.conf`](/riak/kv/2.2.3/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +find the node's name in [`riak.conf`]({{}}riak/kv/2.2.3/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is `riak-node-1`, you would need to generate your certificate with that in mind, as in this OpenSSL example: @@ -165,7 +165,7 @@ openssl req -new ... '/CN=riak-node-1' ``` Once certificates have been properly generated and configured on all of -the nodes in your Riak cluster, you need to perform a [rolling restart](/riak/kv/2.2.3/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +the nodes in your Riak cluster, you need to perform a [rolling restart]({{}}riak/kv/2.2.3/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client certificate that you generated for the user `riakuser`. How to use Riak clients in conjunction with OpenSSL and other diff --git a/content/riak/kv/2.2.6/_reference-links.md b/content/riak/kv/2.2.6/_reference-links.md new file mode 100644 index 0000000000..28b5698758 --- /dev/null +++ b/content/riak/kv/2.2.6/_reference-links.md @@ -0,0 +1,248 @@ + +# Riak KV 2.2.6 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.2.6/downloads/ +[install index]: {{}}riak/kv/2.2.6/setup/installing +[upgrade index]: {{}}riak/kv/2.2.6/upgrading +[plan index]: {{}}riak/kv/2.2.6/planning +[config index]: {{}}riak/kv/2.1.3/using/configuring/ +[config reference]: {{}}riak/kv/2.2.6/configuring/reference/ +[manage index]: {{}}riak/kv/2.2.6/using/managing +[performance index]: {{}}riak/kv/2.2.6/using/performance +[glossary vnode]: {{}}riak/kv/2.2.6/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.2.6/setup/planning +[plan start]: {{}}riak/kv/2.2.6/setup/planning/start +[plan backend]: {{}}riak/kv/2.2.6/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.2.6/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend memory]: {{}}riak/kv/2.2.6/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.6/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.2.6/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.2.6/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.2.6/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.2.6/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.2.6/setup/installing +[install aws]: {{}}riak/kv/2.2.6/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.2.6/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.2.6/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.2.6/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.2.6/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.2.6/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.2.6/setup/installing/solaris +[install suse]: {{}}riak/kv/2.2.6/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.2.6/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.2.6/setup/installing/source +[install source erlang]: {{}}riak/kv/2.2.6/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.2.6/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.2.6/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.2.6/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.2.6/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.2.6/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.2.6/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.2.6/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.2.6/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.2.6/configuring +[config basic]: {{}}riak/kv/2.2.6/configuring/basic +[config backend]: {{}}riak/kv/2.2.6/configuring/backend +[config manage]: {{}}riak/kv/2.2.6/configuring/managing +[config reference]: {{}}riak/kv/2.2.6/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.2.6/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.2.6/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.2.6/configuring/mapreduce +[config search]: {{}}riak/kv/2.2.6/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.2.6/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.2.6/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.2.6/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.2.6/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.2.6/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.2.6/using/ +[use admin commands]: {{}}riak/kv/2.2.6/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.2.6/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.2.6/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.2.6/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.2.6/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.2.6/using/reference/search +[use ref 2i]: {{}}riak/kv/2.2.6/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.2.6/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.2.6/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.2.6/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.2.6/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.2.6/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.2.6/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.2.6/using/admin/ +[use admin commands]: {{}}riak/kv/2.2.6/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.2.6/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.2.6/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.2.6/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.2.6/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.2.6/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.2.6/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.2.6/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.2.6/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.2.6/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.2.6/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.2.6/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.2.6/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.2.6/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.2.6/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.2.6/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.2.6/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.2.6/using/security/ +[security basics]: {{}}riak/kv/2.2.6/using/security/basics +[security managing]: {{}}riak/kv/2.2.6/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.2.6/using/performance/ +[perf benchmark]: {{}}riak/kv/2.2.6/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.6/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.2.6/using/performance/erlang +[perf aws]: {{}}riak/kv/2.2.6/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.2.6/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.2.6/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.2.6/developing +[dev client libraries]: {{}}riak/kv/2.2.6/developing/client-libraries +[dev data model]: {{}}riak/kv/2.2.6/developing/data-modeling +[dev data types]: {{}}riak/kv/2.2.6/developing/data-types +[dev kv model]: {{}}riak/kv/2.2.6/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.2.6/developing/getting-started +[getting started java]: {{}}riak/kv/2.2.6/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.2.6/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.2.6/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.2.6/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.2.6/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.2.6/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.2.6/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.2.6/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.2.6/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.6/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.6/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.6/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.6/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.6/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.6/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.2.6/developing/usage +[usage bucket types]: {{}}riak/kv/2.2.6/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.2.6/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.2.6/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.2.6/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.2.6/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.2.6/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.2.6/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.2.6/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.2.6/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.6/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.6/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.2.6/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.2.6/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.2.6/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.2.6/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.2.6/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.2.6/developing/api/backend +[dev api http]: {{}}riak/kv/2.2.6/developing/api/http +[dev api http status]: {{}}riak/kv/2.2.6/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.2.6/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.2.6/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.2.6/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.2.6/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.2.6/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.2.6/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.2.6/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.2.6/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.2.6/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.2.6/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.2.6/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.2.6/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.2.6/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.2.6/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + diff --git a/content/riak/kv/2.2.6/add-ons.md b/content/riak/kv/2.2.6/add-ons.md new file mode 100644 index 0000000000..f622186cc3 --- /dev/null +++ b/content/riak/kv/2.2.6/add-ons.md @@ -0,0 +1,19 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.2.6/add-ons/redis/) diff --git a/content/riak/kv/2.2.6/add-ons/redis.md b/content/riak/kv/2.2.6/add-ons/redis.md new file mode 100644 index 0000000000..000b9e2dad --- /dev/null +++ b/content/riak/kv/2.2.6/add-ons/redis.md @@ -0,0 +1,58 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] diff --git a/content/riak/kv/2.2.6/add-ons/redis/developing-rra.md b/content/riak/kv/2.2.6/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..008320a2c1 --- /dev/null +++ b/content/riak/kv/2.2.6/add-ons/redis/developing-rra.md @@ -0,0 +1,325 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.2.6/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.6/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.2.6/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.6/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.6/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | diff --git a/content/riak/kv/2.2.6/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.2.6/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..70c7efe157 --- /dev/null +++ b/content/riak/kv/2.2.6/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,131 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.2.6/add-ons/redis/set-up-rra.md b/content/riak/kv/2.2.6/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..c246f76417 --- /dev/null +++ b/content/riak/kv/2.2.6/add-ons/redis/set-up-rra.md @@ -0,0 +1,280 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.2.6/setup/installing +[perf open files]: {{}}riak/kv/2.2.6/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. diff --git a/content/riak/kv/2.2.6/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.2.6/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..f1761f2efb --- /dev/null +++ b/content/riak/kv/2.2.6/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,138 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. diff --git a/content/riak/kv/2.2.6/add-ons/redis/using-rra.md b/content/riak/kv/2.2.6/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..3c7eebe049 --- /dev/null +++ b/content/riak/kv/2.2.6/add-ons/redis/using-rra.md @@ -0,0 +1,242 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.2.6/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.2.6/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details diff --git a/content/riak/kv/2.2.6/configuring.md b/content/riak/kv/2.2.6/configuring.md new file mode 100644 index 0000000000..91f5724427 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring.md @@ -0,0 +1,82 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + diff --git a/content/riak/kv/2.2.6/configuring/backend.md b/content/riak/kv/2.2.6/configuring/backend.md new file mode 100644 index 0000000000..da82fab674 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/backend.md @@ -0,0 +1,559 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +--- + +[plan backend leveldb]: {{}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.6/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.6/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.2.6/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` diff --git a/content/riak/kv/2.2.6/configuring/basic.md b/content/riak/kv/2.2.6/configuring/basic.md new file mode 100644 index 0000000000..867d0c3cff --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/basic.md @@ -0,0 +1,235 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.2.6/ops/building/configuration/ + - /riak/kv/2.2.6/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/2.2.6/configuring/reference +[use running cluster]: {{}}riak/kv/2.2.6/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.2.6/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.2.6/using/performance/erlang +[plan start]: {{}}riak/kv/2.2.6/setup/planning/start +[plan best practices]: {{}}riak/kv/2.2.6/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.2.6/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.2.6/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.2.6/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.2.6/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.2.6/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.2.6/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.2.6/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.2.6/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.2.6/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.2.6/using/performance +[perf aws]: {{}}riak/kv/2.2.6/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.2.6/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.6/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. diff --git a/content/riak/kv/2.2.6/configuring/global-object-expiration.md b/content/riak/kv/2.2.6/configuring/global-object-expiration.md new file mode 100644 index 0000000000..03a75134e1 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/global-object-expiration.md @@ -0,0 +1,85 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.2.6: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: "2.2.6" +toc: true +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` diff --git a/content/riak/kv/2.2.6/configuring/load-balancing-proxy.md b/content/riak/kv/2.2.6/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..b98993aef3 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/load-balancing-proxy.md @@ -0,0 +1,271 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.2.6/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/2.2.6/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` diff --git a/content/riak/kv/2.2.6/configuring/managing.md b/content/riak/kv/2.2.6/configuring/managing.md new file mode 100644 index 0000000000..1a17178be0 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/managing.md @@ -0,0 +1,116 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +--- + +[use admin riak cli]: {{}}riak/kv/2.2.6/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.2.6/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.2.6/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. diff --git a/content/riak/kv/2.2.6/configuring/mapreduce.md b/content/riak/kv/2.2.6/configuring/mapreduce.md new file mode 100644 index 0000000000..caa8b44145 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/mapreduce.md @@ -0,0 +1,196 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/configs/mapreduce/ + - /riak/kv/2.2.6/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/2.2.6/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.2.6/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.2.6/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. diff --git a/content/riak/kv/2.2.6/configuring/reference.md b/content/riak/kv/2.2.6/configuring/reference.md new file mode 100644 index 0000000000..a90887517c --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/reference.md @@ -0,0 +1,1984 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/configs/configuration-files/ + - /riak/kv/2.2.6/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] --- [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] --- [configuration][config backend leveldb] +* [Memory][plan backend memory] --- [configuration][config backend memory] +* [Multi][plan backend multi] --- [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.2.6 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.2.6 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` diff --git a/content/riak/kv/2.2.6/configuring/search.md b/content/riak/kv/2.2.6/configuring/search.md new file mode 100644 index 0000000000..1f2e40f750 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/search.md @@ -0,0 +1,274 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/configs/search/ + - /riak/kv/2.2.6/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/2.2.6/developing/usage/search +[usage search schema]: {{}}riak/kv/2.2.6/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.2.6/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.2.6/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.2.6/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.2.6/configuring/reference +[config reference#search]: {{}}riak/kv/2.2.6/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.2.6/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.2.6/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.2.6. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. diff --git a/content/riak/kv/2.2.6/configuring/strong-consistency.md b/content/riak/kv/2.2.6/configuring/strong-consistency.md new file mode 100644 index 0000000000..6690c9d4bd --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/strong-consistency.md @@ -0,0 +1,666 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/2.2.6/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.2.6/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.2.6/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.2.6/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.2.6/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.2.6/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.2.6/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.2.6/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.2.6/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.2.6/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.2.6/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.2.6/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.2.6/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.2.6/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.2.6/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.2.6/developing/data-types +[glossary aae]: {{}}riak/kv/2.2.6/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.2.6/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.2.6/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.2.6/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.2.6/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer --- The ID of the peer
  • Status --- Whether the peer is a leader or a follower
  • Trusted --- Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch --- The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node --- The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] --- If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] --- Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] --- Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** --- A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** --- In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** --- Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** --- At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** --- Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. diff --git a/content/riak/kv/2.2.6/configuring/v2-multi-datacenter.md b/content/riak/kv/2.2.6/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..d9fa14dc31 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/v2-multi-datacenter.md @@ -0,0 +1,156 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v2/configuration + - /riak/kv/2.2.6/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/2.2.6/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. diff --git a/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..92d6c848bf --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,78 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v2/nat + - /riak/kv/2.2.6/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/2.2.6/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` diff --git a/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..fb5847b291 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,367 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v2/quick-start + - /riak/kv/2.2.6/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. diff --git a/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..233249bae5 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,160 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v2/ssl + - /riak/kv/2.2.6/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. diff --git a/content/riak/kv/2.2.6/configuring/v3-multi-datacenter.md b/content/riak/kv/2.2.6/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..ee58538ded --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/v3-multi-datacenter.md @@ -0,0 +1,157 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/configuration + - /riak/kv/2.2.6/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/2.2.6/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. diff --git a/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..977cdca9b9 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,167 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/nat + - /riak/kv/2.2.6/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` diff --git a/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..5cfe2e52c5 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,168 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/quick-start + - /riak/kv/2.2.6/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/2.2.6/using/performance +[config v3 mdc]: {{}}riak/kv/2.2.6/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. diff --git a/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..7db8f822e8 --- /dev/null +++ b/content/riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,170 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/ssl + - /riak/kv/2.2.6/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/2.2.6/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. diff --git a/content/riak/kv/2.2.6/developing.md b/content/riak/kv/2.2.6/developing.md new file mode 100644 index 0000000000..f537129267 --- /dev/null +++ b/content/riak/kv/2.2.6/developing.md @@ -0,0 +1,73 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + diff --git a/content/riak/kv/2.2.6/developing/api.md b/content/riak/kv/2.2.6/developing/api.md new file mode 100644 index 0000000000..1746d53eaa --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api.md @@ -0,0 +1,37 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] diff --git a/content/riak/kv/2.2.6/developing/api/backend.md b/content/riak/kv/2.2.6/developing/api/backend.md new file mode 100644 index 0000000000..8f93d8234b --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/backend.md @@ -0,0 +1,114 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.2.6/dev/references/backend-api + - /riak/kv/2.2.6/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/2.2.6/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` diff --git a/content/riak/kv/2.2.6/developing/api/http.md b/content/riak/kv/2.2.6/developing/api/http.md new file mode 100644 index 0000000000..871e058f04 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http.md @@ -0,0 +1,89 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.2.6/dev/references/http + - /riak/kv/2.2.6/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.2.6/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.2.6/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.2.6/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.2.6/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.2.6/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.2.6/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.2.6/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.6/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.2.6/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.2.6/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.2.6/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.2.6/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.2.6/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.2.6/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.6/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.2.6/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.2.6/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.2.6/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.2.6/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.2.6/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.2.6/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.2.6/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.2.6/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.2.6/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.2.6/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.2.6/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.2.6/developing/api/http/counters.md b/content/riak/kv/2.2.6/developing/api/http/counters.md new file mode 100644 index 0000000000..5506f94ec6 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/counters.md @@ -0,0 +1,78 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/counters + - /riak/kv/2.2.6/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.2.6/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.2.6/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/delete-object.md b/content/riak/kv/2.2.6/developing/api/http/delete-object.md new file mode 100644 index 0000000000..d8036baf8b --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/delete-object.md @@ -0,0 +1,75 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/delete-object + - /riak/kv/2.2.6/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/delete-search-index.md b/content/riak/kv/2.2.6/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..8fcae15d6e --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/delete-search-index.md @@ -0,0 +1,33 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/delete-search-index + - /riak/kv/2.2.6/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` --- The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.2.6/developing/api/http/fetch-object.md b/content/riak/kv/2.2.6/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..cf0beb9047 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/fetch-object.md @@ -0,0 +1,242 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/fetch-object + - /riak/kv/2.2.6/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.2.6/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.2.6/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.2.6/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.2.6/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.6/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/fetch-search-index.md b/content/riak/kv/2.2.6/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..73856c2b87 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/fetch-search-index.md @@ -0,0 +1,47 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/fetch-search-index + - /riak/kv/2.2.6/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.2.6/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` --- No Search index with that name is currently + available +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.2.6/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.2.6/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..5c94655085 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/fetch-search-schema.md @@ -0,0 +1,38 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/fetch-search-schema + - /riak/kv/2.2.6/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). diff --git a/content/riak/kv/2.2.6/developing/api/http/get-bucket-props.md b/content/riak/kv/2.2.6/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..9e90ae64b1 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/get-bucket-props.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/get-bucket-props + - /riak/kv/2.2.6/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.2.6/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.2.6/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.2.6/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/link-walking.md b/content/riak/kv/2.2.6/developing/api/http/link-walking.md new file mode 100644 index 0000000000..26c154dc36 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/link-walking.md @@ -0,0 +1,125 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/link-walking + - /riak/kv/2.2.6/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.2.6/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.2.6/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.2.6/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/list-buckets.md b/content/riak/kv/2.2.6/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..9a0d8aeacd --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/list-buckets.md @@ -0,0 +1,64 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/list-buckets + - /riak/kv/2.2.6/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/list-keys.md b/content/riak/kv/2.2.6/developing/api/http/list-keys.md new file mode 100644 index 0000000000..e59a175dad --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/list-keys.md @@ -0,0 +1,76 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/list-keys + - /riak/kv/2.2.6/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/list-resources.md b/content/riak/kv/2.2.6/developing/api/http/list-resources.md new file mode 100644 index 0000000000..70e65a3066 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/list-resources.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/list-resources + - /riak/kv/2.2.6/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.2.6/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.2.6/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.2.6/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.2.6/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.2.6/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.2.6/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.2.6/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.2.6/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/mapreduce.md b/content/riak/kv/2.2.6/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..1195d1b9a2 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/mapreduce.md @@ -0,0 +1,70 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/mapreduce + - /riak/kv/2.2.6/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/2.2.6/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.2.6/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/ping.md b/content/riak/kv/2.2.6/developing/api/http/ping.md new file mode 100644 index 0000000000..6bfbedc962 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/ping.md @@ -0,0 +1,53 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/ping + - /riak/kv/2.2.6/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.2.6/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..e737124b73 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/reset-bucket-props.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/reset-bucket-props + - /riak/kv/2.2.6/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/search-index-info.md b/content/riak/kv/2.2.6/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..6c010ce2c7 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/search-index-info.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/search-index-info + - /riak/kv/2.2.6/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.2.6/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` --- Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.2.6/developing/api/http/search-query.md b/content/riak/kv/2.2.6/developing/api/http/search-query.md new file mode 100644 index 0000000000..63a9dff6f5 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/search-query.md @@ -0,0 +1,69 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/search-query + - /riak/kv/2.2.6/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/2.2.6/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` --- The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` --- The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.2.6/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` --- Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` --- Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/secondary-indexes.md b/content/riak/kv/2.2.6/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..48e2e4b75a --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/secondary-indexes.md @@ -0,0 +1,91 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/secondary-indexes + - /riak/kv/2.2.6/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/set-bucket-props.md b/content/riak/kv/2.2.6/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..0bb3912d4f --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/set-bucket-props.md @@ -0,0 +1,101 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/set-bucket-props + - /riak/kv/2.2.6/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.2.6/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.2.6/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/status.md b/content/riak/kv/2.2.6/developing/api/http/status.md new file mode 100644 index 0000000000..309b74d2cd --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/status.md @@ -0,0 +1,169 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/status + - /riak/kv/2.2.6/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.2.6", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.2.6/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute diff --git a/content/riak/kv/2.2.6/developing/api/http/store-object.md b/content/riak/kv/2.2.6/developing/api/http/store-object.md new file mode 100644 index 0000000000..9bd777adce --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/store-object.md @@ -0,0 +1,146 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/store-object + - /riak/kv/2.2.6/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.2.6/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.2.6/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.2.6/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` diff --git a/content/riak/kv/2.2.6/developing/api/http/store-search-index.md b/content/riak/kv/2.2.6/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..760c6ae149 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/store-search-index.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/store-search-index + - /riak/kv/2.2.6/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/2.2.6/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.2.6/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` --- The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` --- The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.2.6/developing/api/http/store-search-schema.md b/content/riak/kv/2.2.6/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..d0dcb29395 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/http/store-search-schema.md @@ -0,0 +1,50 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.2.6/dev/references/http/store-search-schema + - /riak/kv/2.2.6/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` --- The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` --- The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` --- The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..ee6d9dd50c --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers.md @@ -0,0 +1,185 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers + - /riak/kv/2.2.6/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` --- A string representation of what went wrong +* `errcode` --- A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..5250cccf27 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,30 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/auth-req + - /riak/kv/2.2.6/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.2.6/using/security/basics). diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..72d4d2e2a6 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,78 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.2.6/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..23300e3409 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,100 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/delete-object + - /riak/kv/2.2.6/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/2.2.6/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..01e88e2ec2 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,31 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.2.6/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/2.2.6/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..9a33a62786 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,127 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.2.6/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.2.6/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.2.6/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.2.6/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..1fa399eecd --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,73 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.2.6/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..446bb6c6a5 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,32 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.2.6/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..98097f4165 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,128 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/dt-store + - /riak/kv/2.2.6/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.2.6/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.2.6/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.2.6/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..a791eb5658 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,31 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/dt-union + - /riak/kv/2.2.6/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..c799ca0434 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,181 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.2.6/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` --- The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` --- The character encoding of the object, e.g. `utf-8` +* `content_encoding` --- The content encoding of the object, e.g. + `video/mp4` +* `vtag` --- The object's [vtag]({{}}riak/kv/2.2.6/learn/glossary/#vector-clock) +* `links` --- This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` --- A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` --- A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` --- This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` --- Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,2.2.6,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..edee7147e5 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,110 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.2.6/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.2.6/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.2.6/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..58a36559de --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,33 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.2.6/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.2.6/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..963da8fb5b --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,61 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.2.6/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..dcf8cc5232 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,76 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.2.6/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` --- Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..77026bb250 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,97 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/list-keys + - /riak/kv/2.2.6/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` --- bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..587e1d682c --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,149 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.2.6/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` --- MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` --- JSON-encoded MapReduce job +* `application/x-erlang-binary` --- Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.2.6/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.2.6/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` --- Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..8c51397f5e --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/ping.md @@ -0,0 +1,42 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/ping + - /riak/kv/2.2.6/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..87114fb164 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,59 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.2.6/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.2.6/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/search.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..0baecffb7b --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/search.md @@ -0,0 +1,148 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/search + - /riak/kv/2.2.6/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` --- The contents of the query +* `index` --- The name of the index to search + +Optional Parameters + +* `rows` --- The maximum number of rows to return +* `start` --- A start offset, i.e. the number of keys to skip before + returning values +* `sort` --- How the search results are to be sorted +* `filter` --- Filters search with additional query scoped to inline + fields +* `df` --- Override the `default_field` setting in the schema file +* `op` --- `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` --- Return the fields limit +* `presort` --- Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` --- A list of docs that match the search request +* `max_score` --- The top score returned +* `num_found` --- Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..3cef814e3b --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,121 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.2.6/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.2.6/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..277ee835b9 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,58 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/server-info + - /riak/kv/2.2.6/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..dc70496b57 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,68 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.2.6/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.2.6/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..21cd1fa5b2 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,31 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.2.6/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.2.6/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..d9a28dc185 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,62 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.2.6/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..7bd7baf309 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,150 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/store-object + - /riak/kv/2.2.6/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.2.6/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.2.6/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.2.6/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.2.6/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.2.6/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.2.6/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,2.2.6,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..1be5b5c1b7 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,33 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.2.6/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..b041652413 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,59 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.2.6/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.6/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..b997f25607 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.2.6/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.2.6/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..9d94636e88 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,48 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.2.6/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. diff --git a/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..0944f25de9 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,41 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.2.6/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.2.6/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.2.6/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.2.6/developing/api/repl-hooks.md b/content/riak/kv/2.2.6/developing/api/repl-hooks.md new file mode 100644 index 0000000000..7eff7e2894 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/api/repl-hooks.md @@ -0,0 +1,192 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v2/hooks + - /riak/kv/2.2.6/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + diff --git a/content/riak/kv/2.2.6/developing/app-guide.md b/content/riak/kv/2.2.6/developing/app-guide.md new file mode 100644 index 0000000000..af7476bddc --- /dev/null +++ b/content/riak/kv/2.2.6/developing/app-guide.md @@ -0,0 +1,415 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.2.6/dev/using/application-guide/ + - /riak/kv/2.2.6/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/2.2.6/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.2.6/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.2.6/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.2.6/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.2.6/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.2.6/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.2.6/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.2.6/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.2.6/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.2.6/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.2.6/developing/usage/search +[use ref search]: {{}}riak/kv/2.2.6/using/reference/search +[usage 2i]: {{}}riak/kv/2.2.6/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.2.6/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.2.6/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.2.6/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.2.6/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.2.6/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.2.6/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.2.6/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.2.6/setup/planning/backend/memory +[obj model java]: {{}}riak/kv/2.2.6/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.2.6/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.2.6/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.2.6/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.2.6/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.2.6/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.2.6/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.2.6/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.2.6/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.2.6/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.2.6/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.2.6/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.2.6/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.2.6/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.2.6/setup/installing +[getting started]: {{}}riak/kv/2.2.6/developing/getting-started +[usage index]: {{}}riak/kv/2.2.6/developing/usage +[glossary]: {{}}riak/kv/2.2.6/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** --- While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** --- Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](http://basho.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** --- Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** --- It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** --- If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** --- If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{< baseurl >}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** --- If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] --- Getting started with Riak Search +* [Search Details][use ref search] --- A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] --- How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** --- Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** --- At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** --- In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] --- A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] --- A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] --- An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** --- If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** --- If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** --- If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** --- While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** --- Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] --- A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] --- A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** --- You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** --- Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] --- A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] --- Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** --- At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** --- If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** --- 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] --- Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] --- A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] --- How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] --- A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] --- A listing of frequently used terms in Riak's + documentation + diff --git a/content/riak/kv/2.2.6/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.2.6/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..14c08bfb5d --- /dev/null +++ b/content/riak/kv/2.2.6/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,798 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/mapreduce/ + - /riak/kv/2.2.6/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/2.2.6/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.2.6/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.2.6/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.2.6/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.2.6/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.2.6/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.2.6]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.2.6/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) --- Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) --- Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) --- Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
diff --git a/content/riak/kv/2.2.6/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.2.6/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..ca5e528a1c
--- /dev/null
+++ b/content/riak/kv/2.2.6/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,67 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: "2.2.6"
+menu:
+  riak_kv-2.2.6:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.2.6/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.2.6/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.2.6/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
diff --git a/content/riak/kv/2.2.6/developing/app-guide/reference.md b/content/riak/kv/2.2.6/developing/app-guide/reference.md
new file mode 100644
index 0000000000..3c040fd9ac
--- /dev/null
+++ b/content/riak/kv/2.2.6/developing/app-guide/reference.md
@@ -0,0 +1,16 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: "2.2.6"
+#menu:
+#  riak_kv-2.2.6:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+---
+
+**TODO: Add content**
diff --git a/content/riak/kv/2.2.6/developing/app-guide/replication-properties.md b/content/riak/kv/2.2.6/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..2842bca5f3
--- /dev/null
+++ b/content/riak/kv/2.2.6/developing/app-guide/replication-properties.md
@@ -0,0 +1,580 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: "2.2.6"
+menu:
+  riak_kv-2.2.6:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.2.6/dev/advanced/replication-properties
+  - /riak/kv/2.2.6/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/2.2.6/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.2.6/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.2.6/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.2.6/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.2.6/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.2.6/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.2.6/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.2.6/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.2.6/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.2.6/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.2.6/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.2.6/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.2.6/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` --- All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` --- This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` --- A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` --- Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.2.6/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.2.6/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.2.6/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
diff --git a/content/riak/kv/2.2.6/developing/app-guide/strong-consistency.md b/content/riak/kv/2.2.6/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..8135d8dcdb
--- /dev/null
+++ b/content/riak/kv/2.2.6/developing/app-guide/strong-consistency.md
@@ -0,0 +1,257 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: "2.2.6"
+menu:
+  riak_kv-2.2.6:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.2.6/dev/advanced/strong-consistency
+  - /riak/kv/2.2.6/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/2.2.6/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.2.6/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.2.6/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.2.6/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.2.6/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.2.6/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.2.6/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.2.6/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.2.6/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.2.6/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.2.6/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.2.6/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.2.6/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.2.6/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.2.6/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.2.6/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.2.6/developing/client-libraries
+[getting started]: {{}}riak/kv/2.2.6/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.2.6/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. diff --git a/content/riak/kv/2.2.6/developing/app-guide/write-once.md b/content/riak/kv/2.2.6/developing/app-guide/write-once.md new file mode 100644 index 0000000000..857a83d138 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/app-guide/write-once.md @@ -0,0 +1,155 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.2.6/dev/advanced/write-once + - /riak/kv/2.2.6/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/2.2.6/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.2.6/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.2.6/developing/data-types +[strong consistency]: {{}}riak/kv/2.2.6/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.2.6/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} diff --git a/content/riak/kv/2.2.6/developing/client-libraries.md b/content/riak/kv/2.2.6/developing/client-libraries.md new file mode 100644 index 0000000000..1520850b4b --- /dev/null +++ b/content/riak/kv/2.2.6/developing/client-libraries.md @@ -0,0 +1,304 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.2.6/dev/using/libraries + - /riak/kv/2.2.6/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) --- A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) --- A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) --- A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) --- A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) --- An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) --- An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) --- Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) --- A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) --- Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) + --- A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) --- HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) + --- Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) + --- A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) --- Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) --- Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) --- Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) --- Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) --- An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) --- A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) --- A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) --- A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) --- A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) --- A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) --- Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) --- Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) + --- A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) + --- Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) + --- Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) --- Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) --- Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) --- Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) --- Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) --- A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) --- Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) --- A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) --- Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) --- Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) --- a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) --- A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) --- A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) --- Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) --- + Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) --- Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) --- A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) --- + Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) --- A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) --- A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) + --- Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) --- Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) --- Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) --- A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) --- A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) --- A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) --- A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) --- + [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) --- A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) + --- Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) --- A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) --- A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) --- Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) --- A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) --- A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) --- Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) --- Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) --- Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) --- A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) --- + Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) --- Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) --- + DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) --- Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) --- An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) --- Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) --- Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) --- Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) --- A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) --- An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) --- A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) + --- A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). diff --git a/content/riak/kv/2.2.6/developing/data-modeling.md b/content/riak/kv/2.2.6/developing/data-modeling.md new file mode 100644 index 0000000000..3fc390cec2 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/data-modeling.md @@ -0,0 +1,10 @@ +--- +layout: redirect +target: "riak/kv/2.2.6/learn/use-cases/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. diff --git a/content/riak/kv/2.2.6/developing/data-types.md b/content/riak/kv/2.2.6/developing/data-types.md new file mode 100644 index 0000000000..db392286ac --- /dev/null +++ b/content/riak/kv/2.2.6/developing/data-types.md @@ -0,0 +1,275 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.2.6/dev/using/data-types + - /riak/kv/2.2.6/dev/using/data-types + - /riak/2.2.6/dev/data-modeling/data-types + - /riak/kv/2.2.6/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. diff --git a/content/riak/kv/2.2.6/developing/data-types/counters.md b/content/riak/kv/2.2.6/developing/data-types/counters.md new file mode 100644 index 0000000000..99a93465d1 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/data-types/counters.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.2.6/dev/using/data-types/counters + - /riak/kv/2.2.6/dev/using/data-types/counters + - /riak/2.2.6/dev/data-modeling/data-types/counters + - /riak/kv/2.2.6/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` diff --git a/content/riak/kv/2.2.6/developing/data-types/gsets.md b/content/riak/kv/2.2.6/developing/data-types/gsets.md new file mode 100644 index 0000000000..a244305bd7 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/data-types/gsets.md @@ -0,0 +1,627 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.2.6/dev/using/data-types/gsets + - /riak/kv/2.2.6/dev/using/data-types/gsets + - /riak/2.2.6/dev/data-modeling/data-types/gsets + - /riak/kv/2.2.6/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::GrowOnlySet.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::GrowOnlySet.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::GrowOnlySet.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` diff --git a/content/riak/kv/2.2.6/developing/data-types/hyperloglogs.md b/content/riak/kv/2.2.6/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..64ec0bbefb --- /dev/null +++ b/content/riak/kv/2.2.6/developing/data-types/hyperloglogs.md @@ -0,0 +1,639 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.2.6/dev/using/data-types/hyperloglogs + - /riak/kv/2.2.6/dev/using/data-types/hyperloglogs + - /riak/2.2.6/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.2.6/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` diff --git a/content/riak/kv/2.2.6/developing/data-types/maps.md b/content/riak/kv/2.2.6/developing/data-types/maps.md new file mode 100644 index 0000000000..ed7eeb766a --- /dev/null +++ b/content/riak/kv/2.2.6/developing/data-types/maps.md @@ -0,0 +1,1881 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.2.6/dev/using/data-types/maps + - /riak/kv/2.2.6/dev/using/data-types/maps + - /riak/2.2.6/dev/data-modeling/data-types/maps + - /riak/kv/2.2.6/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` diff --git a/content/riak/kv/2.2.6/developing/data-types/sets.md b/content/riak/kv/2.2.6/developing/data-types/sets.md new file mode 100644 index 0000000000..ad4ba01969 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/data-types/sets.md @@ -0,0 +1,769 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.2.6/dev/using/data-types/sets + - /riak/kv/2.2.6/dev/using/data-types/sets + - /riak/2.2.6/dev/data-modeling/data-types/sets + - /riak/kv/2.2.6/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` diff --git a/content/riak/kv/2.2.6/developing/faq.md b/content/riak/kv/2.2.6/developing/faq.md new file mode 100644 index 0000000000..581f6fcebe --- /dev/null +++ b/content/riak/kv/2.2.6/developing/faq.md @@ -0,0 +1,654 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.2.6/community/faqs/developing + - /riak/kv/2.2.6/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/2.2.6/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.2.6/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.2.6/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.2.6/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.2.6/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.2.6/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.2.6/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.2.6/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.2.6/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.2.6/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + + +--- + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + + +--- + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + + +--- + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +--- + +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +--- + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + + +--- + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + + +--- + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + + +--- + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + + +--- + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + + +--- + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + + +--- + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +--- + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +--- + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +--- + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + + +--- + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +--- + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + + +--- + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + + +--- + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + + +--- + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +--- + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + + +--- + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) --- requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) --- if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +--- + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +--- + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +--- + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +--- + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +--- + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +--- + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +--- + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +--- + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +--- + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +--- + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +--- + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +--- + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. diff --git a/content/riak/kv/2.2.6/developing/getting-started.md b/content/riak/kv/2.2.6/developing/getting-started.md new file mode 100644 index 0000000000..7f99bcb67a --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started.md @@ -0,0 +1,46 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +--- + +[install index]: {{}}riak/kv/2.2.6/setup/installing +[dev client libraries]: {{}}riak/kv/2.2.6/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. diff --git a/content/riak/kv/2.2.6/developing/getting-started/csharp.md b/content/riak/kv/2.2.6/developing/getting-started/csharp.md new file mode 100644 index 0000000000..42df49f735 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/csharp.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/csharp + - /riak/kv/2.2.6/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.6/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.2.6/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.2.6/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.2.6/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..6ee7a34b05 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,143 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. diff --git a/content/riak/kv/2.2.6/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.2.6/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..ad5684b875 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,107 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.2.6/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + diff --git a/content/riak/kv/2.2.6/developing/getting-started/csharp/querying.md b/content/riak/kv/2.2.6/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..492f846770 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/csharp/querying.md @@ -0,0 +1,210 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/querying-csharp + - /riak/kv/2.2.6/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip diff --git a/content/riak/kv/2.2.6/developing/getting-started/erlang.md b/content/riak/kv/2.2.6/developing/getting-started/erlang.md new file mode 100644 index 0000000000..b13efe6c35 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/erlang.md @@ -0,0 +1,55 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/erlang + - /riak/kv/2.2.6/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.6/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.2.6/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.2.6/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.2.6/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..6ee9556b79 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,167 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` diff --git a/content/riak/kv/2.2.6/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.2.6/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..029bfba825 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,338 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.2.6/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.6/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.2.6/developing/getting-started/erlang/querying.md b/content/riak/kv/2.2.6/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..7bc1427dab --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/erlang/querying.md @@ -0,0 +1,303 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/querying-erlang + - /riak/kv/2.2.6/dev/taste-of-riak/querying-erlang +--- + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.2.6/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.2.6/developing/getting-started/golang.md b/content/riak/kv/2.2.6/developing/getting-started/golang.md new file mode 100644 index 0000000000..fa1bb11b71 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/golang.md @@ -0,0 +1,78 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/golang + - /riak/kv/2.2.6/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.6/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.2.6/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.2.6/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.2.6/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..afe9ee80ea --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,370 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +--- + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` diff --git a/content/riak/kv/2.2.6/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.2.6/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..755fd88cd4 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,548 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.2.6/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.2.6/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + diff --git a/content/riak/kv/2.2.6/developing/getting-started/golang/querying.md b/content/riak/kv/2.2.6/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..57b5435a1c --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/golang/querying.md @@ -0,0 +1,576 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/querying-golang + - /riak/kv/2.2.6/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. diff --git a/content/riak/kv/2.2.6/developing/getting-started/java.md b/content/riak/kv/2.2.6/developing/getting-started/java.md new file mode 100644 index 0000000000..5ba192092f --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/java.md @@ -0,0 +1,89 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/java + - /riak/kv/2.2.6/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.6/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.2.6/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.2.6/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.2.6/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..1a9b00b6f8 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/java/crud-operations.md @@ -0,0 +1,201 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.2.6/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.6/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.2.6/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.6/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.2.6/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.2.6/developing/usage/conflict-resolution/) +documention. diff --git a/content/riak/kv/2.2.6/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.2.6/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..3eb5fc609c --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/java/object-modeling.md @@ -0,0 +1,428 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.2.6/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data diff --git a/content/riak/kv/2.2.6/developing/getting-started/java/querying.md b/content/riak/kv/2.2.6/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..366d40b000 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/java/querying.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/querying-java + - /riak/kv/2.2.6/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.2.6/developing/getting-started/nodejs.md b/content/riak/kv/2.2.6/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..92fec94917 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/nodejs.md @@ -0,0 +1,100 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/nodejs + - /riak/kv/2.2.6/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.6/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.2.6/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.2.6/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.2.6/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..9f22f9475a --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. diff --git a/content/riak/kv/2.2.6/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.2.6/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..02222dca98 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.2.6/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + diff --git a/content/riak/kv/2.2.6/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.2.6/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..bfc76c68f6 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/nodejs/querying.md @@ -0,0 +1,142 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.2.6/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.2.6/developing/getting-started/php.md b/content/riak/kv/2.2.6/developing/getting-started/php.md new file mode 100644 index 0000000000..208bd93666 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/php.md @@ -0,0 +1,76 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/php + - /riak/kv/2.2.6/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.6/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.2.6/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.2.6/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.2.6/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..2a4c011e84 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/php/crud-operations.md @@ -0,0 +1,182 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.2.6/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.2.6/developing/getting-started/php/querying.md b/content/riak/kv/2.2.6/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..e5fb0485e4 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/php/querying.md @@ -0,0 +1,404 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/querying-php + - /riak/kv/2.2.6/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query diff --git a/content/riak/kv/2.2.6/developing/getting-started/python.md b/content/riak/kv/2.2.6/developing/getting-started/python.md new file mode 100644 index 0000000000..99230268b5 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/python.md @@ -0,0 +1,99 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/python + - /riak/kv/2.2.6/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.6/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` --- Header files and a static library for Python +* `libffi-dev` --- Foreign function interface library +* `libssl-dev` --- libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.2.6/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.2.6/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.2.6/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..fbf6cf2e4a --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/python/crud-operations.md @@ -0,0 +1,145 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` diff --git a/content/riak/kv/2.2.6/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.2.6/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..e235613005 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/python/object-modeling.md @@ -0,0 +1,260 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.2.6/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.2.6/developing/getting-started/python/querying.md b/content/riak/kv/2.2.6/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..16622976de --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/python/querying.md @@ -0,0 +1,236 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/querying-python + - /riak/kv/2.2.6/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.2.6/developing/getting-started/ruby.md b/content/riak/kv/2.2.6/developing/getting-started/ruby.md new file mode 100644 index 0000000000..e0d183981a --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/ruby.md @@ -0,0 +1,64 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/ruby + - /riak/kv/2.2.6/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.2.6/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.2.6/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.2.6/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.2.6/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..ba6e8792ed --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` diff --git a/content/riak/kv/2.2.6/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.2.6/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..e8d25983a5 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,291 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.2.6/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.2.6/developing/getting-started/ruby/querying.md b/content/riak/kv/2.2.6/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..00d3d86469 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/getting-started/ruby/querying.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.2.6/dev/taste-of-riak/querying-ruby + - /riak/kv/2.2.6/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.2.6/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.2.6/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.2.6/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.2.6/developing/key-value-modeling.md b/content/riak/kv/2.2.6/developing/key-value-modeling.md new file mode 100644 index 0000000000..01b40c1ad8 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/key-value-modeling.md @@ -0,0 +1,531 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.2.6/dev/data-modeling/key-value/ + - /riak/kv/2.2.6/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.2.6/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.2.6/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.2.6/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.2.6/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.2.6/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.2.6/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.2.6/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.2.6/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.2.6/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.2.6/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.2.6/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.2.6/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.2.6/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.2.6/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.2.6/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.2.6/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.2.6/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.2.6/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). diff --git a/content/riak/kv/2.2.6/developing/usage.md b/content/riak/kv/2.2.6/developing/usage.md new file mode 100644 index 0000000000..7d83c88a70 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage.md @@ -0,0 +1,133 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) diff --git a/content/riak/kv/2.2.6/developing/usage/bucket-types.md b/content/riak/kv/2.2.6/developing/usage/bucket-types.md new file mode 100644 index 0000000000..cdfcc85d4d --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/bucket-types.md @@ -0,0 +1,98 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/bucket-types + - /riak/kv/2.2.6/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` diff --git a/content/riak/kv/2.2.6/developing/usage/commit-hooks.md b/content/riak/kv/2.2.6/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..911e087f8a --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/commit-hooks.md @@ -0,0 +1,239 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/using/commit-hooks + - /riak/kv/2.2.6/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.2.6/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.2.6/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. diff --git a/content/riak/kv/2.2.6/developing/usage/conflict-resolution.md b/content/riak/kv/2.2.6/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..d2c15da516 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/conflict-resolution.md @@ -0,0 +1,677 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/using/conflict-resolution + - /riak/kv/2.2.6/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.2.6/configuring/strong-consistency) --- A guide for operators +> * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.2.6/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.2.6/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.2.6/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.2.6/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.2.6/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** --- If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** --- Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** --- If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.2.6/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.2.6/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.2.6/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) diff --git a/content/riak/kv/2.2.6/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..7e02ac6482 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.2.6/dev/using/conflict-resolution/csharp + - /riak/kv/2.2.6/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client diff --git a/content/riak/kv/2.2.6/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..5cb6290665 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,58 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.2.6/dev/using/conflict-resolution/golang + - /riak/kv/2.2.6/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) diff --git a/content/riak/kv/2.2.6/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..17720cadb6 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/java.md @@ -0,0 +1,272 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.2.6/dev/using/conflict-resolution/java + - /riak/kv/2.2.6/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.2.6/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.6/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..3617196cdb --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,58 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.2.6/dev/using/conflict-resolution/nodejs + - /riak/kv/2.2.6/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) diff --git a/content/riak/kv/2.2.6/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..cbbc8c5cd7 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/php.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.2.6/dev/using/conflict-resolution/php + - /riak/kv/2.2.6/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.2.6/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.6/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..8da314832e --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/python.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.2.6/dev/using/conflict-resolution/python + - /riak/kv/2.2.6/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.2.6/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.6/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..39fda51a0b --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,250 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.2.6/dev/using/conflict-resolution/ruby + - /riak/kv/2.2.6/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.2.6/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets). diff --git a/content/riak/kv/2.2.6/developing/usage/content-types.md b/content/riak/kv/2.2.6/developing/usage/content-types.md new file mode 100644 index 0000000000..0b6ececb70 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/content-types.md @@ -0,0 +1,187 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` diff --git a/content/riak/kv/2.2.6/developing/usage/creating-objects.md b/content/riak/kv/2.2.6/developing/usage/creating-objects.md new file mode 100644 index 0000000000..9aa55b1f67 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/creating-objects.md @@ -0,0 +1,550 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +--- + +[usage content types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.2.6/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` diff --git a/content/riak/kv/2.2.6/developing/usage/custom-extractors.md b/content/riak/kv/2.2.6/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..92a281bdf0 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/custom-extractors.md @@ -0,0 +1,420 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/search/custom-extractors + - /riak/kv/2.2.6/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` --- Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` --- Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` diff --git a/content/riak/kv/2.2.6/developing/usage/deleting-objects.md b/content/riak/kv/2.2.6/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..c0fc5a64e2 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/deleting-objects.md @@ -0,0 +1,152 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` diff --git a/content/riak/kv/2.2.6/developing/usage/document-store.md b/content/riak/kv/2.2.6/developing/usage/document-store.md new file mode 100644 index 0000000000..631804159d --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/document-store.md @@ -0,0 +1,613 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/search/document-store + - /riak/kv/2.2.6/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.2.6/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` diff --git a/content/riak/kv/2.2.6/developing/usage/mapreduce.md b/content/riak/kv/2.2.6/developing/usage/mapreduce.md new file mode 100644 index 0000000000..ccade5315f --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/mapreduce.md @@ -0,0 +1,242 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/using/mapreduce + - /riak/kv/2.2.6/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.2.6/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.2.6/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** --- The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** --- The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. diff --git a/content/riak/kv/2.2.6/developing/usage/reading-objects.md b/content/riak/kv/2.2.6/developing/usage/reading-objects.md new file mode 100644 index 0000000000..35334bd0a9 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/reading-objects.md @@ -0,0 +1,247 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` diff --git a/content/riak/kv/2.2.6/developing/usage/replication.md b/content/riak/kv/2.2.6/developing/usage/replication.md new file mode 100644 index 0000000000..83daf3f886 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/replication.md @@ -0,0 +1,588 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/replication-properties + - /riak/kv/2.2.6/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.2.6/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.2.6/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.2.6/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.2.6/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` --- All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` --- This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` --- A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` --- Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.2.6/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.2.6/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.2.6/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.2.6/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. diff --git a/content/riak/kv/2.2.6/developing/usage/search-schemas.md b/content/riak/kv/2.2.6/developing/usage/search-schemas.md new file mode 100644 index 0000000000..a75e39de67 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/search-schemas.md @@ -0,0 +1,507 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/search-schema + - /riak/kv/2.2.6/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.2.6/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` diff --git a/content/riak/kv/2.2.6/developing/usage/search.md b/content/riak/kv/2.2.6/developing/usage/search.md new file mode 100644 index 0000000000..fb97eb2d2b --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/search.md @@ -0,0 +1,1451 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/using/search + - /riak/kv/2.2.6/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.2.6/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.2.6/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.2.6/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.2.6/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.2.6/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.2.6/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.2.6/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` diff --git a/content/riak/kv/2.2.6/developing/usage/searching-data-types.md b/content/riak/kv/2.2.6/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..acf162a5d3 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/searching-data-types.md @@ -0,0 +1,1683 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/search/search-data-types + - /riak/kv/2.2.6/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. diff --git a/content/riak/kv/2.2.6/developing/usage/secondary-indexes.md b/content/riak/kv/2.2.6/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..8479f939ff --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/secondary-indexes.md @@ -0,0 +1,2026 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/using/2i + - /riak/kv/2.2.6/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.2.6/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.2.6/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.2.6/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` --- Binary index `field1_bin` and integer index `field2_int` +* `Moe` --- Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` --- Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` --- Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` diff --git a/content/riak/kv/2.2.6/developing/usage/security.md b/content/riak/kv/2.2.6/developing/usage/security.md new file mode 100644 index 0000000000..4b819103ee --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/security.md @@ -0,0 +1,99 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/client-security + - /riak/kv/2.2.6/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.2.6/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.2.6/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.2.6/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.2.6/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.2.6/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.2.6/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.2.6/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.2.6/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.2.6/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. diff --git a/content/riak/kv/2.2.6/developing/usage/security/erlang.md b/content/riak/kv/2.2.6/developing/usage/security/erlang.md new file mode 100644 index 0000000000..a600a269b9 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/security/erlang.md @@ -0,0 +1,114 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/client-security/erlang + - /riak/kv/2.2.6/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.2.6/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` diff --git a/content/riak/kv/2.2.6/developing/usage/security/java.md b/content/riak/kv/2.2.6/developing/usage/security/java.md new file mode 100644 index 0000000000..806d167e24 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/security/java.md @@ -0,0 +1,117 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/client-security/java + - /riak/kv/2.2.6/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. diff --git a/content/riak/kv/2.2.6/developing/usage/security/php.md b/content/riak/kv/2.2.6/developing/usage/security/php.md new file mode 100644 index 0000000000..8f955a7abd --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/security/php.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/client-security/php + - /riak/kv/2.2.6/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. diff --git a/content/riak/kv/2.2.6/developing/usage/security/python.md b/content/riak/kv/2.2.6/developing/usage/security/python.md new file mode 100644 index 0000000000..6977873c3e --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/security/python.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/client-security/python + - /riak/kv/2.2.6/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.2.6/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.2.6/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. diff --git a/content/riak/kv/2.2.6/developing/usage/security/ruby.md b/content/riak/kv/2.2.6/developing/usage/security/ruby.md new file mode 100644 index 0000000000..6abc02c854 --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/security/ruby.md @@ -0,0 +1,158 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/client-security/ruby + - /riak/kv/2.2.6/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.2.6/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. diff --git a/content/riak/kv/2.2.6/developing/usage/updating-objects.md b/content/riak/kv/2.2.6/developing/usage/updating-objects.md new file mode 100644 index 0000000000..3e86c9f27e --- /dev/null +++ b/content/riak/kv/2.2.6/developing/usage/updating-objects.md @@ -0,0 +1,823 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.2.6/dev/using/updates + - /riak/kv/2.2.6/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object + +curl -XPUT \ + -H "X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA=" \ + -d "Harlem Globetrotters" \ + http://localhost:8098/types/sports/buckets/nba/keys/champion +``` + +In the samples above, with the exception of curl, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,2.2.6,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.2.6/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +```curl + +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "Pete Carroll" \ + http://localhost:8098/types/siblings/buckets/coaches/keys/seahawks +``` + + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +```curl + +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/siblings/buckets/coaches/keys/packers + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object + +curl -XPUT \ + -H "Content-Type: text/plain" \ + -H "X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA=" \ + -d "Vince Lombardi" \ + http://localhost:8098/types/siblings/buckets/coaches/keys/packers +``` + + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. Attempting to over-write a pre-existing key/value pair without a +context included will usually result in sibling generation which you will +then have to resolve separately. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. diff --git a/content/riak/kv/2.2.6/downloads.md b/content/riak/kv/2.2.6/downloads.md new file mode 100644 index 0000000000..92d592cce0 --- /dev/null +++ b/content/riak/kv/2.2.6/downloads.md @@ -0,0 +1,22 @@ +--- +title: "Download for Riak KV 2.2.6" +description: "Download some stuff!" +menu: + riak_kv-2.2.6: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: "2.2.6" +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: "2.2.6" + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.2.6/downloads + - /riak/kv/2.2.6/downloads +--- diff --git a/content/riak/kv/2.2.6/index.md b/content/riak/kv/2.2.6/index.md new file mode 100644 index 0000000000..fc219fc683 --- /dev/null +++ b/content/riak/kv/2.2.6/index.md @@ -0,0 +1,75 @@ +--- +title: "Riak KV 2.2.6" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.2.6/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.2.6/configuring +[downloads]: {{<baseurl>}}riak/kv/2.2.6/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.2.6/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.2.6/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.2.6/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.2.6/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +Riak KV 2.2.6 is the first Open Source only release that includes the features of the former [Riak KV Enterprise][aboutenterprise] product such as multi-datacenter cluster replication, which ensures low-latency and robust business continuity. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 17.10 ("Artful Ardvark") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.2.6/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] diff --git a/content/riak/kv/2.2.6/learn.md b/content/riak/kv/2.2.6/learn.md new file mode 100644 index 0000000000..861f9dd460 --- /dev/null +++ b/content/riak/kv/2.2.6/learn.md @@ -0,0 +1,47 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] diff --git a/content/riak/kv/2.2.6/learn/concepts.md b/content/riak/kv/2.2.6/learn/concepts.md new file mode 100644 index 0000000000..797edee7a5 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts.md @@ -0,0 +1,44 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +--- + +[concept aae]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.2.6/configuring +[plan index]: {{<baseurl>}}riak/kv/2.2.6/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.2.6/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] diff --git a/content/riak/kv/2.2.6/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.2.6/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..06e5837958 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/active-anti-entropy.md @@ -0,0 +1,107 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/aae + - /riak/kv/2.2.6/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. diff --git a/content/riak/kv/2.2.6/learn/concepts/buckets.md b/content/riak/kv/2.2.6/learn/concepts/buckets.md new file mode 100644 index 0000000000..dee3fbf350 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/buckets.md @@ -0,0 +1,213 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/Buckets + - /riak/kv/2.2.6/theory/concepts/Buckets + - /riak/2.2.6/theory/concepts/buckets + - /riak/kv/2.2.6/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.2.6/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.2.6/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.2.6/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.2.6/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` diff --git a/content/riak/kv/2.2.6/learn/concepts/capability-negotiation.md b/content/riak/kv/2.2.6/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..ca1a00709e --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/capability-negotiation.md @@ -0,0 +1,32 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/capability-negotiation + - /riak/kv/2.2.6/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.2.6/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + diff --git a/content/riak/kv/2.2.6/learn/concepts/causal-context.md b/content/riak/kv/2.2.6/learn/concepts/causal-context.md new file mode 100644 index 0000000000..b169011e1e --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/causal-context.md @@ -0,0 +1,285 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/context + - /riak/kv/2.2.6/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.2.6/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.2.6/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.2.6/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] diff --git a/content/riak/kv/2.2.6/learn/concepts/clusters.md b/content/riak/kv/2.2.6/learn/concepts/clusters.md new file mode 100644 index 0000000000..38d604f60e --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/clusters.md @@ -0,0 +1,113 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/Clusters + - /riak/kv/2.2.6/theory/concepts/Clusters + - /riak/2.2.6/theory/concepts/clusters + - /riak/kv/2.2.6/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.2.6/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. diff --git a/content/riak/kv/2.2.6/learn/concepts/crdts.md b/content/riak/kv/2.2.6/learn/concepts/crdts.md new file mode 100644 index 0000000000..e61102caf3 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/crdts.md @@ -0,0 +1,248 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/crdts + - /riak/kv/2.2.6/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.2.6/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.2.6/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. diff --git a/content/riak/kv/2.2.6/learn/concepts/eventual-consistency.md b/content/riak/kv/2.2.6/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..eb9f24c726 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/eventual-consistency.md @@ -0,0 +1,198 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/Eventual-Consistency + - /riak/kv/2.2.6/theory/concepts/Eventual-Consistency + - /riak/2.2.6/theory/concepts/eventual-consistency + - /riak/kv/2.2.6/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) diff --git a/content/riak/kv/2.2.6/learn/concepts/keys-and-objects.md b/content/riak/kv/2.2.6/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..3f42f96c73 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/keys-and-objects.md @@ -0,0 +1,49 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/keys-and-values + - /riak/kv/2.2.6/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). diff --git a/content/riak/kv/2.2.6/learn/concepts/replication.md b/content/riak/kv/2.2.6/learn/concepts/replication.md new file mode 100644 index 0000000000..85ab455cd0 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/replication.md @@ -0,0 +1,319 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/Replication + - /riak/kv/2.2.6/theory/concepts/Replication + - /riak/2.2.6/theory/concepts/replication + - /riak/kv/2.2.6/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.2.6/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + diff --git a/content/riak/kv/2.2.6/learn/concepts/strong-consistency.md b/content/riak/kv/2.2.6/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..1a9dfa4f30 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/strong-consistency.md @@ -0,0 +1,101 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/strong-consistency + - /riak/kv/2.2.6/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.2.6/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes diff --git a/content/riak/kv/2.2.6/learn/concepts/vnodes.md b/content/riak/kv/2.2.6/learn/concepts/vnodes.md new file mode 100644 index 0000000000..5aa3f5e317 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/concepts/vnodes.md @@ -0,0 +1,156 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.2.6/theory/concepts/vnodes + - /riak/kv/2.2.6/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322492444576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. diff --git a/content/riak/kv/2.2.6/learn/dynamo.md b/content/riak/kv/2.2.6/learn/dynamo.md new file mode 100644 index 0000000000..0485e06d7c --- /dev/null +++ b/content/riak/kv/2.2.6/learn/dynamo.md @@ -0,0 +1,1924 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.2.6/theory/dynamo + - /riak/kv/2.2.6/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.2.6/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.2.6/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.2.6/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.2.6/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.2.6 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.2.6/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.2.6/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.2.6/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.2.6/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.2.6/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.2.6/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. diff --git a/content/riak/kv/2.2.6/learn/glossary.md b/content/riak/kv/2.2.6/learn/glossary.md new file mode 100644 index 0000000000..223de4d965 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/glossary.md @@ -0,0 +1,353 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.2.6/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.2.6/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.2.6/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.2.6/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.2.6/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.2.6/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.2.6/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] diff --git a/content/riak/kv/2.2.6/learn/new-to-nosql.md b/content/riak/kv/2.2.6/learn/new-to-nosql.md new file mode 100644 index 0000000000..dbcf217557 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/new-to-nosql.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: "2.2.6" +#menu: +# riak_kv-2.2.6: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +--- + +**TODO: Add content (not sure where this lives in existing docs)** diff --git a/content/riak/kv/2.2.6/learn/use-cases.md b/content/riak/kv/2.2.6/learn/use-cases.md new file mode 100644 index 0000000000..836851b2cb --- /dev/null +++ b/content/riak/kv/2.2.6/learn/use-cases.md @@ -0,0 +1,401 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.2.6/dev/data-modeling/ + - /riak/kv/2.2.6/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.2.6/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.2.6/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.2.6_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + diff --git a/content/riak/kv/2.2.6/learn/why-riak-kv.md b/content/riak/kv/2.2.6/learn/why-riak-kv.md new file mode 100644 index 0000000000..f76d983e88 --- /dev/null +++ b/content/riak/kv/2.2.6/learn/why-riak-kv.md @@ -0,0 +1,221 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.2.6/theory/why-riak/ + - /riak/kv/2.2.6/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.2.6/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.2.6/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. diff --git a/content/riak/kv/2.2.6/release-notes.md b/content/riak/kv/2.2.6/release-notes.md new file mode 100644 index 0000000000..ed6ee339cd --- /dev/null +++ b/content/riak/kv/2.2.6/release-notes.md @@ -0,0 +1,192 @@ +--- +title: "Riak KV 2.2.6 Release Notes" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.2.6/community/release-notes + - /riak/kv/2.2.6/intro-v20 + - /riak/2.2.6/intro-v20 + - /riak/kv/2.2.6/introduction +--- + +Released April 25, 2018. + +> This release is dedicated to the memory of Andy Gross. Thank you and RIP. + +## Overview + +This is the first full community release of Riak, post-Basho's +collapse into bankruptcy. A lot has happened, in particular [bet365](https://twitter.com/bet365Tech) bought Basho's +assets and donated the code to the community. They kept the Basho +website running, the mailing list, the documents site (after [TI Tokyo](https://www.tiot.jp/) +had helpfully mirrored the docs in the interim) and have done a huge amount to +provide continuity to the community. + +The development work on this release of Riak has received significant +funding from [NHS Digital](https://twitter.com/NHSDigital), who depend on Riak for Spine II, and other +critical services. Thanks also to [ESL](https://twitter.com/ErlangSolutions), [TI Tokyo](https://www.tiot.jp/), and all the other +individuals and organisations involved. + +This release of Riak is based on the last known-good release of Riak, +riak-2.2.3. There is good work in the `develop` branches of many Basho +repos, but since much of it was unfinished, unreleased, untested, or +just status-unknown, we decided as a community to go forward based on +riak-2.2.3. + +This is the first release with open source multi-data-centre +replication. The rest of the changes are fixes ([riak-core claim](#core-claim-fixes), +repl), new features ([gsets](#gsets), [participate in coverage](#participate-in-2i), [node-confirms](#node-confirms)), +and [fixes to tests](#developer-improvements) and the build/development process. + +[Improvements](#improvements) + +[Known Issues](#known-issues) - please read **before upgrading** from a previous Riak release + +[Log of Changes](#change-log-for-this-release) + +[Previous Release Notes](#previous-release-notes) + +## Improvements + +#### Multi Datacentre Replication + +Previously a paid for enterprise addition to Riak as part of the Riak +EE product, this release includes Multi-Datacentre Replication +(MDC). There is no longer a Riak EE product. All is now Open +Source. Please consult the existing documentation for +[MDC]({{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter/). Again, +many thanks to bet365 Technology for this addition. See also +[Known Issues](#known-issues) below. + +#### Core Claim Fixes + +Prior to this release, in some scenarios, multiple partitions from the +same preflist could be stored on the same node, potentially leading to +data loss. [This write up](https://github.com/basho/riak_core/blob/c9c924ef006af1121b7eec04c7e1eefe54f4cf26/docs/claim-fixes.md) +explains the fixes in detail, and links to +[another post](https://github.com/infinityworks/riak_core/blob/ada7030a2b2c3463d6584f1d8b20e2c4bc5ac3d8/docs/ring_claim.md) +that gives a deep examination of riak-core-ring and the issues fixed +in this release. + +#### Node Confirms + +This feature adds a new bucket property, and write-option of +`node_confirms`. Unlike `w` and `pw` that are tunables for +consistency, `node_confirms` is a tunable for durability. When +operating in a failure state, Riak will store replicas in fallback +vnodes, and in some case multiple fallbacks may be on the same +physical node. `node_confirms` is an option that specifies how many +distinct physical nodes must acknowledge a write for it to be +considered successful. There is a +[detailed write up here](https://github.com/ramensen/riak_kv/blob/30b0e50374196d9a8cfef37871955a5f5b2bb472/docs/Node-Diversity.md), +and more in the documentation. + +#### Participate In 2i + +This feature was added to bring greater consistency to 2i query +results. When a node has just been joined to a riak cluster it may not +have any, or at least up-to-date, data. However the joined node is +immediately in the ring and able to take part in coverage queries, +which can lead to incomplete results. This change adds an operator +flag to a node's configuration that will exclude it from coverage +plans. When all transfers are complete, the operator can remove the +flag. See documentation for more details. + +#### GSets + +This release adds another Riak Data Type, the GSet CRDT. The GSet is a +grow only set, and has simpler semantics and better merge performance +than the existing Riak Set. See documentation for details. + +#### Developer Improvements + +The tests didn't pass. Now they do. More details +[here](https://github.com/russelldb/russelldb.github.io/blob/b228eacd4fd3246b4eb7f8d0b98c6bed747e2514/make_test.md) + +## Known Issues + +#### Advanced.config changes + +With the inclusion of Multi-Datacentre Replication in riak-2.2.6 there +are additional `advanced.config` parameters. If you have an existing +`advanced.config` you must merge it with the new one from the install +of riak-2.2.6. Some package installs will simply replace the old with +new (e.g. .deb), others may leave the old file unchanged. YOU MUST +make sure that the `advanced.config` contains valid `riak_repl` +entries. + +Example default entries to add to your existing advanced.config: + +``` +{riak_core, + [ + {cluster_mgr, {"0.0.0.0", 9080 } } + ]}, + {riak_repl, + [ + {data_root, "/var/lib/riak/riak_repl/"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 1}, + {max_fssink_node, 1}, + {fullsync_on_connect, true}, + {fullsync_interval, 30}, + {rtq_max_bytes, 104857600}, + {proxy_get, disabled}, + {rt_heartbeat_interval, 15}, + {rt_heartbeat_timeout, 15}, + {fullsync_use_background_manager, true} + ]}, +``` + +Read more about configuring +[MDC]({{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter/) +replication. + +More details about the issue can be found in riak\_repl/782: [2.2.6 - \[enoent\] - riak_repl couldn't create log dir +"data/riak_repl/logs"](https://github.com/basho/riak/issues/940) + +## Change Log for This Release + +* riak_pipe/113: [Some flappy test failure fixes](https://github.com/basho/riak_pipe/pull/113) +* riak_kv/1657: [Intermittent test failure fixes](https://github.com/basho/riak_kv/pull/1657) +* riak_kv/1658: [ Move schedule_timeout to execute in put_fsm](https://github.com/basho/riak_kv/pull/1658) +* riak_kv/1663: [Add bucket property `node_confirms` for physical diversity](https://github.com/basho/riak_kv/pull/1663) +* riak_kv/1664: [Add option 'participate_in_2i_coverage' with default 'enabled'](https://github.com/basho/riak_kv/pull/1664) +* riak_kv/1665: [enable gset support](https://github.com/basho/riak_kv/pull/1665) +* riak_kv/1666: [Fix schema paths for make test](https://github.com/basho/riak_kv/pull/1666) +* eleveldb/243: [Add a 10% fuzz factor to the resident memory calc (intermittent test failure "fixes")](https://github.com/basho/eleveldb/pull/243) +* riak_core/911: [Fix brops intermittent test failures](https://github.com/basho/riak_core/pull/911) +* riak_core/913: [ Fix claim tail violations and unbalanced rings](https://github.com/basho/riak_core/pull/913) +* riak_core/915: [Add `node_confirms` default bucket props](https://github.com/basho/riak_core/pull/915) +* riak_core/917: [Add participate_in_2i_coverage riak option](https://github.com/basho/riak_core/pull/917) +* sidejob/18: [Address some intermittent test failures](https://github.com/basho/sidejob/pull/18) +* riak_pb/228: [Add `node_confirms` option to write messages](https://github.com/basho/riak_pb/pull/228) +* riak_pb/229: [add gsets support](https://github.com/basho/riak_pb/pull/229) +* basho_stats/13: [Non-deterministic test needs a little ?SOMETIMES](https://github.com/basho/basho_stats/pull/13) +* basho_stats/4: [Add Makefile](https://github.com/basho/basho_stats/pull/4) +* exometer_core/17: [Fix failing test with correct tuple entry](https://github.com/basho/exometer_core/pull/17) +* yokozuna/741: [Fix broken eqc test](https://github.com/basho/yokozuna/pull/741) +* yokozuna/746: [remove -XX:+UseStringCache](https://github.com/basho/yokozuna/pull/746) +* yokozuna/747: [Remove jvm directive from test too](https://github.com/basho/yokozuna/pull/747) +* clique/81: [Fix failing test on some environments](https://github.com/basho/clique/pull/81) +* riak_dt/121: [doc related fix & explanation](https://github.com/basho/riak_dt/pull/121) +* riak_dt/127: [bring develop-2.2 up-to-date with develop](https://github.com/basho/riak_dt/pull/127) +* riak_dt/129: [Add gset support](https://github.com/basho/riak_dt/pull/129) +* riak_dt/135: [Fix `equal/2` bug around unordered dict usage](https://github.com/basho/riak_dt/pull/135) +* riak_repl/776: [Fix bug when passing utc timestamps into httpd_util:rfc1123/1.](https://github.com/basho/riak_repl/pull/776) +* riak_repl/777: [Fix badarg in binary construction for args to ebloom](https://github.com/basho/riak_repl/pull/777) +* riak_repl/779: [Sticking plaster fix for basho/riak_repl#772](https://github.com/basho/riak_repl/pull/779) +* riak_repl/780: [Fix sometime failing test](https://github.com/basho/riak_repl/pull/780) +* riak_repl/782: [Change ETS queue table permissions to protected](https://github.com/basho/riak_repl/pull/782) + +## Previous Release Notes + +Please see the KV 2.2.3 release notes [here]({{<baseurl>}}riak/kv/2.2.6/release-notes/), and the KV 2.2.2 release notes [here]({{<baseurl>}}riak/kv/2.2.6/release-notes/). diff --git a/content/riak/kv/2.2.6/setup.md b/content/riak/kv/2.2.6/setup.md new file mode 100644 index 0000000000..3cd6490225 --- /dev/null +++ b/content/riak/kv/2.2.6/setup.md @@ -0,0 +1,45 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + diff --git a/content/riak/kv/2.2.6/setup/downgrade.md b/content/riak/kv/2.2.6/setup/downgrade.md new file mode 100644 index 0000000000..f519474639 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/downgrade.md @@ -0,0 +1,174 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.2.6/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.2.6/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.2.6/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.2.6, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` diff --git a/content/riak/kv/2.2.6/setup/installing.md b/content/riak/kv/2.2.6/setup/installing.md new file mode 100644 index 0000000000..8d19a51e55 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing.md @@ -0,0 +1,60 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.2.6/ops/building/installing + - /riak/kv/2.2.6/ops/building/installing + - /riak/2.2.6/installing/ + - /riak/kv/2.2.6/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/rhel-centos +[install smartos]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/smartos +[install solaris]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/solaris +[install suse]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.2.6/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SmartOS][install smartos] + * [Solaris][install solaris] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. diff --git a/content/riak/kv/2.2.6/setup/installing/amazon-web-services.md b/content/riak/kv/2.2.6/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..3c93aaeaa4 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/amazon-web-services.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.2.6/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.2.6/installing/amazon-web-services/ + - /riak/kv/2.2.6/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.2.6/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2/riak-2.2.6-1.amzn2x86_64.rpm +sudo yum localinstall -y riak_2.2.6-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2/riak-2.2.6-1.amzn2x86_64.rpm +sudo rpm -i riak_2.2.6-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2016.09/riak-2.2.6-1.amzn1x86_64.rpm +sudo yum localinstall -y riak_2.2.6-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2016.09/riak-2.2.6-1.amzn1x86_64.rpm +sudo rpm -i riak_2.2.6-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/debian-ubuntu.md b/content/riak/kv/2.2.6/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..6b9966c17d --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/debian-ubuntu.md @@ -0,0 +1,185 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.2.6/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.2.6/installing/debian-ubuntu/ + - /riak/kv/2.2.6/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.2.6/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### SSL Library Requirement for Ubuntu and Debian + +Riak currently requires libssl version 0.9.8 on some versions of Ubuntu. +Starting at Ubuntu 12.04 this is no longer an issue. Before installing +Riak via package on Ubuntu, install the `libssl0.9.8` package. Note that +this version of libssl can be safely installed alongside +current/existing libssl installations. + +To install the libssl version 0.9.8 package, execute the following +command: + +```bash +sudo apt-get install libssl0.9.8 +``` + +After the libssl package installation, proceed to installing Riak from +the pre-built package by executing the following commands as appropriate +for the target platform: + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/bionic64/riak_2.2.6-1_amd64.deb +sudo dpkg -i riak_2.2.6-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/xenial64/riak_2.2.6-1_amd64.deb +sudo dpkg -i riak_2.2.6-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/trusty64/riak_2.2.6-1_amd64.deb +sudo dpkg -i riak_2.2.6-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/precise64/riak_2.2.6-1_amd64.deb +sudo dpkg -i riak_2.2.6-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/9/riak_2.2.6-1_amd64.deb +sudo dpkg -i riak_2.2.6-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/8/riak_2.2.6-1_amd64.deb +sudo dpkg -i riak_2.2.6-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/7/riak_2.2.6-1_amd64.deb +sudo dpkg -i riak_2.2.6-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/raspbian/buster/riak_2.2.6-1_armhf.deb +sudo dpkg -i riak_2.2.6-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/riak-2.2.6.tar.gz +tar zxvf riak-2.2.6.tar.gz +cd riak-2.2.6 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/freebsd.md b/content/riak/kv/2.2.6/setup/installing/freebsd.md new file mode 100644 index 0000000000..9c5dce0dea --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/freebsd.md @@ -0,0 +1,127 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.2.6/ops/building/installing/Installing-on-FreeBSD + - /riak/2.2.6/installing/freebsd/ + - /riak/kv/2.2.6/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.2.6/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.2.6-FreeBSD-amd64.tbz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.2/2.2.6/freebsd/11.1/riak-2.2.6.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.2/2.2.6/freebsd/10.4/riak-2.2.6.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/mac-osx.md b/content/riak/kv/2.2.6/setup/installing/mac-osx.md new file mode 100644 index 0000000000..24d71d59ab --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/mac-osx.md @@ -0,0 +1,116 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.2.6/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.2.6/installing/mac-osx/ + - /riak/kv/2.2.6/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/2.2.6/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.2/2.2.6/osx/10.11/riak-2.2.6-OSX-x86_64.tar.gz +tar xzvf riak-2.2.6-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.0 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.2.6` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.2.6 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.2/2.2.6/riak-2.2.6.tar.gz +tar zxvf riak-2.2.6.tar.gz +cd riak-2.2.6 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/rhel-centos.md b/content/riak/kv/2.2.6/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..fc9d8bb408 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/rhel-centos.md @@ -0,0 +1,129 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.2.6/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.2.6/installing/rhel-centos/ + - /riak/kv/2.2.6/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/8/riak-2.2.6-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.2.6-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/8/riak-2.2.6-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.2.6-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/7/riak-2.2.6-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.2.6-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/7/riak-2.2.6-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.2.6-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/6/riak-2.2.6-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.2.6-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/6/riak-2.2.6-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.2.6-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.6/riak-2.2.6.tar.gz +tar zxvf riak-2.2.6.tar.gz +cd riak-2.2.6 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/smartos.md b/content/riak/kv/2.2.6/setup/installing/smartos.md new file mode 100644 index 0000000000..5f06e074ab --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/smartos.md @@ -0,0 +1,117 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.2.6/ops/building/installing/Installing-on-SmartOS + - /riak/2.2.6/installing/smartos/ + - /riak/kv/2.2.6/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/1.2/1.2.0/smartos/1.8/riak-1.2.0-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/solaris.md b/content/riak/kv/2.2.6/setup/installing/solaris.md new file mode 100644 index 0000000000..99407794bb --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/solaris.md @@ -0,0 +1,90 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.2.6/ops/building/installing/Installing-on-Solaris + - /riak/2.2.6/installing/solaris/ + - /riak/kv/2.2.6/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/source.md b/content/riak/kv/2.2.6/setup/installing/source.md new file mode 100644 index 0000000000..b1725341ff --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/source.md @@ -0,0 +1,105 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/Installing-Riak-from-Source + - /riak/kv/2.2.6/ops/building/Installing-Riak-from-Source + - /riak/2.2.6/installing/source/ + - /riak/kv/2.2.6/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.2.6/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.2/2.2.6/riak-2.2.6.tar.gz +tar zxvf riak-2.2.6.tar.gz +cd riak-2.2.6 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/source/erlang.md b/content/riak/kv/2.2.6/setup/installing/source/erlang.md new file mode 100644 index 0000000000..96c0eaa2eb --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/source/erlang.md @@ -0,0 +1,566 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/erlang + - /riak/kv/2.2.6/ops/building/installing/erlang + - /riak/2.2.6/installing/source/erlang/ + - /riak/kv/2.2.6/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/2.2.6/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.2.6/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` diff --git a/content/riak/kv/2.2.6/setup/installing/source/jvm.md b/content/riak/kv/2.2.6/setup/installing/source/jvm.md new file mode 100644 index 0000000000..b6ff685a05 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/source/jvm.md @@ -0,0 +1,51 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/jvm + - /riak/kv/2.2.6/ops/building/installing/jvm + - /riak/2.2.6/ops/building/installing/Installing-the-JVM + - /riak/kv/2.2.6/ops/building/installing/Installing-the-JVM + - /riak/2.2.6/installing/source/jvm/ + - /riak/kv/2.2.6/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` diff --git a/content/riak/kv/2.2.6/setup/installing/suse.md b/content/riak/kv/2.2.6/setup/installing/suse.md new file mode 100644 index 0000000000..4020566373 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/suse.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.2.6/ops/building/installing/Installing-on-SUSE + - /riak/2.2.6/installing/suse/ + - /riak/kv/2.2.6/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.2.6/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.2.6+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.2.6/setup/installing/verify.md b/content/riak/kv/2.2.6/setup/installing/verify.md new file mode 100644 index 0000000000..ec8ca35485 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/verify.md @@ -0,0 +1,164 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/installing/Post-Installation + - /riak/kv/2.2.6/ops/installing/Post-Installation + - /riak/2.2.6/installing/verify-install/ + - /riak/kv/2.2.6/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/2.2.6/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.2.6/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language diff --git a/content/riak/kv/2.2.6/setup/installing/windows-azure.md b/content/riak/kv/2.2.6/setup/installing/windows-azure.md new file mode 100644 index 0000000000..51725eb939 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/installing/windows-azure.md @@ -0,0 +1,192 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.2.6/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.2.6/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.2.6/installing/windows-azure/ + - /riak/kv/2.2.6/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` diff --git a/content/riak/kv/2.2.6/setup/planning.md b/content/riak/kv/2.2.6/setup/planning.md new file mode 100644 index 0000000000..7f38ff8c4d --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning.md @@ -0,0 +1,55 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + diff --git a/content/riak/kv/2.2.6/setup/planning/backend.md b/content/riak/kv/2.2.6/setup/planning/backend.md new file mode 100644 index 0000000000..8745a261e6 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/backend.md @@ -0,0 +1,54 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.2.6/ops/building/planning/backends/ + - /riak/kv/2.2.6/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/multi +[dev api backend]: {{<baseurl>}}riak/kv/2.2.6/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. diff --git a/content/riak/kv/2.2.6/setup/planning/backend/bitcask.md b/content/riak/kv/2.2.6/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..925aee7227 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/backend/bitcask.md @@ -0,0 +1,990 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/backends/bitcask/ + - /riak/kv/2.2.6/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.2.6/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` --- lets the operating system manage syncing writes + (default) + * `o_sync` --- uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval --- Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) --- Writes are made via Erlang's built-in file API +* `nif` --- Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` --- No restrictions on when merge operations can occur + (default) +* `never` --- Merge will never be attempted +* `window` --- Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** --- This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** --- This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** --- This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** --- This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** --- This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322492444576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.2.63831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.2.6184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322492444576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.2.65.bitcask.data +| |-- 13172.2.65.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. diff --git a/content/riak/kv/2.2.6/setup/planning/backend/leveldb.md b/content/riak/kv/2.2.6/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..867409c758 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/backend/leveldb.md @@ -0,0 +1,502 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/backends/leveldb/ + - /riak/kv/2.2.6/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.2.6/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** --- The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** --- LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322492444576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.2.63831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322492444576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. diff --git a/content/riak/kv/2.2.6/setup/planning/backend/memory.md b/content/riak/kv/2.2.6/setup/planning/backend/memory.md new file mode 100644 index 0000000000..be3986c9e9 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/backend/memory.md @@ -0,0 +1,143 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/backends/memory/ + - /riak/kv/2.2.6/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. diff --git a/content/riak/kv/2.2.6/setup/planning/backend/multi.md b/content/riak/kv/2.2.6/setup/planning/backend/multi.md new file mode 100644 index 0000000000..8626989e31 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/backend/multi.md @@ -0,0 +1,226 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/backends/multi/ + - /riak/kv/2.2.6/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. diff --git a/content/riak/kv/2.2.6/setup/planning/best-practices.md b/content/riak/kv/2.2.6/setup/planning/best-practices.md new file mode 100644 index 0000000000..0902324eb7 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/best-practices.md @@ -0,0 +1,141 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.2.6/ops/building/planning/best-practices + - /riak/kv/2.2.6/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/2.2.6/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.2.6/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. diff --git a/content/riak/kv/2.2.6/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.2.6/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..e0c6e5d57d --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,100 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.2.6/ops/building/planning/bitcask + - /riak/kv/2.2.6/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. diff --git a/content/riak/kv/2.2.6/setup/planning/cluster-capacity.md b/content/riak/kv/2.2.6/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..44063cd6a5 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/cluster-capacity.md @@ -0,0 +1,234 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.2.6/ops/building/planning/cluster + - /riak/kv/2.2.6/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.2.6/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.2.6/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. diff --git a/content/riak/kv/2.2.6/setup/planning/future.md b/content/riak/kv/2.2.6/setup/planning/future.md new file mode 100644 index 0000000000..13540869fe --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/future.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: "2.2.6" +#menu: +# riak_kv-2.2.6: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +--- + +**TODO: Add content** diff --git a/content/riak/kv/2.2.6/setup/planning/operating-system.md b/content/riak/kv/2.2.6/setup/planning/operating-system.md new file mode 100644 index 0000000000..13ad4afc48 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/operating-system.md @@ -0,0 +1,25 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +--- + +[downloads]: {{<baseurl>}}riak/kv/2.2.6/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris diff --git a/content/riak/kv/2.2.6/setup/planning/start.md b/content/riak/kv/2.2.6/setup/planning/start.md new file mode 100644 index 0000000000..2c9cee3396 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/planning/start.md @@ -0,0 +1,57 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.2.6/ops/building/planning/system-planning + - /riak/kv/2.2.6/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + diff --git a/content/riak/kv/2.2.6/setup/search.md b/content/riak/kv/2.2.6/setup/search.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/content/riak/kv/2.2.6/setup/upgrading.md b/content/riak/kv/2.2.6/setup/upgrading.md new file mode 100644 index 0000000000..639d175fcc --- /dev/null +++ b/content/riak/kv/2.2.6/setup/upgrading.md @@ -0,0 +1,33 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.2.6][upgrade version] + +A tutorial on updating to Riak KV 2.2.6 + +[Learn More >>][upgrade version] \ No newline at end of file diff --git a/content/riak/kv/2.2.6/setup/upgrading/checklist.md b/content/riak/kv/2.2.6/setup/upgrading/checklist.md new file mode 100644 index 0000000000..b20749f61e --- /dev/null +++ b/content/riak/kv/2.2.6/setup/upgrading/checklist.md @@ -0,0 +1,220 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.2.6/ops/upgrading/production-checklist/ + - /riak/kv/2.2.6/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/2.2.6/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.2.6/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.2.6/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.2.6/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.2.6/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.2.6/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.2.6/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. diff --git a/content/riak/kv/2.2.6/setup/upgrading/cluster.md b/content/riak/kv/2.2.6/setup/upgrading/cluster.md new file mode 100644 index 0000000000..5dbf6580c5 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/upgrading/cluster.md @@ -0,0 +1,298 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.6/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.2.6/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.2.6/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.2.6/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.2.6/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.2.6/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` --- See [JMX Monitoring][jmx monitor] for more information. + * `snmp` --- See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. diff --git a/content/riak/kv/2.2.6/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.2.6/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..617df4d78d --- /dev/null +++ b/content/riak/kv/2.2.6/setup/upgrading/multi-datacenter.md @@ -0,0 +1,18 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: "2.2.6" +#menu: +# riak_kv-2.2.6: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +--- + +## TODO + +How to update to a new version with multi-datacenter. diff --git a/content/riak/kv/2.2.6/setup/upgrading/search.md b/content/riak/kv/2.2.6/setup/upgrading/search.md new file mode 100644 index 0000000000..1302623a8f --- /dev/null +++ b/content/riak/kv/2.2.6/setup/upgrading/search.md @@ -0,0 +1,276 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.2.6/ops/advanced/upgrading-search-2 + - /riak/kv/2.2.6/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.2.6/setup/upgrading/version.md b/content/riak/kv/2.2.6/setup/upgrading/version.md new file mode 100644 index 0000000000..62a7b3ff03 --- /dev/null +++ b/content/riak/kv/2.2.6/setup/upgrading/version.md @@ -0,0 +1,248 @@ +--- +title: "Upgrading to Riak KV 2.2.6" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Upgrading to 2.2.6" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.2.6/upgrade-v20/ + - /riak/kv/2.2.6/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.2.6/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.2.6/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/2.2.6/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.2.6/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.2.6/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.2.6/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.2.6/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.2.6/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.2.6 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Data File Format Changes + +[Riak KV 2.2][release notes] introduces on-disk data file format changes that can impact the upgrade/downgrade process: + +* Changes to active anti-entropy related to inconsistent hashing. +* Upgrading to Solr 4.10.4 for Riak search. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.2.6 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.2, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you perform one of the following actions in your cluster: + +* Enable LZ4 Compression in LevelDB +* Enable Global Expiration in LevelDB + +If you use other new features, such as the HyperLogLog data type, you can still downgrade your cluster, but you will no longer be able to use those features or access data in new formats after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.2.6 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. diff --git a/content/riak/kv/2.2.6/using.md b/content/riak/kv/2.2.6/using.md new file mode 100644 index 0000000000..cf713fdc9f --- /dev/null +++ b/content/riak/kv/2.2.6/using.md @@ -0,0 +1,72 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] diff --git a/content/riak/kv/2.2.6/using/admin.md b/content/riak/kv/2.2.6/using/admin.md new file mode 100644 index 0000000000..d5644bafde --- /dev/null +++ b/content/riak/kv/2.2.6/using/admin.md @@ -0,0 +1,47 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.2.6/ops/running/cluster-admin + - /riak/kv/2.2.6/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] diff --git a/content/riak/kv/2.2.6/using/admin/commands.md b/content/riak/kv/2.2.6/using/admin/commands.md new file mode 100644 index 0000000000..a856c91ed1 --- /dev/null +++ b/content/riak/kv/2.2.6/using/admin/commands.md @@ -0,0 +1,374 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.2.6/ops/running/cluster-admin + - /riak/kv/2.2.6/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` --- There are five possible values for status: + * `valid` --- The node has begun participating in cluster operations + * `leaving` --- The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` --- The node's ownership transfers are complete and it is + currently shutting down + * `joining` --- The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` --- The node is not currently responding +* `avail` --- There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` --- What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` --- The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322492444576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322492444576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` diff --git a/content/riak/kv/2.2.6/using/admin/riak-admin.md b/content/riak/kv/2.2.6/using/admin/riak-admin.md new file mode 100644 index 0000000000..1ac3a21d4a --- /dev/null +++ b/content/riak/kv/2.2.6/using/admin/riak-admin.md @@ -0,0 +1,717 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.2.6/ops/running/tools/riak-admin + - /riak/kv/2.2.6/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.2.6/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.2.6/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.2.6/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.2.6/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.2.6/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.2.6/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.2.6/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.2.6/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` diff --git a/content/riak/kv/2.2.6/using/admin/riak-cli.md b/content/riak/kv/2.2.6/using/admin/riak-cli.md new file mode 100644 index 0000000000..c01f5b1ee4 --- /dev/null +++ b/content/riak/kv/2.2.6/using/admin/riak-cli.md @@ -0,0 +1,200 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.2.6/ops/running/tools/riak + - /riak/kv/2.2.6/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. diff --git a/content/riak/kv/2.2.6/using/admin/riak-control.md b/content/riak/kv/2.2.6/using/admin/riak-control.md new file mode 100644 index 0000000000..8f890b7aaa --- /dev/null +++ b/content/riak/kv/2.2.6/using/admin/riak-control.md @@ -0,0 +1,233 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/riak-control + - /riak/kv/2.2.6/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.2.6/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. diff --git a/content/riak/kv/2.2.6/using/cluster-operations.md b/content/riak/kv/2.2.6/using/cluster-operations.md new file mode 100644 index 0000000000..7baf2a48a0 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations.md @@ -0,0 +1,104 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] diff --git a/content/riak/kv/2.2.6/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.2.6/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..df3fed4cb5 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,282 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.2.6/ops/advanced/aae/ + - /riak/2.2.6/ops/advanced/aae/ +--- + +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.2.6/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.2.6/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. diff --git a/content/riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..69ce9df441 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,194 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.2.6/ops/running/nodes/adding-removing + - /riak/kv/2.2.6/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/2.2.6/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` diff --git a/content/riak/kv/2.2.6/using/cluster-operations/backend.md b/content/riak/kv/2.2.6/using/cluster-operations/backend.md new file mode 100644 index 0000000000..3adae4ee50 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/backend.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: "2.2.6" +#menu: +# riak_kv-2.2.6: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content** diff --git a/content/riak/kv/2.2.6/using/cluster-operations/backing-up.md b/content/riak/kv/2.2.6/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..bb0a58c57b --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/backing-up.md @@ -0,0 +1,267 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.2.6/ops/running/backups + - /riak/kv/2.2.6/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.2.6/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.2.6/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.2.6/using/cluster-operations/bucket-types.md b/content/riak/kv/2.2.6/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..62f7544a35 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/bucket-types.md @@ -0,0 +1,58 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` diff --git a/content/riak/kv/2.2.6/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.2.6/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..5b42e17b77 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,454 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.2.6/ops/running/nodes/renaming + - /riak/kv/2.2.6/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` diff --git a/content/riak/kv/2.2.6/using/cluster-operations/handoff.md b/content/riak/kv/2.2.6/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..5058d3e9bf --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/handoff.md @@ -0,0 +1,116 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.2.6/ops/running/handoff + - /riak/kv/2.2.6/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. diff --git a/content/riak/kv/2.2.6/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.2.6/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..2a21832486 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/inspecting-node.md @@ -0,0 +1,492 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.2.6/ops/running/nodes/inspecting + - /riak/kv/2.2.6/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392993748081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` --- The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` --- The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` --- The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) diff --git a/content/riak/kv/2.2.6/using/cluster-operations/load-balancing.md b/content/riak/kv/2.2.6/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..94e906e1db --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/load-balancing.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: "2.2.6" +#menu: +# riak_kv-2.2.6: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content (not sure where this exists in docs)** diff --git a/content/riak/kv/2.2.6/using/cluster-operations/logging.md b/content/riak/kv/2.2.6/using/cluster-operations/logging.md new file mode 100644 index 0000000000..4abde19556 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/logging.md @@ -0,0 +1,42 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` diff --git a/content/riak/kv/2.2.6/using/cluster-operations/replacing-node.md b/content/riak/kv/2.2.6/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..72c6bfc9cf --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/replacing-node.md @@ -0,0 +1,95 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.2.6/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} diff --git a/content/riak/kv/2.2.6/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.2.6/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..7b136a1cd9 --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: "2.2.6" +#menu: +# riak_kv-2.2.6: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.2.6/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.2.6/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..0ecca850ad --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/strong-consistency.md @@ -0,0 +1,71 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response diff --git a/content/riak/kv/2.2.6/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.2.6/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..82003766bf --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,259 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v2/operations + - /riak/kv/2.2.6/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.2.6/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.2.6/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` --- The IP address and port of a connected client (site)</li><li>`cluster_name` --- The name of the connected client (site)</li><li>`connecting` --- The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.2.6/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.2.6/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests diff --git a/content/riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..342eeb2eee --- /dev/null +++ b/content/riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,421 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/operations + - /riak/kv/2.2.6/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.2.6/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. diff --git a/content/riak/kv/2.2.6/using/performance.md b/content/riak/kv/2.2.6/using/performance.md new file mode 100644 index 0000000000..928874be64 --- /dev/null +++ b/content/riak/kv/2.2.6/using/performance.md @@ -0,0 +1,264 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.2.6/ops/tuning/linux/ + - /riak/2.2.6/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.2.6/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.2.6/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.2.6/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.2.6/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.2.6/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.2.6/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.2.6/using/performance/amazon-web-services.md b/content/riak/kv/2.2.6/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..573ad68344 --- /dev/null +++ b/content/riak/kv/2.2.6/using/performance/amazon-web-services.md @@ -0,0 +1,243 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.2.6/ops/tuning/aws + - /riak/kv/2.2.6/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) diff --git a/content/riak/kv/2.2.6/using/performance/benchmarking.md b/content/riak/kv/2.2.6/using/performance/benchmarking.md new file mode 100644 index 0000000000..7d0f4df596 --- /dev/null +++ b/content/riak/kv/2.2.6/using/performance/benchmarking.md @@ -0,0 +1,598 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.2.6/ops/building/benchmarking + - /riak/kv/2.2.6/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.2.6/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput --- Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` --- generate as many ops per second as possible +* `{rate, N}` --- generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` --- Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` --- Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` --- Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` --- Directly invokes the Bitcask API +* `basho_bench_driver_dets` --- Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` --- operation completed successfully +* `{error, Reason, NewState}` --- operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` --- operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` --- operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` --- generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` --- the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` --- the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` --- selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` --- selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` --- the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` --- specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` --- takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` --- takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` --- generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` --- generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` --- generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` --- specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. diff --git a/content/riak/kv/2.2.6/using/performance/erlang.md b/content/riak/kv/2.2.6/using/performance/erlang.md new file mode 100644 index 0000000000..16386ce0bb --- /dev/null +++ b/content/riak/kv/2.2.6/using/performance/erlang.md @@ -0,0 +1,367 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.2.6/ops/tuning/erlang + - /riak/kv/2.2.6/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. diff --git a/content/riak/kv/2.2.6/using/performance/latency-reduction.md b/content/riak/kv/2.2.6/using/performance/latency-reduction.md new file mode 100644 index 0000000000..b80ecf1c5d --- /dev/null +++ b/content/riak/kv/2.2.6/using/performance/latency-reduction.md @@ -0,0 +1,263 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.2.6/ops/tuning/latency-reduction + - /riak/kv/2.2.6/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. diff --git a/content/riak/kv/2.2.6/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.2.6/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..f7ec83749f --- /dev/null +++ b/content/riak/kv/2.2.6/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,42 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +[perf index]: {{<baseurl>}}riak/kv/2.2.6/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` diff --git a/content/riak/kv/2.2.6/using/performance/open-files-limit.md b/content/riak/kv/2.2.6/using/performance/open-files-limit.md new file mode 100644 index 0000000000..106472873a --- /dev/null +++ b/content/riak/kv/2.2.6/using/performance/open-files-limit.md @@ -0,0 +1,347 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.2.6/ops/tuning/open-files-limit/ + - /riak/kv/2.2.6/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` diff --git a/content/riak/kv/2.2.6/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.2.6/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..50eaf22ae9 --- /dev/null +++ b/content/riak/kv/2.2.6/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,45 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` diff --git a/content/riak/kv/2.2.6/using/reference.md b/content/riak/kv/2.2.6/using/reference.md new file mode 100644 index 0000000000..b592a18277 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference.md @@ -0,0 +1,130 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] diff --git a/content/riak/kv/2.2.6/using/reference/architecture.md b/content/riak/kv/2.2.6/using/reference/architecture.md new file mode 100644 index 0000000000..ecfae7c2dd --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/architecture.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +#menu: +# riak_kv-2.2.6: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +--- + +<!-- TODO: Content --> diff --git a/content/riak/kv/2.2.6/using/reference/bucket-types.md b/content/riak/kv/2.2.6/using/reference/bucket-types.md new file mode 100644 index 0000000000..35ea693cc6 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/bucket-types.md @@ -0,0 +1,818 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.2.6/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.2.6/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.2.6/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.2.6/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.2.6/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.2.6/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. diff --git a/content/riak/kv/2.2.6/using/reference/custom-code.md b/content/riak/kv/2.2.6/using/reference/custom-code.md new file mode 100644 index 0000000000..f55d5fe5fa --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/custom-code.md @@ -0,0 +1,131 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/install-custom-code/ + - /riak/kv/2.2.6/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.2.6/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.2.6/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} diff --git a/content/riak/kv/2.2.6/using/reference/failure-recovery.md b/content/riak/kv/2.2.6/using/reference/failure-recovery.md new file mode 100644 index 0000000000..293aa47499 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/failure-recovery.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.2.6/using/reference/handoff.md b/content/riak/kv/2.2.6/using/reference/handoff.md new file mode 100644 index 0000000000..1912f86b76 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/handoff.md @@ -0,0 +1,197 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.2.6/ops/running/handoff/ + - /riak/kv/2.2.6/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. diff --git a/content/riak/kv/2.2.6/using/reference/jmx.md b/content/riak/kv/2.2.6/using/reference/jmx.md new file mode 100644 index 0000000000..132791b833 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/jmx.md @@ -0,0 +1,186 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/running/monitoring/jmx + - /riak/kv/2.2.6/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> diff --git a/content/riak/kv/2.2.6/using/reference/logging.md b/content/riak/kv/2.2.6/using/reference/logging.md new file mode 100644 index 0000000000..0ccbf01a0f --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/logging.md @@ -0,0 +1,297 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.2.6/ops/running/logging + - /riak/kv/2.2.6/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.2.6 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` --- Every night at midnight +* `$D23` --- Every day at 23:00 (11 pm) +* `$W0D20` --- Every week on Sunday at 20:00 (8 pm) +* `$M1D0` --- On the first day of every month at midnight +* `$M5D6` --- On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` --- Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.2.6/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` --- Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-cli/#attach-direct) command +* `both` --- Console logs will be emitted both to a file and to standard + output +* `off` --- Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] diff --git a/content/riak/kv/2.2.6/using/reference/multi-datacenter.md b/content/riak/kv/2.2.6/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..adc1dd1828 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/multi-datacenter.md @@ -0,0 +1,48 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] diff --git a/content/riak/kv/2.2.6/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.2.6/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..25fb6f172a --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,96 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.2.6/ops/mdc/comparison + - /riak/kv/2.2.6/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.2.6/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.2.6/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). diff --git a/content/riak/kv/2.2.6/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.2.6/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..42400f1a43 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,170 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.2.6/ops/mdc/monitoring + - /riak/kv/2.2.6/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +--- + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +--- + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. diff --git a/content/riak/kv/2.2.6/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.2.6/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..28aa8f9214 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,62 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.2.6/ops/mdc/per-bucket + - /riak/kv/2.2.6/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` --- Enable replication (realtime + fullsync) + * `false` --- Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` --- Replication only occurs in realtime for this bucket + * `fullsync` --- Replication only occurs during a fullsync operation + * `both` --- Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. diff --git a/content/riak/kv/2.2.6/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.2.6/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..544779d9fe --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,240 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.2.6/ops/mdc/statistics + - /riak/kv/2.2.6/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` diff --git a/content/riak/kv/2.2.6/using/reference/object-deletion.md b/content/riak/kv/2.2.6/using/reference/object-deletion.md new file mode 100644 index 0000000000..c86b4959de --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/object-deletion.md @@ -0,0 +1,117 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` --- Disables tombstone removal +* `immediate` --- The tombstone is removed as soon as the request is + received +* Custom time interval --- How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) diff --git a/content/riak/kv/2.2.6/using/reference/runtime-interaction.md b/content/riak/kv/2.2.6/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..d4d10dddb6 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/runtime-interaction.md @@ -0,0 +1,66 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/runtime + - /riak/kv/2.2.6/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` --- Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` --- Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` --- The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` --- The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` --- A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` --- A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` --- A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` diff --git a/content/riak/kv/2.2.6/using/reference/search.md b/content/riak/kv/2.2.6/using/reference/search.md new file mode 100644 index 0000000000..dcc41768c9 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/search.md @@ -0,0 +1,454 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/search + - /riak/kv/2.2.6/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.2.6/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. \ No newline at end of file diff --git a/content/riak/kv/2.2.6/using/reference/secondary-indexes.md b/content/riak/kv/2.2.6/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..24d4c901fc --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/secondary-indexes.md @@ -0,0 +1,72 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.2.6/dev/advanced/2i + - /riak/kv/2.2.6/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.2.6/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. diff --git a/content/riak/kv/2.2.6/using/reference/snmp.md b/content/riak/kv/2.2.6/using/reference/snmp.md new file mode 100644 index 0000000000..61840000b5 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/snmp.md @@ -0,0 +1,162 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/running/monitoring/snmp + - /riak/kv/2.2.6/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) diff --git a/content/riak/kv/2.2.6/using/reference/statistics-monitoring.md b/content/riak/kv/2.2.6/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..de11d29b6b --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/statistics-monitoring.md @@ -0,0 +1,391 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.2.6/ops/running/stats-and-monitoring + - /riak/kv/2.2.6/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.2.6/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.2.6/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.2.6/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.2.6/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.2.6/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.2.6/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ diff --git a/content/riak/kv/2.2.6/using/reference/strong-consistency.md b/content/riak/kv/2.2.6/using/reference/strong-consistency.md new file mode 100644 index 0000000000..9a82287098 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/strong-consistency.md @@ -0,0 +1,145 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.2.6/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.2.6/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.2.6/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.2.6/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..4e3407141a --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter.md @@ -0,0 +1,35 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.2.6/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] diff --git a/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..af18c00057 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,126 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.2.6/ops/mdc/v2/architecture + - /riak/kv/2.2.6/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.2.6/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.2.6/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. diff --git a/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..c49c9ade16 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,49 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.2.6/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.2.6/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.2.6/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` diff --git a/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..78ba75308a --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter.md @@ -0,0 +1,47 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] diff --git a/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..4e5141b205 --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,125 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/aae + - /riak/kv/2.2.6/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` diff --git a/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..17b39f92ee --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,182 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/architecture + - /riak/kv/2.2.6/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> diff --git a/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..b453a4a2dc --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,98 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/cascading-writes + - /riak/kv/2.2.6/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` diff --git a/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..313f503bbe --- /dev/null +++ b/content/riak/kv/2.2.6/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,68 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.2.6/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.2.6/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. diff --git a/content/riak/kv/2.2.6/using/repair-recovery.md b/content/riak/kv/2.2.6/using/repair-recovery.md new file mode 100644 index 0000000000..1482b98fd6 --- /dev/null +++ b/content/riak/kv/2.2.6/using/repair-recovery.md @@ -0,0 +1,48 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] diff --git a/content/riak/kv/2.2.6/using/repair-recovery/errors.md b/content/riak/kv/2.2.6/using/repair-recovery/errors.md new file mode 100644 index 0000000000..46fcc38196 --- /dev/null +++ b/content/riak/kv/2.2.6/using/repair-recovery/errors.md @@ -0,0 +1,362 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.2.6/ops/running/recovery/errors + - /riak/kv/2.2.6/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.2.6/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.2.6/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.2.6/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.2.6/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.2.6/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.2.6/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.2.6/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.2.6/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. diff --git a/content/riak/kv/2.2.6/using/repair-recovery/failed-node.md b/content/riak/kv/2.2.6/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..9c67bb902d --- /dev/null +++ b/content/riak/kv/2.2.6/using/repair-recovery/failed-node.md @@ -0,0 +1,110 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.2.6/ops/running/recovery/failed-node + - /riak/kv/2.2.6/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` diff --git a/content/riak/kv/2.2.6/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.2.6/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..60ff91a0a7 --- /dev/null +++ b/content/riak/kv/2.2.6/using/repair-recovery/failure-recovery.md @@ -0,0 +1,125 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.2.6/ops/running/recovery/failure-recovery + - /riak/kv/2.2.6/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.2.6/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** --- A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** --- If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** --- Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.2.6/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} diff --git a/content/riak/kv/2.2.6/using/repair-recovery/repairs.md b/content/riak/kv/2.2.6/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..2c33c80e7a --- /dev/null +++ b/content/riak/kv/2.2.6/using/repair-recovery/repairs.md @@ -0,0 +1,387 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.2.6/ops/running/recovery/repairing-indexes + - /riak/kv/2.2.6/ops/running/recovery/repairing-indexes + - /riak/2.2.6/ops/running/recovery/failed-node + - /riak/kv/2.2.6/ops/running/recovery/failed-node + - /riak/2.2.6/ops/running/recovery/repairing-leveldb + - /riak/kv/2.2.6/ops/running/recovery/repairing-leveldb + - /riak/2.2.6/ops/running/recovery/repairing-partitions + - /riak/kv/2.2.6/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.2.6/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.2.6/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.2.6/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.2.6/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` diff --git a/content/riak/kv/2.2.6/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.2.6/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..5ea115df7c --- /dev/null +++ b/content/riak/kv/2.2.6/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,71 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +--- + +[upgrade]: {{<baseurl>}}riak/kv/2.2.6/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.2.6/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. diff --git a/content/riak/kv/2.2.6/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.2.6/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..4af7201c75 --- /dev/null +++ b/content/riak/kv/2.2.6/using/repair-recovery/rolling-restart.md @@ -0,0 +1,60 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.2.6/ops/running/recovery/rolling-restart + - /riak/kv/2.2.6/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.2.6/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. diff --git a/content/riak/kv/2.2.6/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.2.6/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..df851b7250 --- /dev/null +++ b/content/riak/kv/2.2.6/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,138 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.2.6/ops/running/recovery/repairing-indexes + - /riak/kv/2.2.6/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. diff --git a/content/riak/kv/2.2.6/using/running-a-cluster.md b/content/riak/kv/2.2.6/using/running-a-cluster.md new file mode 100644 index 0000000000..161cce3d3f --- /dev/null +++ b/content/riak/kv/2.2.6/using/running-a-cluster.md @@ -0,0 +1,335 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.2.6/ops/building/basic-cluster-setup + - /riak/kv/2.2.6/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.2.6/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.2.6/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. diff --git a/content/riak/kv/2.2.6/using/security.md b/content/riak/kv/2.2.6/using/security.md new file mode 100644 index 0000000000..cb6e247414 --- /dev/null +++ b/content/riak/kv/2.2.6/using/security.md @@ -0,0 +1,195 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.2.6/ops/advanced/security + - /riak/kv/2.2.6/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/2.2.6/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.2.6/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.2.6/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.2.6/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.2.6/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] diff --git a/content/riak/kv/2.2.6/using/security/basics.md b/content/riak/kv/2.2.6/using/security/basics.md new file mode 100644 index 0000000000..00a5b087e1 --- /dev/null +++ b/content/riak/kv/2.2.6/using/security/basics.md @@ -0,0 +1,847 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.2.6/ops/running/authz + - /riak/kv/2.2.6/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.2.6/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.2.6/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.2.6/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.2.6/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.2.6/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.2.6/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.2.6/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.2.6/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. diff --git a/content/riak/kv/2.2.6/using/security/best-practices.md b/content/riak/kv/2.2.6/using/security/best-practices.md new file mode 100644 index 0000000000..68662c5030 --- /dev/null +++ b/content/riak/kv/2.2.6/using/security/best-practices.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.2.6/using/security/managing-sources.md b/content/riak/kv/2.2.6/using/security/managing-sources.md new file mode 100644 index 0000000000..590567b8c6 --- /dev/null +++ b/content/riak/kv/2.2.6/using/security/managing-sources.md @@ -0,0 +1,269 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.2.6/ops/running/security-sources + - /riak/kv/2.2.6/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.2.6/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.2.6/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.2.6/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.2.6/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.2.6/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.2.6/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. diff --git a/content/riak/kv/2.2.6/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.2.6/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..051820dd02 --- /dev/null +++ b/content/riak/kv/2.2.6/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.2.6/using/troubleshooting.md b/content/riak/kv/2.2.6/using/troubleshooting.md new file mode 100644 index 0000000000..735510a4ce --- /dev/null +++ b/content/riak/kv/2.2.6/using/troubleshooting.md @@ -0,0 +1,23 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] diff --git a/content/riak/kv/2.2.6/using/troubleshooting/http-204.md b/content/riak/kv/2.2.6/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..3b187d919e --- /dev/null +++ b/content/riak/kv/2.2.6/using/troubleshooting/http-204.md @@ -0,0 +1,17 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: "2.2.6" +menu: + riak_kv-2.2.6: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. diff --git a/content/riak/kv/2.9.0p5/_reference-links.md b/content/riak/kv/2.9.0p5/_reference-links.md new file mode 100644 index 0000000000..c0c84fe755 --- /dev/null +++ b/content/riak/kv/2.9.0p5/_reference-links.md @@ -0,0 +1,249 @@ + +# Riak KV 2.9.0 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.9.0p5/downloads/ +[install index]: {{}}riak/kv/2.9.0p5/setup/installing +[upgrade index]: {{}}riak/kv/2.9.0p5/upgrading +[plan index]: {{}}riak/kv/2.9.0p5/planning +[config index]: {{}}riak/2.9.0p5/using/configuring/ +[config reference]: {{}}riak/kv/2.9.0p5/configuring/reference/ +[manage index]: {{}}riak/kv/2.9.0p5/using/managing +[performance index]: {{}}riak/kv/2.9.0p5/using/performance +[glossary vnode]: {{}}riak/kv/2.9.0p5/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.9.0p5/setup/planning +[plan start]: {{}}riak/kv/2.9.0p5/setup/planning/start +[plan backend]: {{}}riak/kv/2.9.0p5/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.0p5/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/2.9.0p5/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.0p5/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.9.0p5/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.9.0p5/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.9.0p5/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.9.0p5/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.9.0p5/setup/installing +[install aws]: {{}}riak/kv/2.9.0p5/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.9.0p5/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.9.0p5/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.9.0p5/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.9.0p5/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.9.0p5/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.9.0p5/setup/installing/solaris +[install suse]: {{}}riak/kv/2.9.0p5/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.9.0p5/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.9.0p5/setup/installing/source +[install source erlang]: {{}}riak/kv/2.9.0p5/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.9.0p5/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.9.0p5/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.9.0p5/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.9.0p5/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.9.0p5/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.9.0p5/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.9.0p5/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.9.0p5/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.9.0p5/configuring +[config basic]: {{}}riak/kv/2.9.0p5/configuring/basic +[config backend]: {{}}riak/kv/2.9.0p5/configuring/backend +[config manage]: {{}}riak/kv/2.9.0p5/configuring/managing +[config reference]: {{}}riak/kv/2.9.0p5/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.9.0p5/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.9.0p5/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.9.0p5/configuring/mapreduce +[config search]: {{}}riak/kv/2.9.0p5/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.9.0p5/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.9.0p5/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.9.0p5/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.9.0p5/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.9.0p5/using/ +[use admin commands]: {{}}riak/kv/2.9.0p5/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.9.0p5/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.9.0p5/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.9.0p5/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.9.0p5/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.9.0p5/using/reference/search +[use ref 2i]: {{}}riak/kv/2.9.0p5/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.9.0p5/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.9.0p5/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.9.0p5/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.9.0p5/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.9.0p5/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.9.0p5/using/admin/ +[use admin commands]: {{}}riak/kv/2.9.0p5/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.9.0p5/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.9.0p5/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.9.0p5/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.9.0p5/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.9.0p5/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.9.0p5/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.9.0p5/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.9.0p5/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.9.0p5/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.9.0p5/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.9.0p5/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.9.0p5/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.9.0p5/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.9.0p5/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.9.0p5/using/security/ +[security basics]: {{}}riak/kv/2.9.0p5/using/security/basics +[security managing]: {{}}riak/kv/2.9.0p5/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.9.0p5/using/performance/ +[perf benchmark]: {{}}riak/kv/2.9.0p5/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.0p5/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.9.0p5/using/performance/erlang +[perf aws]: {{}}riak/kv/2.9.0p5/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.9.0p5/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.9.0p5/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.9.0p5/developing +[dev client libraries]: {{}}riak/kv/2.9.0p5/developing/client-libraries +[dev data model]: {{}}riak/kv/2.9.0p5/developing/data-modeling +[dev data types]: {{}}riak/kv/2.9.0p5/developing/data-types +[dev kv model]: {{}}riak/kv/2.9.0p5/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.9.0p5/developing/getting-started +[getting started java]: {{}}riak/kv/2.9.0p5/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.9.0p5/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.9.0p5/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.9.0p5/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.9.0p5/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.9.0p5/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.9.0p5/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.9.0p5/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.9.0p5/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.0p5/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.0p5/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.0p5/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.0p5/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.0p5/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.0p5/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.9.0p5/developing/usage +[usage bucket types]: {{}}riak/kv/2.9.0p5/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.9.0p5/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.9.0p5/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.9.0p5/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.9.0p5/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.9.0p5/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.9.0p5/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.9.0p5/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.9.0p5/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.0p5/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.0p5/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.9.0p5/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.9.0p5/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.9.0p5/developing/api/backend +[dev api http]: {{}}riak/kv/2.9.0p5/developing/api/http +[dev api http status]: {{}}riak/kv/2.9.0p5/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.9.0p5/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.9.0p5/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.9.0p5/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.9.0p5/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.9.0p5/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.9.0p5/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.9.0p5/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.9.0p5/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.9.0p5/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.9.0p5/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.9.0p5/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.9.0p5/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + diff --git a/content/riak/kv/2.9.0p5/add-ons.md b/content/riak/kv/2.9.0p5/add-ons.md new file mode 100644 index 0000000000..1ec83cc10d --- /dev/null +++ b/content/riak/kv/2.9.0p5/add-ons.md @@ -0,0 +1,25 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: + - /riak/2.9.0p5/add-ons/ + - /riak/2.9.0/add-ons/ + - /riak/kv/2.9.0/add-ons/ + - /riak/kv/2.9.0p1/add-ons/ + - /riak/kv/2.9.0p2/add-ons/ + - /riak/kv/2.9.0p3/add-ons/ + - /riak/kv/2.9.0p4/add-ons/ +--- + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.9.0p5/add-ons/redis/) diff --git a/content/riak/kv/2.9.0p5/add-ons/redis.md b/content/riak/kv/2.9.0p5/add-ons/redis.md new file mode 100644 index 0000000000..247d2e90e8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/add-ons/redis.md @@ -0,0 +1,67 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/add-ons/redis/ + - /riak/2.9.0/add-ons/redis/ + - /riak/kv/2.9.0/add-ons/redis/ + - /riak/kv/2.9.0p1/add-ons/redis/ + - /riak/kv/2.9.0p2/add-ons/redis/ + - /riak/kv/2.9.0p3/add-ons/redis/ + - /riak/kv/2.9.0p4/add-ons/redis/ +--- + + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] diff --git a/content/riak/kv/2.9.0p5/add-ons/redis/developing-rra.md b/content/riak/kv/2.9.0p5/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..102d99ae96 --- /dev/null +++ b/content/riak/kv/2.9.0p5/add-ons/redis/developing-rra.md @@ -0,0 +1,334 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/add-ons/redis/developing-rra/ + - /riak/2.9.0/add-ons/redis/developing-rra/ + - /riak/kv/2.9.0/add-ons/redis/developing-rra/ + - /riak/kv/2.9.0p1/add-ons/redis/developing-rra/ + - /riak/kv/2.9.0p2/add-ons/redis/developing-rra/ + - /riak/kv/2.9.0p3/add-ons/redis/developing-rra/ + - /riak/kv/2.9.0p4/add-ons/redis/developing-rra/ +--- + + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.9.0p5/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.9.0p5/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.9.0p5/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.9.0p5/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' / + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' / + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | diff --git a/content/riak/kv/2.9.0p5/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.9.0p5/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..2de2d4dcd7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,140 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/add-ons/redis/redis-add-on-features/ + - /riak/2.9.0/add-ons/redis/redis-add-on-features/ + - /riak/kv/2.9.0/add-ons/redis/redis-add-on-features/ + - /riak/kv/2.9.0p1/add-ons/redis/redis-add-on-features/ + - /riak/kv/2.9.0p2/add-ons/redis/redis-add-on-features/ + - /riak/kv/2.9.0p3/add-ons/redis/redis-add-on-features/ + - /riak/kv/2.9.0p4/add-ons/redis/redis-add-on-features/ +--- + + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis/ + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.9.0p5/add-ons/redis/set-up-rra.md b/content/riak/kv/2.9.0p5/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..9e569893fa --- /dev/null +++ b/content/riak/kv/2.9.0p5/add-ons/redis/set-up-rra.md @@ -0,0 +1,289 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/add-ons/redis/set-up-rra/ + - /riak/2.9.0/add-ons/redis/set-up-rra/ + - /riak/kv/2.9.0/add-ons/redis/set-up-rra/ + - /riak/kv/2.9.0p1/add-ons/redis/set-up-rra/ + - /riak/kv/2.9.0p2/add-ons/redis/set-up-rra/ + - /riak/kv/2.9.0p3/add-ons/redis/set-up-rra/ + - /riak/kv/2.9.0p4/add-ons/redis/set-up-rra/ +--- + + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.9.0p5/setup/installing +[perf open files]: {{}}riak/kv/2.9.0p5/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. diff --git a/content/riak/kv/2.9.0p5/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.9.0p5/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..799fe6b1b8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,147 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/add-ons/redis/set-up-rra/deployment-models/ + - /riak/2.9.0/add-ons/redis/set-up-rra/deployment-models/ + - /riak/kv/2.9.0/add-ons/redis/set-up-rra/deployment-models/ + - /riak/kv/2.9.0p1/add-ons/redis/set-up-rra/deployment-models/ + - /riak/kv/2.9.0p2/add-ons/redis/set-up-rra/deployment-models/ + - /riak/kv/2.9.0p3/add-ons/redis/set-up-rra/deployment-models/ + - /riak/kv/2.9.0p4/add-ons/redis/set-up-rra/deployment-models/ +--- + + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. diff --git a/content/riak/kv/2.9.0p5/add-ons/redis/using-rra.md b/content/riak/kv/2.9.0p5/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..2075a2d161 --- /dev/null +++ b/content/riak/kv/2.9.0p5/add-ons/redis/using-rra.md @@ -0,0 +1,250 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.9.0p5/add-ons/redis/get-started-with-rra + - /riak/2.9.0p5/add-ons/redis/using-rra/ + - /riak/2.9.0/add-ons/redis/using-rra/ + - /riak/kv/2.9.0/add-ons/redis/using-rra/ + - /riak/kv/2.9.0p1/add-ons/redis/using-rra/ + - /riak/kv/2.9.0p2/add-ons/redis/using-rra/ + - /riak/kv/2.9.0p3/add-ons/redis/using-rra/ + - /riak/kv/2.9.0p4/add-ons/redis/using-rra/ +--- + + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.9.0p5/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details diff --git a/content/riak/kv/2.9.0p5/configuring.md b/content/riak/kv/2.9.0p5/configuring.md new file mode 100644 index 0000000000..5fd17a7967 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring.md @@ -0,0 +1,90 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: + - /riak/2.9.0p5/configuring/ + - /riak/2.9.0/configuring/ + - /riak/kv/2.9.0/configuring/ + - /riak/kv/2.9.0p1/configuring/ + - /riak/kv/2.9.0p2/configuring/ + - /riak/kv/2.9.0p3/configuring/ + - /riak/kv/2.9.0p4/configuring/ +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + diff --git a/content/riak/kv/2.9.0p5/configuring/backend.md b/content/riak/kv/2.9.0p5/configuring/backend.md new file mode 100644 index 0000000000..8072289d90 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/backend.md @@ -0,0 +1,645 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.0p5/configuring/backend/ + - /riak/2.9.0/configuring/backend/ + - /riak/kv/2.9.0/configuring/backend/ + - /riak/kv/2.9.0p1/configuring/backend/ + - /riak/kv/2.9.0p2/configuring/backend/ + - /riak/kv/2.9.0p3/configuring/backend/ + - /riak/kv/2.9.0p4/configuring/backend/ +--- + + +[plan backend leveldb]: {{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.0p5/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.0p5/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.0p5/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` diff --git a/content/riak/kv/2.9.0p5/configuring/basic.md b/content/riak/kv/2.9.0p5/configuring/basic.md new file mode 100644 index 0000000000..0c49ccbe07 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/basic.md @@ -0,0 +1,243 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/configuration/ + - /riak/kv/2.9.0p5/ops/building/configuration/ + - /riak/2.9.0p5/configuring/basic/ + - /riak/2.9.0/configuring/basic/ + - /riak/kv/2.9.0/configuring/basic/ + - /riak/kv/2.9.0p1/configuring/basic/ + - /riak/kv/2.9.0p2/configuring/basic/ + - /riak/kv/2.9.0p3/configuring/basic/ + - /riak/kv/2.9.0p4/configuring/basic/ +--- + + +[config reference]: {{}}riak/kv/2.9.0p5/configuring/reference +[use running cluster]: {{}}riak/kv/2.9.0p5/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.9.0p5/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.9.0p5/using/performance/erlang +[plan start]: {{}}riak/kv/2.9.0p5/setup/planning/start +[plan best practices]: {{}}riak/kv/2.9.0p5/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.9.0p5/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.9.0p5/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.9.0p5/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.9.0p5/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.9.0p5/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.9.0p5/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.0p5/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.9.0p5/using/performance +[perf aws]: {{}}riak/kv/2.9.0p5/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.9.0p5/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. diff --git a/content/riak/kv/2.9.0p5/configuring/global-object-expiration.md b/content/riak/kv/2.9.0p5/configuring/global-object-expiration.md new file mode 100644 index 0000000000..1b85a64c03 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/global-object-expiration.md @@ -0,0 +1,94 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.9.0p5: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: "2.9.0p5" +toc: true +aliases: + - /riak/2.9.0p5/configuring/global-object-expiration/ + - /riak/2.9.0/configuring/global-object-expiration/ + - /riak/kv/2.9.0/configuring/global-object-expiration/ + - /riak/kv/2.9.0p1/configuring/global-object-expiration/ + - /riak/kv/2.9.0p2/configuring/global-object-expiration/ + - /riak/kv/2.9.0p3/configuring/global-object-expiration/ + - /riak/kv/2.9.0p4/configuring/global-object-expiration/ +--- + + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` diff --git a/content/riak/kv/2.9.0p5/configuring/load-balancing-proxy.md b/content/riak/kv/2.9.0p5/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..aef0e56db0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/load-balancing-proxy.md @@ -0,0 +1,279 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.9.0p5/ops/advanced/configs/load-balanacing-proxy/ + - /riak/2.9.0p5/configuring/load-balancing-proxy/ + - /riak/2.9.0/configuring/load-balancing-proxy/ + - /riak/kv/2.9.0/configuring/load-balancing-proxy/ + - /riak/kv/2.9.0p1/configuring/load-balancing-proxy/ + - /riak/kv/2.9.0p2/configuring/load-balancing-proxy/ + - /riak/kv/2.9.0p3/configuring/load-balancing-proxy/ + - /riak/kv/2.9.0p4/configuring/load-balancing-proxy/ +--- + + +[perf open files]: {{}}riak/kv/2.9.0p5/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` diff --git a/content/riak/kv/2.9.0p5/configuring/managing.md b/content/riak/kv/2.9.0p5/configuring/managing.md new file mode 100644 index 0000000000..e77bf811ae --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/managing.md @@ -0,0 +1,125 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.0p5/configuring/managing/ + - /riak/2.9.0/configuring/managing/ + - /riak/kv/2.9.0/configuring/managing/ + - /riak/kv/2.9.0p1/configuring/managing/ + - /riak/kv/2.9.0p2/configuring/managing/ + - /riak/kv/2.9.0p3/configuring/managing/ + - /riak/kv/2.9.0p4/configuring/managing/ +--- + + +[use admin riak cli]: {{}}riak/kv/2.9.0p5/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.9.0p5/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.9.0p5/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. diff --git a/content/riak/kv/2.9.0p5/configuring/mapreduce.md b/content/riak/kv/2.9.0p5/configuring/mapreduce.md new file mode 100644 index 0000000000..70a0e9b5d5 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/mapreduce.md @@ -0,0 +1,204 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/configs/mapreduce/ + - /riak/kv/2.9.0p5/ops/advanced/configs/mapreduce/ + - /riak/2.9.0p5/configuring/mapreduce/ + - /riak/2.9.0/configuring/mapreduce/ + - /riak/kv/2.9.0/configuring/mapreduce/ + - /riak/kv/2.9.0p1/configuring/mapreduce/ + - /riak/kv/2.9.0p2/configuring/mapreduce/ + - /riak/kv/2.9.0p3/configuring/mapreduce/ + - /riak/kv/2.9.0p4/configuring/mapreduce/ +--- + + +[usage mapreduce]: {{}}riak/kv/2.9.0p5/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.9.0p5/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. diff --git a/content/riak/kv/2.9.0p5/configuring/reference.md b/content/riak/kv/2.9.0p5/configuring/reference.md new file mode 100644 index 0000000000..44dedf5571 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/reference.md @@ -0,0 +1,2038 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/configs/configuration-files/ + - /riak/kv/2.9.0p5/ops/advanced/configs/configuration-files/ + - /riak/2.9.0p5/configuring/reference/ + - /riak/2.9.0/configuring/reference/ + - /riak/kv/2.9.0/configuring/reference/ + - /riak/kv/2.9.0p1/configuring/reference/ + - /riak/kv/2.9.0p2/configuring/reference/ + - /riak/kv/2.9.0p3/configuring/reference/ + - /riak/kv/2.9.0p4/configuring/reference/ +--- + + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] --- [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] --- [configuration][config backend leveldb] +* [Leveled][plan backend leveled] --- [configuration][config backend leveled] +* [Memory][plan backend memory] --- [configuration][config backend memory] +* [Multi][plan backend multi] --- [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]/(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` diff --git a/content/riak/kv/2.9.0p5/configuring/search.md b/content/riak/kv/2.9.0p5/configuring/search.md new file mode 100644 index 0000000000..42a175997f --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/search.md @@ -0,0 +1,282 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/configs/search/ + - /riak/kv/2.9.0p5/ops/advanced/configs/search/ + - /riak/2.9.0p5/configuring/search/ + - /riak/2.9.0/configuring/search/ + - /riak/kv/2.9.0/configuring/search/ + - /riak/kv/2.9.0p1/configuring/search/ + - /riak/kv/2.9.0p2/configuring/search/ + - /riak/kv/2.9.0p3/configuring/search/ + - /riak/kv/2.9.0p4/configuring/search/ +--- + + +[usage search]: {{}}riak/kv/2.9.0p5/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.0p5/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.0p5/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.9.0p5/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.9.0p5/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.9.0p5/configuring/reference +[config reference#search]: {{}}riak/kv/2.9.0p5/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.9.0p5/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.9.0p5/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. diff --git a/content/riak/kv/2.9.0p5/configuring/strong-consistency.md b/content/riak/kv/2.9.0p5/configuring/strong-consistency.md new file mode 100644 index 0000000000..1e347d1e6a --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/strong-consistency.md @@ -0,0 +1,675 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.0p5/configuring/strong-consistency/ + - /riak/2.9.0/configuring/strong-consistency/ + - /riak/kv/2.9.0/configuring/strong-consistency/ + - /riak/kv/2.9.0p1/configuring/strong-consistency/ + - /riak/kv/2.9.0p2/configuring/strong-consistency/ + - /riak/kv/2.9.0p3/configuring/strong-consistency/ + - /riak/kv/2.9.0p4/configuring/strong-consistency/ +--- + + +[apps strong consistency]: {{}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.9.0p5/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.9.0p5/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.9.0p5/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.9.0p5/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.9.0p5/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.9.0p5/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.9.0p5/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.9.0p5/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.9.0p5/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.9.0p5/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.9.0p5/developing/data-types +[glossary aae]: {{}}riak/kv/2.9.0p5/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.9.0p5/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.9.0p5/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.9.0p5/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.9.0p5/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant / + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer --- The ID of the peer
  • Status --- Whether the peer is a leader or a follower
  • Trusted --- Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch --- The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node --- The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] /(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] --- If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] --- Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] --- Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** --- A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** --- In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** --- Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] /(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** --- At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication /(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** --- Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. diff --git a/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter.md b/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..a21c904885 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter.md @@ -0,0 +1,164 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v2/configuration + - /riak/kv/2.9.0p5/ops/mdc/v2/configuration + - /riak/2.9.0p5/configuring/v2-multi-datacenter/ + - /riak/2.9.0/configuring/v2-multi-datacenter/ + - /riak/kv/2.9.0/configuring/v2-multi-datacenter/ + - /riak/kv/2.9.0p1/configuring/v2-multi-datacenter/ + - /riak/kv/2.9.0p2/configuring/v2-multi-datacenter/ + - /riak/kv/2.9.0p3/configuring/v2-multi-datacenter/ + - /riak/kv/2.9.0p4/configuring/v2-multi-datacenter/ +--- + + +[config v2 ssl]: {{}}riak/kv/2.9.0p5/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. diff --git a/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..580b3bb337 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,86 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v2/nat + - /riak/kv/2.9.0p5/ops/mdc/v2/nat + - /riak/2.9.0p5/configuring/v2-multi-datacenter/nat/ + - /riak/2.9.0/configuring/v2-multi-datacenter/nat/ + - /riak/kv/2.9.0/configuring/v2-multi-datacenter/nat/ + - /riak/kv/2.9.0p1/configuring/v2-multi-datacenter/nat/ + - /riak/kv/2.9.0p2/configuring/v2-multi-datacenter/nat/ + - /riak/kv/2.9.0p3/configuring/v2-multi-datacenter/nat/ + - /riak/kv/2.9.0p4/configuring/v2-multi-datacenter/nat/ +--- + + +[config v2 ssl]: {{}}riak/kv/2.9.0p5/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` diff --git a/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..c57c655218 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,375 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v2/quick-start + - /riak/kv/2.9.0p5/ops/mdc/v2/quick-start + - /riak/2.9.0p5/configuring/v2-multi-datacenter/quick-start/ + - /riak/2.9.0/configuring/v2-multi-datacenter/quick-start/ + - /riak/kv/2.9.0/configuring/v2-multi-datacenter/quick-start/ + - /riak/kv/2.9.0p1/configuring/v2-multi-datacenter/quick-start/ + - /riak/kv/2.9.0p2/configuring/v2-multi-datacenter/quick-start/ + - /riak/kv/2.9.0p3/configuring/v2-multi-datacenter/quick-start/ + - /riak/kv/2.9.0p4/configuring/v2-multi-datacenter/quick-start/ +--- + + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. diff --git a/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..aadd971dc7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,168 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v2/ssl + - /riak/kv/2.9.0p5/ops/mdc/v2/ssl + - /riak/2.9.0p5/configuring/v2-multi-datacenter/ssl/ + - /riak/2.9.0/configuring/v2-multi-datacenter/ssl/ + - /riak/kv/2.9.0/configuring/v2-multi-datacenter/ssl/ + - /riak/kv/2.9.0p1/configuring/v2-multi-datacenter/ssl/ + - /riak/kv/2.9.0p2/configuring/v2-multi-datacenter/ssl/ + - /riak/kv/2.9.0p3/configuring/v2-multi-datacenter/ssl/ + - /riak/kv/2.9.0p4/configuring/v2-multi-datacenter/ssl/ +--- + + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. diff --git a/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter.md b/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..8e8a298f99 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter.md @@ -0,0 +1,165 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/configuration + - /riak/kv/2.9.0p5/ops/mdc/v3/configuration + - /riak/2.9.0p5/configuring/v3-multi-datacenter/ + - /riak/2.9.0/configuring/v3-multi-datacenter/ + - /riak/kv/2.9.0/configuring/v3-multi-datacenter/ + - /riak/kv/2.9.0p1/configuring/v3-multi-datacenter/ + - /riak/kv/2.9.0p2/configuring/v3-multi-datacenter/ + - /riak/kv/2.9.0p3/configuring/v3-multi-datacenter/ + - /riak/kv/2.9.0p4/configuring/v3-multi-datacenter/ +--- + + +[config reference#advanced]: {{}}riak/kv/2.9.0p5/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. diff --git a/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..a2786884e0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,175 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/nat + - /riak/kv/2.9.0p5/ops/mdc/v3/nat + - /riak/2.9.0p5/configuring/v3-multi-datacenter/nat/ + - /riak/2.9.0/configuring/v3-multi-datacenter/nat/ + - /riak/kv/2.9.0/configuring/v3-multi-datacenter/nat/ + - /riak/kv/2.9.0p1/configuring/v3-multi-datacenter/nat/ + - /riak/kv/2.9.0p2/configuring/v3-multi-datacenter/nat/ + - /riak/kv/2.9.0p3/configuring/v3-multi-datacenter/nat/ + - /riak/kv/2.9.0p4/configuring/v3-multi-datacenter/nat/ +--- + + +[config v3 ssl]: {{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` diff --git a/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..1784bc39dc --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,176 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/quick-start + - /riak/kv/2.9.0p5/ops/mdc/v3/quick-start + - /riak/2.9.0p5/configuring/v3-multi-datacenter/quick-start/ + - /riak/2.9.0/configuring/v3-multi-datacenter/quick-start/ + - /riak/kv/2.9.0/configuring/v3-multi-datacenter/quick-start/ + - /riak/kv/2.9.0p1/configuring/v3-multi-datacenter/quick-start/ + - /riak/kv/2.9.0p2/configuring/v3-multi-datacenter/quick-start/ + - /riak/kv/2.9.0p3/configuring/v3-multi-datacenter/quick-start/ + - /riak/kv/2.9.0p4/configuring/v3-multi-datacenter/quick-start/ +--- + + +[perf index]: {{}}riak/kv/2.9.0p5/using/performance +[config v3 mdc]: {{}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. diff --git a/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..30261514c3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,178 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/ssl + - /riak/kv/2.9.0p5/ops/mdc/v3/ssl + - /riak/2.9.0p5/configuring/v3-multi-datacenter/ssl/ + - /riak/2.9.0/configuring/v3-multi-datacenter/ssl/ + - /riak/kv/2.9.0/configuring/v3-multi-datacenter/ssl/ + - /riak/kv/2.9.0p1/configuring/v3-multi-datacenter/ssl/ + - /riak/kv/2.9.0p2/configuring/v3-multi-datacenter/ssl/ + - /riak/kv/2.9.0p3/configuring/v3-multi-datacenter/ssl/ + - /riak/kv/2.9.0p4/configuring/v3-multi-datacenter/ssl/ +--- + + +[config reference#advanced.config]: {{}}riak/kv/2.9.0p5/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. diff --git a/content/riak/kv/2.9.0p5/developing.md b/content/riak/kv/2.9.0p5/developing.md new file mode 100644 index 0000000000..2721374984 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing.md @@ -0,0 +1,82 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: + - /riak/2.9.0p5/developing/ + - /riak/2.9.0/developing/ + - /riak/kv/2.9.0/developing/ + - /riak/kv/2.9.0p1/developing/ + - /riak/kv/2.9.0p2/developing/ + - /riak/kv/2.9.0p3/developing/ + - /riak/kv/2.9.0p4/developing/ +--- + + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + diff --git a/content/riak/kv/2.9.0p5/developing/api.md b/content/riak/kv/2.9.0p5/developing/api.md new file mode 100644 index 0000000000..d989f11ffc --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api.md @@ -0,0 +1,46 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +aliases: + - /riak/2.9.0p5/developing/api/ + - /riak/2.9.0/developing/api/ + - /riak/kv/2.9.0/developing/api/ + - /riak/kv/2.9.0p1/developing/api/ + - /riak/kv/2.9.0p2/developing/api/ + - /riak/kv/2.9.0p3/developing/api/ + - /riak/kv/2.9.0p4/developing/api/ +--- + + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] diff --git a/content/riak/kv/2.9.0p5/developing/api/backend.md b/content/riak/kv/2.9.0p5/developing/api/backend.md new file mode 100644 index 0000000000..5d1102127c --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/backend.md @@ -0,0 +1,122 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/backend-api + - /riak/kv/2.9.0p5/dev/references/backend-api + - /riak/2.9.0p5/developing/api/backend/ + - /riak/2.9.0/developing/api/backend/ + - /riak/kv/2.9.0/developing/api/backend/ + - /riak/kv/2.9.0p1/developing/api/backend/ + - /riak/kv/2.9.0p2/developing/api/backend/ + - /riak/kv/2.9.0p3/developing/api/backend/ + - /riak/kv/2.9.0p4/developing/api/backend/ +--- + + +[plan backend]: {{}}riak/kv/2.9.0p5/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http.md b/content/riak/kv/2.9.0p5/developing/api/http.md new file mode 100644 index 0000000000..1558aec189 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http.md @@ -0,0 +1,97 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http + - /riak/kv/2.9.0p5/dev/references/http + - /riak/2.9.0p5/developing/api/http/ + - /riak/2.9.0/developing/api/http/ + - /riak/kv/2.9.0/developing/api/http/ + - /riak/kv/2.9.0p1/developing/api/http/ + - /riak/kv/2.9.0p2/developing/api/http/ + - /riak/kv/2.9.0p3/developing/api/http/ + - /riak/kv/2.9.0p4/developing/api/http/ +--- + + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.9.0p5/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.9.0p5/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.9.0p5/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.9.0p5/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.0p5/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.0p5/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.9.0p5/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.9.0p5/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.9.0p5/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.9.0p5/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.9.0p5/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.0p5/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.0p5/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.9.0p5/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.9.0p5/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.9.0p5/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.9.0p5/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.9.0p5/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.9.0p5/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.9.0p5/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.9.0p5/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.9.0p5/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.9.0p5/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.9.0p5/developing/api/http/counters.md b/content/riak/kv/2.9.0p5/developing/api/http/counters.md new file mode 100644 index 0000000000..dfc1033767 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/counters.md @@ -0,0 +1,85 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/counters + - /riak/kv/2.9.0p5/dev/references/http/counters + - /riak/2.9.0p5/developing/api/http/counters/ + - /riak/2.9.0/developing/api/http/counters/ + - /riak/kv/2.9.0/developing/api/http/counters/ + - /riak/kv/2.9.0p1/developing/api/http/counters/ + - /riak/kv/2.9.0p2/developing/api/http/counters/ + - /riak/kv/2.9.0p3/developing/api/http/counters/ + - /riak/kv/2.9.0p4/developing/api/http/counters/ +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props / + -H "Content-Type: application/json" / + -d "{/"props/" : {/"allow_mult/": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.9.0p5/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.9.0p5/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/delete-object.md b/content/riak/kv/2.9.0p5/developing/api/http/delete-object.md new file mode 100644 index 0000000000..9aad910a7d --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/delete-object.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/delete-object + - /riak/kv/2.9.0p5/dev/references/http/delete-object + - /riak/2.9.0p5/developing/api/http/delete-object/ + - /riak/2.9.0/developing/api/http/delete-object/ + - /riak/kv/2.9.0/developing/api/http/delete-object/ + - /riak/kv/2.9.0p1/developing/api/http/delete-object/ + - /riak/kv/2.9.0p2/developing/api/http/delete-object/ + - /riak/kv/2.9.0p3/developing/api/http/delete-object/ + - /riak/kv/2.9.0p4/developing/api/http/delete-object/ +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/delete-search-index.md b/content/riak/kv/2.9.0p5/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..4c09f29845 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/delete-search-index.md @@ -0,0 +1,40 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/delete-search-index + - /riak/kv/2.9.0p5/dev/references/http/delete-search-index + - /riak/2.9.0p5/developing/api/http/delete-search-index/ + - /riak/2.9.0/developing/api/http/delete-search-index/ + - /riak/kv/2.9.0/developing/api/http/delete-search-index/ + - /riak/kv/2.9.0p1/developing/api/http/delete-search-index/ + - /riak/kv/2.9.0p2/developing/api/http/delete-search-index/ + - /riak/kv/2.9.0p3/developing/api/http/delete-search-index/ + - /riak/kv/2.9.0p4/developing/api/http/delete-search-index/ +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` --- The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.0p5/developing/api/http/fetch-object.md b/content/riak/kv/2.9.0p5/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..57f76bc736 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/fetch-object.md @@ -0,0 +1,249 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/fetch-object + - /riak/kv/2.9.0p5/dev/references/http/fetch-object + - /riak/2.9.0p5/developing/api/http/fetch-object/ + - /riak/2.9.0/developing/api/http/fetch-object/ + - /riak/kv/2.9.0/developing/api/http/fetch-object/ + - /riak/kv/2.9.0p1/developing/api/http/fetch-object/ + - /riak/kv/2.9.0p2/developing/api/http/fetch-object/ + - /riak/kv/2.9.0p3/developing/api/http/fetch-object/ + - /riak/kv/2.9.0p4/developing/api/http/fetch-object/ +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.9.0p5/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.9.0p5/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.9.0p5/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.9.0p5/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.0p5/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/fetch-search-index.md b/content/riak/kv/2.9.0p5/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..c903c5c0b4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/fetch-search-index.md @@ -0,0 +1,54 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/fetch-search-index + - /riak/kv/2.9.0p5/dev/references/http/fetch-search-index + - /riak/2.9.0p5/developing/api/http/fetch-search-index/ + - /riak/2.9.0/developing/api/http/fetch-search-index/ + - /riak/kv/2.9.0/developing/api/http/fetch-search-index/ + - /riak/kv/2.9.0p1/developing/api/http/fetch-search-index/ + - /riak/kv/2.9.0p2/developing/api/http/fetch-search-index/ + - /riak/kv/2.9.0p3/developing/api/http/fetch-search-index/ + - /riak/kv/2.9.0p4/developing/api/http/fetch-search-index/ +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.9.0p5/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` --- No Search index with that name is currently + available +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.9.0p5/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..f373e4e317 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/fetch-search-schema.md @@ -0,0 +1,45 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/fetch-search-schema + - /riak/kv/2.9.0p5/dev/references/http/fetch-search-schema + - /riak/2.9.0p5/developing/api/http/fetch-search-schema/ + - /riak/2.9.0/developing/api/http/fetch-search-schema/ + - /riak/kv/2.9.0/developing/api/http/fetch-search-schema/ + - /riak/kv/2.9.0p1/developing/api/http/fetch-search-schema/ + - /riak/kv/2.9.0p2/developing/api/http/fetch-search-schema/ + - /riak/kv/2.9.0p3/developing/api/http/fetch-search-schema/ + - /riak/kv/2.9.0p4/developing/api/http/fetch-search-schema/ +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). diff --git a/content/riak/kv/2.9.0p5/developing/api/http/get-bucket-props.md b/content/riak/kv/2.9.0p5/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..a6fdc48ed7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/get-bucket-props.md @@ -0,0 +1,90 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/get-bucket-props + - /riak/kv/2.9.0p5/dev/references/http/get-bucket-props + - /riak/2.9.0p5/developing/api/http/get-bucket-props/ + - /riak/2.9.0/developing/api/http/get-bucket-props/ + - /riak/kv/2.9.0/developing/api/http/get-bucket-props/ + - /riak/kv/2.9.0p1/developing/api/http/get-bucket-props/ + - /riak/kv/2.9.0p2/developing/api/http/get-bucket-props/ + - /riak/kv/2.9.0p3/developing/api/http/get-bucket-props/ + - /riak/kv/2.9.0p4/developing/api/http/get-bucket-props/ +--- + + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.9.0p5/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.9.0p5/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/link-walking.md b/content/riak/kv/2.9.0p5/developing/api/http/link-walking.md new file mode 100644 index 0000000000..14ea89a920 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/link-walking.md @@ -0,0 +1,132 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/link-walking + - /riak/kv/2.9.0p5/dev/references/http/link-walking + - /riak/2.9.0p5/developing/api/http/link-walking/ + - /riak/2.9.0/developing/api/http/link-walking/ + - /riak/kv/2.9.0/developing/api/http/link-walking/ + - /riak/kv/2.9.0p1/developing/api/http/link-walking/ + - /riak/kv/2.9.0p2/developing/api/http/link-walking/ + - /riak/kv/2.9.0p3/developing/api/http/link-walking/ + - /riak/kv/2.9.0p4/developing/api/http/link-walking/ +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.9.0p5/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.9.0p5/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.9.0p5/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/list-buckets.md b/content/riak/kv/2.9.0p5/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..52285fa6f3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/list-buckets.md @@ -0,0 +1,71 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/list-buckets + - /riak/kv/2.9.0p5/dev/references/http/list-buckets + - /riak/2.9.0p5/developing/api/http/list-buckets/ + - /riak/2.9.0/developing/api/http/list-buckets/ + - /riak/kv/2.9.0/developing/api/http/list-buckets/ + - /riak/kv/2.9.0p1/developing/api/http/list-buckets/ + - /riak/kv/2.9.0p2/developing/api/http/list-buckets/ + - /riak/kv/2.9.0p3/developing/api/http/list-buckets/ + - /riak/kv/2.9.0p4/developing/api/http/list-buckets/ +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/list-keys.md b/content/riak/kv/2.9.0p5/developing/api/http/list-keys.md new file mode 100644 index 0000000000..6502682c6a --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/list-keys.md @@ -0,0 +1,83 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/list-keys + - /riak/kv/2.9.0p5/dev/references/http/list-keys + - /riak/2.9.0p5/developing/api/http/list-keys/ + - /riak/2.9.0/developing/api/http/list-keys/ + - /riak/kv/2.9.0/developing/api/http/list-keys/ + - /riak/kv/2.9.0p1/developing/api/http/list-keys/ + - /riak/kv/2.9.0p2/developing/api/http/list-keys/ + - /riak/kv/2.9.0p3/developing/api/http/list-keys/ + - /riak/kv/2.9.0p4/developing/api/http/list-keys/ +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/list-resources.md b/content/riak/kv/2.9.0p5/developing/api/http/list-resources.md new file mode 100644 index 0000000000..13802041d9 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/list-resources.md @@ -0,0 +1,88 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/list-resources + - /riak/kv/2.9.0p5/dev/references/http/list-resources + - /riak/2.9.0p5/developing/api/http/list-resources/ + - /riak/2.9.0/developing/api/http/list-resources/ + - /riak/kv/2.9.0/developing/api/http/list-resources/ + - /riak/kv/2.9.0p1/developing/api/http/list-resources/ + - /riak/kv/2.9.0p2/developing/api/http/list-resources/ + - /riak/kv/2.9.0p3/developing/api/http/list-resources/ + - /riak/kv/2.9.0p4/developing/api/http/list-resources/ +--- + + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.9.0p5/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.9.0p5/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.9.0p5/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.9.0p5/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.9.0p5/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.9.0p5/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.9.0p5/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/mapreduce.md b/content/riak/kv/2.9.0p5/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..ad3f8714d5 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/mapreduce.md @@ -0,0 +1,78 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/mapreduce + - /riak/kv/2.9.0p5/dev/references/http/mapreduce + - /riak/2.9.0p5/developing/api/http/mapreduce/ + - /riak/2.9.0/developing/api/http/mapreduce/ + - /riak/kv/2.9.0/developing/api/http/mapreduce/ + - /riak/kv/2.9.0p1/developing/api/http/mapreduce/ + - /riak/kv/2.9.0p2/developing/api/http/mapreduce/ + - /riak/kv/2.9.0p3/developing/api/http/mapreduce/ + - /riak/kv/2.9.0p4/developing/api/http/mapreduce/ +--- + + +[MapReduce]({{}}riak/kv/2.9.0p5/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.9.0p5/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/ping.md b/content/riak/kv/2.9.0p5/developing/api/http/ping.md new file mode 100644 index 0000000000..94e0c284f8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/ping.md @@ -0,0 +1,61 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/ping + - /riak/kv/2.9.0p5/dev/references/http/ping + - /riak/2.9.0p5/developing/api/http/ping/ + - /riak/2.9.0/developing/api/http/ping/ + - /riak/kv/2.9.0/developing/api/http/ping/ + - /riak/kv/2.9.0p1/developing/api/http/ping/ + - /riak/kv/2.9.0p2/developing/api/http/ping/ + - /riak/kv/2.9.0p3/developing/api/http/ping/ + - /riak/kv/2.9.0p4/developing/api/http/ping/ +--- + + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.9.0p5/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..a244c36d40 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/reset-bucket-props.md @@ -0,0 +1,65 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/reset-bucket-props + - /riak/kv/2.9.0p5/dev/references/http/reset-bucket-props + - /riak/2.9.0p5/developing/api/http/reset-bucket-props/ + - /riak/2.9.0/developing/api/http/reset-bucket-props/ + - /riak/kv/2.9.0/developing/api/http/reset-bucket-props/ + - /riak/kv/2.9.0p1/developing/api/http/reset-bucket-props/ + - /riak/kv/2.9.0p2/developing/api/http/reset-bucket-props/ + - /riak/kv/2.9.0p3/developing/api/http/reset-bucket-props/ + - /riak/kv/2.9.0p4/developing/api/http/reset-bucket-props/ +--- + + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/search-index-info.md b/content/riak/kv/2.9.0p5/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..9569a1a7e9 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/search-index-info.md @@ -0,0 +1,60 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/search-index-info + - /riak/kv/2.9.0p5/dev/references/http/search-index-info + - /riak/2.9.0p5/developing/api/http/search-index-info/ + - /riak/2.9.0/developing/api/http/search-index-info/ + - /riak/kv/2.9.0/developing/api/http/search-index-info/ + - /riak/kv/2.9.0p1/developing/api/http/search-index-info/ + - /riak/kv/2.9.0p2/developing/api/http/search-index-info/ + - /riak/kv/2.9.0p3/developing/api/http/search-index-info/ + - /riak/kv/2.9.0p4/developing/api/http/search-index-info/ +--- + + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.9.0p5/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` --- Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.0p5/developing/api/http/search-query.md b/content/riak/kv/2.9.0p5/developing/api/http/search-query.md new file mode 100644 index 0000000000..5fe7bacc8a --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/search-query.md @@ -0,0 +1,77 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/search-query + - /riak/kv/2.9.0p5/dev/references/http/search-query + - /riak/2.9.0p5/developing/api/http/search-query/ + - /riak/2.9.0/developing/api/http/search-query/ + - /riak/kv/2.9.0/developing/api/http/search-query/ + - /riak/kv/2.9.0p1/developing/api/http/search-query/ + - /riak/kv/2.9.0p2/developing/api/http/search-query/ + - /riak/kv/2.9.0p3/developing/api/http/search-query/ + - /riak/kv/2.9.0p4/developing/api/http/search-query/ +--- + + +Performs a [Riak KV Search]({{}}riak/kv/2.9.0p5/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` --- The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` --- The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.9.0p5/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` --- Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` --- Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/secondary-indexes.md b/content/riak/kv/2.9.0p5/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..a726c10641 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/secondary-indexes.md @@ -0,0 +1,99 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/secondary-indexes + - /riak/kv/2.9.0p5/dev/references/http/secondary-indexes + - /riak/2.9.0p5/developing/api/http/secondary-indexes/ + - /riak/2.9.0/developing/api/http/secondary-indexes/ + - /riak/kv/2.9.0/developing/api/http/secondary-indexes/ + - /riak/kv/2.9.0p1/developing/api/http/secondary-indexes/ + - /riak/kv/2.9.0p2/developing/api/http/secondary-indexes/ + - /riak/kv/2.9.0p3/developing/api/http/secondary-indexes/ + - /riak/kv/2.9.0p4/developing/api/http/secondary-indexes/ +--- + + +[Secondary Indexes]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/set-bucket-props.md b/content/riak/kv/2.9.0p5/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..abfce56cf7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/set-bucket-props.md @@ -0,0 +1,109 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/set-bucket-props + - /riak/kv/2.9.0p5/dev/references/http/set-bucket-props + - /riak/2.9.0p5/developing/api/http/set-bucket-props/ + - /riak/2.9.0/developing/api/http/set-bucket-props/ + - /riak/kv/2.9.0/developing/api/http/set-bucket-props/ + - /riak/kv/2.9.0p1/developing/api/http/set-bucket-props/ + - /riak/kv/2.9.0p2/developing/api/http/set-bucket-props/ + - /riak/kv/2.9.0p3/developing/api/http/set-bucket-props/ + - /riak/kv/2.9.0p4/developing/api/http/set-bucket-props/ +--- + + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.9.0p5/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.9.0p5/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props / + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/status.md b/content/riak/kv/2.9.0p5/developing/api/http/status.md new file mode 100644 index 0000000000..3fd51fc085 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/status.md @@ -0,0 +1,177 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/status + - /riak/kv/2.9.0p5/dev/references/http/status + - /riak/2.9.0p5/developing/api/http/status/ + - /riak/2.9.0/developing/api/http/status/ + - /riak/kv/2.9.0/developing/api/http/status/ + - /riak/kv/2.9.0p1/developing/api/http/status/ + - /riak/kv/2.9.0p2/developing/api/http/status/ + - /riak/kv/2.9.0p3/developing/api/http/status/ + - /riak/kv/2.9.0p4/developing/api/http/status/ +--- + + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.0", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute diff --git a/content/riak/kv/2.9.0p5/developing/api/http/store-object.md b/content/riak/kv/2.9.0p5/developing/api/http/store-object.md new file mode 100644 index 0000000000..7533223be4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/store-object.md @@ -0,0 +1,154 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/store-object + - /riak/kv/2.9.0p5/dev/references/http/store-object + - /riak/2.9.0p5/developing/api/http/store-object/ + - /riak/2.9.0/developing/api/http/store-object/ + - /riak/kv/2.9.0/developing/api/http/store-object/ + - /riak/kv/2.9.0p1/developing/api/http/store-object/ + - /riak/kv/2.9.0p2/developing/api/http/store-object/ + - /riak/kv/2.9.0p3/developing/api/http/store-object/ + - /riak/kv/2.9.0p4/developing/api/http/store-object/ +--- + + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.9.0p5/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.0p5/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.9.0p5/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys / + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/http/store-search-index.md b/content/riak/kv/2.9.0p5/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..0d12ef39b7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/store-search-index.md @@ -0,0 +1,60 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/store-search-index + - /riak/kv/2.9.0p5/dev/references/http/store-search-index + - /riak/2.9.0p5/developing/api/http/store-search-index/ + - /riak/2.9.0/developing/api/http/store-search-index/ + - /riak/kv/2.9.0/developing/api/http/store-search-index/ + - /riak/kv/2.9.0p1/developing/api/http/store-search-index/ + - /riak/kv/2.9.0p2/developing/api/http/store-search-index/ + - /riak/kv/2.9.0p3/developing/api/http/store-search-index/ + - /riak/kv/2.9.0p4/developing/api/http/store-search-index/ +--- + + +Creates a new Riak Search [index]({{}}riak/kv/2.9.0p5/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index / + -H "Content-Type: application/json" / + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.9.0p5/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` --- The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` --- The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.0p5/developing/api/http/store-search-schema.md b/content/riak/kv/2.9.0p5/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..e1809a2dbe --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/http/store-search-schema.md @@ -0,0 +1,58 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/http/store-search-schema + - /riak/kv/2.9.0p5/dev/references/http/store-search-schema + - /riak/2.9.0p5/developing/api/http/store-search-schema/ + - /riak/2.9.0/developing/api/http/store-search-schema/ + - /riak/kv/2.9.0/developing/api/http/store-search-schema/ + - /riak/kv/2.9.0p1/developing/api/http/store-search-schema/ + - /riak/kv/2.9.0p2/developing/api/http/store-search-schema/ + - /riak/kv/2.9.0p3/developing/api/http/store-search-schema/ + - /riak/kv/2.9.0p4/developing/api/http/store-search-schema/ +--- + + +Creates a new Riak [Search schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema / + -H "Content-Type: application/xml" / + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` --- The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` --- The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` --- The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..0a8a147672 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers.md @@ -0,0 +1,193 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers + - /riak/kv/2.9.0p5/dev/references/protocol-buffers + - /riak/2.9.0p5/developing/api/protocol-buffers/ + - /riak/2.9.0/developing/api/protocol-buffers/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/ +--- + + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` --- A string representation of what went wrong +* `errcode` --- A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..b47da706bf --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,38 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/auth-req + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/auth-req + - /riak/2.9.0p5/developing/api/protocol-buffers/auth-req/ + - /riak/2.9.0/developing/api/protocol-buffers/auth-req/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/auth-req/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/auth-req/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/auth-req/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/auth-req/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/auth-req/ +--- + + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.9.0p5/using/security/basics). diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..a0364df3ce --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,78 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..dc551b54c7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,108 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/delete-object + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/delete-object + - /riak/2.9.0p5/developing/api/protocol-buffers/delete-object/ + - /riak/2.9.0/developing/api/protocol-buffers/delete-object/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/delete-object/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/delete-object/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/delete-object/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/delete-object/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/delete-object/ +--- + + +Delete an object in the specified [bucket type]({{}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..11688dbd1a --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,39 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/dt-counter-store + - /riak/2.9.0p5/developing/api/protocol-buffers/dt-counter-store/ + - /riak/2.9.0/developing/api/protocol-buffers/dt-counter-store/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/dt-counter-store/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/dt-counter-store/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/dt-counter-store/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/dt-counter-store/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/dt-counter-store/ +--- + + +An operation to update a [counter]({{}}riak/kv/2.9.0p5/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..6f7752294e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,135 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/dt-fetch + - /riak/2.9.0p5/developing/api/protocol-buffers/dt-fetch/ + - /riak/2.9.0/developing/api/protocol-buffers/dt-fetch/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/dt-fetch/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/dt-fetch/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/dt-fetch/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/dt-fetch/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/dt-fetch/ +--- + + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.9.0p5/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.9.0p5/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..e67ef5236e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,81 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/dt-map-store + - /riak/2.9.0p5/developing/api/protocol-buffers/dt-map-store/ + - /riak/2.9.0/developing/api/protocol-buffers/dt-map-store/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/dt-map-store/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/dt-map-store/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/dt-map-store/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/dt-map-store/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/dt-map-store/ +--- + + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..ff993cd3bb --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,40 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/dt-set-store + - /riak/2.9.0p5/developing/api/protocol-buffers/dt-set-store/ + - /riak/2.9.0/developing/api/protocol-buffers/dt-set-store/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/dt-set-store/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/dt-set-store/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/dt-set-store/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/dt-set-store/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/dt-set-store/ +--- + + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..fe41a28a4e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,136 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/dt-store + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/dt-store + - /riak/2.9.0p5/developing/api/protocol-buffers/dt-store/ + - /riak/2.9.0/developing/api/protocol-buffers/dt-store/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/dt-store/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/dt-store/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/dt-store/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/dt-store/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/dt-store/ +--- + + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.9.0p5/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.9.0p5/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..4ab89d7ba6 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,39 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/dt-union + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/dt-union + - /riak/2.9.0p5/developing/api/protocol-buffers/dt-union/ + - /riak/2.9.0/developing/api/protocol-buffers/dt-union/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/dt-union/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/dt-union/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/dt-union/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/dt-union/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/dt-union/ +--- + + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..7b4ed1cde3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,189 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/fetch-object + - /riak/2.9.0p5/developing/api/protocol-buffers/fetch-object/ + - /riak/2.9.0/developing/api/protocol-buffers/fetch-object/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/fetch-object/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/fetch-object/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/fetch-object/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/fetch-object/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/fetch-object/ +--- + + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` --- The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` --- The character encoding of the object, e.g. `utf-8` +* `content_encoding` --- The content encoding of the object, e.g. + `video/mp4` +* `vtag` --- The object's [vtag]({{}}riak/kv/2.9.0p5/learn/glossary/#vector-clock) +* `links` --- This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` --- A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` --- A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` --- This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` --- Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,2.9.0,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..72363b5b49 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,118 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/get-bucket-props + - /riak/2.9.0p5/developing/api/protocol-buffers/get-bucket-props/ + - /riak/2.9.0/developing/api/protocol-buffers/get-bucket-props/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/get-bucket-props/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/get-bucket-props/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/get-bucket-props/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/get-bucket-props/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/get-bucket-props/ +--- + + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.9.0p5/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..af0fa7ece4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,41 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/get-bucket-type + - /riak/2.9.0p5/developing/api/protocol-buffers/get-bucket-type/ + - /riak/2.9.0/developing/api/protocol-buffers/get-bucket-type/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/get-bucket-type/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/get-bucket-type/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/get-bucket-type/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/get-bucket-type/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/get-bucket-type/ +--- + + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..4d13c29e84 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,69 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/get-client-id + - /riak/2.9.0p5/developing/api/protocol-buffers/get-client-id/ + - /riak/2.9.0/developing/api/protocol-buffers/get-client-id/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/get-client-id/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/get-client-id/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/get-client-id/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/get-client-id/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/get-client-id/ +--- + + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..b04f0f3dd8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,84 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/list-buckets + - /riak/2.9.0p5/developing/api/protocol-buffers/list-buckets/ + - /riak/2.9.0/developing/api/protocol-buffers/list-buckets/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/list-buckets/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/list-buckets/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/list-buckets/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/list-buckets/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/list-buckets/ +--- + + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` --- Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..15c7e68326 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,105 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/list-keys + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/list-keys + - /riak/2.9.0p5/developing/api/protocol-buffers/list-keys/ + - /riak/2.9.0/developing/api/protocol-buffers/list-keys/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/list-keys/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/list-keys/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/list-keys/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/list-keys/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/list-keys/ +--- + + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` --- bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..06231753fd --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,157 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/mapreduce + - /riak/2.9.0p5/developing/api/protocol-buffers/mapreduce/ + - /riak/2.9.0/developing/api/protocol-buffers/mapreduce/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/mapreduce/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/mapreduce/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/mapreduce/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/mapreduce/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/mapreduce/ +--- + + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` --- MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` --- JSON-encoded MapReduce job +* `application/x-erlang-binary` --- Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.9.0p5/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.9.0p5/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` --- Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..5fcf706497 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/ping.md @@ -0,0 +1,50 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/ping + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/ping + - /riak/2.9.0p5/developing/api/protocol-buffers/ping/ + - /riak/2.9.0/developing/api/protocol-buffers/ping/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/ping/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/ping/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/ping/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/ping/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/ping/ +--- + + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..e0fb22ccf5 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,67 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/reset-bucket-props + - /riak/2.9.0p5/developing/api/protocol-buffers/reset-bucket-props/ + - /riak/2.9.0/developing/api/protocol-buffers/reset-bucket-props/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/reset-bucket-props/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/reset-bucket-props/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/reset-bucket-props/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/reset-bucket-props/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/reset-bucket-props/ +--- + + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/search.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..703fd384d6 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/search.md @@ -0,0 +1,156 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/search + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/search + - /riak/2.9.0p5/developing/api/protocol-buffers/search/ + - /riak/2.9.0/developing/api/protocol-buffers/search/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/search/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/search/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/search/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/search/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/search/ +--- + + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` --- The contents of the query +* `index` --- The name of the index to search + +Optional Parameters + +* `rows` --- The maximum number of rows to return +* `start` --- A start offset, i.e. the number of keys to skip before + returning values +* `sort` --- How the search results are to be sorted +* `filter` --- Filters search with additional query scoped to inline + fields +* `df` --- Override the `default_field` setting in the schema file +* `op` --- `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` --- Return the fields limit +* `presort` --- Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` --- A list of docs that match the search request +* `max_score` --- The top score returned +* `num_found` --- Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..bb7559a50c --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,129 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/secondary-indexes + - /riak/2.9.0p5/developing/api/protocol-buffers/secondary-indexes/ + - /riak/2.9.0/developing/api/protocol-buffers/secondary-indexes/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/secondary-indexes/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/secondary-indexes/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/secondary-indexes/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/secondary-indexes/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/secondary-indexes/ +--- + + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..a0d98b4ef8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,66 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/server-info + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/server-info + - /riak/2.9.0p5/developing/api/protocol-buffers/server-info/ + - /riak/2.9.0/developing/api/protocol-buffers/server-info/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/server-info/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/server-info/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/server-info/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/server-info/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/server-info/ +--- + + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..2a0a561e2e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,76 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/set-bucket-props + - /riak/2.9.0p5/developing/api/protocol-buffers/set-bucket-props/ + - /riak/2.9.0/developing/api/protocol-buffers/set-bucket-props/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/set-bucket-props/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/set-bucket-props/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/set-bucket-props/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/set-bucket-props/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/set-bucket-props/ +--- + + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..774309376a --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,39 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/set-bucket-type + - /riak/2.9.0p5/developing/api/protocol-buffers/set-bucket-type/ + - /riak/2.9.0/developing/api/protocol-buffers/set-bucket-type/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/set-bucket-type/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/set-bucket-type/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/set-bucket-type/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/set-bucket-type/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/set-bucket-type/ +--- + + +Assigns a set of [bucket properties]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..8383ef68fb --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,70 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/set-client-id + - /riak/2.9.0p5/developing/api/protocol-buffers/set-client-id/ + - /riak/2.9.0/developing/api/protocol-buffers/set-client-id/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/set-client-id/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/set-client-id/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/set-client-id/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/set-client-id/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/set-client-id/ +--- + + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..ffc8ff9aba --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,158 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/store-object + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/store-object + - /riak/2.9.0p5/developing/api/protocol-buffers/store-object/ + - /riak/2.9.0/developing/api/protocol-buffers/store-object/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/store-object/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/store-object/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/store-object/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/store-object/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/store-object/ +--- + + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.9.0p5/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.9.0p5/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.9.0p5/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.9.0p5/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,2.9.0,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..2ce4cfb237 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,41 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/yz-index-delete + - /riak/2.9.0p5/developing/api/protocol-buffers/yz-index-delete/ + - /riak/2.9.0/developing/api/protocol-buffers/yz-index-delete/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/yz-index-delete/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/yz-index-delete/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/yz-index-delete/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/yz-index-delete/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/yz-index-delete/ +--- + + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..8e6835b8bf --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,67 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/yz-index-get + - /riak/2.9.0p5/developing/api/protocol-buffers/yz-index-get/ + - /riak/2.9.0/developing/api/protocol-buffers/yz-index-get/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/yz-index-get/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/yz-index-get/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/yz-index-get/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/yz-index-get/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/yz-index-get/ +--- + + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..db056b074d --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,53 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/yz-index-put + - /riak/2.9.0p5/developing/api/protocol-buffers/yz-index-put/ + - /riak/2.9.0/developing/api/protocol-buffers/yz-index-put/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/yz-index-put/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/yz-index-put/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/yz-index-put/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/yz-index-put/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/yz-index-put/ +--- + + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..979bf0b758 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,56 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/yz-schema-get + - /riak/2.9.0p5/developing/api/protocol-buffers/yz-schema-get/ + - /riak/2.9.0/developing/api/protocol-buffers/yz-schema-get/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/yz-schema-get/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/yz-schema-get/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/yz-schema-get/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/yz-schema-get/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/yz-schema-get/ +--- + + +Fetch a [search schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. diff --git a/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..58f4eb4bfb --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.0p5/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.9.0p5/dev/references/protocol-buffers/yz-schema-put + - /riak/2.9.0p5/developing/api/protocol-buffers/yz-schema-put/ + - /riak/2.9.0/developing/api/protocol-buffers/yz-schema-put/ + - /riak/kv/2.9.0/developing/api/protocol-buffers/yz-schema-put/ + - /riak/kv/2.9.0p1/developing/api/protocol-buffers/yz-schema-put/ + - /riak/kv/2.9.0p2/developing/api/protocol-buffers/yz-schema-put/ + - /riak/kv/2.9.0p3/developing/api/protocol-buffers/yz-schema-put/ + - /riak/kv/2.9.0p4/developing/api/protocol-buffers/yz-schema-put/ +--- + + +Create a new Solr [search schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.9.0p5/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.9.0p5/developing/api/repl-hooks.md b/content/riak/kv/2.9.0p5/developing/api/repl-hooks.md new file mode 100644 index 0000000000..ca72a17e25 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/api/repl-hooks.md @@ -0,0 +1,200 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v2/hooks + - /riak/kv/2.9.0p5/ops/mdc/v2/hooks + - /riak/2.9.0p5/developing/api/repl-hooks/ + - /riak/2.9.0/developing/api/repl-hooks/ + - /riak/kv/2.9.0/developing/api/repl-hooks/ + - /riak/kv/2.9.0p1/developing/api/repl-hooks/ + - /riak/kv/2.9.0p2/developing/api/repl-hooks/ + - /riak/kv/2.9.0p3/developing/api/repl-hooks/ + - /riak/kv/2.9.0p4/developing/api/repl-hooks/ +--- + +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + diff --git a/content/riak/kv/2.9.0p5/developing/app-guide.md b/content/riak/kv/2.9.0p5/developing/app-guide.md new file mode 100644 index 0000000000..e834359f63 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/app-guide.md @@ -0,0 +1,424 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/application-guide/ + - /riak/kv/2.9.0p5/dev/using/application-guide/ + - /riak/2.9.0p5/developing/app-guide/ + - /riak/2.9.0/developing/app-guide/ + - /riak/kv/2.9.0/developing/app-guide/ + - /riak/kv/2.9.0p1/developing/app-guide/ + - /riak/kv/2.9.0p2/developing/app-guide/ + - /riak/kv/2.9.0p3/developing/app-guide/ + - /riak/kv/2.9.0p4/developing/app-guide/ +--- + + +[usage conflict resolution]: {{}}riak/kv/2.9.0p5/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.9.0p5/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.9.0p5/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.9.0p5/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.9.0p5/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.9.0p5/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.9.0p5/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.9.0p5/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.9.0p5/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.9.0p5/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.9.0p5/developing/usage/search +[use ref search]: {{}}riak/kv/2.9.0p5/using/reference/search +[usage 2i]: {{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.9.0p5/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.9.0p5/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.9.0p5/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.9.0p5/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.9.0p5/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.9.0p5/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.0p5/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/2.9.0p5/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/2.9.0p5/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.0p5/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.0p5/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.0p5/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.0p5/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.0p5/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.0p5/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.9.0p5/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.9.0p5/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.9.0p5/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.9.0p5/setup/installing +[getting started]: {{}}riak/kv/2.9.0p5/developing/getting-started +[usage index]: {{}}riak/kv/2.9.0p5/developing/usage +[glossary]: {{}}riak/kv/2.9.0p5/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** --- While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** --- Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** --- Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** --- It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** --- If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** --- If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** --- If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] --- Getting started with Riak Search +* [Search Details][use ref search] --- A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] --- How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** --- Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] /(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** --- At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** --- In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] --- A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] --- A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] --- An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** --- If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** --- If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** --- If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** --- While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** --- Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] --- A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] --- A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** --- You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** --- Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] --- A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] --- Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** --- At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** --- If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** --- 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] --- Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] --- A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] --- How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] --- A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] --- A listing of frequently used terms in Riak's + documentation + diff --git a/content/riak/kv/2.9.0p5/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.9.0p5/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..81a4f683fb --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,806 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/mapreduce/ + - /riak/kv/2.9.0p5/dev/advanced/mapreduce/ + - /riak/2.9.0p5/developing/app-guide/advanced-mapreduce/ + - /riak/2.9.0/developing/app-guide/advanced-mapreduce/ + - /riak/kv/2.9.0/developing/app-guide/advanced-mapreduce/ + - /riak/kv/2.9.0p1/developing/app-guide/advanced-mapreduce/ + - /riak/kv/2.9.0p2/developing/app-guide/advanced-mapreduce/ + - /riak/kv/2.9.0p3/developing/app-guide/advanced-mapreduce/ + - /riak/kv/2.9.0p4/developing/app-guide/advanced-mapreduce/ +--- + + +[usage 2i]: {{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.9.0p5/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.9.0p5/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.9.0p5/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.9.0p5/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.0]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred / + -H 'Content-Type: application/json' / + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.9.0p5/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "/r?/n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{/"Date/":/"~s/",/"Open/":~s,/"High/":~s,/"Low/":~s,/"Close/":~s,/"Volume/":~s,/"Adj. Close/":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) --- Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) --- Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) --- Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred / +> -H 'Content-Type: application/json' / +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:/n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{/"phase/":0,/"error/":/"function_clause/",/"input/":/"{ok,{r_object,<>,<>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{/"phase/":0,/"error/":/"function_clause/",/"input/":/"{ok,{r_object,<>,<>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{/"phase/":0,/"error/":/"function_clause/",/"input/":/"{ok,{r_object,<>,<>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<>]],[],[],[],[],[],[],[],[[<>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<>]],[],[[<>|{1405,709865,48668}]],[],[[<>]]}}},<>}],...},...}/",/"type/":/"error/",/"stack/":/"[{string,substr,[///"2009-06-10///",0,7],[{file,///"string.erl///"},{line,207}]},{erl_eval,do_apply,6,[{file,///"erl_eval.erl///"},{line,573}]},{erl_eval,expr,5,[{file,///"erl_eval.erl///"},{line,364}]},{erl_eval,exprs,5,[{file,///"erl_eval.erl///"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,///"src/riak_kv_mrc_map.erl///"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,///"src/riak_kv_mrc_map.erl///"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,///"src/riak_pipe_vnode_worker.erl///"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]/"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[///"2009-06-10///",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
diff --git a/content/riak/kv/2.9.0p5/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.9.0p5/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..25d3b76ad6
--- /dev/null
+++ b/content/riak/kv/2.9.0p5/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,76 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: "2.9.0p5"
+menu:
+  riak_kv-2.9.0p5:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.0p5/developing/app-guide/cluster-metadata/
+  - /riak/2.9.0/developing/app-guide/cluster-metadata/
+  - /riak/kv/2.9.0/developing/app-guide/cluster-metadata/
+  - /riak/kv/2.9.0p1/developing/app-guide/cluster-metadata/
+  - /riak/kv/2.9.0p2/developing/app-guide/cluster-metadata/
+  - /riak/kv/2.9.0p3/developing/app-guide/cluster-metadata/
+  - /riak/kv/2.9.0p4/developing/app-guide/cluster-metadata/
+---
+
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.9.0p5/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.9.0p5/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.9.0p5/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
diff --git a/content/riak/kv/2.9.0p5/developing/app-guide/reference.md b/content/riak/kv/2.9.0p5/developing/app-guide/reference.md
new file mode 100644
index 0000000000..0d0edd6144
--- /dev/null
+++ b/content/riak/kv/2.9.0p5/developing/app-guide/reference.md
@@ -0,0 +1,25 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: "2.9.0p5"
+#menu:
+#  riak_kv-2.9.0p5:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.0p5/developing/app-guide/reference/
+  - /riak/2.9.0/developing/app-guide/reference/
+  - /riak/kv/2.9.0/developing/app-guide/reference/
+  - /riak/kv/2.9.0p1/developing/app-guide/reference/
+  - /riak/kv/2.9.0p2/developing/app-guide/reference/
+  - /riak/kv/2.9.0p3/developing/app-guide/reference/
+  - /riak/kv/2.9.0p4/developing/app-guide/reference/
+---
+
+
+**TODO: Add content**
diff --git a/content/riak/kv/2.9.0p5/developing/app-guide/replication-properties.md b/content/riak/kv/2.9.0p5/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..a861e41d68
--- /dev/null
+++ b/content/riak/kv/2.9.0p5/developing/app-guide/replication-properties.md
@@ -0,0 +1,588 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: "2.9.0p5"
+menu:
+  riak_kv-2.9.0p5:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.0p5/dev/advanced/replication-properties
+  - /riak/kv/2.9.0p5/dev/advanced/replication-properties
+  - /riak/2.9.0p5/developing/app-guide/replication-properties/
+  - /riak/2.9.0/developing/app-guide/replication-properties/
+  - /riak/kv/2.9.0/developing/app-guide/replication-properties/
+  - /riak/kv/2.9.0p1/developing/app-guide/replication-properties/
+  - /riak/kv/2.9.0p2/developing/app-guide/replication-properties/
+  - /riak/kv/2.9.0p3/developing/app-guide/replication-properties/
+  - /riak/kv/2.9.0p4/developing/app-guide/replication-properties/
+---
+
+
+[usage bucket types]: {{}}riak/kv/2.9.0p5/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.9.0p5/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.9.0p5/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.9.0p5/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key /
+  -H "Content-Type: text/plain" /
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new /Basho/Riak/Command/Builder/FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new /Basho/Riak/Command/Builder/StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT /
+  -H "Content-type: text/plain" /
+  -d "The species name of the giraffe is Giraffa camelopardalis" /
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.9.0p5/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.9.0p5/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` --- All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` --- This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` --- A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` --- Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new /Basho/Riak/Command/Builder/FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new /Basho/Riak/Command/Builder/StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT /
+  -H "Content-Type: application/json" /
+  -d '{"stats":{ ... large stats object ... }}' /
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.9.0p5/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.9.0p5/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.9.0p5/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
diff --git a/content/riak/kv/2.9.0p5/developing/app-guide/strong-consistency.md b/content/riak/kv/2.9.0p5/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..0fa1f6f77a
--- /dev/null
+++ b/content/riak/kv/2.9.0p5/developing/app-guide/strong-consistency.md
@@ -0,0 +1,265 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: "2.9.0p5"
+menu:
+  riak_kv-2.9.0p5:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.0p5/dev/advanced/strong-consistency
+  - /riak/kv/2.9.0p5/dev/advanced/strong-consistency
+  - /riak/2.9.0p5/developing/app-guide/strong-consistency/
+  - /riak/2.9.0/developing/app-guide/strong-consistency/
+  - /riak/kv/2.9.0/developing/app-guide/strong-consistency/
+  - /riak/kv/2.9.0p1/developing/app-guide/strong-consistency/
+  - /riak/kv/2.9.0p2/developing/app-guide/strong-consistency/
+  - /riak/kv/2.9.0p3/developing/app-guide/strong-consistency/
+  - /riak/kv/2.9.0p4/developing/app-guide/strong-consistency/
+---
+
+
+[use ref strong consistency]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.9.0p5/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.9.0p5/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.9.0p5/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.9.0p5/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.9.0p5/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.9.0p5/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.9.0p5/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.9.0p5/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.9.0p5/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.9.0p5/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.9.0p5/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.9.0p5/developing/client-libraries
+[getting started]: {{}}riak/kv/2.9.0p5/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.9.0p5/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent /
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. diff --git a/content/riak/kv/2.9.0p5/developing/app-guide/write-once.md b/content/riak/kv/2.9.0p5/developing/app-guide/write-once.md new file mode 100644 index 0000000000..120b8912b3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/app-guide/write-once.md @@ -0,0 +1,163 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.9.0p5/dev/advanced/write-once + - /riak/kv/2.9.0p5/dev/advanced/write-once + - /riak/2.9.0p5/developing/app-guide/write-once/ + - /riak/2.9.0/developing/app-guide/write-once/ + - /riak/kv/2.9.0/developing/app-guide/write-once/ + - /riak/kv/2.9.0p1/developing/app-guide/write-once/ + - /riak/kv/2.9.0p2/developing/app-guide/write-once/ + - /riak/kv/2.9.0p3/developing/app-guide/write-once/ + - /riak/kv/2.9.0p4/developing/app-guide/write-once/ +--- + + +[glossary vnode]: {{}}riak/kv/2.9.0p5/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.9.0p5/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.9.0p5/developing/data-types +[strong consistency]: {{}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.9.0p5/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} diff --git a/content/riak/kv/2.9.0p5/developing/client-libraries.md b/content/riak/kv/2.9.0p5/developing/client-libraries.md new file mode 100644 index 0000000000..3ee40e0f4b --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/client-libraries.md @@ -0,0 +1,312 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/libraries + - /riak/kv/2.9.0p5/dev/using/libraries + - /riak/2.9.0p5/developing/client-libraries/ + - /riak/2.9.0/developing/client-libraries/ + - /riak/kv/2.9.0/developing/client-libraries/ + - /riak/kv/2.9.0p1/developing/client-libraries/ + - /riak/kv/2.9.0p2/developing/client-libraries/ + - /riak/kv/2.9.0p3/developing/client-libraries/ + - /riak/kv/2.9.0p4/developing/client-libraries/ +--- + + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) --- A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) --- A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) --- A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) --- A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) --- An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) --- An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) --- Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) --- A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) --- Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) + --- A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) --- HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) + --- Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) + --- A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) --- Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) --- Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) --- Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) --- Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) --- An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) --- A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) --- A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) --- A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) --- A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) --- A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) --- Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) --- Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) + --- A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) + --- Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) + --- Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) --- Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) --- Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) --- Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) --- Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) --- A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) --- Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) --- A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) --- Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) --- Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) --- a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) --- A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) --- A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) --- Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) --- + Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) --- Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) --- A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) --- + Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) --- A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) --- A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) + --- Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) --- Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) --- Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) --- A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) --- A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) --- A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) --- A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) --- + [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) --- A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) + --- Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) --- A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) --- A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) --- Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) --- A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) --- A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) --- Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) --- Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) --- Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) --- A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) --- + Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) --- Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) --- + DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) --- Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) --- An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) --- Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) --- Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) --- Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) --- A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) --- An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) --- A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) + --- A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). diff --git a/content/riak/kv/2.9.0p5/developing/data-modeling.md b/content/riak/kv/2.9.0p5/developing/data-modeling.md new file mode 100644 index 0000000000..6af3562c9e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/data-modeling.md @@ -0,0 +1,19 @@ +--- +layout: redirect +target: "riak/kv/2.9.0p5/learn/use-cases/" +aliases: + - /riak/2.9.0p5/developing/data-modeling/ + - /riak/2.9.0/developing/data-modeling/ + - /riak/kv/2.9.0/developing/data-modeling/ + - /riak/kv/2.9.0p1/developing/data-modeling/ + - /riak/kv/2.9.0p2/developing/data-modeling/ + - /riak/kv/2.9.0p3/developing/data-modeling/ + - /riak/kv/2.9.0p4/developing/data-modeling/ +--- + + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. diff --git a/content/riak/kv/2.9.0p5/developing/data-types.md b/content/riak/kv/2.9.0p5/developing/data-types.md new file mode 100644 index 0000000000..f84e9d9885 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/data-types.md @@ -0,0 +1,283 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/data-types + - /riak/kv/2.9.0p5/dev/using/data-types + - /riak/2.9.0p5/dev/data-modeling/data-types + - /riak/kv/2.9.0p5/dev/data-modeling/data-types + - /riak/2.9.0p5/developing/data-types/ + - /riak/2.9.0/developing/data-types/ + - /riak/kv/2.9.0/developing/data-types/ + - /riak/kv/2.9.0p1/developing/data-types/ + - /riak/kv/2.9.0p2/developing/data-types/ + - /riak/kv/2.9.0p3/developing/data-types/ + - /riak/kv/2.9.0p4/developing/data-types/ +--- + + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "/x83l/x00/x00/x00/x01h/x02m/x00/x00/x00/b#/t/xFE/xF9S/x95/xBD3a/x01j" +``` + +```php +$map = (new /Basho/Riak/Command/Builder/FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new /Basho/Riak/Command/Builder/FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->remove('opera'); + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. diff --git a/content/riak/kv/2.9.0p5/developing/data-types/counters.md b/content/riak/kv/2.9.0p5/developing/data-types/counters.md new file mode 100644 index 0000000000..f37d6d706d --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/data-types/counters.md @@ -0,0 +1,639 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/data-types/counters + - /riak/kv/2.9.0p5/dev/using/data-types/counters + - /riak/2.9.0p5/dev/data-modeling/data-types/counters + - /riak/kv/2.9.0p5/dev/data-modeling/data-types/counters + - /riak/2.9.0p5/developing/data-types/counters/ + - /riak/2.9.0/developing/data-types/counters/ + - /riak/kv/2.9.0/developing/data-types/counters/ + - /riak/kv/2.9.0p1/developing/data-types/counters/ + - /riak/kv/2.9.0p2/developing/data-types/counters/ + - /riak/kv/2.9.0p3/developing/data-types/counters/ + - /riak/kv/2.9.0p4/developing/data-types/counters/ +--- + + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new /Basho/Riak/Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new /Basho/Riak/Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ / + -H "Content-Type: application/json" / + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new /Basho/Riak/Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets / + -H "Content-Type: application/json" / + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new /Basho/Riak/Command/Builder/IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets / + -H "Content-Type: application/json" / + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new /Basho/Riak/Command/Builder/IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets / + -H "Content-Type: application/json" / + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new /Basho/Riak/Command/Builder/FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new /Basho/Riak/Command/Builder/IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets / + -H "Content-Type: application/json" / + -d '{"decrement": 3}' +``` diff --git a/content/riak/kv/2.9.0p5/developing/data-types/gsets.md b/content/riak/kv/2.9.0p5/developing/data-types/gsets.md new file mode 100644 index 0000000000..c3e85a7551 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/data-types/gsets.md @@ -0,0 +1,635 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/data-types/gsets + - /riak/kv/2.9.0p5/dev/using/data-types/gsets + - /riak/2.9.0p5/dev/data-modeling/data-types/gsets + - /riak/kv/2.9.0p5/dev/data-modeling/data-types/gsets + - /riak/2.9.0p5/developing/data-types/gsets/ + - /riak/2.9.0/developing/data-types/gsets/ + - /riak/kv/2.9.0/developing/data-types/gsets/ + - /riak/kv/2.9.0p1/developing/data-types/gsets/ + - /riak/kv/2.9.0p2/developing/data-types/gsets/ + - /riak/kv/2.9.0p3/developing/data-types/gsets/ + - /riak/kv/2.9.0p4/developing/data-types/gsets/ +--- + + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new /Basho/Riak/Location('key', new /Basho/Riak/Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new /Basho/Riak/Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new /Basho/Riak/Command/Builder/FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 / + -H "Content-Type: application/json" / + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new /Basho/Riak/Command/Builder/FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` diff --git a/content/riak/kv/2.9.0p5/developing/data-types/hyperloglogs.md b/content/riak/kv/2.9.0p5/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..1e30fb6d1a --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/data-types/hyperloglogs.md @@ -0,0 +1,647 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/data-types/hyperloglogs + - /riak/kv/2.9.0p5/dev/using/data-types/hyperloglogs + - /riak/2.9.0p5/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.9.0p5/dev/data-modeling/data-types/hyperloglogs + - /riak/2.9.0p5/developing/data-types/hyperloglogs/ + - /riak/2.9.0/developing/data-types/hyperloglogs/ + - /riak/kv/2.9.0/developing/data-types/hyperloglogs/ + - /riak/kv/2.9.0p1/developing/data-types/hyperloglogs/ + - /riak/kv/2.9.0p2/developing/data-types/hyperloglogs/ + - /riak/kv/2.9.0p3/developing/data-types/hyperloglogs/ + - /riak/kv/2.9.0p4/developing/data-types/hyperloglogs/ +--- + + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command/Builder/FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command/Builder/UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command/Builder/FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command/Builder/UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness / + -H "Content-Type: application/json" / + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command/Builder/UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command/Builder/FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` diff --git a/content/riak/kv/2.9.0p5/developing/data-types/maps.md b/content/riak/kv/2.9.0p5/developing/data-types/maps.md new file mode 100644 index 0000000000..a691fde20c --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/data-types/maps.md @@ -0,0 +1,1888 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/data-types/maps + - /riak/kv/2.9.0p5/dev/using/data-types/maps + - /riak/2.9.0p5/dev/data-modeling/data-types/maps + - /riak/kv/2.9.0p5/dev/data-modeling/data-types/maps + - /riak/2.9.0p5/developing/data-types/maps/ + - /riak/2.9.0/developing/data-types/maps/ + - /riak/kv/2.9.0/developing/data-types/maps/ + - /riak/kv/2.9.0p1/developing/data-types/maps/ + - /riak/kv/2.9.0p2/developing/data-types/maps/ + - /riak/kv/2.9.0p3/developing/data-types/maps/ + - /riak/kv/2.9.0p4/developing/data-types/maps/ +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new /Basho/Riak/Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new /Basho/Riak/Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new /Basho/Riak/Command/Builder/FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new /Basho/Riak/Command/Builder/IncrementCounter($riak)) + ->withIncrement(1); + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new /Basho/Riak/Command/Builder/FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->removeRegister('first_name'); + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new /Basho/Riak/Command/Builder/IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info / + -H "Content-Type: application/json" / + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` diff --git a/content/riak/kv/2.9.0p5/developing/data-types/sets.md b/content/riak/kv/2.9.0p5/developing/data-types/sets.md new file mode 100644 index 0000000000..d5eee90751 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/data-types/sets.md @@ -0,0 +1,777 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/data-types/sets + - /riak/kv/2.9.0p5/dev/using/data-types/sets + - /riak/2.9.0p5/dev/data-modeling/data-types/sets + - /riak/kv/2.9.0p5/dev/data-modeling/data-types/sets + - /riak/2.9.0p5/developing/data-types/sets/ + - /riak/2.9.0/developing/data-types/sets/ + - /riak/kv/2.9.0/developing/data-types/sets/ + - /riak/kv/2.9.0p1/developing/data-types/sets/ + - /riak/kv/2.9.0p2/developing/data-types/sets/ + - /riak/kv/2.9.0p3/developing/data-types/sets/ + - /riak/kv/2.9.0p4/developing/data-types/sets/ +--- + + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new /Basho/Riak/Location('key', new /Basho/Riak/Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new /Basho/Riak/Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new /Basho/Riak/Command/Builder/FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities / + -H "Content-Type: application/json" / + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities / + -H "Content-Type: application/json" / + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new /Basho/Riak/Command/Builder/FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` diff --git a/content/riak/kv/2.9.0p5/developing/faq.md b/content/riak/kv/2.9.0p5/developing/faq.md new file mode 100644 index 0000000000..4785aca453 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/faq.md @@ -0,0 +1,662 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.9.0p5/community/faqs/developing + - /riak/kv/2.9.0p5/community/faqs/developing + - /riak/2.9.0p5/developing/faq/ + - /riak/2.9.0/developing/faq/ + - /riak/kv/2.9.0/developing/faq/ + - /riak/kv/2.9.0p1/developing/faq/ + - /riak/kv/2.9.0p2/developing/faq/ + - /riak/kv/2.9.0p3/developing/faq/ + - /riak/kv/2.9.0p4/developing/faq/ +--- + + +[[Basho Bench]: {{}}riak/kv/2.9.0p5/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.9.0p5/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.9.0p5/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.9.0p5/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.9.0p5/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.9.0p5/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.9.0p5/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.9.0p5/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.9.0p5/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + + +--- + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + + +--- + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + + +--- + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +--- + +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +--- + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + + +--- + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + + +--- + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + + +--- + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + + +--- + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + + +--- + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + + +--- + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +--- + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +--- + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +--- + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + + +--- + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +--- + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + + +--- + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + + +--- + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + + +--- + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +--- + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + + +--- + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) --- requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) --- if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +--- + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +--- + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +--- + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +--- + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +--- + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +--- + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +--- + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +--- + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +--- + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +--- + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +--- + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +--- + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{/"a/":/"b/",/"foo/":/"bar/"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. diff --git a/content/riak/kv/2.9.0p5/developing/getting-started.md b/content/riak/kv/2.9.0p5/developing/getting-started.md new file mode 100644 index 0000000000..cddde766e1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started.md @@ -0,0 +1,55 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/ + - /riak/2.9.0/developing/getting-started/ + - /riak/kv/2.9.0/developing/getting-started/ + - /riak/kv/2.9.0p1/developing/getting-started/ + - /riak/kv/2.9.0p2/developing/getting-started/ + - /riak/kv/2.9.0p3/developing/getting-started/ + - /riak/kv/2.9.0p4/developing/getting-started/ +--- + + +[install index]: {{}}riak/kv/2.9.0p5/setup/installing +[dev client libraries]: {{}}riak/kv/2.9.0p5/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/csharp.md b/content/riak/kv/2.9.0p5/developing/getting-started/csharp.md new file mode 100644 index 0000000000..566969ecc0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/csharp.md @@ -0,0 +1,90 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/csharp + - /riak/kv/2.9.0p5/dev/taste-of-riak/csharp + - /riak/2.9.0p5/developing/getting-started/csharp/ + - /riak/2.9.0/developing/getting-started/csharp/ + - /riak/kv/2.9.0/developing/getting-started/csharp/ + - /riak/kv/2.9.0p1/developing/getting-started/csharp/ + - /riak/kv/2.9.0p2/developing/getting-started/csharp/ + - /riak/kv/2.9.0p3/developing/getting-started/csharp/ + - /riak/kv/2.9.0p4/developing/getting-started/csharp/ +--- + + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.0p5/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.0p5/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.9.0p5/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..a733f73a4c --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,152 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/csharp/crud-operations/ + - /riak/2.9.0/developing/getting-started/csharp/crud-operations/ + - /riak/kv/2.9.0/developing/getting-started/csharp/crud-operations/ + - /riak/kv/2.9.0p1/developing/getting-started/csharp/crud-operations/ + - /riak/kv/2.9.0p2/developing/getting-started/csharp/crud-operations/ + - /riak/kv/2.9.0p3/developing/getting-started/csharp/crud-operations/ + - /riak/kv/2.9.0p4/developing/getting-started/csharp/crud-operations/ +--- + + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.9.0p5/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..f44b0b2ffa --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,115 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.9.0p5/dev/taste-of-riak/object-modeling-csharp + - /riak/2.9.0p5/developing/getting-started/csharp/object-modeling/ + - /riak/2.9.0/developing/getting-started/csharp/object-modeling/ + - /riak/kv/2.9.0/developing/getting-started/csharp/object-modeling/ + - /riak/kv/2.9.0p1/developing/getting-started/csharp/object-modeling/ + - /riak/kv/2.9.0p2/developing/getting-started/csharp/object-modeling/ + - /riak/kv/2.9.0p3/developing/getting-started/csharp/object-modeling/ + - /riak/kv/2.9.0p4/developing/getting-started/csharp/object-modeling/ +--- + + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/csharp/querying.md b/content/riak/kv/2.9.0p5/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..d4edd2d5b2 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/csharp/querying.md @@ -0,0 +1,218 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/querying-csharp + - /riak/kv/2.9.0p5/dev/taste-of-riak/querying-csharp + - /riak/2.9.0p5/developing/getting-started/csharp/querying/ + - /riak/2.9.0/developing/getting-started/csharp/querying/ + - /riak/kv/2.9.0/developing/getting-started/csharp/querying/ + - /riak/kv/2.9.0p1/developing/getting-started/csharp/querying/ + - /riak/kv/2.9.0p2/developing/getting-started/csharp/querying/ + - /riak/kv/2.9.0p3/developing/getting-started/csharp/querying/ + - /riak/kv/2.9.0p4/developing/getting-started/csharp/querying/ +--- + + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}/n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}/n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.0p5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/erlang.md b/content/riak/kv/2.9.0p5/developing/getting-started/erlang.md new file mode 100644 index 0000000000..b4ecfeaf23 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/erlang.md @@ -0,0 +1,63 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/erlang + - /riak/kv/2.9.0p5/dev/taste-of-riak/erlang + - /riak/2.9.0p5/developing/getting-started/erlang/ + - /riak/2.9.0/developing/getting-started/erlang/ + - /riak/kv/2.9.0/developing/getting-started/erlang/ + - /riak/kv/2.9.0p1/developing/getting-started/erlang/ + - /riak/kv/2.9.0p2/developing/getting-started/erlang/ + - /riak/kv/2.9.0p3/developing/getting-started/erlang/ + - /riak/kv/2.9.0p4/developing/getting-started/erlang/ +--- + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.0p5/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.0p5/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.9.0p5/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..76d6a731a1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,176 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/erlang/crud-operations/ + - /riak/2.9.0/developing/getting-started/erlang/crud-operations/ + - /riak/kv/2.9.0/developing/getting-started/erlang/crud-operations/ + - /riak/kv/2.9.0p1/developing/getting-started/erlang/crud-operations/ + - /riak/kv/2.9.0p2/developing/getting-started/erlang/crud-operations/ + - /riak/kv/2.9.0p3/developing/getting-started/erlang/crud-operations/ + - /riak/kv/2.9.0p4/developing/getting-started/erlang/crud-operations/ +--- + + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.9.0p5/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..acba7f7f59 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,346 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.9.0p5/dev/taste-of-riak/object-modeling-erlang + - /riak/2.9.0p5/developing/getting-started/erlang/object-modeling/ + - /riak/2.9.0/developing/getting-started/erlang/object-modeling/ + - /riak/kv/2.9.0/developing/getting-started/erlang/object-modeling/ + - /riak/kv/2.9.0p1/developing/getting-started/erlang/object-modeling/ + - /riak/kv/2.9.0p2/developing/getting-started/erlang/object-modeling/ + - /riak/kv/2.9.0p3/developing/getting-started/erlang/object-modeling/ + - /riak/kv/2.9.0p4/developing/getting-started/erlang/object-modeling/ +--- + + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/erlang/querying.md b/content/riak/kv/2.9.0p5/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..25f6ff2bbb --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/erlang/querying.md @@ -0,0 +1,311 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/querying-erlang + - /riak/kv/2.9.0p5/dev/taste-of-riak/querying-erlang + - /riak/2.9.0p5/developing/getting-started/erlang/querying/ + - /riak/2.9.0/developing/getting-started/erlang/querying/ + - /riak/kv/2.9.0/developing/getting-started/erlang/querying/ + - /riak/kv/2.9.0p1/developing/getting-started/erlang/querying/ + - /riak/kv/2.9.0p2/developing/getting-started/erlang/querying/ + - /riak/kv/2.9.0p3/developing/getting-started/erlang/querying/ + - /riak/kv/2.9.0p4/developing/getting-started/erlang/querying/ +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.9.0p5/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.0p5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/golang.md b/content/riak/kv/2.9.0p5/developing/getting-started/golang.md new file mode 100644 index 0000000000..4bc42bd226 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/golang.md @@ -0,0 +1,86 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/golang + - /riak/kv/2.9.0p5/dev/taste-of-riak/golang + - /riak/2.9.0p5/developing/getting-started/golang/ + - /riak/2.9.0/developing/getting-started/golang/ + - /riak/kv/2.9.0/developing/getting-started/golang/ + - /riak/kv/2.9.0p1/developing/getting-started/golang/ + - /riak/kv/2.9.0p2/developing/getting-started/golang/ + - /riak/kv/2.9.0p3/developing/getting-started/golang/ + - /riak/kv/2.9.0p4/developing/getting-started/golang/ +--- + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.0p5/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.0p5/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.9.0p5/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..f2a07871c8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,379 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/golang/crud-operations/ + - /riak/2.9.0/developing/getting-started/golang/crud-operations/ + - /riak/kv/2.9.0/developing/getting-started/golang/crud-operations/ + - /riak/kv/2.9.0p1/developing/getting-started/golang/crud-operations/ + - /riak/kv/2.9.0p2/developing/getting-started/golang/crud-operations/ + - /riak/kv/2.9.0p3/developing/getting-started/golang/crud-operations/ + - /riak/kv/2.9.0p4/developing/getting-started/golang/crud-operations/ +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.9.0p5/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..b172c4fd32 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,555 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.9.0p5/dev/taste-of-riak/object-modeling-golang + - /riak/2.9.0p5/developing/getting-started/golang/object-modeling/ + - /riak/2.9.0/developing/getting-started/golang/object-modeling/ + - /riak/kv/2.9.0/developing/getting-started/golang/object-modeling/ + - /riak/kv/2.9.0p1/developing/getting-started/golang/object-modeling/ + - /riak/kv/2.9.0p2/developing/getting-started/golang/object-modeling/ + - /riak/kv/2.9.0p3/developing/getting-started/golang/object-modeling/ + - /riak/kv/2.9.0p4/developing/getting-started/golang/object-modeling/ +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/golang/querying.md b/content/riak/kv/2.9.0p5/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..a8acbcd22a --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/golang/querying.md @@ -0,0 +1,584 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/querying-golang + - /riak/kv/2.9.0p5/dev/taste-of-riak/querying-golang + - /riak/2.9.0p5/developing/getting-started/golang/querying/ + - /riak/2.9.0/developing/getting-started/golang/querying/ + - /riak/kv/2.9.0/developing/getting-started/golang/querying/ + - /riak/kv/2.9.0p1/developing/getting-started/golang/querying/ + - /riak/kv/2.9.0p2/developing/getting-started/golang/querying/ + - /riak/kv/2.9.0p3/developing/getting-started/golang/querying/ + - /riak/kv/2.9.0p4/developing/getting-started/golang/querying/ +--- + + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.0p5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/java.md b/content/riak/kv/2.9.0p5/developing/getting-started/java.md new file mode 100644 index 0000000000..2cbd307fb6 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/java.md @@ -0,0 +1,97 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/java + - /riak/kv/2.9.0p5/dev/taste-of-riak/java + - /riak/2.9.0p5/developing/getting-started/java/ + - /riak/2.9.0/developing/getting-started/java/ + - /riak/kv/2.9.0/developing/getting-started/java/ + - /riak/kv/2.9.0p1/developing/getting-started/java/ + - /riak/kv/2.9.0p2/developing/getting-started/java/ + - /riak/kv/2.9.0p3/developing/getting-started/java/ + - /riak/kv/2.9.0p4/developing/getting-started/java/ +--- + + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.0p5/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.0p5/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.9.0p5/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..a8040c453e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/java/crud-operations.md @@ -0,0 +1,210 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/java/crud-operations/ + - /riak/2.9.0/developing/getting-started/java/crud-operations/ + - /riak/kv/2.9.0/developing/getting-started/java/crud-operations/ + - /riak/kv/2.9.0p1/developing/getting-started/java/crud-operations/ + - /riak/kv/2.9.0p2/developing/getting-started/java/crud-operations/ + - /riak/kv/2.9.0p3/developing/getting-started/java/crud-operations/ + - /riak/kv/2.9.0p4/developing/getting-started/java/crud-operations/ +--- + + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.0p5/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.0p5/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.0p5/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/) +documention. diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.9.0p5/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..91af5ac6e7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/java/object-modeling.md @@ -0,0 +1,436 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.9.0p5/dev/taste-of-riak/object-modeling-java + - /riak/2.9.0p5/developing/getting-started/java/object-modeling/ + - /riak/2.9.0/developing/getting-started/java/object-modeling/ + - /riak/kv/2.9.0/developing/getting-started/java/object-modeling/ + - /riak/kv/2.9.0p1/developing/getting-started/java/object-modeling/ + - /riak/kv/2.9.0p2/developing/getting-started/java/object-modeling/ + - /riak/kv/2.9.0p3/developing/getting-started/java/object-modeling/ + - /riak/kv/2.9.0p4/developing/getting-started/java/object-modeling/ +--- + + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/java/querying.md b/content/riak/kv/2.9.0p5/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..a90e452ee3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/java/querying.md @@ -0,0 +1,284 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/querying-java + - /riak/kv/2.9.0p5/dev/taste-of-riak/querying-java + - /riak/2.9.0p5/developing/getting-started/java/querying/ + - /riak/2.9.0/developing/getting-started/java/querying/ + - /riak/kv/2.9.0/developing/getting-started/java/querying/ + - /riak/kv/2.9.0p1/developing/getting-started/java/querying/ + - /riak/kv/2.9.0p2/developing/getting-started/java/querying/ + - /riak/kv/2.9.0p3/developing/getting-started/java/querying/ + - /riak/kv/2.9.0p4/developing/getting-started/java/querying/ +--- + + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s/n", fetchedCust); + System.out.format("OrderSummary 1: %s/n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.0p5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s/n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s/n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/nodejs.md b/content/riak/kv/2.9.0p5/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..927b98b985 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/nodejs.md @@ -0,0 +1,108 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/nodejs + - /riak/kv/2.9.0p5/dev/taste-of-riak/nodejs + - /riak/2.9.0p5/developing/getting-started/nodejs/ + - /riak/2.9.0/developing/getting-started/nodejs/ + - /riak/kv/2.9.0/developing/getting-started/nodejs/ + - /riak/kv/2.9.0p1/developing/getting-started/nodejs/ + - /riak/kv/2.9.0p2/developing/getting-started/nodejs/ + - /riak/kv/2.9.0p3/developing/getting-started/nodejs/ + - /riak/kv/2.9.0p4/developing/getting-started/nodejs/ +--- + + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.0p5/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.0p5/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..ba833a6246 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,142 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/nodejs/crud-operations/ + - /riak/2.9.0/developing/getting-started/nodejs/crud-operations/ + - /riak/kv/2.9.0/developing/getting-started/nodejs/crud-operations/ + - /riak/kv/2.9.0p1/developing/getting-started/nodejs/crud-operations/ + - /riak/kv/2.9.0p2/developing/getting-started/nodejs/crud-operations/ + - /riak/kv/2.9.0p3/developing/getting-started/nodejs/crud-operations/ + - /riak/kv/2.9.0p4/developing/getting-started/nodejs/crud-operations/ +--- + + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..4fbd3ea319 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,127 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.9.0p5/dev/taste-of-riak/object-modeling-nodejs + - /riak/2.9.0p5/developing/getting-started/nodejs/object-modeling/ + - /riak/2.9.0/developing/getting-started/nodejs/object-modeling/ + - /riak/kv/2.9.0/developing/getting-started/nodejs/object-modeling/ + - /riak/kv/2.9.0p1/developing/getting-started/nodejs/object-modeling/ + - /riak/kv/2.9.0p2/developing/getting-started/nodejs/object-modeling/ + - /riak/kv/2.9.0p3/developing/getting-started/nodejs/object-modeling/ + - /riak/kv/2.9.0p4/developing/getting-started/nodejs/object-modeling/ +--- + + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..cb43d2aca5 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/nodejs/querying.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.9.0p5/dev/taste-of-riak/querying-nodejs + - /riak/2.9.0p5/developing/getting-started/nodejs/querying/ + - /riak/2.9.0/developing/getting-started/nodejs/querying/ + - /riak/kv/2.9.0/developing/getting-started/nodejs/querying/ + - /riak/kv/2.9.0p1/developing/getting-started/nodejs/querying/ + - /riak/kv/2.9.0p2/developing/getting-started/nodejs/querying/ + - /riak/kv/2.9.0p3/developing/getting-started/nodejs/querying/ + - /riak/kv/2.9.0p4/developing/getting-started/nodejs/querying/ +--- + + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.0p5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/php.md b/content/riak/kv/2.9.0p5/developing/getting-started/php.md new file mode 100644 index 0000000000..76e547989c --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/php.md @@ -0,0 +1,84 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/php + - /riak/kv/2.9.0p5/dev/taste-of-riak/php + - /riak/2.9.0p5/developing/getting-started/php/ + - /riak/2.9.0/developing/getting-started/php/ + - /riak/kv/2.9.0/developing/getting-started/php/ + - /riak/kv/2.9.0p1/developing/getting-started/php/ + - /riak/kv/2.9.0p2/developing/getting-started/php/ + - /riak/kv/2.9.0p3/developing/getting-started/php/ + - /riak/kv/2.9.0p4/developing/getting-started/php/ +--- + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.0p5/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho/Riak; +use Basho/Riak/Node; +use Basho/Riak/Command; + +$node = (new Node/Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.0p5/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.9.0p5/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..d8411fa249 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/php/crud-operations.md @@ -0,0 +1,191 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/php/crud-operations/ + - /riak/2.9.0/developing/getting-started/php/crud-operations/ + - /riak/kv/2.9.0/developing/getting-started/php/crud-operations/ + - /riak/kv/2.9.0p1/developing/getting-started/php/crud-operations/ + - /riak/kv/2.9.0p2/developing/getting-started/php/crud-operations/ + - /riak/kv/2.9.0p3/developing/getting-started/php/crud-operations/ + - /riak/kv/2.9.0p4/developing/getting-started/php/crud-operations/ +--- + + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak/Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak/Location('one', $bucket); + +$storeCommand1 = (new Command/Builder/StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak/Location('two', $bucket); + +$storeCommand2 = (new Command/Builder/StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak/Location('three', $bucket); + +$storeCommand3 = (new Command/Builder/StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command/Builder/FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command/Builder/FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command/Builder/FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command/Builder/StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command/Builder/DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command/Builder/DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command/Builder/DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak/Location($book->isbn, new Riak/Bucket('books')); + +$storeCommand1 = (new Command/Builder/StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command/Builder/FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command/Builder/DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.9.0p5/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/php/querying.md b/content/riak/kv/2.9.0p5/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..29ad91406b --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/php/querying.md @@ -0,0 +1,412 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/querying-php + - /riak/kv/2.9.0p5/dev/taste-of-riak/querying-php + - /riak/2.9.0p5/developing/getting-started/php/querying/ + - /riak/2.9.0/developing/getting-started/php/querying/ + - /riak/kv/2.9.0/developing/getting-started/php/querying/ + - /riak/kv/2.9.0p1/developing/getting-started/php/querying/ + - /riak/kv/2.9.0p2/developing/getting-started/php/querying/ + - /riak/kv/2.9.0p3/developing/getting-started/php/querying/ + - /riak/kv/2.9.0p4/developing/getting-started/php/querying/ +--- + + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node/Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak/Bucket('Customers'); +$ordersBucket = new Riak/Bucket('Orders'); +$orderSummariesBucket = new Riak/Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command/Builder/StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command/Builder/StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command/Builder/StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command/Builder/FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command/Builder/FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: /n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.0p5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command/Builder/FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command/Builder/StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command/Builder/QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("/n/nJane's Orders: /n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command/Builder/QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("/n/nOctober's Orders: /n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/python.md b/content/riak/kv/2.9.0p5/developing/getting-started/python.md new file mode 100644 index 0000000000..d6cb3e29c7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/python.md @@ -0,0 +1,107 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/python + - /riak/kv/2.9.0p5/dev/taste-of-riak/python + - /riak/2.9.0p5/developing/getting-started/python/ + - /riak/2.9.0/developing/getting-started/python/ + - /riak/kv/2.9.0/developing/getting-started/python/ + - /riak/kv/2.9.0p1/developing/getting-started/python/ + - /riak/kv/2.9.0p2/developing/getting-started/python/ + - /riak/kv/2.9.0p3/developing/getting-started/python/ + - /riak/kv/2.9.0p4/developing/getting-started/python/ +--- + + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.0p5/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` --- Header files and a static library for Python +* `libffi-dev` --- Foreign function interface library +* `libssl-dev` --- libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.0p5/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.9.0p5/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..ae707acf65 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/python/crud-operations.md @@ -0,0 +1,154 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/python/crud-operations/ + - /riak/2.9.0/developing/getting-started/python/crud-operations/ + - /riak/kv/2.9.0/developing/getting-started/python/crud-operations/ + - /riak/kv/2.9.0p1/developing/getting-started/python/crud-operations/ + - /riak/kv/2.9.0p2/developing/getting-started/python/crud-operations/ + - /riak/kv/2.9.0p3/developing/getting-started/python/crud-operations/ + - /riak/kv/2.9.0p4/developing/getting-started/python/crud-operations/ +--- + + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.9.0p5/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..6697b12b60 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/python/object-modeling.md @@ -0,0 +1,268 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.9.0p5/dev/taste-of-riak/object-modeling-python + - /riak/2.9.0p5/developing/getting-started/python/object-modeling/ + - /riak/2.9.0/developing/getting-started/python/object-modeling/ + - /riak/kv/2.9.0/developing/getting-started/python/object-modeling/ + - /riak/kv/2.9.0p1/developing/getting-started/python/object-modeling/ + - /riak/kv/2.9.0p2/developing/getting-started/python/object-modeling/ + - /riak/kv/2.9.0p3/developing/getting-started/python/object-modeling/ + - /riak/kv/2.9.0p4/developing/getting-started/python/object-modeling/ +--- + + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}/nMsg : {1}/n/n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/python/querying.md b/content/riak/kv/2.9.0p5/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..3ed6aabdbd --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/python/querying.md @@ -0,0 +1,244 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/querying-python + - /riak/kv/2.9.0p5/dev/taste-of-riak/querying-python + - /riak/2.9.0p5/developing/getting-started/python/querying/ + - /riak/2.9.0/developing/getting-started/python/querying/ + - /riak/kv/2.9.0/developing/getting-started/python/querying/ + - /riak/kv/2.9.0p1/developing/getting-started/python/querying/ + - /riak/kv/2.9.0p2/developing/getting-started/python/querying/ + - /riak/kv/2.9.0p3/developing/getting-started/python/querying/ + - /riak/kv/2.9.0p4/developing/getting-started/python/querying/ +--- + + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.0p5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/ruby.md b/content/riak/kv/2.9.0p5/developing/getting-started/ruby.md new file mode 100644 index 0000000000..902c8feb3e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/ruby.md @@ -0,0 +1,72 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/ruby + - /riak/kv/2.9.0p5/dev/taste-of-riak/ruby + - /riak/2.9.0p5/developing/getting-started/ruby/ + - /riak/2.9.0/developing/getting-started/ruby/ + - /riak/kv/2.9.0/developing/getting-started/ruby/ + - /riak/kv/2.9.0p1/developing/getting-started/ruby/ + - /riak/kv/2.9.0p2/developing/getting-started/ruby/ + - /riak/kv/2.9.0p3/developing/getting-started/ruby/ + - /riak/kv/2.9.0p4/developing/getting-started/ruby/ +--- + + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.0p5/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.0p5/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.9.0p5/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..a1ace2ac05 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,155 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.0p5/developing/getting-started/ruby/crud-operations/ + - /riak/2.9.0/developing/getting-started/ruby/crud-operations/ + - /riak/kv/2.9.0/developing/getting-started/ruby/crud-operations/ + - /riak/kv/2.9.0p1/developing/getting-started/ruby/crud-operations/ + - /riak/kv/2.9.0p2/developing/getting-started/ruby/crud-operations/ + - /riak/kv/2.9.0p3/developing/getting-started/ruby/crud-operations/ + - /riak/kv/2.9.0p4/developing/getting-started/ruby/crud-operations/ +--- + + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.9.0p5/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..47af236b1b --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,299 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.9.0p5/dev/taste-of-riak/object-modeling-ruby + - /riak/2.9.0p5/developing/getting-started/ruby/object-modeling/ + - /riak/2.9.0/developing/getting-started/ruby/object-modeling/ + - /riak/kv/2.9.0/developing/getting-started/ruby/object-modeling/ + - /riak/kv/2.9.0p1/developing/getting-started/ruby/object-modeling/ + - /riak/kv/2.9.0p2/developing/getting-started/ruby/object-modeling/ + - /riak/kv/2.9.0p3/developing/getting-started/ruby/object-modeling/ + - /riak/kv/2.9.0p4/developing/getting-started/ruby/object-modeling/ +--- + + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}/nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.0p5/developing/getting-started/ruby/querying.md b/content/riak/kv/2.9.0p5/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..b268ce64a0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/getting-started/ruby/querying.md @@ -0,0 +1,260 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.0p5/dev/taste-of-riak/querying-ruby + - /riak/kv/2.9.0p5/dev/taste-of-riak/querying-ruby + - /riak/2.9.0p5/developing/getting-started/ruby/querying/ + - /riak/2.9.0/developing/getting-started/ruby/querying/ + - /riak/kv/2.9.0/developing/getting-started/ruby/querying/ + - /riak/kv/2.9.0p1/developing/getting-started/ruby/querying/ + - /riak/kv/2.9.0p2/developing/getting-started/ruby/querying/ + - /riak/kv/2.9.0p3/developing/getting-started/ruby/querying/ + - /riak/kv/2.9.0p4/developing/getting-started/ruby/querying/ +--- + + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.0p5/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.0p5/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.0p5/developing/key-value-modeling.md b/content/riak/kv/2.9.0p5/developing/key-value-modeling.md new file mode 100644 index 0000000000..c5d64f4f0b --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/key-value-modeling.md @@ -0,0 +1,539 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.9.0p5/dev/data-modeling/key-value/ + - /riak/kv/2.9.0p5/dev/data-modeling/key-value/ + - /riak/2.9.0p5/developing/key-value-modeling/ + - /riak/2.9.0/developing/key-value-modeling/ + - /riak/kv/2.9.0/developing/key-value-modeling/ + - /riak/kv/2.9.0p1/developing/key-value-modeling/ + - /riak/kv/2.9.0p2/developing/key-value-modeling/ + - /riak/kv/2.9.0p3/developing/key-value-modeling/ + - /riak/kv/2.9.0p4/developing/key-value-modeling/ +--- + + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.9.0p5/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.9.0p5/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.9.0p5/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.9.0p5/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.9.0p5/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.9.0p5/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.9.0p5/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.9.0p5/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.9.0p5/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.9.0p5/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.9.0p5/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.9.0p5/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new /Basho/Riak/Command/Builder/FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.9.0p5/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.9.0p5/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new /Basho/Riak/Command/Builder/StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new /Basho/Riak/Command/Builder/UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new /Basho/Riak/Command/Builder/UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new /Basho/Riak/Command/Builder/FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new /Basho/Riak/Command/Builder/FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.9.0p5/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.9.0p5/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). diff --git a/content/riak/kv/2.9.0p5/developing/usage.md b/content/riak/kv/2.9.0p5/developing/usage.md new file mode 100644 index 0000000000..5c6a731143 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage.md @@ -0,0 +1,142 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +aliases: + - /riak/2.9.0p5/developing/usage/ + - /riak/2.9.0/developing/usage/ + - /riak/kv/2.9.0/developing/usage/ + - /riak/kv/2.9.0p1/developing/usage/ + - /riak/kv/2.9.0p2/developing/usage/ + - /riak/kv/2.9.0p3/developing/usage/ + - /riak/kv/2.9.0p4/developing/usage/ +--- + + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) diff --git a/content/riak/kv/2.9.0p5/developing/usage/bucket-types.md b/content/riak/kv/2.9.0p5/developing/usage/bucket-types.md new file mode 100644 index 0000000000..4a13996a00 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/bucket-types.md @@ -0,0 +1,106 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/bucket-types + - /riak/kv/2.9.0p5/dev/advanced/bucket-types + - /riak/2.9.0p5/developing/usage/bucket-types/ + - /riak/2.9.0/developing/usage/bucket-types/ + - /riak/kv/2.9.0/developing/usage/bucket-types/ + - /riak/kv/2.9.0p1/developing/usage/bucket-types/ + - /riak/kv/2.9.0p2/developing/usage/bucket-types/ + - /riak/kv/2.9.0p3/developing/usage/bucket-types/ + - /riak/kv/2.9.0p4/developing/usage/bucket-types/ +--- + + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new /Basho/Riak/Command/Builder/Search/AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props / + -H'content-type:application/json' / + -d'{"props":{"search_index":"famous"}}' +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/commit-hooks.md b/content/riak/kv/2.9.0p5/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..66ed09fcb1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/commit-hooks.md @@ -0,0 +1,247 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/commit-hooks + - /riak/kv/2.9.0p5/dev/using/commit-hooks + - /riak/2.9.0p5/developing/usage/commit-hooks/ + - /riak/2.9.0/developing/usage/commit-hooks/ + - /riak/kv/2.9.0/developing/usage/commit-hooks/ + - /riak/kv/2.9.0p1/developing/usage/commit-hooks/ + - /riak/kv/2.9.0p2/developing/usage/commit-hooks/ + - /riak/kv/2.9.0p3/developing/usage/commit-hooks/ + - /riak/kv/2.9.0p4/developing/usage/commit-hooks/ +--- + + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit / + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. diff --git a/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution.md b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..394e8ef98c --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution.md @@ -0,0 +1,685 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/conflict-resolution + - /riak/kv/2.9.0p5/dev/using/conflict-resolution + - /riak/2.9.0p5/developing/usage/conflict-resolution/ + - /riak/2.9.0/developing/usage/conflict-resolution/ + - /riak/kv/2.9.0/developing/usage/conflict-resolution/ + - /riak/kv/2.9.0p1/developing/usage/conflict-resolution/ + - /riak/kv/2.9.0p2/developing/usage/conflict-resolution/ + - /riak/kv/2.9.0p3/developing/usage/conflict-resolution/ + - /riak/kv/2.9.0p4/developing/usage/conflict-resolution/ +--- + + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.9.0p5/configuring/strong-consistency) --- A guide for operators +> * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.0p5/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** --- If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** --- Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** --- If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character / + -H "Content-Type: text/plain" / + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character / + -H "Content-Type: text/plain" / + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.9.0p5/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.0p5/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" / + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) diff --git a/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..c0a19c349f --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,127 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/conflict-resolution/csharp + - /riak/kv/2.9.0p5/dev/using/conflict-resolution/csharp + - /riak/2.9.0p5/developing/usage/conflict-resolution/csharp/ + - /riak/2.9.0/developing/usage/conflict-resolution/csharp/ + - /riak/kv/2.9.0/developing/usage/conflict-resolution/csharp/ + - /riak/kv/2.9.0p1/developing/usage/conflict-resolution/csharp/ + - /riak/kv/2.9.0p2/developing/usage/conflict-resolution/csharp/ + - /riak/kv/2.9.0p3/developing/usage/conflict-resolution/csharp/ + - /riak/kv/2.9.0p4/developing/usage/conflict-resolution/csharp/ +--- + + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client diff --git a/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..140ab3c195 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,66 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/conflict-resolution/golang + - /riak/kv/2.9.0p5/dev/using/conflict-resolution/golang + - /riak/2.9.0p5/developing/usage/conflict-resolution/golang/ + - /riak/2.9.0/developing/usage/conflict-resolution/golang/ + - /riak/kv/2.9.0/developing/usage/conflict-resolution/golang/ + - /riak/kv/2.9.0p1/developing/usage/conflict-resolution/golang/ + - /riak/kv/2.9.0p2/developing/usage/conflict-resolution/golang/ + - /riak/kv/2.9.0p3/developing/usage/conflict-resolution/golang/ + - /riak/kv/2.9.0p4/developing/usage/conflict-resolution/golang/ +--- + + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) diff --git a/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..9e74673637 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/java.md @@ -0,0 +1,280 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/conflict-resolution/java + - /riak/kv/2.9.0p5/dev/using/conflict-resolution/java + - /riak/2.9.0p5/developing/usage/conflict-resolution/java/ + - /riak/2.9.0/developing/usage/conflict-resolution/java/ + - /riak/kv/2.9.0/developing/usage/conflict-resolution/java/ + - /riak/kv/2.9.0p1/developing/usage/conflict-resolution/java/ + - /riak/kv/2.9.0p2/developing/usage/conflict-resolution/java/ + - /riak/kv/2.9.0p3/developing/usage/conflict-resolution/java/ + - /riak/kv/2.9.0p4/developing/usage/conflict-resolution/java/ +--- + + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..11a3d5f0f9 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,66 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/conflict-resolution/nodejs + - /riak/kv/2.9.0p5/dev/using/conflict-resolution/nodejs + - /riak/2.9.0p5/developing/usage/conflict-resolution/nodejs/ + - /riak/2.9.0/developing/usage/conflict-resolution/nodejs/ + - /riak/kv/2.9.0/developing/usage/conflict-resolution/nodejs/ + - /riak/kv/2.9.0p1/developing/usage/conflict-resolution/nodejs/ + - /riak/kv/2.9.0p2/developing/usage/conflict-resolution/nodejs/ + - /riak/kv/2.9.0p3/developing/usage/conflict-resolution/nodejs/ + - /riak/kv/2.9.0p4/developing/usage/conflict-resolution/nodejs/ +--- + + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) diff --git a/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..11f92382f4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/php.md @@ -0,0 +1,248 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/conflict-resolution/php + - /riak/kv/2.9.0p5/dev/using/conflict-resolution/php + - /riak/2.9.0p5/developing/usage/conflict-resolution/php/ + - /riak/2.9.0/developing/usage/conflict-resolution/php/ + - /riak/kv/2.9.0/developing/usage/conflict-resolution/php/ + - /riak/kv/2.9.0p1/developing/usage/conflict-resolution/php/ + - /riak/kv/2.9.0p2/developing/usage/conflict-resolution/php/ + - /riak/kv/2.9.0p3/developing/usage/conflict-resolution/php/ + - /riak/kv/2.9.0p4/developing/usage/conflict-resolution/php/ +--- + + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `/Basho/Riak/Object` command returns a `/Basho/Riak/Command/Object/Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // /Basho/Riak/Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use /Basho/Riak; +use /Basho/Riak/Command; + +function longest_friends_list_resolver(Command/Object/Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command/Builder/FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..97a36d4f64 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/python.md @@ -0,0 +1,262 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/conflict-resolution/python + - /riak/kv/2.9.0p5/dev/using/conflict-resolution/python + - /riak/2.9.0p5/developing/usage/conflict-resolution/python/ + - /riak/2.9.0/developing/usage/conflict-resolution/python/ + - /riak/kv/2.9.0/developing/usage/conflict-resolution/python/ + - /riak/kv/2.9.0p1/developing/usage/conflict-resolution/python/ + - /riak/kv/2.9.0p2/developing/usage/conflict-resolution/python/ + - /riak/kv/2.9.0p3/developing/usage/conflict-resolution/python/ + - /riak/kv/2.9.0p4/developing/usage/conflict-resolution/python/ +--- + + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..c337d7511e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,258 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/conflict-resolution/ruby + - /riak/kv/2.9.0p5/dev/using/conflict-resolution/ruby + - /riak/2.9.0p5/developing/usage/conflict-resolution/ruby/ + - /riak/2.9.0/developing/usage/conflict-resolution/ruby/ + - /riak/kv/2.9.0/developing/usage/conflict-resolution/ruby/ + - /riak/kv/2.9.0p1/developing/usage/conflict-resolution/ruby/ + - /riak/kv/2.9.0p2/developing/usage/conflict-resolution/ruby/ + - /riak/kv/2.9.0p3/developing/usage/conflict-resolution/ruby/ + - /riak/kv/2.9.0p4/developing/usage/conflict-resolution/ruby/ +--- + + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.0p5/developing/usage/content-types.md b/content/riak/kv/2.9.0p5/developing/usage/content-types.md new file mode 100644 index 0000000000..a63486220e --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/content-types.md @@ -0,0 +1,196 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/developing/usage/content-types/ + - /riak/2.9.0/developing/usage/content-types/ + - /riak/kv/2.9.0/developing/usage/content-types/ + - /riak/kv/2.9.0p1/developing/usage/content-types/ + - /riak/kv/2.9.0p2/developing/usage/content-types/ + - /riak/kv/2.9.0p3/developing/usage/content-types/ + - /riak/kv/2.9.0p4/developing/usage/content-types/ +--- + + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT / + -H "Content-Type: text/plain" / + -d "I have nothing to declare but my genius" / + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/creating-objects.md b/content/riak/kv/2.9.0p5/developing/usage/creating-objects.md new file mode 100644 index 0000000000..3470b26c54 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/creating-objects.md @@ -0,0 +1,559 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/developing/usage/creating-objects/ + - /riak/2.9.0/developing/usage/creating-objects/ + - /riak/kv/2.9.0/developing/usage/creating-objects/ + - /riak/kv/2.9.0p1/developing/usage/creating-objects/ + - /riak/kv/2.9.0p2/developing/usage/creating-objects/ + - /riak/kv/2.9.0p3/developing/usage/creating-objects/ + - /riak/kv/2.9.0p4/developing/usage/creating-objects/ +--- + + +[usage content types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT / + -H "Content-Type: text/plain" / + -d "vroom" / + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT / + -H "Content-Type: text/plain" / + -d "vroom" / + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v/n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST / + -H "Content-Type: text/plain" / + -d "this is a test" / + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/custom-extractors.md b/content/riak/kv/2.9.0p5/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..7fb222ba26 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/custom-extractors.md @@ -0,0 +1,428 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/search/custom-extractors + - /riak/kv/2.9.0p5/dev/search/custom-extractors + - /riak/2.9.0p5/developing/usage/custom-extractors/ + - /riak/2.9.0/developing/usage/custom-extractors/ + - /riak/kv/2.9.0/developing/usage/custom-extractors/ + - /riak/kv/2.9.0p1/developing/usage/custom-extractors/ + - /riak/kv/2.9.0p2/developing/usage/custom-extractors/ + - /riak/kv/2.9.0p3/developing/usage/custom-extractors/ + - /riak/kv/2.9.0p4/developing/usage/custom-extractors/ +--- + + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` --- Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` --- Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract / + -H 'Content-Type: application/httpheader' / # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1/n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new /Basho/Riak/Command/Builder/StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema / + -H 'Content-Type: application/xml' / + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new /Basho/Riak/Command/Builder/StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data / + -H 'Content-Type: application/json' / + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google / + -H 'Content-Type: application/httpheader' / + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (/Basho/Riak/Command/Search/FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/deleting-objects.md b/content/riak/kv/2.9.0p5/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..b57616b0f3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/deleting-objects.md @@ -0,0 +1,161 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/developing/usage/deleting-objects/ + - /riak/2.9.0/developing/usage/deleting-objects/ + - /riak/kv/2.9.0/developing/usage/deleting-objects/ + - /riak/kv/2.9.0p1/developing/usage/deleting-objects/ + - /riak/kv/2.9.0p2/developing/usage/deleting-objects/ + - /riak/kv/2.9.0p3/developing/usage/deleting-objects/ + - /riak/kv/2.9.0p4/developing/usage/deleting-objects/ +--- + + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new /Basho/Riak/Command/Builder/DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/document-store.md b/content/riak/kv/2.9.0p5/developing/usage/document-store.md new file mode 100644 index 0000000000..b8c8e53378 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/document-store.md @@ -0,0 +1,621 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/search/document-store + - /riak/kv/2.9.0p5/dev/search/document-store + - /riak/2.9.0p5/developing/usage/document-store/ + - /riak/2.9.0/developing/usage/document-store/ + - /riak/kv/2.9.0/developing/usage/document-store/ + - /riak/kv/2.9.0p1/developing/usage/document-store/ + - /riak/kv/2.9.0p2/developing/usage/document-store/ + - /riak/kv/2.9.0p3/developing/usage/document-store/ + - /riak/kv/2.9.0p4/developing/usage/document-store/ +--- + + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.9.0p5/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new /Basho/Riak/Command/Builder/StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema / + -H 'Content-Type: application/xml' / + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command/Builder/Search/StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts / + -H 'Content-Type: application/json' / + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms / + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(/Basho/Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new /Basho/Riak/Command/Builder/UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new /Basho/Riak/Command/Builder/UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new /DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` diff --git a/content/riak/kv/2.9.0p5/developing/usage/mapreduce.md b/content/riak/kv/2.9.0p5/developing/usage/mapreduce.md new file mode 100644 index 0000000000..92cc84c113 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/mapreduce.md @@ -0,0 +1,250 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/mapreduce + - /riak/kv/2.9.0p5/dev/using/mapreduce + - /riak/2.9.0p5/developing/usage/mapreduce/ + - /riak/2.9.0/developing/usage/mapreduce/ + - /riak/kv/2.9.0/developing/usage/mapreduce/ + - /riak/kv/2.9.0p1/developing/usage/mapreduce/ + - /riak/kv/2.9.0p2/developing/usage/mapreduce/ + - /riak/kv/2.9.0p3/developing/usage/mapreduce/ + - /riak/kv/2.9.0p4/developing/usage/mapreduce/ +--- + + +[usage 2i]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** --- The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** --- The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo / + -H 'Content-Type: text/plain' / + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar / + -H 'Content-Type: text/plain' / + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz / + -H 'Content-Type: text/plain' / + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam / + -H 'Content-Type: text/plain' / + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. diff --git a/content/riak/kv/2.9.0p5/developing/usage/reading-objects.md b/content/riak/kv/2.9.0p5/developing/usage/reading-objects.md new file mode 100644 index 0000000000..d1202d51f4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/reading-objects.md @@ -0,0 +1,256 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/developing/usage/reading-objects/ + - /riak/2.9.0/developing/usage/reading-objects/ + - /riak/kv/2.9.0/developing/usage/reading-objects/ + - /riak/kv/2.9.0p1/developing/usage/reading-objects/ + - /riak/kv/2.9.0p2/developing/usage/reading-objects/ + - /riak/kv/2.9.0p3/developing/usage/reading-objects/ + - /riak/kv/2.9.0p4/developing/usage/reading-objects/ +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/#siblings) /(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/replication.md b/content/riak/kv/2.9.0p5/developing/usage/replication.md new file mode 100644 index 0000000000..a11752c22b --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/replication.md @@ -0,0 +1,588 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/replication-properties + - /riak/kv/2.9.1/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.9.1/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.9.1/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.1/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.1/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` --- All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` --- This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` --- A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` --- Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.9.1/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.9.1/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.9.1/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.9.1/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. diff --git a/content/riak/kv/2.9.0p5/developing/usage/search-schemas.md b/content/riak/kv/2.9.0p5/developing/usage/search-schemas.md new file mode 100644 index 0000000000..0861a4873c --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/search-schemas.md @@ -0,0 +1,515 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/search-schema + - /riak/kv/2.9.0p5/dev/advanced/search-schema + - /riak/2.9.0p5/developing/usage/search-schemas/ + - /riak/2.9.0/developing/usage/search-schemas/ + - /riak/kv/2.9.0/developing/usage/search-schemas/ + - /riak/kv/2.9.0p1/developing/usage/search-schemas/ + - /riak/kv/2.9.0p2/developing/usage/search-schemas/ + - /riak/kv/2.9.0p3/developing/usage/search-schemas/ + - /riak/kv/2.9.0p4/developing/usage/search-schemas/ +--- + + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new /Basho/Riak/Command/Builder/Search/StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons / + -H 'Content-Type:application/xml' / + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats / + -H 'Content-Type:application/xml' / + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/search.md b/content/riak/kv/2.9.0p5/developing/usage/search.md new file mode 100644 index 0000000000..6ff410e162 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/search.md @@ -0,0 +1,1459 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/search + - /riak/kv/2.9.0p5/dev/using/search + - /riak/2.9.0p5/developing/usage/search/ + - /riak/2.9.0/developing/usage/search/ + - /riak/kv/2.9.0/developing/usage/search/ + - /riak/kv/2.9.0p1/developing/usage/search/ + - /riak/kv/2.9.0p2/developing/usage/search/ + - /riak/kv/2.9.0p3/developing/usage/search/ + - /riak/kv/2.9.0p4/developing/usage/search/ +--- + + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.0p5/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.0p5/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous / + -H 'Content-Type: application/json' / + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props / + -H 'Content-Type: application/json' / + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new /Basho/Riak/Command/Builder/Search/AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props / + -H'content-type:application/json' / + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.9.0p5/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{/"name_s/":/"Lion-o/",/"age_i/":30,/"leader_b/":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{/"name_s/":/"Cheetara/",/"age_i/":30,/"leader_b/":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{/"name_s/":/"Snarf/",/"age_i/":43,/"leader_b/":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{/"name_s/":/"Panthro/",/"age_i/":36,/"leader_b/":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new /Basho/Riak/Bucket('cats', 'animals'); + +$storeObjectBuilder = (new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withLocation(new /Basho/Riak/Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new /Basho/Riak/Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new /Basho/Riak/Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new /Basho/Riak/Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{/"name_s/":/"Lion-o/", /"age_i/":30, /"leader_b/":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{/"name_s/":/"Cheetara/", /"age_i/":28, /"leader_b/":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{/"name_s/":/"Snarf/", /"age_i/":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{/"name_s/":/"Panthro/", /"age_i/":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{/"name_s/":/"Lion-o/",/"age_i/":30,/"leader_b/":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{/"name_s/":/"Cheetara/",/"age_i/":30,/"leader_b/":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{/"name_s/":/"Snarf/",/"age_i/":43,/"leader_b/":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{/"name_s/":/"Panthro/",/"age_i/":36,/"leader_b/":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono / + -H 'Content-Type: application/json' / + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara / + -H 'Content-Type: application/json' / + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf / + -H 'Content-Type: application/json' / + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro / + -H 'Content-Type: application/json' / + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command/Builder/Search/DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props / + -H 'Content-Type: application/json' / + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred / + -H 'Content-Type: application/json' / + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/searching-data-types.md b/content/riak/kv/2.9.0p5/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..ab3d4f788f --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/searching-data-types.md @@ -0,0 +1,1691 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/search/search-data-types + - /riak/kv/2.9.0p5/dev/search/search-data-types + - /riak/2.9.0p5/developing/usage/searching-data-types/ + - /riak/2.9.0/developing/usage/searching-data-types/ + - /riak/kv/2.9.0/developing/usage/searching-data-types/ + - /riak/kv/2.9.0p1/developing/usage/searching-data-types/ + - /riak/kv/2.9.0p2/developing/usage/searching-data-types/ + - /riak/kv/2.9.0p3/developing/usage/searching-data-types/ + - /riak/kv/2.9.0p4/developing/usage/searching-data-types/ +--- + + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies / + -H 'Content-Type: application/json' / + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new /Basho/Riak/Command/Builder/IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies / + -H 'Content-Type: application/json' / + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new /Basho/Riak/Command/Builder/UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command/Builder/Search/StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers / + -H 'Content-Type: application/json' / + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new /Basho/Riak/Command/Builder/IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new /Basho/Riak/Command/Builder/UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new /Basho/Riak/Command/Builder/UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new /Basho/Riak/Command/Builder/UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new /Basho/Riak/Command/Builder/Search/FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. diff --git a/content/riak/kv/2.9.0p5/developing/usage/secondary-indexes.md b/content/riak/kv/2.9.0p5/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..3f64cf53d2 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/secondary-indexes.md @@ -0,0 +1,2034 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/2i + - /riak/kv/2.9.0p5/dev/using/2i + - /riak/2.9.0p5/developing/usage/secondary-indexes/ + - /riak/2.9.0/developing/usage/secondary-indexes/ + - /riak/kv/2.9.0/developing/usage/secondary-indexes/ + - /riak/kv/2.9.0p1/developing/usage/secondary-indexes/ + - /riak/kv/2.9.0p2/developing/usage/secondary-indexes/ + - /riak/kv/2.9.0p3/developing/usage/secondary-indexes/ + - /riak/kv/2.9.0p4/developing/usage/secondary-indexes/ +--- + + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new /Basho/Riak/Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith / + -H 'x-riak-index-twitter_bin: jsmith123' / + -H 'x-riak-index-email_bin: jsmith@basho.com' / + -H 'Content-Type: application/json' / + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.0p5/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.0p5/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new /Basho/Riak/Command/Builder/QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new /Basho/Riak/Bucket('people', 'indexes'); + +$object = (new /Basho/Riak/Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withObject($object) + ->withLocation(new /Basho/Riak/Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new /Basho/Riak/Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withObject($object) + ->withLocation(new /Basho/Riak/Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new /Basho/Riak/Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withObject($object) + ->withLocation(new /Basho/Riak/Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new /Basho/Riak/Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withObject($object) + ->withLocation(new /Basho/Riak/Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry / + -H "x-riak-index-field1_bin: val1" / + -H "x-riak-index-field2_int: 1001" / + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe / + -H "x-riak-index-Field1_bin: val2" / + -H "x-riak-index-Field2_int: 1002" / + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly / + -H "X-RIAK-INDEX-FIELD1_BIN: val3" / + -H "X-RIAK-INDEX-FIELD2_INT: 1003" / + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica / + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" / + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" / + -H "x-riak-index-field2_int: 1004" / + -H "x-riak-index-field2_int: 1004" / + -H "x-riak-index-field2_int: 1004" / + -H "x-riak-index-field2_int: 1007" / + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` --- Binary index `field1_bin` and integer index `field2_int` +* `Moe` --- Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` --- Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` --- Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws /InvalidArgumentException +$object = (new /Basho/Riak/Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry / + -H "x-riak-index-field2_foo: 1001" / + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws /InvalidArgumentException +$object = (new /Basho/Riak/Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws /InvalidArgumentException +$object = (new /Basho/Riak/Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new /Basho/Riak/Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry / + -H "x-riak-index-field2_int: bar" / + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new /Basho/Riak/Command/Builder/QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new /Basho/Riak/Command/Builder/QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred / + -H "Content-Type: application/json" / + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new /Basho/Riak/Command/Builder/QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new /Basho/Riak/Command/Builder/QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred/ + -H "Content-Type: application/json" / + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new /Basho/Riak/Command/Builder/QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new /Basho/Riak/Command/Builder/QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new /Basho/Riak/Command/Builder/QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index//$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred/ + -H "Content-Type: application/json" / + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "/$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/security.md b/content/riak/kv/2.9.0p5/developing/usage/security.md new file mode 100644 index 0000000000..7a24225c3a --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/security.md @@ -0,0 +1,107 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/client-security + - /riak/kv/2.9.0p5/dev/advanced/client-security + - /riak/2.9.0p5/developing/usage/security/ + - /riak/2.9.0/developing/usage/security/ + - /riak/kv/2.9.0/developing/usage/security/ + - /riak/kv/2.9.0p1/developing/usage/security/ + - /riak/kv/2.9.0p2/developing/usage/security/ + - /riak/kv/2.9.0p3/developing/usage/security/ + - /riak/kv/2.9.0p4/developing/usage/security/ +--- + + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. diff --git a/content/riak/kv/2.9.0p5/developing/usage/security/erlang.md b/content/riak/kv/2.9.0p5/developing/usage/security/erlang.md new file mode 100644 index 0000000000..dda2795110 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/security/erlang.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/client-security/erlang + - /riak/kv/2.9.0p5/dev/advanced/client-security/erlang + - /riak/2.9.0p5/developing/usage/security/erlang/ + - /riak/2.9.0/developing/usage/security/erlang/ + - /riak/kv/2.9.0/developing/usage/security/erlang/ + - /riak/kv/2.9.0p1/developing/usage/security/erlang/ + - /riak/kv/2.9.0p2/developing/usage/security/erlang/ + - /riak/kv/2.9.0p3/developing/usage/security/erlang/ + - /riak/kv/2.9.0p4/developing/usage/security/erlang/ +--- + + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` diff --git a/content/riak/kv/2.9.0p5/developing/usage/security/java.md b/content/riak/kv/2.9.0p5/developing/usage/security/java.md new file mode 100644 index 0000000000..7ac8227118 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/security/java.md @@ -0,0 +1,125 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/client-security/java + - /riak/kv/2.9.0p5/dev/advanced/client-security/java + - /riak/2.9.0p5/developing/usage/security/java/ + - /riak/2.9.0/developing/usage/security/java/ + - /riak/kv/2.9.0/developing/usage/security/java/ + - /riak/kv/2.9.0p1/developing/usage/security/java/ + - /riak/kv/2.9.0p2/developing/usage/security/java/ + - /riak/kv/2.9.0p3/developing/usage/security/java/ + - /riak/kv/2.9.0p4/developing/usage/security/java/ +--- + + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. diff --git a/content/riak/kv/2.9.0p5/developing/usage/security/php.md b/content/riak/kv/2.9.0p5/developing/usage/security/php.md new file mode 100644 index 0000000000..157eee1dc1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/security/php.md @@ -0,0 +1,126 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/client-security/php + - /riak/kv/2.9.0p5/dev/advanced/client-security/php + - /riak/2.9.0p5/developing/usage/security/php/ + - /riak/2.9.0/developing/usage/security/php/ + - /riak/kv/2.9.0/developing/usage/security/php/ + - /riak/kv/2.9.0p1/developing/usage/security/php/ + - /riak/kv/2.9.0p2/developing/usage/security/php/ + - /riak/kv/2.9.0p3/developing/usage/security/php/ + - /riak/kv/2.9.0p4/developing/usage/security/php/ +--- + + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `/Basho/Riak/Node` objects for each node in your +cluster and passing those `/Basho/Riak/Node` objects as an array to a +`/Basho/Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `/Basho/Riak/Node/Builder/Exception`: + +```php +use /Basho/Riak; +use /Basho/Riak/Node; + +$node = (new Node/Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use /Basho/Riak; +use /Basho/Riak/Node; + +$node = (new Node/Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use /Basho/Riak; +use /Basho/Riak/Node; + +// PAM Example +$node = (new Node/Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node/Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. diff --git a/content/riak/kv/2.9.0p5/developing/usage/security/python.md b/content/riak/kv/2.9.0p5/developing/usage/security/python.md new file mode 100644 index 0000000000..1e2f8442ad --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/security/python.md @@ -0,0 +1,180 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/client-security/python + - /riak/kv/2.9.0p5/dev/advanced/client-security/python + - /riak/2.9.0p5/developing/usage/security/python/ + - /riak/2.9.0/developing/usage/security/python/ + - /riak/kv/2.9.0/developing/usage/security/python/ + - /riak/kv/2.9.0p1/developing/usage/security/python/ + - /riak/kv/2.9.0p2/developing/usage/security/python/ + - /riak/kv/2.9.0p3/developing/usage/security/python/ + - /riak/kv/2.9.0p4/developing/usage/security/python/ +--- + + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. diff --git a/content/riak/kv/2.9.0p5/developing/usage/security/ruby.md b/content/riak/kv/2.9.0p5/developing/usage/security/ruby.md new file mode 100644 index 0000000000..49fa3aa3c1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/security/ruby.md @@ -0,0 +1,166 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/client-security/ruby + - /riak/kv/2.9.0p5/dev/advanced/client-security/ruby + - /riak/2.9.0p5/developing/usage/security/ruby/ + - /riak/2.9.0/developing/usage/security/ruby/ + - /riak/kv/2.9.0/developing/usage/security/ruby/ + - /riak/kv/2.9.0p1/developing/usage/security/ruby/ + - /riak/kv/2.9.0p2/developing/usage/security/ruby/ + - /riak/kv/2.9.0p3/developing/usage/security/ruby/ + - /riak/kv/2.9.0p4/developing/usage/security/ruby/ +--- + + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. diff --git a/content/riak/kv/2.9.0p5/developing/usage/updating-objects.md b/content/riak/kv/2.9.0p5/developing/usage/updating-objects.md new file mode 100644 index 0000000000..e7940a3db2 --- /dev/null +++ b/content/riak/kv/2.9.0p5/developing/usage/updating-objects.md @@ -0,0 +1,782 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.0p5/dev/using/updates + - /riak/kv/2.9.0p5/dev/using/updates + - /riak/2.9.0p5/developing/usage/updating-objects/ + - /riak/2.9.0/developing/usage/updating-objects/ + - /riak/kv/2.9.0/developing/usage/updating-objects/ + - /riak/kv/2.9.0p1/developing/usage/updating-objects/ + - /riak/kv/2.9.0p2/developing/usage/updating-objects/ + - /riak/kv/2.9.0p3/developing/usage/updating-objects/ + - /riak/kv/2.9.0p4/developing/usage/updating-objects/ +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new /Basho/Riak/Location('champion', new /Basho/Riak/Bucket('nba', 'sports')); +$object = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,2.9.0,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/#siblings) /(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new /Basho/Riak/Location('seahawks', new /Basho/Riak/Bucket('coaches', 'siblings')); +$response = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new /Basho/Riak/Object('Pete Carroll', 'text/plain'); +} + +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new /Basho/Riak/Location('seahawks', new /Basho/Riak/Bucket('coaches', 'siblings')); + $response = (new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new /Basho/Riak/Object('Pete Carroll', 'text/plain'); + } + + $response = (new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. diff --git a/content/riak/kv/2.9.0p5/downloads.md b/content/riak/kv/2.9.0p5/downloads.md new file mode 100644 index 0000000000..5d2b4c5573 --- /dev/null +++ b/content/riak/kv/2.9.0p5/downloads.md @@ -0,0 +1,30 @@ +--- +title: "Download for Riak KV 2.9.0" +description: "Download some stuff!" +menu: + riak_kv-2.9.0p5: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: "2.9.0p5" +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: "2.9.0p5" + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.9.0p5/downloads + - /riak/kv/2.9.0p5/downloads + - /riak/2.9.0p5/downloads/ + - /riak/2.9.0/downloads/ + - /riak/kv/2.9.0/downloads/ + - /riak/kv/2.9.0p1/downloads/ + - /riak/kv/2.9.0p2/downloads/ + - /riak/kv/2.9.0p3/downloads/ + - /riak/kv/2.9.0p4/downloads/ +--- + diff --git a/content/riak/kv/2.9.0p5/index.md b/content/riak/kv/2.9.0p5/index.md new file mode 100644 index 0000000000..2d6e1cfeb6 --- /dev/null +++ b/content/riak/kv/2.9.0p5/index.md @@ -0,0 +1,80 @@ +--- +title: "Riak KV 2.9.0p5" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.9.0p5/ + - /riak/2.9.0/ + - /riak/kv/2.9.0/ + - /riak/kv/2.9.0p1/ + - /riak/kv/2.9.0p2/ + - /riak/kv/2.9.0p3/ + - /riak/kv/2.9.0p4/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.9.0p5/configuring +[downloads]: {{<baseurl>}}riak/kv/2.9.0p5/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.9.0p5/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.9.0p5/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +Riak KV 2.9.0p5 is the first of the Riak KV 2.9.x family which has been designed specially as the upgrade path from Riak KV 2.2.x to Riak KV 3.0.x. As such the feature set of Riak KV 2.9.x will be the same as Riak KV 3.0.x with the main differences being the performance benefits KV 3.0.x enjoys from using more modern versions of Erlang and the minor change in KV 3.0.x command line based commands owing to the move to relx. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.9.0p5/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] diff --git a/content/riak/kv/2.9.0p5/learn.md b/content/riak/kv/2.9.0p5/learn.md new file mode 100644 index 0000000000..82e5b65ed4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn.md @@ -0,0 +1,55 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: + - /riak/2.9.0p5/learn/ + - /riak/2.9.0/learn/ + - /riak/kv/2.9.0/learn/ + - /riak/kv/2.9.0p1/learn/ + - /riak/kv/2.9.0p2/learn/ + - /riak/kv/2.9.0p3/learn/ + - /riak/kv/2.9.0p4/learn/ +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] diff --git a/content/riak/kv/2.9.0p5/learn/concepts.md b/content/riak/kv/2.9.0p5/learn/concepts.md new file mode 100644 index 0000000000..bfa67cabe8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts.md @@ -0,0 +1,53 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +aliases: + - /riak/2.9.0p5/learn/concepts/ + - /riak/2.9.0/learn/concepts/ + - /riak/kv/2.9.0/learn/concepts/ + - /riak/kv/2.9.0p1/learn/concepts/ + - /riak/kv/2.9.0p2/learn/concepts/ + - /riak/kv/2.9.0p3/learn/concepts/ + - /riak/kv/2.9.0p4/learn/concepts/ +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.9.0p5/configuring +[plan index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.9.0p5/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] diff --git a/content/riak/kv/2.9.0p5/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.9.0p5/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..13994fb8fc --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/active-anti-entropy.md @@ -0,0 +1,115 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/aae + - /riak/kv/2.9.0p5/theory/concepts/aae + - /riak/2.9.0p5/learn/concepts/active-anti-entropy/ + - /riak/2.9.0/learn/concepts/active-anti-entropy/ + - /riak/kv/2.9.0/learn/concepts/active-anti-entropy/ + - /riak/kv/2.9.0p1/learn/concepts/active-anti-entropy/ + - /riak/kv/2.9.0p2/learn/concepts/active-anti-entropy/ + - /riak/kv/2.9.0p3/learn/concepts/active-anti-entropy/ + - /riak/kv/2.9.0p4/learn/concepts/active-anti-entropy/ +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. diff --git a/content/riak/kv/2.9.0p5/learn/concepts/buckets.md b/content/riak/kv/2.9.0p5/learn/concepts/buckets.md new file mode 100644 index 0000000000..22c1142a5c --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/buckets.md @@ -0,0 +1,221 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/Buckets + - /riak/kv/2.9.0p5/theory/concepts/Buckets + - /riak/2.9.0p5/theory/concepts/buckets + - /riak/kv/2.9.0p5/theory/concepts/buckets + - /riak/2.9.0p5/learn/concepts/buckets/ + - /riak/2.9.0/learn/concepts/buckets/ + - /riak/kv/2.9.0/learn/concepts/buckets/ + - /riak/kv/2.9.0p1/learn/concepts/buckets/ + - /riak/kv/2.9.0p2/learn/concepts/buckets/ + - /riak/kv/2.9.0p3/learn/concepts/buckets/ + - /riak/kv/2.9.0p4/learn/concepts/buckets/ +--- + + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new /Basho/Riak/Command/Builder/FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new /Basho/Riak/Command/Builder/FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` diff --git a/content/riak/kv/2.9.0p5/learn/concepts/capability-negotiation.md b/content/riak/kv/2.9.0p5/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..96ecf7ad0f --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/capability-negotiation.md @@ -0,0 +1,40 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/capability-negotiation + - /riak/kv/2.9.0p5/theory/concepts/capability-negotiation + - /riak/2.9.0p5/learn/concepts/capability-negotiation/ + - /riak/2.9.0/learn/concepts/capability-negotiation/ + - /riak/kv/2.9.0/learn/concepts/capability-negotiation/ + - /riak/kv/2.9.0p1/learn/concepts/capability-negotiation/ + - /riak/kv/2.9.0p2/learn/concepts/capability-negotiation/ + - /riak/kv/2.9.0p3/learn/concepts/capability-negotiation/ + - /riak/kv/2.9.0p4/learn/concepts/capability-negotiation/ +--- + + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.9.0p5/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + diff --git a/content/riak/kv/2.9.0p5/learn/concepts/causal-context.md b/content/riak/kv/2.9.0p5/learn/concepts/causal-context.md new file mode 100644 index 0000000000..395695f9ea --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/causal-context.md @@ -0,0 +1,293 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/context + - /riak/kv/2.9.0p5/theory/concepts/context + - /riak/2.9.0p5/learn/concepts/causal-context/ + - /riak/2.9.0/learn/concepts/causal-context/ + - /riak/kv/2.9.0/learn/concepts/causal-context/ + - /riak/kv/2.9.0p1/learn/concepts/causal-context/ + - /riak/kv/2.9.0p2/learn/concepts/causal-context/ + - /riak/kv/2.9.0p3/learn/concepts/causal-context/ + - /riak/kv/2.9.0p4/learn/concepts/causal-context/ +--- + + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.9.0p5/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] diff --git a/content/riak/kv/2.9.0p5/learn/concepts/clusters.md b/content/riak/kv/2.9.0p5/learn/concepts/clusters.md new file mode 100644 index 0000000000..b5b801b7bc --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/clusters.md @@ -0,0 +1,121 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/Clusters + - /riak/kv/2.9.0p5/theory/concepts/Clusters + - /riak/2.9.0p5/theory/concepts/clusters + - /riak/kv/2.9.0p5/theory/concepts/clusters + - /riak/2.9.0p5/learn/concepts/clusters/ + - /riak/2.9.0/learn/concepts/clusters/ + - /riak/kv/2.9.0/learn/concepts/clusters/ + - /riak/kv/2.9.0p1/learn/concepts/clusters/ + - /riak/kv/2.9.0p2/learn/concepts/clusters/ + - /riak/kv/2.9.0p3/learn/concepts/clusters/ + - /riak/kv/2.9.0p4/learn/concepts/clusters/ +--- + + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.0p5/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. diff --git a/content/riak/kv/2.9.0p5/learn/concepts/crdts.md b/content/riak/kv/2.9.0p5/learn/concepts/crdts.md new file mode 100644 index 0000000000..f4d8e175d4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/crdts.md @@ -0,0 +1,256 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/crdts + - /riak/kv/2.9.0p5/theory/concepts/crdts + - /riak/2.9.0p5/learn/concepts/crdts/ + - /riak/2.9.0/learn/concepts/crdts/ + - /riak/kv/2.9.0/learn/concepts/crdts/ + - /riak/kv/2.9.0p1/learn/concepts/crdts/ + - /riak/kv/2.9.0p2/learn/concepts/crdts/ + - /riak/kv/2.9.0p3/learn/concepts/crdts/ + - /riak/kv/2.9.0p4/learn/concepts/crdts/ +--- + + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. diff --git a/content/riak/kv/2.9.0p5/learn/concepts/eventual-consistency.md b/content/riak/kv/2.9.0p5/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..6f705280f3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/eventual-consistency.md @@ -0,0 +1,206 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/Eventual-Consistency + - /riak/kv/2.9.0p5/theory/concepts/Eventual-Consistency + - /riak/2.9.0p5/theory/concepts/eventual-consistency + - /riak/kv/2.9.0p5/theory/concepts/eventual-consistency + - /riak/2.9.0p5/learn/concepts/eventual-consistency/ + - /riak/2.9.0/learn/concepts/eventual-consistency/ + - /riak/kv/2.9.0/learn/concepts/eventual-consistency/ + - /riak/kv/2.9.0p1/learn/concepts/eventual-consistency/ + - /riak/kv/2.9.0p2/learn/concepts/eventual-consistency/ + - /riak/kv/2.9.0p3/learn/concepts/eventual-consistency/ + - /riak/kv/2.9.0p4/learn/concepts/eventual-consistency/ +--- + + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) diff --git a/content/riak/kv/2.9.0p5/learn/concepts/keys-and-objects.md b/content/riak/kv/2.9.0p5/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..23d02d2c08 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/keys-and-objects.md @@ -0,0 +1,57 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/keys-and-values + - /riak/kv/2.9.0p5/theory/concepts/keys-and-values + - /riak/2.9.0p5/learn/concepts/keys-and-objects/ + - /riak/2.9.0/learn/concepts/keys-and-objects/ + - /riak/kv/2.9.0/learn/concepts/keys-and-objects/ + - /riak/kv/2.9.0p1/learn/concepts/keys-and-objects/ + - /riak/kv/2.9.0p2/learn/concepts/keys-and-objects/ + - /riak/kv/2.9.0p3/learn/concepts/keys-and-objects/ + - /riak/kv/2.9.0p4/learn/concepts/keys-and-objects/ +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). diff --git a/content/riak/kv/2.9.0p5/learn/concepts/replication.md b/content/riak/kv/2.9.0p5/learn/concepts/replication.md new file mode 100644 index 0000000000..e50082ac45 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/replication.md @@ -0,0 +1,327 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/Replication + - /riak/kv/2.9.0p5/theory/concepts/Replication + - /riak/2.9.0p5/theory/concepts/replication + - /riak/kv/2.9.0p5/theory/concepts/replication + - /riak/2.9.0p5/learn/concepts/replication/ + - /riak/2.9.0/learn/concepts/replication/ + - /riak/kv/2.9.0/learn/concepts/replication/ + - /riak/kv/2.9.0p1/learn/concepts/replication/ + - /riak/kv/2.9.0p2/learn/concepts/replication/ + - /riak/kv/2.9.0p3/learn/concepts/replication/ + - /riak/kv/2.9.0p4/learn/concepts/replication/ +--- + + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] /(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + diff --git a/content/riak/kv/2.9.0p5/learn/concepts/strong-consistency.md b/content/riak/kv/2.9.0p5/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..cc84ba114a --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/strong-consistency.md @@ -0,0 +1,109 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/strong-consistency + - /riak/kv/2.9.0p5/theory/concepts/strong-consistency + - /riak/2.9.0p5/learn/concepts/strong-consistency/ + - /riak/2.9.0/learn/concepts/strong-consistency/ + - /riak/kv/2.9.0/learn/concepts/strong-consistency/ + - /riak/kv/2.9.0p1/learn/concepts/strong-consistency/ + - /riak/kv/2.9.0p2/learn/concepts/strong-consistency/ + - /riak/kv/2.9.0p3/learn/concepts/strong-consistency/ + - /riak/kv/2.9.0p4/learn/concepts/strong-consistency/ +--- + + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.0p5/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes diff --git a/content/riak/kv/2.9.0p5/learn/concepts/vnodes.md b/content/riak/kv/2.9.0p5/learn/concepts/vnodes.md new file mode 100644 index 0000000000..a884ea24f1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/concepts/vnodes.md @@ -0,0 +1,164 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.0p5/theory/concepts/vnodes + - /riak/kv/2.9.0p5/theory/concepts/vnodes + - /riak/2.9.0p5/learn/concepts/vnodes/ + - /riak/2.9.0/learn/concepts/vnodes/ + - /riak/kv/2.9.0/learn/concepts/vnodes/ + - /riak/kv/2.9.0p1/learn/concepts/vnodes/ + - /riak/kv/2.9.0p2/learn/concepts/vnodes/ + - /riak/kv/2.9.0p3/learn/concepts/vnodes/ + - /riak/kv/2.9.0p4/learn/concepts/vnodes/ +--- + + + +[concept causal context]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322492444576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±/vi80/f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. diff --git a/content/riak/kv/2.9.0p5/learn/dynamo.md b/content/riak/kv/2.9.0p5/learn/dynamo.md new file mode 100644 index 0000000000..7fc3609e80 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/dynamo.md @@ -0,0 +1,1932 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.9.0p5/theory/dynamo + - /riak/kv/2.9.0p5/theory/dynamo + - /riak/2.9.0p5/learn/dynamo/ + - /riak/2.9.0/learn/dynamo/ + - /riak/kv/2.9.0/learn/dynamo/ + - /riak/kv/2.9.0p1/learn/dynamo/ + - /riak/kv/2.9.0p2/learn/dynamo/ + - /riak/kv/2.9.0p3/learn/dynamo/ + - /riak/kv/2.9.0p4/learn/dynamo/ +--- + + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.0 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.9.0p5/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. diff --git a/content/riak/kv/2.9.0p5/learn/glossary.md b/content/riak/kv/2.9.0p5/learn/glossary.md new file mode 100644 index 0000000000..c79993d26a --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/glossary.md @@ -0,0 +1,362 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +aliases: + - /riak/2.9.0p5/learn/glossary/ + - /riak/2.9.0/learn/glossary/ + - /riak/kv/2.9.0/learn/glossary/ + - /riak/kv/2.9.0p1/learn/glossary/ + - /riak/kv/2.9.0p2/learn/glossary/ + - /riak/kv/2.9.0p3/learn/glossary/ + - /riak/kv/2.9.0p4/learn/glossary/ +--- + + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.9.0p5/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.0p5/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.9.0p5/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.9.0p5/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] diff --git a/content/riak/kv/2.9.0p5/learn/new-to-nosql.md b/content/riak/kv/2.9.0p5/learn/new-to-nosql.md new file mode 100644 index 0000000000..bc1d5ab16b --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/new-to-nosql.md @@ -0,0 +1,25 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +#menu: +# riak_kv-2.9.0p5: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +aliases: + - /riak/2.9.0p5/learn/new-to-nosql/ + - /riak/2.9.0/learn/new-to-nosql/ + - /riak/kv/2.9.0/learn/new-to-nosql/ + - /riak/kv/2.9.0p1/learn/new-to-nosql/ + - /riak/kv/2.9.0p2/learn/new-to-nosql/ + - /riak/kv/2.9.0p3/learn/new-to-nosql/ + - /riak/kv/2.9.0p4/learn/new-to-nosql/ +--- + + +**TODO: Add content (not sure where this lives in existing docs)** diff --git a/content/riak/kv/2.9.0p5/learn/use-cases.md b/content/riak/kv/2.9.0p5/learn/use-cases.md new file mode 100644 index 0000000000..bbb5cc5b53 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/use-cases.md @@ -0,0 +1,409 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.9.0p5/dev/data-modeling/ + - /riak/kv/2.9.0p5/dev/data-modeling/ + - /riak/2.9.0p5/learn/use-cases/ + - /riak/2.9.0/learn/use-cases/ + - /riak/kv/2.9.0/learn/use-cases/ + - /riak/kv/2.9.0p1/learn/use-cases/ + - /riak/kv/2.9.0p2/learn/use-cases/ + - /riak/kv/2.9.0p3/learn/use-cases/ + - /riak/kv/2.9.0p4/learn/use-cases/ +--- + + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] /(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + diff --git a/content/riak/kv/2.9.0p5/learn/why-riak-kv.md b/content/riak/kv/2.9.0p5/learn/why-riak-kv.md new file mode 100644 index 0000000000..594a9d3a31 --- /dev/null +++ b/content/riak/kv/2.9.0p5/learn/why-riak-kv.md @@ -0,0 +1,229 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.9.0p5/theory/why-riak/ + - /riak/kv/2.9.0p5/theory/why-riak/ + - /riak/2.9.0p5/learn/why-riak-kv/ + - /riak/2.9.0/learn/why-riak-kv/ + - /riak/kv/2.9.0/learn/why-riak-kv/ + - /riak/kv/2.9.0p1/learn/why-riak-kv/ + - /riak/kv/2.9.0p2/learn/why-riak-kv/ + - /riak/kv/2.9.0p3/learn/why-riak-kv/ + - /riak/kv/2.9.0p4/learn/why-riak-kv/ +--- + + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. diff --git a/content/riak/kv/2.9.0p5/release-notes.md b/content/riak/kv/2.9.0p5/release-notes.md new file mode 100644 index 0000000000..ff1c4da8b4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/release-notes.md @@ -0,0 +1,178 @@ +--- +title: "Riak KV 2.9.0 Release Notes" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.9.0p5/community/release-notes + - /riak/kv/2.9.0p5/intro-v20 + - /riak/2.9.0p5/intro-v20 + - /riak/kv/2.9.0p5/introduction + - /riak/2.9.0p5/release-notes/ + - /riak/2.9.0/release-notes/ + - /riak/kv/2.9.0/release-notes/ + - /riak/kv/2.9.0p1/release-notes/ + - /riak/kv/2.9.0p2/release-notes/ + - /riak/kv/2.9.0p3/release-notes/ + - /riak/kv/2.9.0p4/release-notes/ +--- + + +Released April 25, 2018. + +> This release is dedicated to the memory of Andy Gross. Thank you and RIP. + +## Overview + +## Release Schedule + +April 2018 saw the release of [Riak KV 2.2.5](https://github.com/basho/riak/blob/riak-2.2.5/RELEASE-NOTES.md), the first [post-Basho](https://www.theregister.co.uk/2017/07/31/end_of_the_road_for_basho_as_court_puts_biz_into_receivership/) release of the Riak KV store. This release was deliberately light on features, focusing on stability fixes that could be added with minimal disruption to the existing codebase and minimal risk to existing production workloads. The release did though establish a smoother path to releasing future changes, by investing significant effort in improving the reliability and usability of the Riak test and release process. + +There is now a plan for significant further improvements to Riak. These improvements will be delivered in two release cycles - Release 2.9 and Release 3.0: + +- Release 2.9 is focused on delivering significant database throughput improvements for use-cases which depend on both ordered keys and mid-to-large size objects, an overhaul of the efficiency of managing anti-entropy both within and across database clusters, and reductions in the network-overheads of running a Riak cluster. + +- Release 3.0 is focused on providing a future-proof Riak, migrating to an up-to-date OTP platform, and stripping away the accidental complexity of under-used features. Release 3.0 will also build on some of the foundation improvements in Riak 2.9, to provide for a more efficient and flexible replication solution, and allow for a richer set of query features that can be run at minimal risk to the predictability of performance for the core Key/Value workloads of Riak customers. + +Release 2.9 will have an initial private release candidate available in early December 2018, and the release is expected to be generally available by the end of January 2019. The target for Release 3.0 is to have an initial Release Candidate available in April 2019. + +Release 2.9 is intended to be a stepping stone towards migrating to Release 3.0, but for users of features that will be terminated in Release 3.0, it is possible that community-led updates may continue on the 2.9 release branch for some time beyond the availability of Release 3.0. + +[Improvements](#improvements) + +[New features](#additions) + +[Known Issues](#transition-configuration-guidance) - please read **before upgrading** from a previous Riak release + +[Log of Changes](#change-log-for-this-release) + +[Previous Release Notes](#previous-release-notes) + +## Improvements + +Release 2.9 also brings three building blocks to enable current and future improvements to the management of operational risk: + +#### Vnode Soft Limits + +- When Riak is in receipt of a PUT request, it must select a vnode to co-ordinate the PUT. However, when load is high, vnodes may have work queues of varying sizes - and picking a vnode with a large queue will slow the PUT to the pace of that slow vnode. Vnode soft limits are a resolution to this problem, providing a simple check of the sate of a vnode queue before determining that a particular vnode is a good candidate to coordinate a given PUT. + +- The biggest advantage seen in testing vnode soft limits is with the leveldb backend, where under soak test conditions there is a 50% reduction in the trend-line of 99th percentile PUT measure, and a 80% reduction in the peak 99th percentile PUT time. + +#### Core node worker pool + +- Riak-backed applications tend to make heavy use of the standard GET/PUT KV operations. These are short-lived tasks, but sometimes longer-lives tasks are required to either provide information to the operator (e.g. what is the average object size in the store?), or detect otherwise hidden errors (e.g. AAE tree rebuilds). Each such task has tended to evolve its own mechanism to ensure that the impact of the task can be controlled to avoid inhibiting higher priority database work. The core node worker pool is a simple mechanism for controlling concurrency of background tasks on a per-node basis. It allows for either a single node worker pool to manage concurrency, or a series of pools modelled on the Differentiated Services design pattern. + +- There are other more sophisticated candidate methods which have been proposed in this space (e.g. riak_kv_sweeper and riak_core jobs). A decision will be made for the Riak 3.0 release which mechanism should be the standard going forward, but the core node worker pool appears to be the simplest of all the proposals at this stage. + +#### Repl API + +- Multiple customers of Riak have ended up with some form of bespoke extensions to the Riak replication features, normally to avoid some inefficiency in a replication feature by leveraging knowledge of the application (e.g. keys are time-stamped based, some keys are write-once etc). The repl code itself has expanded complexity to deal with scheduling of jobs, marshalling the use of resource by jobs, managing environmental factors (e.g. NAT, encryption requirements), handling change within the cluster, managing exceptional replication topologies. Going forward, the preferred approach for handling special customer scenarios is to expose core replication features for customers to manage from outside of the database, rather than extending the internal feature scope for each scenario. + +- There are two repl features to be exposed in Riak 2.9. The first feature is an API to re-replicate an object: given a key re-replicate this key from the cluster in receipt of the request. The second feature is the availability of an aae_fold API, to give access to cluster-wide AAE trees available as part of the TictacAAE change - as well as the ability to fetch keys and version information from objects within specific segments of the AAE tree + + +## Additions + +### TicTac Active Ant-Entropy + +- [Tictac Active Anti-Entropy](https://github.com/martinsumner/kv_index_tictactree). + + - This makes two fundamental changes to the way anti-entropy has historically worked in Riak. The feature changes the nature of the construction of the Merkle Trees used in Anti-Entropy so that they can be built incrementally. The feature also changes the nature of the underlying anti-entropy key store so that the store can now be key-ordered, whilst still allowing for acceleration of access to keys by either their Merkle tree location or by the last modified date of the object. + + - Previously anti-entropy had required knowledge of all elements of the tree to build the tree, and for a key store to be kept ordered based on the layout of that tree. These changes allow for: + + - Lower overhead internal anti-entropy based on *cached trees*. + + - Cluster-wide anti-entropy based on *cached trees* and without the need to pause for cluster-wide full synchronisation to be suspended for long periods while AAE trees and stores are rebuilt. Cached trees are kept updated in parallel to the rebuilding of trees and AAE stores. + + - Cross-cluster Merkle trees to be *independent of the internal layout* of the data in the cluster, + + - Folding anti-entropy. The rapid and efficient production of anti-entropy Merkle *trees of subsets of the store data*, with those subsets definable at run-time based on *bucket*, *key-range* and *modified date* restrictions. Allowing for more flexible inter-cluster comparisons (other than comparing whole stores). + + - Database statistics and operator helpers. The anti-entropy keystore stores keys and additional metadata to support potentially helpful queries, without the need to fold over the vnode object store. This keystore can then also efficiently support ordered folds for unordered backends (e.g. bitcask). By folding over ranges of keys and metadata, not slowed by loading in all the values off disk - [administrative database queries](https://github.com/martinsumner/riak_kv/blob/develop-2.9/src/riak_kv_clusteraae_fsm.erl#L165-L208) can now be efficiently supported (e.g. object counts, find keys with siblings, find keys with large object sizes, object size histograms, calculate the average object size of items modified in last 24 hours etc). + + - The Tictac AAE feature can be run in additional to or instead of traditional Riak Active Anti-Entropy mechanisms to ease migration from the existing service. + + - Future work on Tictac AAE is planned to handle issues arising from time-based deletion of objects. Tictac AAE will not currently work efficiently if a significant portion of objects have automatic TTL-based expiry. + +### Leveld backend + +- [Leveled backend](https://github.com/martinsumner/leveled). + + - A new database backend to Riak, written entirely in Erlang, and optimised specifically for Riak-style workloads. Leveled is based on the same [Log Structured Merge (LSM) Tree paper](https://www.cs.umb.edu/~poneil/lsmtree.pdf) as the existing [leveldb](https://github.com/basho/leveldb/wiki) and [hanoidb](https://github.com/krestenkrab/hanoidb) backends, but making specific trade-offs to improve on throughput in some common riak uses cases: + + - LSM-trees supporting larger objects. Other LSM-tree based these stores have the potential to be bottle-necked by write-amplification (100 fold write amplification has bene seen on large, mature Riak stores using leveldb) when used for storing larger objects (e.g. objects over 4KB). Leveled splits objects from headers, which was suggested as an option in the original LSM-tree paper, and further explored in the [WiscKey paper](https://www.usenix.org/node/194425) (and implemented in other stores such as Dgraph's [BadgerDB](https://github.com/dgraph-io/badger)). The full object is stored in a sequence-ordered journal separate to the LSM-tree, which contains only Keys and their metadata. This reduces write amplification for larger values, as the LSM tree merge events are proportionate to the size of the headers not the objects. + + - Replacing GETs with HEADs. In all existing riak backends, the cost of extracting the header of an object is broadly equivalent to the cost of extracting the whole object. However, when resolving a GET request, only the headers of the objects are required to determine if the version vectors of the objects match, and if they do match only one vnode is required to return the actual body. Likewise the PUT path in Riak only requires to see the version object header not the object body before updating an object. + + - By providing a fast-path to accessing the head of the object, Riak with a leveled backend is able to stop the practice of pulling the desired object N times over the network for each GET, using HEAD requests where possible instead. + + - The response time of the HEAD messages also provides early warning of vnode queues and network latency, so that GET requests can be pushed towards fast responding vnodes, better balancing load across the cluster when one or more nodes slower than other nodes in the cluster. + + - Anti-entropy without a secondary store. The Tictac AAE solution requires an ordered keystore (As does the current Riak AAE solution), but as the Leveled backend already has a dedicated keystore for holding keys and metadata, this can be reused for AAE purposes. This means that Tictac AAE can be run in `native` mode - where no secondary store is required, queries can be directed back to the actual backend store. + + - A cluster-wide hot-backup facility, which due to reduced write amplification provides for efficient rsync support in a key-ordered backend. + + - Migrating to the leveled backend requires a riak cluster `replace` operation - there is no in-place transition to leveled from an existing backend. + + - It is expected that community interest and support in the [bitcask backend](https://github.com/basho/bitcask) within Riak will continue into Riak 3.0 and beyond, as bitcask still offers throughput advantages with some workloads, where there is no demand for secondary indexes. + + - Some [performance testing results and guidance for choosing a backend have been made available to assist with this decision](https://github.com/martinsumner/riak_testing_notes/blob/master/Release%202.9%20-%20Choosing%20a%20Backend.md). The optimal decision though is driven by too many variables (e.g. object size, number of keys, distribution of requests to keys, mutability of objects, physical server configuration, feature requirements and levels of application concurrency) to make an optimal decision obvious in most uses cases - realistic use-case specific testing is always recommended. + +## Transition Configuration Guidance + +This section contains some initial notes to assist with planning and configuration for Transition of pre-2.9 releases to 2.9: + +- The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: + +- First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; + +- Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +- Testing hash shown that higher transfer-limits can be sued safely when running transfers to leveled nodes, by comparison to transfers to eleveldb nodes. + +- If upgrading from a release prior to the introduction of version 1 hashing of AAE, and if you intend to eventually move to TictacAAE - then follow the guidance to not upgrade to version 1. This prevents CPU resource bing invested in the upgrade when it is eventually unnecessary. + +- Tictac AAE and Legacy AAE may be run in parallel - set both to active in riak.conf. The cost of running Tictac AAE in parallel can be reduced by adjusting the tictacaae_exchangetick to a higher value. By default this is is set to 120000 ms (2 minutes). + +- When Tictac AAE has not been run from the initial loading of the node, then the AAE process will not be fully effective until all nodes have undergone an "AAE rebuild". An increased tictacaae_exchangetick is recommended in this period. +For observability of new features, the stats output from riak-admin status have been extended, but also there is a greater focus on use of logs for standard events, on both exit and entry from the event. Tictac AAE is best observed form indexing the Riak logs (both console.*.log and erlang.*.log), and riak-admin aae-status will no longer offer any information. + +- Flushing to disk on every write can be enabled in leveled using leveled.sync_strategy. For Riak 2.9.0, the riak_sync mechanism must be used to enable sync, the sync mechanism is only valid on later versions of OTP. + +- Leveled like levedb continuously compacts the keystore (the LSM-tree). However, it must separately compact the value store, and compaction of the value store may be scheduled - using leveled.compaction_runs_perday, leveled.compaction_low_hour, leveled.compaction_high_hour and leveled.max_run_length. The following log should help with tuning: +"IC003", "Scoring of compaction runs complete with highest score=~w with run of run_length=~w", +If the highest score is increasing over time (and positive), then there is a backlog of compaction activity - so increase either the length of the run or the runs per day. + +- The size of the journal files can be changed to align with the size of the objects. Set the configuration parameter leveled.journal_size to be approximately the size of around 100 thousand objects. + +- Leveled compression can be either native or lz4. lz4 has improved performance in most volume tests, but unless the performance improvement is significant for a use case, sticking with native compression is recommended, as this does not create a dependency on an external library. For objects which are already compressed, and may gain little value from compression, it is recommended switching the compression point to be on_compact rather than on_receipt. + +- The code contains a more complete view of startup options for leveled and tictac_aae. + + +## Change log for this release + +- Receive buffer size is now configurable via `advanced.config` with `{webmachine, [{recbuf, 65536}]},` +- As part of the change above, mochiweb has been brought up-to-date with the mainstream mochi repository. This brings through all changes since 2.9.0. Users of the HTTP API should consider these changes when testing the release. +- Log level with the leveled backend can now be set through riak.conf, and the log format has been changed to make the logs easier to index. +- An issue discovered in property-based testing (by Quviq) with object folds in sqn_order has been resolved. +- The process of closing down leveled has been refactored to stop process leaks discovered in property-based testing (by Quviq). +- A workaround to an issue running a leveled unit test in riak make test was leading to a make test failure. +- There are corrections to the Leveled fixes made in RC2 to ensure that the full cache-index situation is handled safely, and a potential deadlock on shutdown between the penciller and an individual sst file is resolved. +- The Riak KV default cache size for leveled is reduced to the leveled default, the maximum size the cache can grow to (via jitter/returned) is reduced, and the number of cache lines are reduced. This means that in a stalled penciller, the next L0 file is constrained to be an order of magnitude smaller than in RC2. This may prevent bad behaviour under heavy handoff load. +- The riak_kv_leveled_backend will now pause the vnode in response to a stalling leveled backend. +- The riak_kv_leveled_backend will support v1 objects only, the riak_kv_vnode will never try to write an object as v0 into leveled. +- It was discovered in handoff scenarios in a leveled backend, Riak consumed much more memory than expected. This was caused by "switched" Level 0 files in the Penciller. These files have a small memory footprint when garbage collected, but a large footprint uncollected - there is a legacy of all the data being on the LoopState in the starting state (but not the reader state). Each file process now does garbage_collect/1 on self at the point of the switch to free this memory immediately. +- fixes some security issues within yokozuna, and completes a full run through of the yokozuna tests. It resolves an issue with HTTP security features crashing Riak which was introduced as part of the RC2 mochiweb uplift to fix the 2i index changes. It also transitions the eleveldb branch used to point back to the `basho` repository, with a fix that allows eleveldb to be deployed on recent OSX versions. An OSX-specific issue with `make test` failing on `eper` and `riak_ensemble` unit tests is also resolved. + +## Previous Release Notes + +Please see the KV 2.2.6 release notes [here]({{<baseurl>}}riak/kv/2.2.6/release-notes/), and the KV 2.2.3 release notes [here]({{<baseurl>}}riak/kv/2.2.3/release-notes/). diff --git a/content/riak/kv/2.9.0p5/setup.md b/content/riak/kv/2.9.0p5/setup.md new file mode 100644 index 0000000000..ef0d6d4bb6 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup.md @@ -0,0 +1,54 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: + - /riak/2.9.0p5/setup/ + - /riak/2.9.0/setup/ + - /riak/kv/2.9.0/setup/ + - /riak/kv/2.9.0p1/setup/ + - /riak/kv/2.9.0p2/setup/ + - /riak/kv/2.9.0p3/setup/ + - /riak/kv/2.9.0p4/setup/ +--- + + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + diff --git a/content/riak/kv/2.9.0p5/setup/downgrade.md b/content/riak/kv/2.9.0p5/setup/downgrade.md new file mode 100644 index 0000000000..b10957be13 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/downgrade.md @@ -0,0 +1,182 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.0p5/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.9.0p5/ops/upgrading/rolling-downgrades/ + - /riak/2.9.0p5/setup/downgrade/ + - /riak/2.9.0/setup/downgrade/ + - /riak/kv/2.9.0/setup/downgrade/ + - /riak/kv/2.9.0p1/setup/downgrade/ + - /riak/kv/2.9.0p2/setup/downgrade/ + - /riak/kv/2.9.0p3/setup/downgrade/ + - /riak/kv/2.9.0p4/setup/downgrade/ +--- + + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.9.0p5/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.0, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1/. Stop Riak KV: + +```bash +riak stop +``` +2/. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3/. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4/. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5/. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6/. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7/. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8/. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9/. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10/. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11/. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12/. Exit the attach session by pressing **Ctrl-G** then **q**. + +13/. Verify that transfers have completed: + +```bash +riak-admin transfers +``` diff --git a/content/riak/kv/2.9.0p5/setup/installing.md b/content/riak/kv/2.9.0p5/setup/installing.md new file mode 100644 index 0000000000..1b6e1cb0bd --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing.md @@ -0,0 +1,64 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing + - /riak/kv/2.9.0p5/ops/building/installing + - /riak/2.9.0p5/installing/ + - /riak/kv/2.9.0p5/installing/ + - /riak/2.9.0p5/setup/installing/ + - /riak/2.9.0/setup/installing/ + - /riak/kv/2.9.0/setup/installing/ + - /riak/kv/2.9.0p1/setup/installing/ + - /riak/kv/2.9.0p2/setup/installing/ + - /riak/kv/2.9.0p3/setup/installing/ + - /riak/kv/2.9.0p4/setup/installing/ +--- + + +[install aws]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/amazon-web-services.md b/content/riak/kv/2.9.0p5/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..ccdd0b513b --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/amazon-web-services.md @@ -0,0 +1,156 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.9.0p5/installing/amazon-web-services/ + - /riak/kv/2.9.0p5/installing/amazon-web-services/ + - /riak/2.9.0p5/setup/installing/amazon-web-services/ + - /riak/2.9.0/setup/installing/amazon-web-services/ + - /riak/kv/2.9.0/setup/installing/amazon-web-services/ + - /riak/kv/2.9.0p1/setup/installing/amazon-web-services/ + - /riak/kv/2.9.0p2/setup/installing/amazon-web-services/ + - /riak/kv/2.9.0p3/setup/installing/amazon-web-services/ + - /riak/kv/2.9.0p4/setup/installing/amazon-web-services/ +--- + + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.0p5/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2/riak-2.9.0-1.amzn2x86_64.rpm +sudo yum localinstall -y riak_2.9.0-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2/riak-2.9.0-1.amzn2x86_64.rpm +sudo rpm -i riak_2.9.0-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2016.09/riak-2.9.0-1.amzn1x86_64.rpm +sudo yum localinstall -y riak_2.9.0-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2016.09/riak-2.9.0-1.amzn1x86_64.rpm +sudo rpm -i riak_2.9.0-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/debian-ubuntu.md b/content/riak/kv/2.9.0p5/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..115315cb4a --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/debian-ubuntu.md @@ -0,0 +1,174 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.9.0p5/installing/debian-ubuntu/ + - /riak/kv/2.9.0p5/installing/debian-ubuntu/ + - /riak/2.9.0p5/setup/installing/debian-ubuntu/ + - /riak/2.9.0/setup/installing/debian-ubuntu/ + - /riak/kv/2.9.0/setup/installing/debian-ubuntu/ + - /riak/kv/2.9.0p1/setup/installing/debian-ubuntu/ + - /riak/kv/2.9.0p2/setup/installing/debian-ubuntu/ + - /riak/kv/2.9.0p3/setup/installing/debian-ubuntu/ + - /riak/kv/2.9.0p4/setup/installing/debian-ubuntu/ +--- + + +[install source index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.9.0p5/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/bionic64/riak_2.9.0-1_amd64.deb +sudo dpkg -i riak_2.9.0-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/xenial64/riak_2.9.0-1_amd64.deb +sudo dpkg -i riak_2.9.0-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/trusty64/riak_2.9.0-1_amd64.deb +sudo dpkg -i riak_2.9.0-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/precise64/riak_2.9.0-1_amd64.deb +sudo dpkg -i riak_2.9.0-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/9/riak_2.9.0-1_amd64.deb +sudo dpkg -i riak_2.9.0-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/8/riak_2.9.0-1_amd64.deb +sudo dpkg -i riak_2.9.0-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/7/riak_2.9.0-1_amd64.deb +sudo dpkg -i riak_2.9.0-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/raspbian/buster/riak_2.9.0-1_armhf.deb +sudo dpkg -i riak_2.9.0-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/riak-2.9.0.tar.gz +tar zxvf riak-2.9.0.tar.gz +cd riak-2.9.0 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/freebsd.md b/content/riak/kv/2.9.0p5/setup/installing/freebsd.md new file mode 100644 index 0000000000..2e8dddc0e4 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/freebsd.md @@ -0,0 +1,136 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-FreeBSD + - /riak/2.9.0p5/installing/freebsd/ + - /riak/kv/2.9.0p5/installing/freebsd/ + - /riak/2.9.0p5/setup/installing/freebsd/ + - /riak/2.9.0/setup/installing/freebsd/ + - /riak/kv/2.9.0/setup/installing/freebsd/ + - /riak/kv/2.9.0p1/setup/installing/freebsd/ + - /riak/kv/2.9.0p2/setup/installing/freebsd/ + - /riak/kv/2.9.0p3/setup/installing/freebsd/ + - /riak/kv/2.9.0p4/setup/installing/freebsd/ +--- + + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.0p5/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.9.0.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.0p5/freebsd/11.1/riak-2.9.0.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.0p5/freebsd/10.4/riak-2.9.0.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/mac-osx.md b/content/riak/kv/2.9.0p5/setup/installing/mac-osx.md new file mode 100644 index 0000000000..e317d90738 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/mac-osx.md @@ -0,0 +1,124 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.9.0p5/installing/mac-osx/ + - /riak/kv/2.9.0p5/installing/mac-osx/ + - /riak/2.9.0p5/setup/installing/mac-osx/ + - /riak/2.9.0/setup/installing/mac-osx/ + - /riak/kv/2.9.0/setup/installing/mac-osx/ + - /riak/kv/2.9.0p1/setup/installing/mac-osx/ + - /riak/kv/2.9.0p2/setup/installing/mac-osx/ + - /riak/kv/2.9.0p3/setup/installing/mac-osx/ + - /riak/kv/2.9.0p4/setup/installing/mac-osx/ +--- + + + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.11/riak-2.9.0-OSX-x86_64.tar.gz +tar xzvf riak-2.9.0-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.9.0 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.0` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.0p5/riak-2.9.0.tar.gz +tar zxvf riak-2.9.0.tar.gz +cd riak-2.9.0 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/rhel-centos.md b/content/riak/kv/2.9.0p5/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..364eac0ec7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/rhel-centos.md @@ -0,0 +1,137 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.9.0p5/installing/rhel-centos/ + - /riak/kv/2.9.0p5/installing/rhel-centos/ + - /riak/2.9.0p5/setup/installing/rhel-centos/ + - /riak/2.9.0/setup/installing/rhel-centos/ + - /riak/kv/2.9.0/setup/installing/rhel-centos/ + - /riak/kv/2.9.0p1/setup/installing/rhel-centos/ + - /riak/kv/2.9.0p2/setup/installing/rhel-centos/ + - /riak/kv/2.9.0p3/setup/installing/rhel-centos/ + - /riak/kv/2.9.0p4/setup/installing/rhel-centos/ +--- + + + + +[install source index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/8/riak-2.9.0-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.9.0-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/8/riak-2.9.0-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.9.0-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/7/riak-2.9.0-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.9.0-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/7/riak-2.9.0-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.9.0-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/6/riak-2.9.0-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.9.0-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/6/riak-2.9.0-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.9.0-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.0p5/riak-2.9.0.tar.gz +tar zxvf riak-2.9.0.tar.gz +cd riak-2.9.0 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/smartos.md b/content/riak/kv/2.9.0p5/setup/installing/smartos.md new file mode 100644 index 0000000000..4f2ca4a545 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/smartos.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-SmartOS + - /riak/2.9.0p5/installing/smartos/ + - /riak/kv/2.9.0p5/installing/smartos/ + - /riak/kv/2.9.0p5/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/solaris.md b/content/riak/kv/2.9.0p5/setup/installing/solaris.md new file mode 100644 index 0000000000..d8cdcea96f --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/solaris.md @@ -0,0 +1,90 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-Solaris + - /riak/2.9.0p5/installing/solaris/ + - /riak/kv/2.9.0p5/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/source.md b/content/riak/kv/2.9.0p5/setup/installing/source.md new file mode 100644 index 0000000000..e585772818 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/source.md @@ -0,0 +1,113 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/Installing-Riak-from-Source + - /riak/kv/2.9.0p5/ops/building/Installing-Riak-from-Source + - /riak/2.9.0p5/installing/source/ + - /riak/kv/2.9.0p5/installing/source/ + - /riak/2.9.0p5/setup/installing/source/ + - /riak/2.9.0/setup/installing/source/ + - /riak/kv/2.9.0/setup/installing/source/ + - /riak/kv/2.9.0p1/setup/installing/source/ + - /riak/kv/2.9.0p2/setup/installing/source/ + - /riak/kv/2.9.0p3/setup/installing/source/ + - /riak/kv/2.9.0p4/setup/installing/source/ +--- + + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.0p5/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.0p5/riak-2.9.0.tar.gz +tar zxvf riak-2.9.0.tar.gz +cd riak-2.9.0 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/source/erlang.md b/content/riak/kv/2.9.0p5/setup/installing/source/erlang.md new file mode 100644 index 0000000000..eaaa5c92e5 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/source/erlang.md @@ -0,0 +1,574 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/erlang + - /riak/kv/2.9.0p5/ops/building/installing/erlang + - /riak/2.9.0p5/installing/source/erlang/ + - /riak/kv/2.9.0p5/installing/source/erlang/ + - /riak/2.9.0p5/setup/installing/source/erlang/ + - /riak/2.9.0/setup/installing/source/erlang/ + - /riak/kv/2.9.0/setup/installing/source/erlang/ + - /riak/kv/2.9.0p1/setup/installing/source/erlang/ + - /riak/kv/2.9.0p2/setup/installing/source/erlang/ + - /riak/kv/2.9.0p3/setup/installing/source/erlang/ + - /riak/kv/2.9.0p4/setup/installing/source/erlang/ +--- + + +[install index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.9.0p5/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads / +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' / +./configure --disable-hipe --enable-smp-support --enable-threads / +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads / +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads / +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` diff --git a/content/riak/kv/2.9.0p5/setup/installing/source/jvm.md b/content/riak/kv/2.9.0p5/setup/installing/source/jvm.md new file mode 100644 index 0000000000..5bc4cbae79 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/source/jvm.md @@ -0,0 +1,59 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/jvm + - /riak/kv/2.9.0p5/ops/building/installing/jvm + - /riak/2.9.0p5/ops/building/installing/Installing-the-JVM + - /riak/kv/2.9.0p5/ops/building/installing/Installing-the-JVM + - /riak/2.9.0p5/installing/source/jvm/ + - /riak/kv/2.9.0p5/installing/source/jvm/ + - /riak/2.9.0p5/setup/installing/source/jvm/ + - /riak/2.9.0/setup/installing/source/jvm/ + - /riak/kv/2.9.0/setup/installing/source/jvm/ + - /riak/kv/2.9.0p1/setup/installing/source/jvm/ + - /riak/kv/2.9.0p2/setup/installing/source/jvm/ + - /riak/kv/2.9.0p3/setup/installing/source/jvm/ + - /riak/kv/2.9.0p4/setup/installing/source/jvm/ +--- + + +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` diff --git a/content/riak/kv/2.9.0p5/setup/installing/suse.md b/content/riak/kv/2.9.0p5/setup/installing/suse.md new file mode 100644 index 0000000000..33822300e3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/suse.md @@ -0,0 +1,55 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-SUSE + - /riak/2.9.0p5/installing/suse/ + - /riak/kv/2.9.0p5/installing/suse/ + - /riak/2.9.0p5/setup/installing/suse/ + - /riak/2.9.0/setup/installing/suse/ + - /riak/kv/2.9.0/setup/installing/suse/ + - /riak/kv/2.9.0p1/setup/installing/suse/ + - /riak/kv/2.9.0p2/setup/installing/suse/ + - /riak/kv/2.9.0p3/setup/installing/suse/ + - /riak/kv/2.9.0p4/setup/installing/suse/ +--- + + +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.0+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.0p5/setup/installing/verify.md b/content/riak/kv/2.9.0p5/setup/installing/verify.md new file mode 100644 index 0000000000..f889853623 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/verify.md @@ -0,0 +1,172 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/installing/Post-Installation + - /riak/kv/2.9.0p5/ops/installing/Post-Installation + - /riak/2.9.0p5/installing/verify-install/ + - /riak/kv/2.9.0p5/installing/verify-install/ + - /riak/2.9.0p5/setup/installing/verify/ + - /riak/2.9.0/setup/installing/verify/ + - /riak/kv/2.9.0/setup/installing/verify/ + - /riak/kv/2.9.0p1/setup/installing/verify/ + - /riak/kv/2.9.0p2/setup/installing/verify/ + - /riak/kv/2.9.0p3/setup/installing/verify/ + - /riak/kv/2.9.0p4/setup/installing/verify/ +--- + + +[client libraries]: {{<baseurl>}}riak/kv/2.9.0p5/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language diff --git a/content/riak/kv/2.9.0p5/setup/installing/windows-azure.md b/content/riak/kv/2.9.0p5/setup/installing/windows-azure.md new file mode 100644 index 0000000000..c793459322 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/installing/windows-azure.md @@ -0,0 +1,200 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.9.0p5/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.9.0p5/installing/windows-azure/ + - /riak/kv/2.9.0p5/installing/windows-azure/ + - /riak/2.9.0p5/setup/installing/windows-azure/ + - /riak/2.9.0/setup/installing/windows-azure/ + - /riak/kv/2.9.0/setup/installing/windows-azure/ + - /riak/kv/2.9.0p1/setup/installing/windows-azure/ + - /riak/kv/2.9.0p2/setup/installing/windows-azure/ + - /riak/kv/2.9.0p3/setup/installing/windows-azure/ + - /riak/kv/2.9.0p4/setup/installing/windows-azure/ +--- + + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` diff --git a/content/riak/kv/2.9.0p5/setup/planning.md b/content/riak/kv/2.9.0p5/setup/planning.md new file mode 100644 index 0000000000..c031cbabac --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning.md @@ -0,0 +1,64 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.0p5/setup/planning/ + - /riak/2.9.0/setup/planning/ + - /riak/kv/2.9.0/setup/planning/ + - /riak/kv/2.9.0p1/setup/planning/ + - /riak/kv/2.9.0p2/setup/planning/ + - /riak/kv/2.9.0p3/setup/planning/ + - /riak/kv/2.9.0p4/setup/planning/ +--- + + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + diff --git a/content/riak/kv/2.9.0p5/setup/planning/backend.md b/content/riak/kv/2.9.0p5/setup/planning/backend.md new file mode 100644 index 0000000000..d3a724341e --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/backend.md @@ -0,0 +1,64 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/planning/backends/ + - /riak/kv/2.9.0p5/ops/building/planning/backends/ + - /riak/2.9.0p5/setup/planning/backend/ + - /riak/2.9.0/setup/planning/backend/ + - /riak/kv/2.9.0/setup/planning/backend/ + - /riak/kv/2.9.0p1/setup/planning/backend/ + - /riak/kv/2.9.0p2/setup/planning/backend/ + - /riak/kv/2.9.0p3/setup/planning/backend/ + - /riak/kv/2.9.0p4/setup/planning/backend/ +--- + + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/2.9.0p5/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory|Leveled| +:----------------------------------------------|:-----:|:-----:|:----:|:----:| +Default Riak KV backend |✓ | | | | +Persistent |✓ |✓ | |✓ | +Keyspace in RAM |✓ | |✓ | | +Keyspace can be greater than available RAM | |✓ | |✓ | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | | +Objects in RAM | | |✓ | | +Object expiration |✓ | |✓ | | +Secondary indexes | |✓ |✓ |✓ | +Tiered storage | |✓ | |✓ | + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. diff --git a/content/riak/kv/2.9.0p5/setup/planning/backend/bitcask.md b/content/riak/kv/2.9.0p5/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..582232b0a5 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/backend/bitcask.md @@ -0,0 +1,998 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/backends/bitcask/ + - /riak/kv/2.9.0p5/ops/advanced/backends/bitcask/ + - /riak/2.9.0p5/setup/planning/backend/bitcask/ + - /riak/2.9.0/setup/planning/backend/bitcask/ + - /riak/kv/2.9.0/setup/planning/backend/bitcask/ + - /riak/kv/2.9.0p1/setup/planning/backend/bitcask/ + - /riak/kv/2.9.0p2/setup/planning/backend/bitcask/ + - /riak/kv/2.9.0p3/setup/planning/backend/bitcask/ + - /riak/kv/2.9.0p4/setup/planning/backend/bitcask/ +--- + + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` --- lets the operating system manage syncing writes + (default) + * `o_sync` --- uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval --- Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) --- Writes are made via Erlang's built-in file API +* `nif` --- Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` --- No restrictions on when merge operations can occur + (default) +* `never` --- Merge will never be attempted +* `window` --- Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** --- This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** --- This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** --- This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** --- This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** --- This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322492444576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test / + -H "Content-Type: text/plain" / + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322492444576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. diff --git a/content/riak/kv/2.9.0p5/setup/planning/backend/leveldb.md b/content/riak/kv/2.9.0p5/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..03ea17a6db --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/backend/leveldb.md @@ -0,0 +1,510 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/backends/leveldb/ + - /riak/kv/2.9.0p5/ops/advanced/backends/leveldb/ + - /riak/2.9.0p5/setup/planning/backend/leveldb/ + - /riak/2.9.0/setup/planning/backend/leveldb/ + - /riak/kv/2.9.0/setup/planning/backend/leveldb/ + - /riak/kv/2.9.0p1/setup/planning/backend/leveldb/ + - /riak/kv/2.9.0p2/setup/planning/backend/leveldb/ + - /riak/kv/2.9.0p3/setup/planning/backend/leveldb/ + - /riak/kv/2.9.0p4/setup/planning/backend/leveldb/ +--- + + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** --- The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** --- LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322492444576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322492444576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. diff --git a/content/riak/kv/2.9.0p5/setup/planning/backend/leveled.md b/content/riak/kv/2.9.0p5/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..dc4232af80 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/backend/leveled.md @@ -0,0 +1,143 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/backends/leveled/ + - /riak/kv/2.9.0p5/ops/advanced/backends/leveled/ + - /riak/2.9.0p5/setup/planning/backend/leveled/ + - /riak/2.9.0/setup/planning/backend/leveled/ + - /riak/kv/2.9.0/setup/planning/backend/leveled/ + - /riak/kv/2.9.0p1/setup/planning/backend/leveled/ + - /riak/kv/2.9.0p2/setup/planning/backend/leveled/ + - /riak/kv/2.9.0p3/setup/planning/backend/leveled/ + - /riak/kv/2.9.0p4/setup/planning/backend/leveled/ +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#active-anti-entropy + +[Leveled](https://github.com/martinsumner/leveled) is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.0 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. diff --git a/content/riak/kv/2.9.0p5/setup/planning/backend/memory.md b/content/riak/kv/2.9.0p5/setup/planning/backend/memory.md new file mode 100644 index 0000000000..3ed1703d2f --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/backend/memory.md @@ -0,0 +1,151 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/backends/memory/ + - /riak/kv/2.9.0p5/ops/advanced/backends/memory/ + - /riak/2.9.0p5/setup/planning/backend/memory/ + - /riak/2.9.0/setup/planning/backend/memory/ + - /riak/kv/2.9.0/setup/planning/backend/memory/ + - /riak/kv/2.9.0p1/setup/planning/backend/memory/ + - /riak/kv/2.9.0p2/setup/planning/backend/memory/ + - /riak/kv/2.9.0p3/setup/planning/backend/memory/ + - /riak/kv/2.9.0p4/setup/planning/backend/memory/ +--- + + +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. diff --git a/content/riak/kv/2.9.0p5/setup/planning/backend/multi.md b/content/riak/kv/2.9.0p5/setup/planning/backend/multi.md new file mode 100644 index 0000000000..16a889b2e7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/backend/multi.md @@ -0,0 +1,234 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/backends/multi/ + - /riak/kv/2.9.0p5/ops/advanced/backends/multi/ + - /riak/2.9.0p5/setup/planning/backend/multi/ + - /riak/2.9.0/setup/planning/backend/multi/ + - /riak/kv/2.9.0/setup/planning/backend/multi/ + - /riak/kv/2.9.0p1/setup/planning/backend/multi/ + - /riak/kv/2.9.0p2/setup/planning/backend/multi/ + - /riak/kv/2.9.0p3/setup/planning/backend/multi/ + - /riak/kv/2.9.0p4/setup/planning/backend/multi/ +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multi-backend + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. diff --git a/content/riak/kv/2.9.0p5/setup/planning/best-practices.md b/content/riak/kv/2.9.0p5/setup/planning/best-practices.md new file mode 100644 index 0000000000..0cfb8923ce --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/best-practices.md @@ -0,0 +1,149 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/planning/best-practices + - /riak/kv/2.9.0p5/ops/building/planning/best-practices + - /riak/2.9.0p5/setup/planning/best-practices/ + - /riak/2.9.0/setup/planning/best-practices/ + - /riak/kv/2.9.0/setup/planning/best-practices/ + - /riak/kv/2.9.0p1/setup/planning/best-practices/ + - /riak/kv/2.9.0p2/setup/planning/best-practices/ + - /riak/kv/2.9.0p3/setup/planning/best-practices/ + - /riak/kv/2.9.0p4/setup/planning/best-practices/ +--- + + +[use ref handoff]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. diff --git a/content/riak/kv/2.9.0p5/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.9.0p5/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..e2734e5fb8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,108 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/planning/bitcask + - /riak/kv/2.9.0p5/ops/building/planning/bitcask + - /riak/2.9.0p5/setup/planning/bitcask-capacity-calc/ + - /riak/2.9.0/setup/planning/bitcask-capacity-calc/ + - /riak/kv/2.9.0/setup/planning/bitcask-capacity-calc/ + - /riak/kv/2.9.0p1/setup/planning/bitcask-capacity-calc/ + - /riak/kv/2.9.0p2/setup/planning/bitcask-capacity-calc/ + - /riak/kv/2.9.0p3/setup/planning/bitcask-capacity-calc/ + - /riak/kv/2.9.0p4/setup/planning/bitcask-capacity-calc/ +--- + + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. diff --git a/content/riak/kv/2.9.0p5/setup/planning/cluster-capacity.md b/content/riak/kv/2.9.0p5/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..705784261f --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/cluster-capacity.md @@ -0,0 +1,242 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/planning/cluster + - /riak/kv/2.9.0p5/ops/building/planning/cluster + - /riak/2.9.0p5/setup/planning/cluster-capacity/ + - /riak/2.9.0/setup/planning/cluster-capacity/ + - /riak/kv/2.9.0/setup/planning/cluster-capacity/ + - /riak/kv/2.9.0p1/setup/planning/cluster-capacity/ + - /riak/kv/2.9.0p2/setup/planning/cluster-capacity/ + - /riak/kv/2.9.0p3/setup/planning/cluster-capacity/ + - /riak/kv/2.9.0p4/setup/planning/cluster-capacity/ +--- + + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. diff --git a/content/riak/kv/2.9.0p5/setup/planning/future.md b/content/riak/kv/2.9.0p5/setup/planning/future.md new file mode 100644 index 0000000000..7ff441c7ef --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/future.md @@ -0,0 +1,25 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +#menu: +# riak_kv-2.9.0p5: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +aliases: + - /riak/2.9.0p5/setup/planning/future/ + - /riak/2.9.0/setup/planning/future/ + - /riak/kv/2.9.0/setup/planning/future/ + - /riak/kv/2.9.0p1/setup/planning/future/ + - /riak/kv/2.9.0p2/setup/planning/future/ + - /riak/kv/2.9.0p3/setup/planning/future/ + - /riak/kv/2.9.0p4/setup/planning/future/ +--- + + +**TODO: Add content** diff --git a/content/riak/kv/2.9.0p5/setup/planning/operating-system.md b/content/riak/kv/2.9.0p5/setup/planning/operating-system.md new file mode 100644 index 0000000000..9698926b2d --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/operating-system.md @@ -0,0 +1,34 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +aliases: + - /riak/2.9.0p5/setup/planning/operating-system/ + - /riak/2.9.0/setup/planning/operating-system/ + - /riak/kv/2.9.0/setup/planning/operating-system/ + - /riak/kv/2.9.0p1/setup/planning/operating-system/ + - /riak/kv/2.9.0p2/setup/planning/operating-system/ + - /riak/kv/2.9.0p3/setup/planning/operating-system/ + - /riak/kv/2.9.0p4/setup/planning/operating-system/ +--- + + +[downloads]: {{<baseurl>}}riak/kv/2.9.0p5/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris diff --git a/content/riak/kv/2.9.0p5/setup/planning/start.md b/content/riak/kv/2.9.0p5/setup/planning/start.md new file mode 100644 index 0000000000..95e02fbbd0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/planning/start.md @@ -0,0 +1,65 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/planning/system-planning + - /riak/kv/2.9.0p5/ops/building/planning/system-planning + - /riak/2.9.0p5/setup/planning/start/ + - /riak/2.9.0/setup/planning/start/ + - /riak/kv/2.9.0/setup/planning/start/ + - /riak/kv/2.9.0p1/setup/planning/start/ + - /riak/kv/2.9.0p2/setup/planning/start/ + - /riak/kv/2.9.0p3/setup/planning/start/ + - /riak/kv/2.9.0p4/setup/planning/start/ +--- + + +[plan backend]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + diff --git a/content/riak/kv/2.9.0p5/setup/search.md b/content/riak/kv/2.9.0p5/setup/search.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/content/riak/kv/2.9.0p5/setup/upgrading.md b/content/riak/kv/2.9.0p5/setup/upgrading.md new file mode 100644 index 0000000000..e8e2a98428 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/upgrading.md @@ -0,0 +1,42 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.0p5/setup/upgrading/ + - /riak/2.9.0/setup/upgrading/ + - /riak/kv/2.9.0/setup/upgrading/ + - /riak/kv/2.9.0p1/setup/upgrading/ + - /riak/kv/2.9.0p2/setup/upgrading/ + - /riak/kv/2.9.0p3/setup/upgrading/ + - /riak/kv/2.9.0p4/setup/upgrading/ +--- + + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.0][upgrade version] + +A tutorial on updating to Riak KV 2.9.0 + +[Learn More >>][upgrade version] diff --git a/content/riak/kv/2.9.0p5/setup/upgrading/checklist.md b/content/riak/kv/2.9.0p5/setup/upgrading/checklist.md new file mode 100644 index 0000000000..77ad194268 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/upgrading/checklist.md @@ -0,0 +1,228 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.0p5/ops/upgrading/production-checklist/ + - /riak/kv/2.9.0p5/ops/upgrading/production-checklist/ + - /riak/2.9.0p5/setup/upgrading/checklist/ + - /riak/2.9.0/setup/upgrading/checklist/ + - /riak/kv/2.9.0/setup/upgrading/checklist/ + - /riak/kv/2.9.0p1/setup/upgrading/checklist/ + - /riak/kv/2.9.0p2/setup/upgrading/checklist/ + - /riak/kv/2.9.0p3/setup/upgrading/checklist/ + - /riak/kv/2.9.0p4/setup/upgrading/checklist/ +--- + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.9.0p5/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.9.0p5/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. diff --git a/content/riak/kv/2.9.0p5/setup/upgrading/cluster.md b/content/riak/kv/2.9.0p5/setup/upgrading/cluster.md new file mode 100644 index 0000000000..5848d58a1f --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/upgrading/cluster.md @@ -0,0 +1,300 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.0p5/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.0p5/ops/upgrading/rolling-upgrades/ + - /riak/2.9.0/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.0/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.0p5/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` --- See [JMX Monitoring][jmx monitor] for more information. + * `snmp` --- See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. diff --git a/content/riak/kv/2.9.0p5/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.9.0p5/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..57430c8fb5 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/upgrading/multi-datacenter.md @@ -0,0 +1,27 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +#menu: +# riak_kv-2.9.0p5: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: + - /riak/2.9.0p5/setup/upgrading/multi-datacenter/ + - /riak/2.9.0/setup/upgrading/multi-datacenter/ + - /riak/kv/2.9.0/setup/upgrading/multi-datacenter/ + - /riak/kv/2.9.0p1/setup/upgrading/multi-datacenter/ + - /riak/kv/2.9.0p2/setup/upgrading/multi-datacenter/ + - /riak/kv/2.9.0p3/setup/upgrading/multi-datacenter/ + - /riak/kv/2.9.0p4/setup/upgrading/multi-datacenter/ +--- + + +## TODO + +How to update to a new version with multi-datacenter. diff --git a/content/riak/kv/2.9.0p5/setup/upgrading/search.md b/content/riak/kv/2.9.0p5/setup/upgrading/search.md new file mode 100644 index 0000000000..ac0084a2f9 --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/upgrading/search.md @@ -0,0 +1,278 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.0.0/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.0/ops/advanced/upgrading-search-2 + - /riak/2.0.0p5/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.0p5/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.9.0p5/setup/upgrading/version.md b/content/riak/kv/2.9.0p5/setup/upgrading/version.md new file mode 100644 index 0000000000..617db8867d --- /dev/null +++ b/content/riak/kv/2.9.0p5/setup/upgrading/version.md @@ -0,0 +1,252 @@ +--- +title: "Upgrading to Riak KV 2.9.0" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Upgrading to 2.9.0" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.0p5/upgrade-v20/ + - /riak/kv/2.9.0p5/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.0p5/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.0p5/setup/upgrading/cluster/ + - /riak/2.9.0p5/setup/upgrading/version/ + - /riak/2.9.0/setup/upgrading/version/ + - /riak/kv/2.9.0/setup/upgrading/version/ + - /riak/kv/2.9.0p1/setup/upgrading/version/ + - /riak/kv/2.9.0p2/setup/upgrading/version/ + - /riak/kv/2.9.0p3/setup/upgrading/version/ + - /riak/kv/2.9.0p4/setup/upgrading/version/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.0p5/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.9.0p5/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.9.0p5/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.9.0 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.0 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1/. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2/. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3/. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4/. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a/. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b/. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c/. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d/. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e/. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6/. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.0 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7/. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8/. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9/. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10/. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. diff --git a/content/riak/kv/2.9.0p5/using.md b/content/riak/kv/2.9.0p5/using.md new file mode 100644 index 0000000000..9c6622a47b --- /dev/null +++ b/content/riak/kv/2.9.0p5/using.md @@ -0,0 +1,81 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: + - /riak/2.9.0p5/using/ + - /riak/2.9.0/using/ + - /riak/kv/2.9.0/using/ + - /riak/kv/2.9.0p1/using/ + - /riak/kv/2.9.0p2/using/ + - /riak/kv/2.9.0p3/using/ + - /riak/kv/2.9.0p4/using/ +--- + + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] diff --git a/content/riak/kv/2.9.0p5/using/admin.md b/content/riak/kv/2.9.0p5/using/admin.md new file mode 100644 index 0000000000..fe923e639b --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/admin.md @@ -0,0 +1,55 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/cluster-admin + - /riak/kv/2.9.0p5/ops/running/cluster-admin + - /riak/2.9.0p5/using/admin/ + - /riak/2.9.0/using/admin/ + - /riak/kv/2.9.0/using/admin/ + - /riak/kv/2.9.0p1/using/admin/ + - /riak/kv/2.9.0p2/using/admin/ + - /riak/kv/2.9.0p3/using/admin/ + - /riak/kv/2.9.0p4/using/admin/ +--- + + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] diff --git a/content/riak/kv/2.9.0p5/using/admin/commands.md b/content/riak/kv/2.9.0p5/using/admin/commands.md new file mode 100644 index 0000000000..a7ef5816e1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/admin/commands.md @@ -0,0 +1,382 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/cluster-admin + - /riak/kv/2.9.0p5/ops/running/cluster-admin + - /riak/2.9.0p5/using/admin/commands/ + - /riak/2.9.0/using/admin/commands/ + - /riak/kv/2.9.0/using/admin/commands/ + - /riak/kv/2.9.0p1/using/admin/commands/ + - /riak/kv/2.9.0p2/using/admin/commands/ + - /riak/kv/2.9.0p3/using/admin/commands/ + - /riak/kv/2.9.0p4/using/admin/commands/ +--- + + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` --- There are five possible values for status: + * `valid` --- The node has begun participating in cluster operations + * `leaving` --- The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` --- The node's ownership transfers are complete and it is + currently shutting down + * `joining` --- The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` --- The node is not currently responding +* `avail` --- There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` --- What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` --- The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322492444576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322492444576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` diff --git a/content/riak/kv/2.9.0p5/using/admin/riak-admin.md b/content/riak/kv/2.9.0p5/using/admin/riak-admin.md new file mode 100644 index 0000000000..77594d4dac --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/admin/riak-admin.md @@ -0,0 +1,725 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/tools/riak-admin + - /riak/kv/2.9.0p5/ops/running/tools/riak-admin + - /riak/2.9.0p5/using/admin/riak-admin/ + - /riak/2.9.0/using/admin/riak-admin/ + - /riak/kv/2.9.0/using/admin/riak-admin/ + - /riak/kv/2.9.0p1/using/admin/riak-admin/ + - /riak/kv/2.9.0p2/using/admin/riak-admin/ + - /riak/kv/2.9.0p3/using/admin/riak-admin/ + - /riak/kv/2.9.0p4/using/admin/riak-admin/ +--- + + +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.9.0p5/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.9.0p5/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.9.0p5/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` diff --git a/content/riak/kv/2.9.0p5/using/admin/riak-cli.md b/content/riak/kv/2.9.0p5/using/admin/riak-cli.md new file mode 100644 index 0000000000..6794bc516e --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/admin/riak-cli.md @@ -0,0 +1,208 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/tools/riak + - /riak/kv/2.9.0p5/ops/running/tools/riak + - /riak/2.9.0p5/using/admin/riak-cli/ + - /riak/2.9.0/using/admin/riak-cli/ + - /riak/kv/2.9.0/using/admin/riak-cli/ + - /riak/kv/2.9.0p1/using/admin/riak-cli/ + - /riak/kv/2.9.0p2/using/admin/riak-cli/ + - /riak/kv/2.9.0p3/using/admin/riak-cli/ + - /riak/kv/2.9.0p4/using/admin/riak-cli/ +--- + + +[configuration file]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. diff --git a/content/riak/kv/2.9.0p5/using/admin/riak-control.md b/content/riak/kv/2.9.0p5/using/admin/riak-control.md new file mode 100644 index 0000000000..1b52d44fc0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/admin/riak-control.md @@ -0,0 +1,241 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/riak-control + - /riak/kv/2.9.0p5/ops/advanced/riak-control + - /riak/2.9.0p5/using/admin/riak-control/ + - /riak/2.9.0/using/admin/riak-control/ + - /riak/kv/2.9.0/using/admin/riak-control/ + - /riak/kv/2.9.0p1/using/admin/riak-control/ + - /riak/kv/2.9.0p2/using/admin/riak-control/ + - /riak/kv/2.9.0p3/using/admin/riak-control/ + - /riak/kv/2.9.0p4/using/admin/riak-control/ +--- + + +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations.md b/content/riak/kv/2.9.0p5/using/cluster-operations.md new file mode 100644 index 0000000000..2542a8aa57 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations.md @@ -0,0 +1,113 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +aliases: + - /riak/2.9.0p5/using/cluster-operations/ + - /riak/2.9.0/using/cluster-operations/ + - /riak/kv/2.9.0/using/cluster-operations/ + - /riak/kv/2.9.0p1/using/cluster-operations/ + - /riak/kv/2.9.0p2/using/cluster-operations/ + - /riak/kv/2.9.0p3/using/cluster-operations/ + - /riak/kv/2.9.0p4/using/cluster-operations/ +--- + + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.9.0p5/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..40d57ce0ea --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,292 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.9.0p5/ops/advanced/aae/ + - /riak/2.9.0p5/ops/advanced/aae/ + - /riak/2.9.0p5/using/cluster-operations/active-anti-entropy/ + - /riak/2.9.0/using/cluster-operations/active-anti-entropy/ + - /riak/kv/2.9.0/using/cluster-operations/active-anti-entropy/ + - /riak/kv/2.9.0p1/using/cluster-operations/active-anti-entropy/ + - /riak/kv/2.9.0p2/using/cluster-operations/active-anti-entropy/ + - /riak/kv/2.9.0p3/using/cluster-operations/active-anti-entropy/ + - /riak/kv/2.9.0p4/using/cluster-operations/active-anti-entropy/ +--- + + +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) /(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.0 and later, [TicTac AAE]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/tictac-active-anti-entropy) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..618d84408b --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,202 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/nodes/adding-removing + - /riak/kv/2.9.0p5/ops/running/nodes/adding-removing + - /riak/2.9.0p5/using/cluster-operations/adding-removing-nodes/ + - /riak/2.9.0/using/cluster-operations/adding-removing-nodes/ + - /riak/kv/2.9.0/using/cluster-operations/adding-removing-nodes/ + - /riak/kv/2.9.0p1/using/cluster-operations/adding-removing-nodes/ + - /riak/kv/2.9.0p2/using/cluster-operations/adding-removing-nodes/ + - /riak/kv/2.9.0p3/using/cluster-operations/adding-removing-nodes/ + - /riak/kv/2.9.0p4/using/cluster-operations/adding-removing-nodes/ +--- + + +[use running cluster]: {{<baseurl>}}riak/kv/2.9.0p5/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/backend.md b/content/riak/kv/2.9.0p5/using/cluster-operations/backend.md new file mode 100644 index 0000000000..0a01c65aa3 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/backend.md @@ -0,0 +1,25 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +#menu: +# riak_kv-2.9.0p5: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/using/cluster-operations/backend/ + - /riak/2.9.0/using/cluster-operations/backend/ + - /riak/kv/2.9.0/using/cluster-operations/backend/ + - /riak/kv/2.9.0p1/using/cluster-operations/backend/ + - /riak/kv/2.9.0p2/using/cluster-operations/backend/ + - /riak/kv/2.9.0p3/using/cluster-operations/backend/ + - /riak/kv/2.9.0p4/using/cluster-operations/backend/ +--- + + +**TODO: Add content** diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/backing-up.md b/content/riak/kv/2.9.0p5/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..7b1fff9e11 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/backing-up.md @@ -0,0 +1,275 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/backups + - /riak/kv/2.9.0p5/ops/running/backups + - /riak/2.9.0p5/using/cluster-operations/backing-up/ + - /riak/2.9.0/using/cluster-operations/backing-up/ + - /riak/kv/2.9.0/using/cluster-operations/backing-up/ + - /riak/kv/2.9.0p1/using/cluster-operations/backing-up/ + - /riak/kv/2.9.0p2/using/cluster-operations/backing-up/ + - /riak/kv/2.9.0p3/using/cluster-operations/backing-up/ + - /riak/kv/2.9.0p4/using/cluster-operations/backing-up/ +--- + + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz / + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz / + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz / + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz / + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz / + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace / + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.9.0p5/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/bucket-types.md b/content/riak/kv/2.9.0p5/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..7f86b30fa1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/bucket-types.md @@ -0,0 +1,67 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/using/cluster-operations/bucket-types/ + - /riak/2.9.0/using/cluster-operations/bucket-types/ + - /riak/kv/2.9.0/using/cluster-operations/bucket-types/ + - /riak/kv/2.9.0p1/using/cluster-operations/bucket-types/ + - /riak/kv/2.9.0p2/using/cluster-operations/bucket-types/ + - /riak/kv/2.9.0p3/using/cluster-operations/bucket-types/ + - /riak/kv/2.9.0p4/using/cluster-operations/bucket-types/ +--- + + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props / + -H 'Content-Type: application/json' / + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.9.0p5/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..1925ccc1c9 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,462 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/nodes/renaming + - /riak/kv/2.9.0p5/ops/running/nodes/renaming + - /riak/2.9.0p5/using/cluster-operations/changing-cluster-info/ + - /riak/2.9.0/using/cluster-operations/changing-cluster-info/ + - /riak/kv/2.9.0/using/cluster-operations/changing-cluster-info/ + - /riak/kv/2.9.0p1/using/cluster-operations/changing-cluster-info/ + - /riak/kv/2.9.0p2/using/cluster-operations/changing-cluster-info/ + - /riak/kv/2.9.0p3/using/cluster-operations/changing-cluster-info/ + - /riak/kv/2.9.0p4/using/cluster-operations/changing-cluster-info/ +--- + + +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/handoff.md b/content/riak/kv/2.9.0p5/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..de45423275 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/handoff.md @@ -0,0 +1,124 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/handoff + - /riak/kv/2.9.0p5/ops/running/handoff + - /riak/2.9.0p5/using/cluster-operations/handoff/ + - /riak/2.9.0/using/cluster-operations/handoff/ + - /riak/kv/2.9.0/using/cluster-operations/handoff/ + - /riak/kv/2.9.0p1/using/cluster-operations/handoff/ + - /riak/kv/2.9.0p2/using/cluster-operations/handoff/ + - /riak/kv/2.9.0p3/using/cluster-operations/handoff/ + - /riak/kv/2.9.0p4/using/cluster-operations/handoff/ +--- + + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.9.0p5/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..5b8697bd3a --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/inspecting-node.md @@ -0,0 +1,500 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/nodes/inspecting + - /riak/kv/2.9.0p5/ops/running/nodes/inspecting + - /riak/2.9.0p5/using/cluster-operations/inspecting-node/ + - /riak/2.9.0/using/cluster-operations/inspecting-node/ + - /riak/kv/2.9.0/using/cluster-operations/inspecting-node/ + - /riak/kv/2.9.0p1/using/cluster-operations/inspecting-node/ + - /riak/kv/2.9.0p2/using/cluster-operations/inspecting-node/ + - /riak/kv/2.9.0p3/using/cluster-operations/inspecting-node/ + - /riak/kv/2.9.0p4/using/cluster-operations/inspecting-node/ +--- + + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392993748081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` --- The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` --- The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` --- The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/load-balancing.md b/content/riak/kv/2.9.0p5/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..8cf4b4e2ec --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/load-balancing.md @@ -0,0 +1,25 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +#menu: +# riak_kv-2.9.0p5: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/using/cluster-operations/load-balancing/ + - /riak/2.9.0/using/cluster-operations/load-balancing/ + - /riak/kv/2.9.0/using/cluster-operations/load-balancing/ + - /riak/kv/2.9.0p1/using/cluster-operations/load-balancing/ + - /riak/kv/2.9.0p2/using/cluster-operations/load-balancing/ + - /riak/kv/2.9.0p3/using/cluster-operations/load-balancing/ + - /riak/kv/2.9.0p4/using/cluster-operations/load-balancing/ +--- + + +**TODO: Add content (not sure where this exists in docs)** diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/logging.md b/content/riak/kv/2.9.0p5/using/cluster-operations/logging.md new file mode 100644 index 0000000000..0e365082d8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/logging.md @@ -0,0 +1,51 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/using/cluster-operations/logging/ + - /riak/2.9.0/using/cluster-operations/logging/ + - /riak/kv/2.9.0/using/cluster-operations/logging/ + - /riak/kv/2.9.0p1/using/cluster-operations/logging/ + - /riak/kv/2.9.0p2/using/cluster-operations/logging/ + - /riak/kv/2.9.0p3/using/cluster-operations/logging/ + - /riak/kv/2.9.0p4/using/cluster-operations/logging/ +--- + + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/replacing-node.md b/content/riak/kv/2.9.0p5/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..97819ba283 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/replacing-node.md @@ -0,0 +1,104 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/using/cluster-operations/replacing-node/ + - /riak/2.9.0/using/cluster-operations/replacing-node/ + - /riak/kv/2.9.0/using/cluster-operations/replacing-node/ + - /riak/kv/2.9.0p1/using/cluster-operations/replacing-node/ + - /riak/kv/2.9.0p2/using/cluster-operations/replacing-node/ + - /riak/kv/2.9.0p3/using/cluster-operations/replacing-node/ + - /riak/kv/2.9.0p4/using/cluster-operations/replacing-node/ +--- + + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.9.0p5/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.9.0p5/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..66d6e2ff69 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,89 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +#menu: +# riak_kv-2.9.0p5: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/using/cluster-operations/secondary-indexes/ + - /riak/2.9.0/using/cluster-operations/secondary-indexes/ + - /riak/kv/2.9.0/using/cluster-operations/secondary-indexes/ + - /riak/kv/2.9.0p1/using/cluster-operations/secondary-indexes/ + - /riak/kv/2.9.0p2/using/cluster-operations/secondary-indexes/ + - /riak/kv/2.9.0p3/using/cluster-operations/secondary-indexes/ + - /riak/kv/2.9.0p4/using/cluster-operations/secondary-indexes/ +--- + + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.9.0p5/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..c5bb1e1b2d --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/strong-consistency.md @@ -0,0 +1,80 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.0p5/using/cluster-operations/strong-consistency/ + - /riak/2.9.0/using/cluster-operations/strong-consistency/ + - /riak/kv/2.9.0/using/cluster-operations/strong-consistency/ + - /riak/kv/2.9.0p1/using/cluster-operations/strong-consistency/ + - /riak/kv/2.9.0p2/using/cluster-operations/strong-consistency/ + - /riak/kv/2.9.0p3/using/cluster-operations/strong-consistency/ + - /riak/kv/2.9.0p4/using/cluster-operations/strong-consistency/ +--- + + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/2.9.0p5/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..f0109f555a --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,41 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.0+" +aliases: + - /riak/kv/2.9.0p5/ops/advanced/tictacaae/ + - /riak/2.9.0p5/ops/advanced/ticktacaae/ + - /riak/2.9.0p5/using/cluster-operations/tictac-active-anti-entropy/ + - /riak/2.9.0/using/cluster-operations/tictac-active-anti-entropy/ + - /riak/kv/2.9.0/using/cluster-operations/tictac-active-anti-entropy/ + - /riak/kv/2.9.0p1/using/cluster-operations/tictac-active-anti-entropy/ + - /riak/kv/2.9.0p2/using/cluster-operations/tictac-active-anti-entropy/ + - /riak/kv/2.9.0p3/using/cluster-operations/tictac-active-anti-entropy/ + - /riak/kv/2.9.0p4/using/cluster-operations/tictac-active-anti-entropy/ +--- + + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) /(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.9.0p5/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..a1d4b792b9 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,267 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v2/operations + - /riak/kv/2.9.0p5/ops/mdc/v2/operations + - /riak/2.9.0p5/using/cluster-operations/v2-multi-datacenter/ + - /riak/2.9.0/using/cluster-operations/v2-multi-datacenter/ + - /riak/kv/2.9.0/using/cluster-operations/v2-multi-datacenter/ + - /riak/kv/2.9.0p1/using/cluster-operations/v2-multi-datacenter/ + - /riak/kv/2.9.0p2/using/cluster-operations/v2-multi-datacenter/ + - /riak/kv/2.9.0p3/using/cluster-operations/v2-multi-datacenter/ + - /riak/kv/2.9.0p4/using/cluster-operations/v2-multi-datacenter/ +--- + + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` --- The IP address and port of a connected client (site)</li><li>`cluster_name` --- The name of the connected client (site)</li><li>`connecting` --- The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.9.0p5/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests diff --git a/content/riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..66dd2b0906 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,429 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/operations + - /riak/kv/2.9.0p5/ops/mdc/v3/operations + - /riak/2.9.0p5/using/cluster-operations/v3-multi-datacenter/ + - /riak/2.9.0/using/cluster-operations/v3-multi-datacenter/ + - /riak/kv/2.9.0/using/cluster-operations/v3-multi-datacenter/ + - /riak/kv/2.9.0p1/using/cluster-operations/v3-multi-datacenter/ + - /riak/kv/2.9.0p2/using/cluster-operations/v3-multi-datacenter/ + - /riak/kv/2.9.0p3/using/cluster-operations/v3-multi-datacenter/ + - /riak/kv/2.9.0p4/using/cluster-operations/v3-multi-datacenter/ +--- + + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. diff --git a/content/riak/kv/2.9.0p5/using/performance.md b/content/riak/kv/2.9.0p5/using/performance.md new file mode 100644 index 0000000000..3e92e3a7e5 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/performance.md @@ -0,0 +1,272 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.9.0p5/ops/tuning/linux/ + - /riak/2.9.0p5/ops/tuning/linux/ + - /riak/2.9.0p5/using/performance/ + - /riak/2.9.0/using/performance/ + - /riak/kv/2.9.0/using/performance/ + - /riak/kv/2.9.0p1/using/performance/ + - /riak/kv/2.9.0p2/using/performance/ + - /riak/kv/2.9.0p3/using/performance/ + - /riak/kv/2.9.0p4/using/performance/ +--- + + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.9.0p5/using/performance/amazon-web-services.md b/content/riak/kv/2.9.0p5/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..21ee8d800c --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/performance/amazon-web-services.md @@ -0,0 +1,251 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.0p5/ops/tuning/aws + - /riak/kv/2.9.0p5/ops/tuning/aws + - /riak/2.9.0p5/using/performance/amazon-web-services/ + - /riak/2.9.0/using/performance/amazon-web-services/ + - /riak/kv/2.9.0/using/performance/amazon-web-services/ + - /riak/kv/2.9.0p1/using/performance/amazon-web-services/ + - /riak/kv/2.9.0p2/using/performance/amazon-web-services/ + - /riak/kv/2.9.0p3/using/performance/amazon-web-services/ + - /riak/kv/2.9.0p4/using/performance/amazon-web-services/ +--- + + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) diff --git a/content/riak/kv/2.9.0p5/using/performance/benchmarking.md b/content/riak/kv/2.9.0p5/using/performance/benchmarking.md new file mode 100644 index 0000000000..1835bdc98c --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/performance/benchmarking.md @@ -0,0 +1,606 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/benchmarking + - /riak/kv/2.9.0p5/ops/building/benchmarking + - /riak/2.9.0p5/using/performance/benchmarking/ + - /riak/2.9.0/using/performance/benchmarking/ + - /riak/kv/2.9.0/using/performance/benchmarking/ + - /riak/kv/2.9.0p1/using/performance/benchmarking/ + - /riak/kv/2.9.0p2/using/performance/benchmarking/ + - /riak/kv/2.9.0p3/using/performance/benchmarking/ + - /riak/kv/2.9.0p4/using/performance/benchmarking/ +--- + + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput --- Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` --- generate as many ops per second as possible +* `{rate, N}` --- generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` --- Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` --- Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` --- Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` --- Directly invokes the Bitcask API +* `basho_bench_driver_dets` --- Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` --- operation completed successfully +* `{error, Reason, NewState}` --- operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` --- operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` --- operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` --- generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` --- the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` --- the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` --- selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` --- selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` --- the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` --- specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` --- takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` --- takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` --- generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` --- generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` --- generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` --- specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. diff --git a/content/riak/kv/2.9.0p5/using/performance/erlang.md b/content/riak/kv/2.9.0p5/using/performance/erlang.md new file mode 100644 index 0000000000..96a6c69578 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/performance/erlang.md @@ -0,0 +1,374 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.0p5/ops/tuning/erlang + - /riak/kv/2.9.0p5/ops/tuning/erlang + - /riak/2.9.0p5/using/performance/erlang/ + - /riak/2.9.0/using/performance/erlang/ + - /riak/kv/2.9.0/using/performance/erlang/ + - /riak/kv/2.9.0p1/using/performance/erlang/ + - /riak/kv/2.9.0p2/using/performance/erlang/ + - /riak/kv/2.9.0p3/using/performance/erlang/ + - /riak/kv/2.9.0p4/using/performance/erlang/ +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) /(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. diff --git a/content/riak/kv/2.9.0p5/using/performance/latency-reduction.md b/content/riak/kv/2.9.0p5/using/performance/latency-reduction.md new file mode 100644 index 0000000000..9eaf490c60 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/performance/latency-reduction.md @@ -0,0 +1,271 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.0p5/ops/tuning/latency-reduction + - /riak/kv/2.9.0p5/ops/tuning/latency-reduction + - /riak/2.9.0p5/using/performance/latency-reduction/ + - /riak/2.9.0/using/performance/latency-reduction/ + - /riak/kv/2.9.0/using/performance/latency-reduction/ + - /riak/kv/2.9.0p1/using/performance/latency-reduction/ + - /riak/kv/2.9.0p2/using/performance/latency-reduction/ + - /riak/kv/2.9.0p3/using/performance/latency-reduction/ + - /riak/kv/2.9.0p4/using/performance/latency-reduction/ +--- + + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) /(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. diff --git a/content/riak/kv/2.9.0p5/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.9.0p5/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..73ddd2d590 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,51 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/using/performance/multi-datacenter-tuning/ + - /riak/2.9.0/using/performance/multi-datacenter-tuning/ + - /riak/kv/2.9.0/using/performance/multi-datacenter-tuning/ + - /riak/kv/2.9.0p1/using/performance/multi-datacenter-tuning/ + - /riak/kv/2.9.0p2/using/performance/multi-datacenter-tuning/ + - /riak/kv/2.9.0p3/using/performance/multi-datacenter-tuning/ + - /riak/kv/2.9.0p4/using/performance/multi-datacenter-tuning/ +--- + + +[perf index]: {{<baseurl>}}riak/kv/2.9.0p5/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` diff --git a/content/riak/kv/2.9.0p5/using/performance/open-files-limit.md b/content/riak/kv/2.9.0p5/using/performance/open-files-limit.md new file mode 100644 index 0000000000..bb255893fa --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/performance/open-files-limit.md @@ -0,0 +1,355 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.0p5/ops/tuning/open-files-limit/ + - /riak/kv/2.9.0p5/ops/tuning/open-files-limit/ + - /riak/2.9.0p5/using/performance/open-files-limit/ + - /riak/2.9.0/using/performance/open-files-limit/ + - /riak/kv/2.9.0/using/performance/open-files-limit/ + - /riak/kv/2.9.0p1/using/performance/open-files-limit/ + - /riak/kv/2.9.0p2/using/performance/open-files-limit/ + - /riak/kv/2.9.0p3/using/performance/open-files-limit/ + - /riak/kv/2.9.0p4/using/performance/open-files-limit/ +--- + + +[plan backend]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1/. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2/. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3/. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4/. Save and close the file. + +5/. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6/. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1/. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2/. Save and close /etc/pam.d/login + +3/. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4/. Save and close the /etc/security/limits.conf file. + +5/. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1/. Add the following line to your .bash/_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2/. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3/. Save and close the file. + +4/. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1/. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2/. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4/. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1/. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2/. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3/. Save and close the file. + +4/. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1/. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2/. Save the file and restart the system for the new limits to take effect. + +3/. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` diff --git a/content/riak/kv/2.9.0p5/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.9.0p5/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..98f407b2c8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,54 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/using/performance/v2-scheduling-fullsync/ + - /riak/2.9.0/using/performance/v2-scheduling-fullsync/ + - /riak/kv/2.9.0/using/performance/v2-scheduling-fullsync/ + - /riak/kv/2.9.0p1/using/performance/v2-scheduling-fullsync/ + - /riak/kv/2.9.0p2/using/performance/v2-scheduling-fullsync/ + - /riak/kv/2.9.0p3/using/performance/v2-scheduling-fullsync/ + - /riak/kv/2.9.0p4/using/performance/v2-scheduling-fullsync/ +--- + + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` diff --git a/content/riak/kv/2.9.0p5/using/reference.md b/content/riak/kv/2.9.0p5/using/reference.md new file mode 100644 index 0000000000..388d58f3b0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference.md @@ -0,0 +1,139 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +aliases: + - /riak/2.9.0p5/using/reference/ + - /riak/2.9.0/using/reference/ + - /riak/kv/2.9.0/using/reference/ + - /riak/kv/2.9.0p1/using/reference/ + - /riak/kv/2.9.0p2/using/reference/ + - /riak/kv/2.9.0p3/using/reference/ + - /riak/kv/2.9.0p4/using/reference/ +--- + + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] diff --git a/content/riak/kv/2.9.0p5/using/reference/architecture.md b/content/riak/kv/2.9.0p5/using/reference/architecture.md new file mode 100644 index 0000000000..eb6240b0da --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/architecture.md @@ -0,0 +1,25 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +#menu: +# riak_kv-2.9.0p5: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/using/reference/architecture/ + - /riak/2.9.0/using/reference/architecture/ + - /riak/kv/2.9.0/using/reference/architecture/ + - /riak/kv/2.9.0p1/using/reference/architecture/ + - /riak/kv/2.9.0p2/using/reference/architecture/ + - /riak/kv/2.9.0p3/using/reference/architecture/ + - /riak/kv/2.9.0p4/using/reference/architecture/ +--- + + +<!-- TODO: Content --> diff --git a/content/riak/kv/2.9.0p5/using/reference/bucket-types.md b/content/riak/kv/2.9.0p5/using/reference/bucket-types.md new file mode 100644 index 0000000000..3de6888f27 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/bucket-types.md @@ -0,0 +1,827 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/using/reference/bucket-types/ + - /riak/2.9.0/using/reference/bucket-types/ + - /riak/kv/2.9.0/using/reference/bucket-types/ + - /riak/kv/2.9.0p1/using/reference/bucket-types/ + - /riak/kv/2.9.0p2/using/reference/bucket-types/ + - /riak/kv/2.9.0p3/using/reference/bucket-types/ + - /riak/kv/2.9.0p4/using/reference/bucket-types/ +--- + + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.9.0p5/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.0p5/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.0p5/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new /Basho/Riak/Command/Builder/FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new /Basho/Riak/Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new /Basho/Riak/Command/Builder/FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new /Basho/Riak/Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new /Basho/Riak/Location('my_key', new Bucket('my_bucket')); +$builder = new /Basho/Riak/Command/Builder/FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{/"name/":/"Bob/"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT / + -H "Content-Type: application/json" / + -d "{ ... user data ... }" / + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new /Basho/Riak/Command/Builder/StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT / + -H "Content-Type: text/plain" / + -d "all your base are belong to us" / + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. diff --git a/content/riak/kv/2.9.0p5/using/reference/custom-code.md b/content/riak/kv/2.9.0p5/using/reference/custom-code.md new file mode 100644 index 0000000000..b12fea143c --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/custom-code.md @@ -0,0 +1,139 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/install-custom-code/ + - /riak/kv/2.9.0p5/ops/advanced/install-custom-code/ + - /riak/2.9.0p5/using/reference/custom-code/ + - /riak/2.9.0/using/reference/custom-code/ + - /riak/kv/2.9.0/using/reference/custom-code/ + - /riak/kv/2.9.0p1/using/reference/custom-code/ + - /riak/kv/2.9.0p2/using/reference/custom-code/ + - /riak/kv/2.9.0p3/using/reference/custom-code/ + - /riak/kv/2.9.0p4/using/reference/custom-code/ +--- + + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} diff --git a/content/riak/kv/2.9.0p5/using/reference/failure-recovery.md b/content/riak/kv/2.9.0p5/using/reference/failure-recovery.md new file mode 100644 index 0000000000..7439a9e4ff --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/failure-recovery.md @@ -0,0 +1,89 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/using/reference/failure-recovery/ + - /riak/2.9.0/using/reference/failure-recovery/ + - /riak/kv/2.9.0/using/reference/failure-recovery/ + - /riak/kv/2.9.0p1/using/reference/failure-recovery/ + - /riak/kv/2.9.0p2/using/reference/failure-recovery/ + - /riak/kv/2.9.0p3/using/reference/failure-recovery/ + - /riak/kv/2.9.0p4/using/reference/failure-recovery/ +--- + + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.0p5/using/reference/handoff.md b/content/riak/kv/2.9.0p5/using/reference/handoff.md new file mode 100644 index 0000000000..bd216afd6e --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/handoff.md @@ -0,0 +1,205 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/handoff/ + - /riak/kv/2.9.0p5/ops/running/handoff/ + - /riak/2.9.0p5/using/reference/handoff/ + - /riak/2.9.0/using/reference/handoff/ + - /riak/kv/2.9.0/using/reference/handoff/ + - /riak/kv/2.9.0p1/using/reference/handoff/ + - /riak/kv/2.9.0p2/using/reference/handoff/ + - /riak/kv/2.9.0p3/using/reference/handoff/ + - /riak/kv/2.9.0p4/using/reference/handoff/ +--- + + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. diff --git a/content/riak/kv/2.9.0p5/using/reference/jmx.md b/content/riak/kv/2.9.0p5/using/reference/jmx.md new file mode 100644 index 0000000000..62cf356a54 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/jmx.md @@ -0,0 +1,194 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/running/monitoring/jmx + - /riak/kv/2.9.0p5/ops/running/monitoring/jmx + - /riak/2.9.0p5/using/reference/jmx/ + - /riak/2.9.0/using/reference/jmx/ + - /riak/kv/2.9.0/using/reference/jmx/ + - /riak/kv/2.9.0p1/using/reference/jmx/ + - /riak/kv/2.9.0p2/using/reference/jmx/ + - /riak/kv/2.9.0p3/using/reference/jmx/ + - /riak/kv/2.9.0p4/using/reference/jmx/ +--- + + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> diff --git a/content/riak/kv/2.9.0p5/using/reference/logging.md b/content/riak/kv/2.9.0p5/using/reference/logging.md new file mode 100644 index 0000000000..0f3a208527 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/logging.md @@ -0,0 +1,305 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/logging + - /riak/kv/2.9.0p5/ops/running/logging + - /riak/2.9.0p5/using/reference/logging/ + - /riak/2.9.0/using/reference/logging/ + - /riak/kv/2.9.0/using/reference/logging/ + - /riak/kv/2.9.0p1/using/reference/logging/ + - /riak/kv/2.9.0p2/using/reference/logging/ + - /riak/kv/2.9.0p3/using/reference/logging/ + - /riak/kv/2.9.0p4/using/reference/logging/ +--- + + +[cluster ops log]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.0 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` --- Every night at midnight +* `$D23` --- Every day at 23:00 (11 pm) +* `$W0D20` --- Every week on Sunday at 20:00 (8 pm) +* `$M1D0` --- On the first day of every month at midnight +* `$M5D6` --- On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` --- Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` --- Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-cli/#attach-direct) command +* `both` --- Console logs will be emitted both to a file and to standard + output +* `off` --- Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] diff --git a/content/riak/kv/2.9.0p5/using/reference/multi-datacenter.md b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..661817ad44 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter.md @@ -0,0 +1,57 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/using/reference/multi-datacenter/ + - /riak/2.9.0/using/reference/multi-datacenter/ + - /riak/kv/2.9.0/using/reference/multi-datacenter/ + - /riak/kv/2.9.0p1/using/reference/multi-datacenter/ + - /riak/kv/2.9.0p2/using/reference/multi-datacenter/ + - /riak/kv/2.9.0p3/using/reference/multi-datacenter/ + - /riak/kv/2.9.0p4/using/reference/multi-datacenter/ +--- + + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] diff --git a/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..46c367d0b7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,104 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.0p5/ops/mdc/comparison + - /riak/kv/2.9.0p5/ops/mdc/comparison + - /riak/2.9.0p5/using/reference/multi-datacenter/comparison/ + - /riak/2.9.0/using/reference/multi-datacenter/comparison/ + - /riak/kv/2.9.0/using/reference/multi-datacenter/comparison/ + - /riak/kv/2.9.0p1/using/reference/multi-datacenter/comparison/ + - /riak/kv/2.9.0p2/using/reference/multi-datacenter/comparison/ + - /riak/kv/2.9.0p3/using/reference/multi-datacenter/comparison/ + - /riak/kv/2.9.0p4/using/reference/multi-datacenter/comparison/ +--- + + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/) /(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). diff --git a/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..ceff83f4ae --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,178 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.0p5/ops/mdc/monitoring + - /riak/kv/2.9.0p5/ops/mdc/monitoring + - /riak/2.9.0p5/using/reference/multi-datacenter/monitoring/ + - /riak/2.9.0/using/reference/multi-datacenter/monitoring/ + - /riak/kv/2.9.0/using/reference/multi-datacenter/monitoring/ + - /riak/kv/2.9.0p1/using/reference/multi-datacenter/monitoring/ + - /riak/kv/2.9.0p2/using/reference/multi-datacenter/monitoring/ + - /riak/kv/2.9.0p3/using/reference/multi-datacenter/monitoring/ + - /riak/kv/2.9.0p4/using/reference/multi-datacenter/monitoring/ +--- + + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +--- + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +--- + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. diff --git a/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..6a0076d1ef --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,70 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.0p5/ops/mdc/per-bucket + - /riak/kv/2.9.0p5/ops/mdc/per-bucket + - /riak/2.9.0p5/using/reference/multi-datacenter/per-bucket-replication/ + - /riak/2.9.0/using/reference/multi-datacenter/per-bucket-replication/ + - /riak/kv/2.9.0/using/reference/multi-datacenter/per-bucket-replication/ + - /riak/kv/2.9.0p1/using/reference/multi-datacenter/per-bucket-replication/ + - /riak/kv/2.9.0p2/using/reference/multi-datacenter/per-bucket-replication/ + - /riak/kv/2.9.0p3/using/reference/multi-datacenter/per-bucket-replication/ + - /riak/kv/2.9.0p4/using/reference/multi-datacenter/per-bucket-replication/ +--- + + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` --- Enable replication (realtime + fullsync) + * `false` --- Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` --- Replication only occurs in realtime for this bucket + * `fullsync` --- Replication only occurs during a fullsync operation + * `both` --- Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket / + -H "Content-Type: application/json" / + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket / + -H "Content-Type: application/json" / + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. diff --git a/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..62b228a442 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,248 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.0p5/ops/mdc/statistics + - /riak/kv/2.9.0p5/ops/mdc/statistics + - /riak/2.9.0p5/using/reference/multi-datacenter/statistics/ + - /riak/2.9.0/using/reference/multi-datacenter/statistics/ + - /riak/kv/2.9.0/using/reference/multi-datacenter/statistics/ + - /riak/kv/2.9.0p1/using/reference/multi-datacenter/statistics/ + - /riak/kv/2.9.0p2/using/reference/multi-datacenter/statistics/ + - /riak/kv/2.9.0p3/using/reference/multi-datacenter/statistics/ + - /riak/kv/2.9.0p4/using/reference/multi-datacenter/statistics/ +--- + + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` diff --git a/content/riak/kv/2.9.0p5/using/reference/object-deletion.md b/content/riak/kv/2.9.0p5/using/reference/object-deletion.md new file mode 100644 index 0000000000..5fe66e1baa --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/object-deletion.md @@ -0,0 +1,125 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/deletion + - /riak/2.9.0p5/using/reference/object-deletion/ + - /riak/2.9.0/using/reference/object-deletion/ + - /riak/kv/2.9.0/using/reference/object-deletion/ + - /riak/kv/2.9.0p1/using/reference/object-deletion/ + - /riak/kv/2.9.0p2/using/reference/object-deletion/ + - /riak/kv/2.9.0p3/using/reference/object-deletion/ + - /riak/kv/2.9.0p4/using/reference/object-deletion/ +--- + + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` --- Disables tombstone removal +* `immediate` --- The tombstone is removed as soon as the request is + received +* Custom time interval --- How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) diff --git a/content/riak/kv/2.9.0p5/using/reference/runtime-interaction.md b/content/riak/kv/2.9.0p5/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..f3ead0150d --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/runtime-interaction.md @@ -0,0 +1,74 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/runtime + - /riak/kv/2.9.0p5/ops/advanced/runtime + - /riak/2.9.0p5/using/reference/runtime-interaction/ + - /riak/2.9.0/using/reference/runtime-interaction/ + - /riak/kv/2.9.0/using/reference/runtime-interaction/ + - /riak/kv/2.9.0p1/using/reference/runtime-interaction/ + - /riak/kv/2.9.0p2/using/reference/runtime-interaction/ + - /riak/kv/2.9.0p3/using/reference/runtime-interaction/ + - /riak/kv/2.9.0p4/using/reference/runtime-interaction/ +--- + + +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` --- Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` --- Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` --- The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` --- The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` --- A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` --- A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` --- A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` diff --git a/content/riak/kv/2.9.0p5/using/reference/search.md b/content/riak/kv/2.9.0p5/using/reference/search.md new file mode 100644 index 0000000000..5edf744f39 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/search.md @@ -0,0 +1,462 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/search + - /riak/kv/2.9.0p5/dev/advanced/search + - /riak/2.9.0p5/using/reference/search/ + - /riak/2.9.0/using/reference/search/ + - /riak/kv/2.9.0/using/reference/search/ + - /riak/kv/2.9.0p1/using/reference/search/ + - /riak/kv/2.9.0p2/using/reference/search/ + - /riak/kv/2.9.0p3/using/reference/search/ + - /riak/kv/2.9.0p4/using/reference/search/ +--- + + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract / + -H 'Content-Type: application/json' / + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/) /(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. diff --git a/content/riak/kv/2.9.0p5/using/reference/secondary-indexes.md b/content/riak/kv/2.9.0p5/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..de418f680f --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/secondary-indexes.md @@ -0,0 +1,80 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/dev/advanced/2i + - /riak/kv/2.9.0p5/dev/advanced/2i + - /riak/2.9.0p5/using/reference/secondary-indexes/ + - /riak/2.9.0/using/reference/secondary-indexes/ + - /riak/kv/2.9.0/using/reference/secondary-indexes/ + - /riak/kv/2.9.0p1/using/reference/secondary-indexes/ + - /riak/kv/2.9.0p2/using/reference/secondary-indexes/ + - /riak/kv/2.9.0p3/using/reference/secondary-indexes/ + - /riak/kv/2.9.0p4/using/reference/secondary-indexes/ +--- + + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.0p5/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/secondary-indexes/) /(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. diff --git a/content/riak/kv/2.9.0p5/using/reference/snmp.md b/content/riak/kv/2.9.0p5/using/reference/snmp.md new file mode 100644 index 0000000000..a853619df0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/snmp.md @@ -0,0 +1,170 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/running/monitoring/snmp + - /riak/kv/2.9.0p5/ops/running/monitoring/snmp + - /riak/2.9.0p5/using/reference/snmp/ + - /riak/2.9.0/using/reference/snmp/ + - /riak/kv/2.9.0/using/reference/snmp/ + - /riak/kv/2.9.0p1/using/reference/snmp/ + - /riak/kv/2.9.0p2/using/reference/snmp/ + - /riak/kv/2.9.0p3/using/reference/snmp/ + - /riak/kv/2.9.0p4/using/reference/snmp/ +--- + + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL / + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs / + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) diff --git a/content/riak/kv/2.9.0p5/using/reference/statistics-monitoring.md b/content/riak/kv/2.9.0p5/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..3013729327 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/statistics-monitoring.md @@ -0,0 +1,399 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/stats-and-monitoring + - /riak/kv/2.9.0p5/ops/running/stats-and-monitoring + - /riak/2.9.0p5/using/reference/statistics-monitoring/ + - /riak/2.9.0/using/reference/statistics-monitoring/ + - /riak/kv/2.9.0/using/reference/statistics-monitoring/ + - /riak/kv/2.9.0p1/using/reference/statistics-monitoring/ + - /riak/kv/2.9.0p2/using/reference/statistics-monitoring/ + - /riak/kv/2.9.0p3/using/reference/statistics-monitoring/ + - /riak/kv/2.9.0p4/using/reference/statistics-monitoring/ +--- + + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.9.0p5/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [riak-admin Command Line Interface]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ diff --git a/content/riak/kv/2.9.0p5/using/reference/strong-consistency.md b/content/riak/kv/2.9.0p5/using/reference/strong-consistency.md new file mode 100644 index 0000000000..bfdba31321 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/strong-consistency.md @@ -0,0 +1,154 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/using/reference/strong-consistency/ + - /riak/2.9.0/using/reference/strong-consistency/ + - /riak/kv/2.9.0/using/reference/strong-consistency/ + - /riak/kv/2.9.0p1/using/reference/strong-consistency/ + - /riak/kv/2.9.0p2/using/reference/strong-consistency/ + - /riak/kv/2.9.0p3/using/reference/strong-consistency/ + - /riak/kv/2.9.0p4/using/reference/strong-consistency/ +--- + + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.0p5/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.9.0p5/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.9.0p5/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..bfb014f77b --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter.md @@ -0,0 +1,44 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/using/reference/v2-multi-datacenter/ + - /riak/2.9.0/using/reference/v2-multi-datacenter/ + - /riak/kv/2.9.0/using/reference/v2-multi-datacenter/ + - /riak/kv/2.9.0p1/using/reference/v2-multi-datacenter/ + - /riak/kv/2.9.0p2/using/reference/v2-multi-datacenter/ + - /riak/kv/2.9.0p3/using/reference/v2-multi-datacenter/ + - /riak/kv/2.9.0p4/using/reference/v2-multi-datacenter/ +--- + + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] diff --git a/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..0065ddefa8 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,134 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.0p5/ops/mdc/v2/architecture + - /riak/kv/2.9.0p5/ops/mdc/v2/architecture + - /riak/2.9.0p5/using/reference/v2-multi-datacenter/architecture/ + - /riak/2.9.0/using/reference/v2-multi-datacenter/architecture/ + - /riak/kv/2.9.0/using/reference/v2-multi-datacenter/architecture/ + - /riak/kv/2.9.0p1/using/reference/v2-multi-datacenter/architecture/ + - /riak/kv/2.9.0p2/using/reference/v2-multi-datacenter/architecture/ + - /riak/kv/2.9.0p3/using/reference/v2-multi-datacenter/architecture/ + - /riak/kv/2.9.0p4/using/reference/v2-multi-datacenter/architecture/ +--- + + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.9.0p5/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. diff --git a/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..6c7c1eba63 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,57 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.0p5/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.9.0p5/ops/mdc/v2/scheduling-fullsync + - /riak/2.9.0p5/using/reference/v2-multi-datacenter/scheduling-fullsync/ + - /riak/2.9.0/using/reference/v2-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0/using/reference/v2-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0p1/using/reference/v2-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0p2/using/reference/v2-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0p3/using/reference/v2-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0p4/using/reference/v2-multi-datacenter/scheduling-fullsync/ +--- + + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` diff --git a/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..e5664270cd --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter.md @@ -0,0 +1,56 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.0p5/using/reference/v3-multi-datacenter/ + - /riak/2.9.0/using/reference/v3-multi-datacenter/ + - /riak/kv/2.9.0/using/reference/v3-multi-datacenter/ + - /riak/kv/2.9.0p1/using/reference/v3-multi-datacenter/ + - /riak/kv/2.9.0p2/using/reference/v3-multi-datacenter/ + - /riak/kv/2.9.0p3/using/reference/v3-multi-datacenter/ + - /riak/kv/2.9.0p4/using/reference/v3-multi-datacenter/ +--- + + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] diff --git a/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..09b2a6aff2 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,133 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/aae + - /riak/kv/2.9.0p5/ops/mdc/v3/aae + - /riak/2.9.0p5/using/reference/v3-multi-datacenter/aae/ + - /riak/2.9.0/using/reference/v3-multi-datacenter/aae/ + - /riak/kv/2.9.0/using/reference/v3-multi-datacenter/aae/ + - /riak/kv/2.9.0p1/using/reference/v3-multi-datacenter/aae/ + - /riak/kv/2.9.0p2/using/reference/v3-multi-datacenter/aae/ + - /riak/kv/2.9.0p3/using/reference/v3-multi-datacenter/aae/ + - /riak/kv/2.9.0p4/using/reference/v3-multi-datacenter/aae/ +--- + + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] /(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] /(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` diff --git a/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..f0f6aa058d --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,190 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/architecture + - /riak/kv/2.9.0p5/ops/mdc/v3/architecture + - /riak/2.9.0p5/using/reference/v3-multi-datacenter/architecture/ + - /riak/2.9.0/using/reference/v3-multi-datacenter/architecture/ + - /riak/kv/2.9.0/using/reference/v3-multi-datacenter/architecture/ + - /riak/kv/2.9.0p1/using/reference/v3-multi-datacenter/architecture/ + - /riak/kv/2.9.0p2/using/reference/v3-multi-datacenter/architecture/ + - /riak/kv/2.9.0p3/using/reference/v3-multi-datacenter/architecture/ + - /riak/kv/2.9.0p4/using/reference/v3-multi-datacenter/architecture/ +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> diff --git a/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..2926624fe1 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,106 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/cascading-writes + - /riak/kv/2.9.0p5/ops/mdc/v3/cascading-writes + - /riak/2.9.0p5/using/reference/v3-multi-datacenter/cascading-writes/ + - /riak/2.9.0/using/reference/v3-multi-datacenter/cascading-writes/ + - /riak/kv/2.9.0/using/reference/v3-multi-datacenter/cascading-writes/ + - /riak/kv/2.9.0p1/using/reference/v3-multi-datacenter/cascading-writes/ + - /riak/kv/2.9.0p2/using/reference/v3-multi-datacenter/cascading-writes/ + - /riak/kv/2.9.0p3/using/reference/v3-multi-datacenter/cascading-writes/ + - /riak/kv/2.9.0p4/using/reference/v3-multi-datacenter/cascading-writes/ +--- + + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | / +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ / | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` diff --git a/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..30d57079a0 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,76 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.0p5/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.9.0p5/ops/mdc/v3/scheduling-fullsync + - /riak/2.9.0p5/using/reference/v3-multi-datacenter/scheduling-fullsync/ + - /riak/2.9.0/using/reference/v3-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0/using/reference/v3-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0p1/using/reference/v3-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0p2/using/reference/v3-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0p3/using/reference/v3-multi-datacenter/scheduling-fullsync/ + - /riak/kv/2.9.0p4/using/reference/v3-multi-datacenter/scheduling-fullsync/ +--- + + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. diff --git a/content/riak/kv/2.9.0p5/using/repair-recovery.md b/content/riak/kv/2.9.0p5/using/repair-recovery.md new file mode 100644 index 0000000000..20197101e6 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/repair-recovery.md @@ -0,0 +1,57 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +aliases: + - /riak/2.9.0p5/using/repair-recovery/ + - /riak/2.9.0/using/repair-recovery/ + - /riak/kv/2.9.0/using/repair-recovery/ + - /riak/kv/2.9.0p1/using/repair-recovery/ + - /riak/kv/2.9.0p2/using/repair-recovery/ + - /riak/kv/2.9.0p3/using/repair-recovery/ + - /riak/kv/2.9.0p4/using/repair-recovery/ +--- + + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] diff --git a/content/riak/kv/2.9.0p5/using/repair-recovery/errors.md b/content/riak/kv/2.9.0p5/using/repair-recovery/errors.md new file mode 100644 index 0000000000..7b888a13de --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/repair-recovery/errors.md @@ -0,0 +1,370 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/recovery/errors + - /riak/kv/2.9.0p5/ops/running/recovery/errors + - /riak/2.9.0p5/using/repair-recovery/errors/ + - /riak/2.9.0/using/repair-recovery/errors/ + - /riak/kv/2.9.0/using/repair-recovery/errors/ + - /riak/kv/2.9.0p1/using/repair-recovery/errors/ + - /riak/kv/2.9.0p2/using/repair-recovery/errors/ + - /riak/kv/2.9.0p3/using/repair-recovery/errors/ + - /riak/kv/2.9.0p4/using/repair-recovery/errors/ +--- + + +[config reference]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:/ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"/ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},/ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},/ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},/ +{webmachine_decision_core,handle_request,2},/ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill e/ +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.9.0p5/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} /;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} /; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. diff --git a/content/riak/kv/2.9.0p5/using/repair-recovery/failed-node.md b/content/riak/kv/2.9.0p5/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..ace493307d --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/repair-recovery/failed-node.md @@ -0,0 +1,118 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/recovery/failed-node + - /riak/kv/2.9.0p5/ops/running/recovery/failed-node + - /riak/2.9.0p5/using/repair-recovery/failed-node/ + - /riak/2.9.0/using/repair-recovery/failed-node/ + - /riak/kv/2.9.0/using/repair-recovery/failed-node/ + - /riak/kv/2.9.0p1/using/repair-recovery/failed-node/ + - /riak/kv/2.9.0p2/using/repair-recovery/failed-node/ + - /riak/kv/2.9.0p3/using/repair-recovery/failed-node/ + - /riak/kv/2.9.0p4/using/repair-recovery/failed-node/ +--- + + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` diff --git a/content/riak/kv/2.9.0p5/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.9.0p5/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..ad2bc05e43 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/repair-recovery/failure-recovery.md @@ -0,0 +1,133 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/recovery/failure-recovery + - /riak/kv/2.9.0p5/ops/running/recovery/failure-recovery + - /riak/2.9.0p5/using/repair-recovery/failure-recovery/ + - /riak/2.9.0/using/repair-recovery/failure-recovery/ + - /riak/kv/2.9.0/using/repair-recovery/failure-recovery/ + - /riak/kv/2.9.0p1/using/repair-recovery/failure-recovery/ + - /riak/kv/2.9.0p2/using/repair-recovery/failure-recovery/ + - /riak/kv/2.9.0p3/using/repair-recovery/failure-recovery/ + - /riak/kv/2.9.0p4/using/repair-recovery/failure-recovery/ +--- + + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** --- A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** --- If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** --- Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} diff --git a/content/riak/kv/2.9.0p5/using/repair-recovery/repairs.md b/content/riak/kv/2.9.0p5/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..0d235cfaeb --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/repair-recovery/repairs.md @@ -0,0 +1,395 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.0p5/ops/running/recovery/repairing-indexes + - /riak/2.9.0p5/ops/running/recovery/failed-node + - /riak/kv/2.9.0p5/ops/running/recovery/failed-node + - /riak/2.9.0p5/ops/running/recovery/repairing-leveldb + - /riak/kv/2.9.0p5/ops/running/recovery/repairing-leveldb + - /riak/2.9.0p5/ops/running/recovery/repairing-partitions + - /riak/kv/2.9.0p5/ops/running/recovery/repairing-partitions + - /riak/2.9.0p5/using/repair-recovery/repairs/ + - /riak/2.9.0/using/repair-recovery/repairs/ + - /riak/kv/2.9.0/using/repair-recovery/repairs/ + - /riak/kv/2.9.0p1/using/repair-recovery/repairs/ + - /riak/kv/2.9.0p2/using/repair-recovery/repairs/ + - /riak/kv/2.9.0p3/using/repair-recovery/repairs/ + - /riak/kv/2.9.0p4/using/repair-recovery/repairs/ +--- + + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.9.0p5/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.9.0p5/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} /; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1/. Stop the node: + +```bash +riak stop +``` + +2/. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3/. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4/. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5/. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6/. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7/. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1/. Stop the node: + +```bash +riak stop +``` + +2/. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3/. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4/. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5/. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6/. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7/. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8/. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.9.0p5/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` diff --git a/content/riak/kv/2.9.0p5/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.9.0p5/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..109776ea31 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,80 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.0p5/using/repair-recovery/rolling-replaces/ + - /riak/2.9.0/using/repair-recovery/rolling-replaces/ + - /riak/kv/2.9.0/using/repair-recovery/rolling-replaces/ + - /riak/kv/2.9.0p1/using/repair-recovery/rolling-replaces/ + - /riak/kv/2.9.0p2/using/repair-recovery/rolling-replaces/ + - /riak/kv/2.9.0p3/using/repair-recovery/rolling-replaces/ + - /riak/kv/2.9.0p4/using/repair-recovery/rolling-replaces/ +--- + + +[upgrade]: {{<baseurl>}}riak/kv/2.9.0p5/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.9.0p5/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1/. Create a free node: + + a/. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b/. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2/. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3/. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4/. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5/. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6/. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7/. Repeat steps 2-6 above until each node has been replaced. + +8/. Join the replaced node back into the cluster or decommission the additional node that was created. diff --git a/content/riak/kv/2.9.0p5/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.9.0p5/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..d78c8ebb66 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/repair-recovery/rolling-restart.md @@ -0,0 +1,68 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/recovery/rolling-restart + - /riak/kv/2.9.0p5/ops/running/recovery/rolling-restart + - /riak/2.9.0p5/using/repair-recovery/rolling-restart/ + - /riak/2.9.0/using/repair-recovery/rolling-restart/ + - /riak/kv/2.9.0/using/repair-recovery/rolling-restart/ + - /riak/kv/2.9.0p1/using/repair-recovery/rolling-restart/ + - /riak/kv/2.9.0p2/using/repair-recovery/rolling-restart/ + - /riak/kv/2.9.0p3/using/repair-recovery/rolling-restart/ + - /riak/kv/2.9.0p4/using/repair-recovery/rolling-restart/ +--- + + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.9.0p5/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1/. Stop Riak + +```bash +riak stop +``` + +2/. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3/. Start Riak again + +```bash +riak start +``` + +4/. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5/. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6/. Repeat the above process for any other nodes that need to be restarted. diff --git a/content/riak/kv/2.9.0p5/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.9.0p5/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..93870189ad --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,146 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.0p5/ops/running/recovery/repairing-indexes + - /riak/2.9.0p5/using/repair-recovery/secondary-indexes/ + - /riak/2.9.0/using/repair-recovery/secondary-indexes/ + - /riak/kv/2.9.0/using/repair-recovery/secondary-indexes/ + - /riak/kv/2.9.0p1/using/repair-recovery/secondary-indexes/ + - /riak/kv/2.9.0p2/using/repair-recovery/secondary-indexes/ + - /riak/kv/2.9.0p3/using/repair-recovery/secondary-indexes/ + - /riak/kv/2.9.0p4/using/repair-recovery/secondary-indexes/ +--- + + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. diff --git a/content/riak/kv/2.9.0p5/using/running-a-cluster.md b/content/riak/kv/2.9.0p5/using/running-a-cluster.md new file mode 100644 index 0000000000..daaba87b10 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/running-a-cluster.md @@ -0,0 +1,343 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.9.0p5/ops/building/basic-cluster-setup + - /riak/kv/2.9.0p5/ops/building/basic-cluster-setup + - /riak/2.9.0p5/using/running-a-cluster/ + - /riak/2.9.0/using/running-a-cluster/ + - /riak/kv/2.9.0/using/running-a-cluster/ + - /riak/kv/2.9.0p1/using/running-a-cluster/ + - /riak/kv/2.9.0p2/using/running-a-cluster/ + - /riak/kv/2.9.0p3/using/running-a-cluster/ + - /riak/kv/2.9.0p4/using/running-a-cluster/ +--- + + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.0p5/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.9.0p5/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. diff --git a/content/riak/kv/2.9.0p5/using/security.md b/content/riak/kv/2.9.0p5/using/security.md new file mode 100644 index 0000000000..fd3c200978 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/security.md @@ -0,0 +1,203 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.9.0p5/ops/advanced/security + - /riak/kv/2.9.0p5/ops/advanced/security + - /riak/2.9.0p5/using/security/ + - /riak/2.9.0/using/security/ + - /riak/kv/2.9.0/using/security/ + - /riak/kv/2.9.0p1/using/security/ + - /riak/kv/2.9.0p2/using/security/ + - /riak/kv/2.9.0p3/using/security/ + - /riak/kv/2.9.0p4/using/security/ +--- + + +[config reference search]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.0p5/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.9.0p5/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] diff --git a/content/riak/kv/2.9.0p5/using/security/basics.md b/content/riak/kv/2.9.0p5/using/security/basics.md new file mode 100644 index 0000000000..1c65972aa7 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/security/basics.md @@ -0,0 +1,855 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/authz + - /riak/kv/2.9.0p5/ops/running/authz + - /riak/2.9.0p5/using/security/basics/ + - /riak/2.9.0/using/security/basics/ + - /riak/kv/2.9.0/using/security/basics/ + - /riak/kv/2.9.0p1/using/security/basics/ + - /riak/kv/2.9.0p2/using/security/basics/ + - /riak/kv/2.9.0p3/using/security/basics/ + - /riak/kv/2.9.0p4/using/security/basics/ +--- + + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.9.0p5/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.9.0p5/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.9.0p5/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.9.0p5/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.9.0p5/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. diff --git a/content/riak/kv/2.9.0p5/using/security/best-practices.md b/content/riak/kv/2.9.0p5/using/security/best-practices.md new file mode 100644 index 0000000000..d750337502 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/security/best-practices.md @@ -0,0 +1,89 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.0p5/using/security/best-practices/ + - /riak/2.9.0/using/security/best-practices/ + - /riak/kv/2.9.0/using/security/best-practices/ + - /riak/kv/2.9.0p1/using/security/best-practices/ + - /riak/kv/2.9.0p2/using/security/best-practices/ + - /riak/kv/2.9.0p3/using/security/best-practices/ + - /riak/kv/2.9.0p4/using/security/best-practices/ +--- + + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.0p5/using/security/managing-sources.md b/content/riak/kv/2.9.0p5/using/security/managing-sources.md new file mode 100644 index 0000000000..cb24760af6 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/security/managing-sources.md @@ -0,0 +1,277 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.0p5/ops/running/security-sources + - /riak/kv/2.9.0p5/ops/running/security-sources + - /riak/2.9.0p5/using/security/managing-sources/ + - /riak/2.9.0/using/security/managing-sources/ + - /riak/kv/2.9.0/using/security/managing-sources/ + - /riak/kv/2.9.0p1/using/security/managing-sources/ + - /riak/kv/2.9.0p2/using/security/managing-sources/ + - /riak/kv/2.9.0p3/using/security/managing-sources/ + - /riak/kv/2.9.0p4/using/security/managing-sources/ +--- + + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.9.0p5/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: / + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life / + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.9.0p5/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.9.0p5/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.9.0p5/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: / + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: / + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. diff --git a/content/riak/kv/2.9.0p5/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.9.0p5/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..34086890c6 --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,89 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.0p5/using/security/v2-v3-ssl-ca/ + - /riak/2.9.0/using/security/v2-v3-ssl-ca/ + - /riak/kv/2.9.0/using/security/v2-v3-ssl-ca/ + - /riak/kv/2.9.0p1/using/security/v2-v3-ssl-ca/ + - /riak/kv/2.9.0p2/using/security/v2-v3-ssl-ca/ + - /riak/kv/2.9.0p3/using/security/v2-v3-ssl-ca/ + - /riak/kv/2.9.0p4/using/security/v2-v3-ssl-ca/ +--- + + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.0p5/using/troubleshooting.md b/content/riak/kv/2.9.0p5/using/troubleshooting.md new file mode 100644 index 0000000000..918dbaf08a --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/troubleshooting.md @@ -0,0 +1,32 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +aliases: + - /riak/2.9.0p5/using/troubleshooting/ + - /riak/2.9.0/using/troubleshooting/ + - /riak/kv/2.9.0/using/troubleshooting/ + - /riak/kv/2.9.0p1/using/troubleshooting/ + - /riak/kv/2.9.0p2/using/troubleshooting/ + - /riak/kv/2.9.0p3/using/troubleshooting/ + - /riak/kv/2.9.0p4/using/troubleshooting/ +--- + + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] diff --git a/content/riak/kv/2.9.0p5/using/troubleshooting/http-204.md b/content/riak/kv/2.9.0p5/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..8b856e50bc --- /dev/null +++ b/content/riak/kv/2.9.0p5/using/troubleshooting/http-204.md @@ -0,0 +1,26 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: "2.9.0p5" +menu: + riak_kv-2.9.0p5: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +aliases: + - /riak/2.9.0p5/using/troubleshooting/http-204/ + - /riak/2.9.0/using/troubleshooting/http-204/ + - /riak/kv/2.9.0/using/troubleshooting/http-204/ + - /riak/kv/2.9.0p1/using/troubleshooting/http-204/ + - /riak/kv/2.9.0p2/using/troubleshooting/http-204/ + - /riak/kv/2.9.0p3/using/troubleshooting/http-204/ + - /riak/kv/2.9.0p4/using/troubleshooting/http-204/ +--- + + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. diff --git a/content/riak/kv/2.9.1/_reference-links.md b/content/riak/kv/2.9.1/_reference-links.md new file mode 100644 index 0000000000..dbca758849 --- /dev/null +++ b/content/riak/kv/2.9.1/_reference-links.md @@ -0,0 +1,249 @@ + +# Riak KV 2.9.1 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.9.1/downloads/ +[install index]: {{}}riak/kv/2.9.1/setup/installing +[upgrade index]: {{}}riak/kv/2.9.1/upgrading +[plan index]: {{}}riak/kv/2.9.1/planning +[config index]: {{}}riak/kv/2.9.1/using/configuring/ +[config reference]: {{}}riak/kv/2.9.1/configuring/reference/ +[manage index]: {{}}riak/kv/2.9.1/using/managing +[performance index]: {{}}riak/kv/2.9.1/using/performance +[glossary vnode]: {{}}riak/kv/2.9.1/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.9.1/setup/planning +[plan start]: {{}}riak/kv/2.9.1/setup/planning/start +[plan backend]: {{}}riak/kv/2.9.1/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.9.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.1/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/2.9.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.1/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.9.1/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.9.1/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.9.1/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.9.1/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.9.1/setup/installing +[install aws]: {{}}riak/kv/2.9.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.9.1/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.9.1/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.9.1/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.9.1/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.9.1/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.9.1/setup/installing/solaris +[install suse]: {{}}riak/kv/2.9.1/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.9.1/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.9.1/setup/installing/source +[install source erlang]: {{}}riak/kv/2.9.1/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.9.1/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.9.1/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.9.1/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.9.1/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.9.1/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.9.1/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.9.1/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.9.1/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.9.1/configuring +[config basic]: {{}}riak/kv/2.9.1/configuring/basic +[config backend]: {{}}riak/kv/2.9.1/configuring/backend +[config manage]: {{}}riak/kv/2.9.1/configuring/managing +[config reference]: {{}}riak/kv/2.9.1/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.9.1/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.9.1/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.9.1/configuring/mapreduce +[config search]: {{}}riak/kv/2.9.1/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.9.1/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.9.1/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.9.1/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.9.1/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.9.1/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.9.1/using/ +[use admin commands]: {{}}riak/kv/2.9.1/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.9.1/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.9.1/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.9.1/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.9.1/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.9.1/using/reference/search +[use ref 2i]: {{}}riak/kv/2.9.1/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.9.1/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.9.1/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.9.1/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.9.1/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.9.1/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.9.1/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.9.1/using/admin/ +[use admin commands]: {{}}riak/kv/2.9.1/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.9.1/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.9.1/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.9.1/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.9.1/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.9.1/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.9.1/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.9.1/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.9.1/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.9.1/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.9.1/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.9.1/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.9.1/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.9.1/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.9.1/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.9.1/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.9.1/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.9.1/using/security/ +[security basics]: {{}}riak/kv/2.9.1/using/security/basics +[security managing]: {{}}riak/kv/2.9.1/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.9.1/using/performance/ +[perf benchmark]: {{}}riak/kv/2.9.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.1/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.9.1/using/performance/erlang +[perf aws]: {{}}riak/kv/2.9.1/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.9.1/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.9.1/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.9.1/developing +[dev client libraries]: {{}}riak/kv/2.9.1/developing/client-libraries +[dev data model]: {{}}riak/kv/2.9.1/developing/data-modeling +[dev data types]: {{}}riak/kv/2.9.1/developing/data-types +[dev kv model]: {{}}riak/kv/2.9.1/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.9.1/developing/getting-started +[getting started java]: {{}}riak/kv/2.9.1/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.9.1/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.9.1/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.9.1/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.9.1/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.9.1/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.9.1/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.9.1/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.9.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.1/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.9.1/developing/usage +[usage bucket types]: {{}}riak/kv/2.9.1/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.9.1/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.9.1/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.9.1/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.9.1/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.9.1/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.9.1/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.9.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.9.1/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.1/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.9.1/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.9.1/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.9.1/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.9.1/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.9.1/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.9.1/developing/api/backend +[dev api http]: {{}}riak/kv/2.9.1/developing/api/http +[dev api http status]: {{}}riak/kv/2.9.1/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.9.1/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.9.1/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.9.1/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.9.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.9.1/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.9.1/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.9.1/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.9.1/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.9.1/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.9.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.9.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.9.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.9.1/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.9.1/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + diff --git a/content/riak/kv/2.9.1/add-ons.md b/content/riak/kv/2.9.1/add-ons.md new file mode 100644 index 0000000000..cbc23bc5e7 --- /dev/null +++ b/content/riak/kv/2.9.1/add-ons.md @@ -0,0 +1,19 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.9.1/add-ons/redis/) diff --git a/content/riak/kv/2.9.1/add-ons/redis.md b/content/riak/kv/2.9.1/add-ons/redis.md new file mode 100644 index 0000000000..ae780e1add --- /dev/null +++ b/content/riak/kv/2.9.1/add-ons/redis.md @@ -0,0 +1,58 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] diff --git a/content/riak/kv/2.9.1/add-ons/redis/developing-rra.md b/content/riak/kv/2.9.1/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..dacfdbb21e --- /dev/null +++ b/content/riak/kv/2.9.1/add-ons/redis/developing-rra.md @@ -0,0 +1,325 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.9.1/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.9.1/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.9.1/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.9.1/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.9.1/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | diff --git a/content/riak/kv/2.9.1/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.9.1/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..58a1ac3d25 --- /dev/null +++ b/content/riak/kv/2.9.1/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,131 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.9.1/add-ons/redis/set-up-rra.md b/content/riak/kv/2.9.1/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..0b1b210d29 --- /dev/null +++ b/content/riak/kv/2.9.1/add-ons/redis/set-up-rra.md @@ -0,0 +1,280 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.9.1/setup/installing +[perf open files]: {{}}riak/kv/2.9.1/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. diff --git a/content/riak/kv/2.9.1/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.9.1/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..b6919ecde1 --- /dev/null +++ b/content/riak/kv/2.9.1/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,138 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. diff --git a/content/riak/kv/2.9.1/add-ons/redis/using-rra.md b/content/riak/kv/2.9.1/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..9f3c14060d --- /dev/null +++ b/content/riak/kv/2.9.1/add-ons/redis/using-rra.md @@ -0,0 +1,242 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.9.1/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.9.1/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details diff --git a/content/riak/kv/2.9.1/configuring.md b/content/riak/kv/2.9.1/configuring.md new file mode 100644 index 0000000000..c352a6069f --- /dev/null +++ b/content/riak/kv/2.9.1/configuring.md @@ -0,0 +1,82 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + diff --git a/content/riak/kv/2.9.1/configuring/backend.md b/content/riak/kv/2.9.1/configuring/backend.md new file mode 100644 index 0000000000..f662ae1657 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/backend.md @@ -0,0 +1,636 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +--- + +[plan backend leveldb]: {{}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.1/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/2.9.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.1/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` diff --git a/content/riak/kv/2.9.1/configuring/basic.md b/content/riak/kv/2.9.1/configuring/basic.md new file mode 100644 index 0000000000..280c55b8b8 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/basic.md @@ -0,0 +1,235 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.1/ops/building/configuration/ + - /riak/kv/2.9.1/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/2.9.1/configuring/reference +[use running cluster]: {{}}riak/kv/2.9.1/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.9.1/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.9.1/using/performance/erlang +[plan start]: {{}}riak/kv/2.9.1/setup/planning/start +[plan best practices]: {{}}riak/kv/2.9.1/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.9.1/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.9.1/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.9.1/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.9.1/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.9.1/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.9.1/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.9.1/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.9.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.1/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.9.1/using/performance +[perf aws]: {{}}riak/kv/2.9.1/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.9.1/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.1/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. diff --git a/content/riak/kv/2.9.1/configuring/global-object-expiration.md b/content/riak/kv/2.9.1/configuring/global-object-expiration.md new file mode 100644 index 0000000000..0bba17257e --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/global-object-expiration.md @@ -0,0 +1,85 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.9.1: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 2.9.1 +toc: true +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` diff --git a/content/riak/kv/2.9.1/configuring/load-balancing-proxy.md b/content/riak/kv/2.9.1/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..0f120b785e --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/load-balancing-proxy.md @@ -0,0 +1,271 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.9.1/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/2.9.1/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` diff --git a/content/riak/kv/2.9.1/configuring/managing.md b/content/riak/kv/2.9.1/configuring/managing.md new file mode 100644 index 0000000000..fa72bfd1b5 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/managing.md @@ -0,0 +1,116 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +--- + +[use admin riak cli]: {{}}riak/kv/2.9.1/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.9.1/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.9.1/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. diff --git a/content/riak/kv/2.9.1/configuring/mapreduce.md b/content/riak/kv/2.9.1/configuring/mapreduce.md new file mode 100644 index 0000000000..7ceb37101c --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/mapreduce.md @@ -0,0 +1,196 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/configs/mapreduce/ + - /riak/kv/2.9.1/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/2.9.1/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.9.1/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.9.1/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. diff --git a/content/riak/kv/2.9.1/configuring/next-gen-replication.md b/content/riak/kv/2.9.1/configuring/next-gen-replication.md new file mode 100644 index 0000000000..97d47a57c1 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/next-gen-replication.md @@ -0,0 +1,61 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.1" +menu: + riak_kv-2.9.1: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. \ No newline at end of file diff --git a/content/riak/kv/2.9.1/configuring/reference.md b/content/riak/kv/2.9.1/configuring/reference.md new file mode 100644 index 0000000000..aa4ec88e4d --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/reference.md @@ -0,0 +1,2030 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/configs/configuration-files/ + - /riak/kv/2.9.1/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] --- [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] --- [configuration][config backend leveldb] +* [Leveled][plan backend leveled] --- [configuration][config backend leveled] +* [Memory][plan backend memory] --- [configuration][config backend memory] +* [Multi][plan backend multi] --- [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.1 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.1 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` diff --git a/content/riak/kv/2.9.1/configuring/search.md b/content/riak/kv/2.9.1/configuring/search.md new file mode 100644 index 0000000000..565734c75f --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/search.md @@ -0,0 +1,274 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/configs/search/ + - /riak/kv/2.9.1/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/2.9.1/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.1/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.9.1/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.9.1/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.9.1/configuring/reference +[config reference#search]: {{}}riak/kv/2.9.1/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.9.1/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.9.1/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. diff --git a/content/riak/kv/2.9.1/configuring/strong-consistency.md b/content/riak/kv/2.9.1/configuring/strong-consistency.md new file mode 100644 index 0000000000..237a6386a7 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/strong-consistency.md @@ -0,0 +1,666 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/2.9.1/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.9.1/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.9.1/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.9.1/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.9.1/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.9.1/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.9.1/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.9.1/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.9.1/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.9.1/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.9.1/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.9.1/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.9.1/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.9.1/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.9.1/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.9.1/developing/data-types +[glossary aae]: {{}}riak/kv/2.9.1/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.9.1/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.9.1/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.9.1/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.9.1/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer --- The ID of the peer
  • Status --- Whether the peer is a leader or a follower
  • Trusted --- Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch --- The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node --- The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] --- If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] --- Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] --- Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** --- A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** --- In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** --- Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** --- At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** --- Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. diff --git a/content/riak/kv/2.9.1/configuring/v2-multi-datacenter.md b/content/riak/kv/2.9.1/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..0dd2920899 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/v2-multi-datacenter.md @@ -0,0 +1,156 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v2/configuration + - /riak/kv/2.9.1/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/2.9.1/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. diff --git a/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..3db6b03af3 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,78 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v2/nat + - /riak/kv/2.9.1/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/2.9.1/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` diff --git a/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..fecb3a564e --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,367 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v2/quick-start + - /riak/kv/2.9.1/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. diff --git a/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..ee786e869e --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,160 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v2/ssl + - /riak/kv/2.9.1/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. diff --git a/content/riak/kv/2.9.1/configuring/v3-multi-datacenter.md b/content/riak/kv/2.9.1/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..020b0c8e0d --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/v3-multi-datacenter.md @@ -0,0 +1,157 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/configuration + - /riak/kv/2.9.1/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/2.9.1/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. diff --git a/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..31ac272d61 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,167 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/nat + - /riak/kv/2.9.1/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` diff --git a/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..2568cefee6 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,168 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/quick-start + - /riak/kv/2.9.1/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/2.9.1/using/performance +[config v3 mdc]: {{}}riak/kv/2.9.1/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. diff --git a/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..cf38428892 --- /dev/null +++ b/content/riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,170 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/ssl + - /riak/kv/2.9.1/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/2.9.1/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. diff --git a/content/riak/kv/2.9.1/developing.md b/content/riak/kv/2.9.1/developing.md new file mode 100644 index 0000000000..0326f6ef62 --- /dev/null +++ b/content/riak/kv/2.9.1/developing.md @@ -0,0 +1,73 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + diff --git a/content/riak/kv/2.9.1/developing/api.md b/content/riak/kv/2.9.1/developing/api.md new file mode 100644 index 0000000000..cde53ead63 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api.md @@ -0,0 +1,37 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] diff --git a/content/riak/kv/2.9.1/developing/api/backend.md b/content/riak/kv/2.9.1/developing/api/backend.md new file mode 100644 index 0000000000..6b1b44ad84 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/backend.md @@ -0,0 +1,114 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.1/dev/references/backend-api + - /riak/kv/2.9.1/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/2.9.1/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` diff --git a/content/riak/kv/2.9.1/developing/api/http.md b/content/riak/kv/2.9.1/developing/api/http.md new file mode 100644 index 0000000000..13ea66246a --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http.md @@ -0,0 +1,89 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.1/dev/references/http + - /riak/kv/2.9.1/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.9.1/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.9.1/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.1/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.9.1/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.9.1/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.9.1/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.9.1/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.1/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.1/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.9.1/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.9.1/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.9.1/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.9.1/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.9.1/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.1/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.1/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.9.1/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.9.1/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.9.1/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.9.1/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.9.1/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.9.1/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.9.1/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.9.1/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.9.1/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.9.1/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.9.1/developing/api/http/counters.md b/content/riak/kv/2.9.1/developing/api/http/counters.md new file mode 100644 index 0000000000..a56702b7c3 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/counters.md @@ -0,0 +1,78 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/counters + - /riak/kv/2.9.1/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.9.1/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.9.1/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/delete-object.md b/content/riak/kv/2.9.1/developing/api/http/delete-object.md new file mode 100644 index 0000000000..8ac1bd8b2c --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/delete-object.md @@ -0,0 +1,75 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/delete-object + - /riak/kv/2.9.1/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/delete-search-index.md b/content/riak/kv/2.9.1/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..f6da8ffdd6 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/delete-search-index.md @@ -0,0 +1,33 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/delete-search-index + - /riak/kv/2.9.1/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` --- The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.1/developing/api/http/fetch-object.md b/content/riak/kv/2.9.1/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..56bd5316e9 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/fetch-object.md @@ -0,0 +1,242 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/fetch-object + - /riak/kv/2.9.1/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.9.1/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.9.1/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.9.1/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.9.1/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.1/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/fetch-search-index.md b/content/riak/kv/2.9.1/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..37dcb0d72c --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/fetch-search-index.md @@ -0,0 +1,47 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/fetch-search-index + - /riak/kv/2.9.1/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.9.1/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` --- No Search index with that name is currently + available +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.9.1/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.9.1/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..b406e4f27d --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/fetch-search-schema.md @@ -0,0 +1,38 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/fetch-search-schema + - /riak/kv/2.9.1/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). diff --git a/content/riak/kv/2.9.1/developing/api/http/get-bucket-props.md b/content/riak/kv/2.9.1/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..c2ea51a695 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/get-bucket-props.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/get-bucket-props + - /riak/kv/2.9.1/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.9.1/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.9.1/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.9.1/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/link-walking.md b/content/riak/kv/2.9.1/developing/api/http/link-walking.md new file mode 100644 index 0000000000..b9cf88b392 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/link-walking.md @@ -0,0 +1,125 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/link-walking + - /riak/kv/2.9.1/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.9.1/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.9.1/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.9.1/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/list-buckets.md b/content/riak/kv/2.9.1/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..79d7b096e3 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/list-buckets.md @@ -0,0 +1,64 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/list-buckets + - /riak/kv/2.9.1/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/list-keys.md b/content/riak/kv/2.9.1/developing/api/http/list-keys.md new file mode 100644 index 0000000000..afb7900334 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/list-keys.md @@ -0,0 +1,76 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/list-keys + - /riak/kv/2.9.1/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/list-resources.md b/content/riak/kv/2.9.1/developing/api/http/list-resources.md new file mode 100644 index 0000000000..51e061c730 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/list-resources.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/list-resources + - /riak/kv/2.9.1/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.9.1/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.9.1/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.9.1/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.9.1/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.9.1/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.9.1/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.9.1/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.9.1/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/mapreduce.md b/content/riak/kv/2.9.1/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..0907b9dcf9 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/mapreduce.md @@ -0,0 +1,70 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/mapreduce + - /riak/kv/2.9.1/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/2.9.1/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.9.1/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/ping.md b/content/riak/kv/2.9.1/developing/api/http/ping.md new file mode 100644 index 0000000000..74c5c30fed --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/ping.md @@ -0,0 +1,53 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/ping + - /riak/kv/2.9.1/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.9.1/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..18815a00af --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/reset-bucket-props.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/reset-bucket-props + - /riak/kv/2.9.1/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/search-index-info.md b/content/riak/kv/2.9.1/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..9501ac0d82 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/search-index-info.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/search-index-info + - /riak/kv/2.9.1/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.9.1/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` --- Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.1/developing/api/http/search-query.md b/content/riak/kv/2.9.1/developing/api/http/search-query.md new file mode 100644 index 0000000000..656bd1f156 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/search-query.md @@ -0,0 +1,69 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/search-query + - /riak/kv/2.9.1/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/2.9.1/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` --- The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` --- The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.9.1/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` --- Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` --- Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/secondary-indexes.md b/content/riak/kv/2.9.1/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..9f60d343a4 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/secondary-indexes.md @@ -0,0 +1,91 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/secondary-indexes + - /riak/kv/2.9.1/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/set-bucket-props.md b/content/riak/kv/2.9.1/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..7bd226b5b0 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/set-bucket-props.md @@ -0,0 +1,101 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/set-bucket-props + - /riak/kv/2.9.1/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.9.1/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.9.1/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/status.md b/content/riak/kv/2.9.1/developing/api/http/status.md new file mode 100644 index 0000000000..71fb15feec --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/status.md @@ -0,0 +1,169 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/status + - /riak/kv/2.9.1/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.9.1/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute diff --git a/content/riak/kv/2.9.1/developing/api/http/store-object.md b/content/riak/kv/2.9.1/developing/api/http/store-object.md new file mode 100644 index 0000000000..f0d9900d7d --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/store-object.md @@ -0,0 +1,146 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/store-object + - /riak/kv/2.9.1/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.9.1/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.1/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.9.1/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` diff --git a/content/riak/kv/2.9.1/developing/api/http/store-search-index.md b/content/riak/kv/2.9.1/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..06e91298f3 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/store-search-index.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/store-search-index + - /riak/kv/2.9.1/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/2.9.1/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.9.1/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` --- The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` --- The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.1/developing/api/http/store-search-schema.md b/content/riak/kv/2.9.1/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..13e879ea02 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/http/store-search-schema.md @@ -0,0 +1,50 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.1/dev/references/http/store-search-schema + - /riak/kv/2.9.1/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` --- The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` --- The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` --- The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..1faef63b5e --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers.md @@ -0,0 +1,185 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers + - /riak/kv/2.9.1/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` --- A string representation of what went wrong +* `errcode` --- A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..c7d392bb85 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,30 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/auth-req + - /riak/kv/2.9.1/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.9.1/using/security/basics). diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..1a82b20575 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,78 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.9.1" +menu: + riak_kv-2.9.1: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.9.1/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..7a573021da --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,100 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/delete-object + - /riak/kv/2.9.1/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/2.9.1/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..6c6e294182 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,31 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.9.1/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/2.9.1/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..2acf77b0f4 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,127 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.9.1/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.9.1/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.9.1/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.9.1/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..36acc85866 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,73 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.9.1/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..31d3846f41 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,32 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.9.1/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..6621fb0a5e --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,128 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/dt-store + - /riak/kv/2.9.1/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.9.1/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.9.1/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.9.1/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..cf413c96a3 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,31 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/dt-union + - /riak/kv/2.9.1/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..d229ae1382 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,181 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.9.1/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` --- The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` --- The character encoding of the object, e.g. `utf-8` +* `content_encoding` --- The content encoding of the object, e.g. + `video/mp4` +* `vtag` --- The object's [vtag]({{}}riak/kv/2.9.1/learn/glossary/#vector-clock) +* `links` --- This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` --- A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` --- A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` --- This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` --- Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,2.9.1,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..525c9d5e32 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,110 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.9.1/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.9.1/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.9.1/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..1c5eb2870a --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,33 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.9.1/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.9.1/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..aef5cd4789 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,61 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.9.1/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..567a82d032 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,76 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.9.1/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` --- Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..1484fb4867 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,97 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/list-keys + - /riak/kv/2.9.1/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` --- bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..a8f634b0d6 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,149 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.9.1/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` --- MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` --- JSON-encoded MapReduce job +* `application/x-erlang-binary` --- Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.9.1/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.9.1/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` --- Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..ecd48834f0 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/ping.md @@ -0,0 +1,42 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/ping + - /riak/kv/2.9.1/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..e5d7fa2a84 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,59 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.9.1/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.9.1/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/search.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..a5a47c43f5 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/search.md @@ -0,0 +1,148 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/search + - /riak/kv/2.9.1/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` --- The contents of the query +* `index` --- The name of the index to search + +Optional Parameters + +* `rows` --- The maximum number of rows to return +* `start` --- A start offset, i.e. the number of keys to skip before + returning values +* `sort` --- How the search results are to be sorted +* `filter` --- Filters search with additional query scoped to inline + fields +* `df` --- Override the `default_field` setting in the schema file +* `op` --- `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` --- Return the fields limit +* `presort` --- Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` --- A list of docs that match the search request +* `max_score` --- The top score returned +* `num_found` --- Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..e8721bb47f --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,121 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.9.1/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.9.1/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..16cdf3029d --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,58 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/server-info + - /riak/kv/2.9.1/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..f351be4cdc --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,68 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.9.1/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.9.1/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..1f4ecdbf81 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,31 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.9.1/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.9.1/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..74d5e7c988 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,62 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.9.1/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..254a057d82 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,150 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/store-object + - /riak/kv/2.9.1/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.9.1/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.9.1/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.9.1/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.9.1/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.9.1/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.9.1/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,2.9.1,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..29d0db57e9 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,33 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.9.1/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..bb82ac920a --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,59 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.9.1/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..533dcbfbea --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.9.1/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..6fe55e7880 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,48 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.9.1/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. diff --git a/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..9e2b70cadb --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,41 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.1/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.9.1/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.9.1/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.9.1/developing/api/repl-hooks.md b/content/riak/kv/2.9.1/developing/api/repl-hooks.md new file mode 100644 index 0000000000..f7ef1512c5 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/api/repl-hooks.md @@ -0,0 +1,192 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v2/hooks + - /riak/kv/2.9.1/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + diff --git a/content/riak/kv/2.9.1/developing/app-guide.md b/content/riak/kv/2.9.1/developing/app-guide.md new file mode 100644 index 0000000000..eb5fda1b54 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/app-guide.md @@ -0,0 +1,416 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.9.1/dev/using/application-guide/ + - /riak/kv/2.9.1/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/2.9.1/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.9.1/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.9.1/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.9.1/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.9.1/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.9.1/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.9.1/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.9.1/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.9.1/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.9.1/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.9.1/developing/usage/search +[use ref search]: {{}}riak/kv/2.9.1/using/reference/search +[usage 2i]: {{}}riak/kv/2.9.1/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.9.1/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.9.1/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.9.1/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.9.1/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.9.1/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.9.1/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.9.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.1/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/2.9.1/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/2.9.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.1/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.9.1/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.9.1/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.9.1/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.9.1/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.9.1/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.9.1/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.9.1/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.9.1/setup/installing +[getting started]: {{}}riak/kv/2.9.1/developing/getting-started +[usage index]: {{}}riak/kv/2.9.1/developing/usage +[glossary]: {{}}riak/kv/2.9.1/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** --- While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** --- Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** --- Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** --- It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** --- If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** --- If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** --- If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] --- Getting started with Riak Search +* [Search Details][use ref search] --- A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] --- How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** --- Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** --- At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** --- In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] --- A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] --- A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] --- An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** --- If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** --- If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** --- If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** --- While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** --- Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] --- A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] --- A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** --- You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** --- Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] --- A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] --- Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** --- At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** --- If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** --- 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] --- Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] --- A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] --- How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] --- A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] --- A listing of frequently used terms in Riak's + documentation + diff --git a/content/riak/kv/2.9.1/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.9.1/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..b006b086ff --- /dev/null +++ b/content/riak/kv/2.9.1/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,798 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/mapreduce/ + - /riak/kv/2.9.1/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/2.9.1/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.9.1/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.9.1/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.9.1/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.9.1/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.9.1/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.9.1/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) --- Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) --- Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) --- Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
diff --git a/content/riak/kv/2.9.1/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.9.1/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..eb4996063a
--- /dev/null
+++ b/content/riak/kv/2.9.1/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,67 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 2.9.1
+menu:
+  riak_kv-2.9.1:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.9.1/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.9.1/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.9.1/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
diff --git a/content/riak/kv/2.9.1/developing/app-guide/reference.md b/content/riak/kv/2.9.1/developing/app-guide/reference.md
new file mode 100644
index 0000000000..c0dd6f1e89
--- /dev/null
+++ b/content/riak/kv/2.9.1/developing/app-guide/reference.md
@@ -0,0 +1,16 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 2.9.1
+#menu:
+#  riak_kv-2.9.1:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+---
+
+**TODO: Add content**
diff --git a/content/riak/kv/2.9.1/developing/app-guide/replication-properties.md b/content/riak/kv/2.9.1/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..2a1399710d
--- /dev/null
+++ b/content/riak/kv/2.9.1/developing/app-guide/replication-properties.md
@@ -0,0 +1,580 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 2.9.1
+menu:
+  riak_kv-2.9.1:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.1/dev/advanced/replication-properties
+  - /riak/kv/2.9.1/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/2.9.1/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.9.1/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.9.1/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.9.1/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.9.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.9.1/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.9.1/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.9.1/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.9.1/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.9.1/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.9.1/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.9.1/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.9.1/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` --- All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` --- This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` --- A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` --- Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.9.1/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.9.1/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.9.1/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
diff --git a/content/riak/kv/2.9.1/developing/app-guide/strong-consistency.md b/content/riak/kv/2.9.1/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..4543d4a945
--- /dev/null
+++ b/content/riak/kv/2.9.1/developing/app-guide/strong-consistency.md
@@ -0,0 +1,257 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 2.9.1
+menu:
+  riak_kv-2.9.1:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.1/dev/advanced/strong-consistency
+  - /riak/kv/2.9.1/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/2.9.1/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.9.1/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.9.1/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.9.1/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.9.1/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.9.1/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.9.1/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.9.1/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.9.1/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.9.1/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.9.1/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.9.1/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.9.1/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.9.1/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.9.1/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.9.1/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.9.1/developing/client-libraries
+[getting started]: {{}}riak/kv/2.9.1/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.9.1/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. diff --git a/content/riak/kv/2.9.1/developing/app-guide/write-once.md b/content/riak/kv/2.9.1/developing/app-guide/write-once.md new file mode 100644 index 0000000000..17007490ca --- /dev/null +++ b/content/riak/kv/2.9.1/developing/app-guide/write-once.md @@ -0,0 +1,155 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.9.1/dev/advanced/write-once + - /riak/kv/2.9.1/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/2.9.1/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.9.1/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.9.1/developing/data-types +[strong consistency]: {{}}riak/kv/2.9.1/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.9.1/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} diff --git a/content/riak/kv/2.9.1/developing/client-libraries.md b/content/riak/kv/2.9.1/developing/client-libraries.md new file mode 100644 index 0000000000..151614f8e6 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/client-libraries.md @@ -0,0 +1,304 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.9.1/dev/using/libraries + - /riak/kv/2.9.1/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) --- A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) --- A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) --- A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) --- A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) --- An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) --- An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) --- Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) --- A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) --- Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) + --- A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) --- HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) + --- Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) + --- A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) --- Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) --- Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) --- Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) --- Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) --- An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) --- A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) --- A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) --- A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) --- A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) --- A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) --- Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) --- Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) + --- A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) + --- Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) + --- Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) --- Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) --- Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) --- Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) --- Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) --- A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) --- Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) --- A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) --- Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) --- Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) --- a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) --- A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) --- A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) --- Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) --- + Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) --- Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) --- A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) --- + Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) --- A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) --- A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) + --- Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) --- Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) --- Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) --- A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) --- A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) --- A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) --- A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) --- + [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) --- A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) + --- Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) --- A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) --- A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) --- Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) --- A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) --- A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) --- Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) --- Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) --- Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) --- A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) --- + Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) --- Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) --- + DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) --- Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) --- An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) --- Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) --- Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) --- Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) --- A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) --- An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) --- A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) + --- A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). diff --git a/content/riak/kv/2.9.1/developing/data-modeling.md b/content/riak/kv/2.9.1/developing/data-modeling.md new file mode 100644 index 0000000000..d0e25e5a17 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/data-modeling.md @@ -0,0 +1,10 @@ +--- +layout: redirect +target: "riak/kv/2.9.1/learn/use-cases/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. diff --git a/content/riak/kv/2.9.1/developing/data-types.md b/content/riak/kv/2.9.1/developing/data-types.md new file mode 100644 index 0000000000..5d93f04369 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/data-types.md @@ -0,0 +1,275 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.9.1/dev/using/data-types + - /riak/kv/2.9.1/dev/using/data-types + - /riak/2.9.1/dev/data-modeling/data-types + - /riak/kv/2.9.1/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. diff --git a/content/riak/kv/2.9.1/developing/data-types/counters.md b/content/riak/kv/2.9.1/developing/data-types/counters.md new file mode 100644 index 0000000000..c400abe2b0 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/data-types/counters.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.1/dev/using/data-types/counters + - /riak/kv/2.9.1/dev/using/data-types/counters + - /riak/2.9.1/dev/data-modeling/data-types/counters + - /riak/kv/2.9.1/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` diff --git a/content/riak/kv/2.9.1/developing/data-types/gsets.md b/content/riak/kv/2.9.1/developing/data-types/gsets.md new file mode 100644 index 0000000000..eff8fb1cc3 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/data-types/gsets.md @@ -0,0 +1,627 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.1/dev/using/data-types/gsets + - /riak/kv/2.9.1/dev/using/data-types/gsets + - /riak/2.9.1/dev/data-modeling/data-types/gsets + - /riak/kv/2.9.1/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` diff --git a/content/riak/kv/2.9.1/developing/data-types/hyperloglogs.md b/content/riak/kv/2.9.1/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..83fd46dc86 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/data-types/hyperloglogs.md @@ -0,0 +1,639 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.1/dev/using/data-types/hyperloglogs + - /riak/kv/2.9.1/dev/using/data-types/hyperloglogs + - /riak/2.9.1/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.9.1/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` diff --git a/content/riak/kv/2.9.1/developing/data-types/maps.md b/content/riak/kv/2.9.1/developing/data-types/maps.md new file mode 100644 index 0000000000..b3957a4820 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/data-types/maps.md @@ -0,0 +1,1881 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.1/dev/using/data-types/maps + - /riak/kv/2.9.1/dev/using/data-types/maps + - /riak/2.9.1/dev/data-modeling/data-types/maps + - /riak/kv/2.9.1/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` diff --git a/content/riak/kv/2.9.1/developing/data-types/sets.md b/content/riak/kv/2.9.1/developing/data-types/sets.md new file mode 100644 index 0000000000..cc7cd224f3 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/data-types/sets.md @@ -0,0 +1,769 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.1/dev/using/data-types/sets + - /riak/kv/2.9.1/dev/using/data-types/sets + - /riak/2.9.1/dev/data-modeling/data-types/sets + - /riak/kv/2.9.1/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` diff --git a/content/riak/kv/2.9.1/developing/faq.md b/content/riak/kv/2.9.1/developing/faq.md new file mode 100644 index 0000000000..fb8a4d6b31 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/faq.md @@ -0,0 +1,654 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.9.1/community/faqs/developing + - /riak/kv/2.9.1/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/2.9.1/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.9.1/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.9.1/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.9.1/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.9.1/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.9.1/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.9.1/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.9.1/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.9.1/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.9.1/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + + +--- + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + + +--- + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + + +--- + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +--- + +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +--- + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + + +--- + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + + +--- + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + + +--- + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + + +--- + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + + +--- + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + + +--- + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +--- + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +--- + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +--- + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + + +--- + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +--- + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + + +--- + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + + +--- + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + + +--- + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +--- + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + + +--- + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) --- requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) --- if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +--- + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +--- + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +--- + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +--- + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +--- + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +--- + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +--- + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +--- + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +--- + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +--- + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +--- + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +--- + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. diff --git a/content/riak/kv/2.9.1/developing/getting-started.md b/content/riak/kv/2.9.1/developing/getting-started.md new file mode 100644 index 0000000000..273a7ae89b --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started.md @@ -0,0 +1,46 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +--- + +[install index]: {{}}riak/kv/2.9.1/setup/installing +[dev client libraries]: {{}}riak/kv/2.9.1/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. diff --git a/content/riak/kv/2.9.1/developing/getting-started/csharp.md b/content/riak/kv/2.9.1/developing/getting-started/csharp.md new file mode 100644 index 0000000000..b590a68100 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/csharp.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/csharp + - /riak/kv/2.9.1/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.1/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.9.1/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.9.1/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..bddf30a9e7 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,143 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. diff --git a/content/riak/kv/2.9.1/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.9.1/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..6e6e0a87f1 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,107 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.9.1/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + diff --git a/content/riak/kv/2.9.1/developing/getting-started/csharp/querying.md b/content/riak/kv/2.9.1/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..c0f8cd8163 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/csharp/querying.md @@ -0,0 +1,210 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/querying-csharp + - /riak/kv/2.9.1/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip diff --git a/content/riak/kv/2.9.1/developing/getting-started/erlang.md b/content/riak/kv/2.9.1/developing/getting-started/erlang.md new file mode 100644 index 0000000000..8b7f39b1ad --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/erlang.md @@ -0,0 +1,55 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/erlang + - /riak/kv/2.9.1/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.1/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.9.1/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.9.1/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..ccd2c67e7f --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,167 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` diff --git a/content/riak/kv/2.9.1/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.9.1/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..541ba96154 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,338 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.9.1/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.1/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.1/developing/getting-started/erlang/querying.md b/content/riak/kv/2.9.1/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..aa3063c870 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/erlang/querying.md @@ -0,0 +1,303 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/querying-erlang + - /riak/kv/2.9.1/dev/taste-of-riak/querying-erlang +--- + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.9.1/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.1/developing/getting-started/golang.md b/content/riak/kv/2.9.1/developing/getting-started/golang.md new file mode 100644 index 0000000000..b25b7f14e7 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/golang.md @@ -0,0 +1,78 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/golang + - /riak/kv/2.9.1/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.1/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.1/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.9.1/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.9.1/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..7c3afaf72a --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,370 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +--- + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` diff --git a/content/riak/kv/2.9.1/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.9.1/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..b34201afba --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,548 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.9.1/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.1/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + diff --git a/content/riak/kv/2.9.1/developing/getting-started/golang/querying.md b/content/riak/kv/2.9.1/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..59c102bec0 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/golang/querying.md @@ -0,0 +1,576 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/querying-golang + - /riak/kv/2.9.1/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. diff --git a/content/riak/kv/2.9.1/developing/getting-started/java.md b/content/riak/kv/2.9.1/developing/getting-started/java.md new file mode 100644 index 0000000000..6fbd4fc014 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/java.md @@ -0,0 +1,89 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/java + - /riak/kv/2.9.1/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.1/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.9.1/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.9.1/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..abc1293281 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/java/crud-operations.md @@ -0,0 +1,201 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.1/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.1/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.1/developing/usage/conflict-resolution/) +documention. diff --git a/content/riak/kv/2.9.1/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.9.1/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..4367065133 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/java/object-modeling.md @@ -0,0 +1,428 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.9.1/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data diff --git a/content/riak/kv/2.9.1/developing/getting-started/java/querying.md b/content/riak/kv/2.9.1/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..9cd0a4db9c --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/java/querying.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/querying-java + - /riak/kv/2.9.1/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.1/developing/getting-started/nodejs.md b/content/riak/kv/2.9.1/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..19709b248e --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/nodejs.md @@ -0,0 +1,100 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/nodejs + - /riak/kv/2.9.1/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.1/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.9.1/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.9.1/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..e8ab700440 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. diff --git a/content/riak/kv/2.9.1/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.9.1/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..766d4999b9 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.9.1/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + diff --git a/content/riak/kv/2.9.1/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.9.1/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..e70832cde2 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/nodejs/querying.md @@ -0,0 +1,142 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.9.1/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.1/developing/getting-started/php.md b/content/riak/kv/2.9.1/developing/getting-started/php.md new file mode 100644 index 0000000000..95c1dc8a1c --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/php.md @@ -0,0 +1,76 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/php + - /riak/kv/2.9.1/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.1/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.9.1/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.9.1/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..b33d7991fd --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/php/crud-operations.md @@ -0,0 +1,182 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.9.1/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.9.1/developing/getting-started/php/querying.md b/content/riak/kv/2.9.1/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..fccccf8fbf --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/php/querying.md @@ -0,0 +1,404 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/querying-php + - /riak/kv/2.9.1/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query diff --git a/content/riak/kv/2.9.1/developing/getting-started/python.md b/content/riak/kv/2.9.1/developing/getting-started/python.md new file mode 100644 index 0000000000..5c37329165 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/python.md @@ -0,0 +1,99 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/python + - /riak/kv/2.9.1/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` --- Header files and a static library for Python +* `libffi-dev` --- Foreign function interface library +* `libssl-dev` --- libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.1/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.9.1/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.9.1/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..d511bdae87 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/python/crud-operations.md @@ -0,0 +1,145 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` diff --git a/content/riak/kv/2.9.1/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.9.1/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..934183b713 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/python/object-modeling.md @@ -0,0 +1,260 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.9.1/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.1/developing/getting-started/python/querying.md b/content/riak/kv/2.9.1/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..8392cbf0f6 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/python/querying.md @@ -0,0 +1,236 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/querying-python + - /riak/kv/2.9.1/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.1/developing/getting-started/ruby.md b/content/riak/kv/2.9.1/developing/getting-started/ruby.md new file mode 100644 index 0000000000..c52fdc2367 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/ruby.md @@ -0,0 +1,64 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/ruby + - /riak/kv/2.9.1/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.1/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.1/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.9.1/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.9.1/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..5a5b678314 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` diff --git a/content/riak/kv/2.9.1/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.9.1/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..4b644c0906 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,291 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.9.1/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.1/developing/getting-started/ruby/querying.md b/content/riak/kv/2.9.1/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..c2cfda7331 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/getting-started/ruby/querying.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.1/dev/taste-of-riak/querying-ruby + - /riak/kv/2.9.1/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.1/developing/key-value-modeling.md b/content/riak/kv/2.9.1/developing/key-value-modeling.md new file mode 100644 index 0000000000..4b59deb348 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/key-value-modeling.md @@ -0,0 +1,531 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.9.1/dev/data-modeling/key-value/ + - /riak/kv/2.9.1/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.9.1/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.9.1/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.9.1/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.9.1/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.9.1/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.9.1/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.9.1/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.9.1/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.9.1/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.9.1/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.9.1/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.9.1/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.9.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.9.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.9.1/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.9.1/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.9.1/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.9.1/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). diff --git a/content/riak/kv/2.9.1/developing/usage.md b/content/riak/kv/2.9.1/developing/usage.md new file mode 100644 index 0000000000..c44453502a --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage.md @@ -0,0 +1,133 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) diff --git a/content/riak/kv/2.9.1/developing/usage/bucket-types.md b/content/riak/kv/2.9.1/developing/usage/bucket-types.md new file mode 100644 index 0000000000..a7517883cd --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/bucket-types.md @@ -0,0 +1,98 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/bucket-types + - /riak/kv/2.9.1/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` diff --git a/content/riak/kv/2.9.1/developing/usage/commit-hooks.md b/content/riak/kv/2.9.1/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..d6475cca1f --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/commit-hooks.md @@ -0,0 +1,239 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/using/commit-hooks + - /riak/kv/2.9.1/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.9.1/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.9.1/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. diff --git a/content/riak/kv/2.9.1/developing/usage/conflict-resolution.md b/content/riak/kv/2.9.1/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..27eba23a09 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/conflict-resolution.md @@ -0,0 +1,677 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/using/conflict-resolution + - /riak/kv/2.9.1/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.9.1/configuring/strong-consistency) --- A guide for operators +> * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.9.1/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.9.1/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.9.1/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.1/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.9.1/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** --- If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** --- Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** --- If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.9.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.1/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.9.1/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) diff --git a/content/riak/kv/2.9.1/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..ddd15dad90 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.1/dev/using/conflict-resolution/csharp + - /riak/kv/2.9.1/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client diff --git a/content/riak/kv/2.9.1/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..4e9b96bf7f --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,58 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.1/dev/using/conflict-resolution/golang + - /riak/kv/2.9.1/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) diff --git a/content/riak/kv/2.9.1/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..7de3409751 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/java.md @@ -0,0 +1,272 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.1/dev/using/conflict-resolution/java + - /riak/kv/2.9.1/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.1/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.1/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..206a1a8c04 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,58 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.1/dev/using/conflict-resolution/nodejs + - /riak/kv/2.9.1/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) diff --git a/content/riak/kv/2.9.1/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..5fb3fc3a1e --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/php.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.1/dev/using/conflict-resolution/php + - /riak/kv/2.9.1/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.1/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.1/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..7fc34c0de4 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/python.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.1/dev/using/conflict-resolution/python + - /riak/kv/2.9.1/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.1/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.1/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..66018d7239 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,250 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.1/dev/using/conflict-resolution/ruby + - /riak/kv/2.9.1/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.1/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.1/developing/usage/content-types.md b/content/riak/kv/2.9.1/developing/usage/content-types.md new file mode 100644 index 0000000000..86fd953cd2 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/content-types.md @@ -0,0 +1,187 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` diff --git a/content/riak/kv/2.9.1/developing/usage/creating-objects.md b/content/riak/kv/2.9.1/developing/usage/creating-objects.md new file mode 100644 index 0000000000..d990cdc182 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/creating-objects.md @@ -0,0 +1,550 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +--- + +[usage content types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.9.1/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` diff --git a/content/riak/kv/2.9.1/developing/usage/custom-extractors.md b/content/riak/kv/2.9.1/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..e6c2a33a17 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/custom-extractors.md @@ -0,0 +1,420 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/search/custom-extractors + - /riak/kv/2.9.1/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` --- Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` --- Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` diff --git a/content/riak/kv/2.9.1/developing/usage/deleting-objects.md b/content/riak/kv/2.9.1/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..0c81190fe0 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/deleting-objects.md @@ -0,0 +1,152 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` diff --git a/content/riak/kv/2.9.1/developing/usage/document-store.md b/content/riak/kv/2.9.1/developing/usage/document-store.md new file mode 100644 index 0000000000..c4bdc614b8 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/document-store.md @@ -0,0 +1,613 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/search/document-store + - /riak/kv/2.9.1/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.9.1/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` diff --git a/content/riak/kv/2.9.1/developing/usage/mapreduce.md b/content/riak/kv/2.9.1/developing/usage/mapreduce.md new file mode 100644 index 0000000000..2367e9167b --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/mapreduce.md @@ -0,0 +1,242 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/using/mapreduce + - /riak/kv/2.9.1/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.9.1/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.9.1/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** --- The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** --- The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. diff --git a/content/riak/kv/2.9.1/developing/usage/next-gen-replication.md b/content/riak/kv/2.9.1/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..c74b8020ce --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/next-gen-replication.md @@ -0,0 +1,150 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.1" +menu: + riak_kv-2.9.0: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/2.9.1/dev/using/NextGenReplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. \ No newline at end of file diff --git a/content/riak/kv/2.9.1/developing/usage/reading-objects.md b/content/riak/kv/2.9.1/developing/usage/reading-objects.md new file mode 100644 index 0000000000..21e853c43d --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/reading-objects.md @@ -0,0 +1,247 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` diff --git a/content/riak/kv/2.9.1/developing/usage/replication.md b/content/riak/kv/2.9.1/developing/usage/replication.md new file mode 100644 index 0000000000..f3bb0c26c5 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/replication.md @@ -0,0 +1,588 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/replication-properties + - /riak/kv/2.9.1/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.9.1/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.9.1/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.1/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.1/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` --- All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` --- This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` --- A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` --- Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.9.1/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.9.1/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.9.1/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.9.1/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. diff --git a/content/riak/kv/2.9.1/developing/usage/search-schemas.md b/content/riak/kv/2.9.1/developing/usage/search-schemas.md new file mode 100644 index 0000000000..9e8331786a --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/search-schemas.md @@ -0,0 +1,507 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/search-schema + - /riak/kv/2.9.1/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.9.1/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` diff --git a/content/riak/kv/2.9.1/developing/usage/search.md b/content/riak/kv/2.9.1/developing/usage/search.md new file mode 100644 index 0000000000..d1252fc505 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/search.md @@ -0,0 +1,1451 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/using/search + - /riak/kv/2.9.1/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.9.1/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.1/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.1/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.9.1/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.9.1/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.9.1/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.9.1/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` diff --git a/content/riak/kv/2.9.1/developing/usage/searching-data-types.md b/content/riak/kv/2.9.1/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..3880aceb2e --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/searching-data-types.md @@ -0,0 +1,1683 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/search/search-data-types + - /riak/kv/2.9.1/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. diff --git a/content/riak/kv/2.9.1/developing/usage/secondary-indexes.md b/content/riak/kv/2.9.1/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..91d661df22 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/secondary-indexes.md @@ -0,0 +1,2026 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/using/2i + - /riak/kv/2.9.1/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.9.1/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.1/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.1/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` --- Binary index `field1_bin` and integer index `field2_int` +* `Moe` --- Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` --- Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` --- Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` diff --git a/content/riak/kv/2.9.1/developing/usage/security.md b/content/riak/kv/2.9.1/developing/usage/security.md new file mode 100644 index 0000000000..25e5bd4aff --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/security.md @@ -0,0 +1,99 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/client-security + - /riak/kv/2.9.1/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.9.1/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.9.1/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.9.1/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.9.1/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.1/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.9.1/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.9.1/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.9.1/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.9.1/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. diff --git a/content/riak/kv/2.9.1/developing/usage/security/erlang.md b/content/riak/kv/2.9.1/developing/usage/security/erlang.md new file mode 100644 index 0000000000..7a7d961cd6 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/security/erlang.md @@ -0,0 +1,114 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/client-security/erlang + - /riak/kv/2.9.1/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.1/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` diff --git a/content/riak/kv/2.9.1/developing/usage/security/java.md b/content/riak/kv/2.9.1/developing/usage/security/java.md new file mode 100644 index 0000000000..c0611bdede --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/security/java.md @@ -0,0 +1,117 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/client-security/java + - /riak/kv/2.9.1/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. diff --git a/content/riak/kv/2.9.1/developing/usage/security/php.md b/content/riak/kv/2.9.1/developing/usage/security/php.md new file mode 100644 index 0000000000..42cef60e3d --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/security/php.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/client-security/php + - /riak/kv/2.9.1/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. diff --git a/content/riak/kv/2.9.1/developing/usage/security/python.md b/content/riak/kv/2.9.1/developing/usage/security/python.md new file mode 100644 index 0000000000..f1390647b3 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/security/python.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/client-security/python + - /riak/kv/2.9.1/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.1/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.9.1/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. diff --git a/content/riak/kv/2.9.1/developing/usage/security/ruby.md b/content/riak/kv/2.9.1/developing/usage/security/ruby.md new file mode 100644 index 0000000000..9fa11db124 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/security/ruby.md @@ -0,0 +1,158 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/client-security/ruby + - /riak/kv/2.9.1/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.1/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. diff --git a/content/riak/kv/2.9.1/developing/usage/updating-objects.md b/content/riak/kv/2.9.1/developing/usage/updating-objects.md new file mode 100644 index 0000000000..61cfdb2134 --- /dev/null +++ b/content/riak/kv/2.9.1/developing/usage/updating-objects.md @@ -0,0 +1,774 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.1/dev/using/updates + - /riak/kv/2.9.1/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,2.9.1,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.9.1/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. diff --git a/content/riak/kv/2.9.1/downloads.md b/content/riak/kv/2.9.1/downloads.md new file mode 100644 index 0000000000..e1fc407599 --- /dev/null +++ b/content/riak/kv/2.9.1/downloads.md @@ -0,0 +1,22 @@ +--- +title: "Download for Riak KV 2.9.1" +description: "Download some stuff!" +menu: + riak_kv-2.9.1: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 2.9.1 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 2.9.1 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.9.1/downloads + - /riak/kv/2.9.1/downloads +--- diff --git a/content/riak/kv/2.9.1/index.md b/content/riak/kv/2.9.1/index.md new file mode 100644 index 0000000000..135b146e0c --- /dev/null +++ b/content/riak/kv/2.9.1/index.md @@ -0,0 +1,72 @@ +--- +title: "Riak KV 2.9.1" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.9.1/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.9.1/configuring +[downloads]: {{<baseurl>}}riak/kv/2.9.1/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.9.1/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.9.1/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.9.1/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.9.1/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.9.1/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] diff --git a/content/riak/kv/2.9.1/learn.md b/content/riak/kv/2.9.1/learn.md new file mode 100644 index 0000000000..4f358c0901 --- /dev/null +++ b/content/riak/kv/2.9.1/learn.md @@ -0,0 +1,47 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] diff --git a/content/riak/kv/2.9.1/learn/concepts.md b/content/riak/kv/2.9.1/learn/concepts.md new file mode 100644 index 0000000000..65959adb92 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts.md @@ -0,0 +1,44 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +--- + +[concept aae]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.9.1/configuring +[plan index]: {{<baseurl>}}riak/kv/2.9.1/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.9.1/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] diff --git a/content/riak/kv/2.9.1/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.9.1/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..eeb5471660 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/active-anti-entropy.md @@ -0,0 +1,107 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/aae + - /riak/kv/2.9.1/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. diff --git a/content/riak/kv/2.9.1/learn/concepts/buckets.md b/content/riak/kv/2.9.1/learn/concepts/buckets.md new file mode 100644 index 0000000000..e707a53919 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/buckets.md @@ -0,0 +1,213 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/Buckets + - /riak/kv/2.9.1/theory/concepts/Buckets + - /riak/2.9.1/theory/concepts/buckets + - /riak/kv/2.9.1/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.9.1/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.9.1/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.9.1/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.9.1/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` diff --git a/content/riak/kv/2.9.1/learn/concepts/capability-negotiation.md b/content/riak/kv/2.9.1/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..e944eb4c69 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/capability-negotiation.md @@ -0,0 +1,32 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/capability-negotiation + - /riak/kv/2.9.1/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.9.1/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + diff --git a/content/riak/kv/2.9.1/learn/concepts/causal-context.md b/content/riak/kv/2.9.1/learn/concepts/causal-context.md new file mode 100644 index 0000000000..212377f76d --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/causal-context.md @@ -0,0 +1,285 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/context + - /riak/kv/2.9.1/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.9.1/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.9.1/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.9.1/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] diff --git a/content/riak/kv/2.9.1/learn/concepts/clusters.md b/content/riak/kv/2.9.1/learn/concepts/clusters.md new file mode 100644 index 0000000000..e6e274035a --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/clusters.md @@ -0,0 +1,113 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/Clusters + - /riak/kv/2.9.1/theory/concepts/Clusters + - /riak/2.9.1/theory/concepts/clusters + - /riak/kv/2.9.1/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.1/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. diff --git a/content/riak/kv/2.9.1/learn/concepts/crdts.md b/content/riak/kv/2.9.1/learn/concepts/crdts.md new file mode 100644 index 0000000000..b294963237 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/crdts.md @@ -0,0 +1,248 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/crdts + - /riak/kv/2.9.1/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.9.1/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.9.1/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. diff --git a/content/riak/kv/2.9.1/learn/concepts/eventual-consistency.md b/content/riak/kv/2.9.1/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..f84e689a1a --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/eventual-consistency.md @@ -0,0 +1,198 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/Eventual-Consistency + - /riak/kv/2.9.1/theory/concepts/Eventual-Consistency + - /riak/2.9.1/theory/concepts/eventual-consistency + - /riak/kv/2.9.1/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) diff --git a/content/riak/kv/2.9.1/learn/concepts/keys-and-objects.md b/content/riak/kv/2.9.1/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..00393a3455 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/keys-and-objects.md @@ -0,0 +1,49 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/keys-and-values + - /riak/kv/2.9.1/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). diff --git a/content/riak/kv/2.9.1/learn/concepts/replication.md b/content/riak/kv/2.9.1/learn/concepts/replication.md new file mode 100644 index 0000000000..4b1af85124 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/replication.md @@ -0,0 +1,319 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/Replication + - /riak/kv/2.9.1/theory/concepts/Replication + - /riak/2.9.1/theory/concepts/replication + - /riak/kv/2.9.1/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.9.1/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + diff --git a/content/riak/kv/2.9.1/learn/concepts/strong-consistency.md b/content/riak/kv/2.9.1/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..1f90f92be1 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/strong-consistency.md @@ -0,0 +1,101 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/strong-consistency + - /riak/kv/2.9.1/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.1/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes diff --git a/content/riak/kv/2.9.1/learn/concepts/vnodes.md b/content/riak/kv/2.9.1/learn/concepts/vnodes.md new file mode 100644 index 0000000000..7d25cb807a --- /dev/null +++ b/content/riak/kv/2.9.1/learn/concepts/vnodes.md @@ -0,0 +1,156 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.1/theory/concepts/vnodes + - /riak/kv/2.9.1/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322492444576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. diff --git a/content/riak/kv/2.9.1/learn/dynamo.md b/content/riak/kv/2.9.1/learn/dynamo.md new file mode 100644 index 0000000000..d10c4272f4 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/dynamo.md @@ -0,0 +1,1924 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.9.1/theory/dynamo + - /riak/kv/2.9.1/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.9.1/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.9.1/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.9.1/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.9.1/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.1 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.9.1/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.1/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.9.1/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.9.1/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.9.1/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.1/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. diff --git a/content/riak/kv/2.9.1/learn/glossary.md b/content/riak/kv/2.9.1/learn/glossary.md new file mode 100644 index 0000000000..6db1e77167 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/glossary.md @@ -0,0 +1,353 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.9.1/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.9.1/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.9.1/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.1/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.9.1/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.9.1/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.9.1/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] diff --git a/content/riak/kv/2.9.1/learn/new-to-nosql.md b/content/riak/kv/2.9.1/learn/new-to-nosql.md new file mode 100644 index 0000000000..26f47d1a84 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/new-to-nosql.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 2.9.1 +#menu: +# riak_kv-2.9.1: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +--- + +**TODO: Add content (not sure where this lives in existing docs)** diff --git a/content/riak/kv/2.9.1/learn/use-cases.md b/content/riak/kv/2.9.1/learn/use-cases.md new file mode 100644 index 0000000000..870dfc3d61 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/use-cases.md @@ -0,0 +1,401 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.9.1/dev/data-modeling/ + - /riak/kv/2.9.1/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.9.1/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.9.1/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + diff --git a/content/riak/kv/2.9.1/learn/why-riak-kv.md b/content/riak/kv/2.9.1/learn/why-riak-kv.md new file mode 100644 index 0000000000..32fc7cabd4 --- /dev/null +++ b/content/riak/kv/2.9.1/learn/why-riak-kv.md @@ -0,0 +1,221 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.9.1/theory/why-riak/ + - /riak/kv/2.9.1/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.1/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.9.1/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. diff --git a/content/riak/kv/2.9.1/release-notes.md b/content/riak/kv/2.9.1/release-notes.md new file mode 100644 index 0000000000..c3b3a0287c --- /dev/null +++ b/content/riak/kv/2.9.1/release-notes.md @@ -0,0 +1,40 @@ +--- +title: "Riak KV 2.9.1 Release Notes" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.9.1/community/release-notes + - /riak/kv/2.9.1/intro-v20 + - /riak/2.9.1/intro-v20 + - /riak/kv/2.9.1/introduction +--- + +Released Feb 15, 2020. + + +## Overview + +This release adds a number of features built on top of the Tictac AAE feature made available in 2.9.0. The new features depend on Tictac AAE being enabled, but are backend independent. The primary features of the release are: + +* A new combined full-sync and real-time replication system nextgenrepl, that is much faster and more efficient at reconciling overall state of clusters (e.g. full-sync). + +* A mechanism for requesting mass deletion of objects on expiry, and mass reaping of tombstones after a time to live. This is not yet an automated, scheduled, set of garbage collection processes, it is required to be triggered by an operational process. + +* A safe method of listing buckets regardless of backend chosen. Listing buckets had previously not been production safe, but can still be required in production environments - it can now be managed safely via an `aae_fold`. + +* A version uplift of the internal ibrowse client, a minor riak_dt fix to resolve issues of unit test reliability, a fix to help build (the now deprecated) erlang_js in some environments, and the removal of hamcrest as a dependency. + +[Previous Release Notes](#previous-release-notes) + + +## Previous Release Notes + +Please see the KV 2.9.0p5 release notes [here]({{<baseurl>}}riak/kv/2.9.0p5/release-notes/), and the KV 2.2.6 release notes [here]({{<baseurl>}}riak/kv/2.2.6/release-notes/). diff --git a/content/riak/kv/2.9.1/setup.md b/content/riak/kv/2.9.1/setup.md new file mode 100644 index 0000000000..7cef653dab --- /dev/null +++ b/content/riak/kv/2.9.1/setup.md @@ -0,0 +1,45 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + diff --git a/content/riak/kv/2.9.1/setup/downgrade.md b/content/riak/kv/2.9.1/setup/downgrade.md new file mode 100644 index 0000000000..0e5e344225 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/downgrade.md @@ -0,0 +1,174 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.1/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.9.1/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.9.1/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.1, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` diff --git a/content/riak/kv/2.9.1/setup/installing.md b/content/riak/kv/2.9.1/setup/installing.md new file mode 100644 index 0000000000..7363ed5f57 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing.md @@ -0,0 +1,56 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.9.1/ops/building/installing + - /riak/kv/2.9.1/ops/building/installing + - /riak/2.9.1/installing/ + - /riak/kv/2.9.1/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.9.1/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. diff --git a/content/riak/kv/2.9.1/setup/installing/amazon-web-services.md b/content/riak/kv/2.9.1/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..bae7f1ba41 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/amazon-web-services.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.9.1/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.9.1/installing/amazon-web-services/ + - /riak/kv/2.9.1/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.1/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2/riak-2.9.1-1.amzn2x86_64.rpm +sudo yum localinstall -y riak-2.9.1-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2/riak-2.9.1-1.amzn2x86_64.rpm +sudo rpm -i riak-2.9.1-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2016.09/riak-2.9.1-1.amzn1x86_64.rpm +sudo yum localinstall -y riak-2.9.1-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2016.09/riak-2.9.1-1.amzn1x86_64.rpm +sudo rpm -i riak-2.9.1-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/debian-ubuntu.md b/content/riak/kv/2.9.1/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..59078717d7 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/debian-ubuntu.md @@ -0,0 +1,166 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.9.1/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.9.1/installing/debian-ubuntu/ + - /riak/kv/2.9.1/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.9.1/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/bionic64/riak-2.9.1-1_amd64.deb +sudo dpkg -i riak-2.9.1-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/xenial64/riak-2.9.1-1_amd64.deb +sudo dpkg -i riak-2.9.1-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/trusty64/riak-2.9.1-1_amd64.deb +sudo dpkg -i riak-2.9.1-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/precise64/riak-2.9.1-1_amd64.deb +sudo dpkg -i riak-2.9.1-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/9/riak-2.9.1-1_amd64.deb +sudo dpkg -i riak-2.9.1-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/8/riak-2.9.1-1_amd64.deb +sudo dpkg -i riak-2.9.1-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/7/riak-2.9.1-1_amd64.deb +sudo dpkg -i riak-2.9.1-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/raspbian/buster/riak-2.9.1-1_armhf.deb +sudo dpkg -i riak-2.9.1-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/riak-2.9.1.tar.gz +tar zxvf riak-2.9.1.tar.gz +cd riak-2.9.1 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/freebsd.md b/content/riak/kv/2.9.1/setup/installing/freebsd.md new file mode 100644 index 0000000000..f516fc4122 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/freebsd.md @@ -0,0 +1,128 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.9.1/ops/building/installing/Installing-on-FreeBSD + - /riak/2.9.1/installing/freebsd/ + - /riak/kv/2.9.1/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.1/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.9.1.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.1/freebsd/11.1/riak-2.9.1.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.1/freebsd/10.4/riak-2.9.1.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/mac-osx.md b/content/riak/kv/2.9.1/setup/installing/mac-osx.md new file mode 100644 index 0000000000..d382b9074d --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/mac-osx.md @@ -0,0 +1,116 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.9.1/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.9.1/installing/mac-osx/ + - /riak/kv/2.9.1/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.1/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.11/riak-2.9.1-OSX-x86_64.tar.gz +tar xzvf riak-2.9.1-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.9.1 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.1` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.1 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.1/riak-2.9.1.tar.gz +tar zxvf riak-2.9.1.tar.gz +cd riak-2.9.1 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/rhel-centos.md b/content/riak/kv/2.9.1/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..9c88771e31 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/rhel-centos.md @@ -0,0 +1,129 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.9.1/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.9.1/installing/rhel-centos/ + - /riak/kv/2.9.1/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/8/riak-2.9.1-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.9.1-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/8/riak-2.9.1-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.9.1-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/7/riak-2.9.1-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.9.1-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/7/riak-2.9.1-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.9.1-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/6/riak-2.9.1-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.9.1-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/6/riak-2.9.1-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.9.1-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.1/riak-2.9.1.tar.gz +tar zxvf riak-2.9.1.tar.gz +cd riak-2.9.1 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/smartos.md b/content/riak/kv/2.9.1/setup/installing/smartos.md new file mode 100644 index 0000000000..652c67a5e9 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/smartos.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.9.1" +menu: + riak_kv-2.9.1: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.9.1/ops/building/installing/Installing-on-SmartOS + - /riak/2.9.1/installing/smartos/ + - /riak/kv/2.9.1/installing/smartos/ + - /riak/kv/2.9.1/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/solaris.md b/content/riak/kv/2.9.1/setup/installing/solaris.md new file mode 100644 index 0000000000..628a85e323 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/solaris.md @@ -0,0 +1,90 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.9.1" +menu: + riak_kv-2.9.1: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.9.1/ops/building/installing/Installing-on-Solaris + - /riak/2.9.1/installing/solaris/ + - /riak/kv/2.9.1/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/source.md b/content/riak/kv/2.9.1/setup/installing/source.md new file mode 100644 index 0000000000..95e85da5d9 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/source.md @@ -0,0 +1,105 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/Installing-Riak-from-Source + - /riak/kv/2.9.1/ops/building/Installing-Riak-from-Source + - /riak/2.9.1/installing/source/ + - /riak/kv/2.9.1/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.1/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.1/riak-2.9.1.tar.gz +tar zxvf riak-2.9.1.tar.gz +cd riak-2.9.1 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/source/erlang.md b/content/riak/kv/2.9.1/setup/installing/source/erlang.md new file mode 100644 index 0000000000..3165ef66bc --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/source/erlang.md @@ -0,0 +1,566 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/erlang + - /riak/kv/2.9.1/ops/building/installing/erlang + - /riak/2.9.1/installing/source/erlang/ + - /riak/kv/2.9.1/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/2.9.1/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.9.1/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` diff --git a/content/riak/kv/2.9.1/setup/installing/source/jvm.md b/content/riak/kv/2.9.1/setup/installing/source/jvm.md new file mode 100644 index 0000000000..65848abc00 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/source/jvm.md @@ -0,0 +1,51 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/jvm + - /riak/kv/2.9.1/ops/building/installing/jvm + - /riak/2.9.1/ops/building/installing/Installing-the-JVM + - /riak/kv/2.9.1/ops/building/installing/Installing-the-JVM + - /riak/2.9.1/installing/source/jvm/ + - /riak/kv/2.9.1/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` diff --git a/content/riak/kv/2.9.1/setup/installing/suse.md b/content/riak/kv/2.9.1/setup/installing/suse.md new file mode 100644 index 0000000000..dba0c441ef --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/suse.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.9.1/ops/building/installing/Installing-on-SUSE + - /riak/2.9.1/installing/suse/ + - /riak/kv/2.9.1/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.1/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.1+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.1/setup/installing/verify.md b/content/riak/kv/2.9.1/setup/installing/verify.md new file mode 100644 index 0000000000..ac7a805df8 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/verify.md @@ -0,0 +1,164 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/installing/Post-Installation + - /riak/kv/2.9.1/ops/installing/Post-Installation + - /riak/2.9.1/installing/verify-install/ + - /riak/kv/2.9.1/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/2.9.1/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.9.1/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language diff --git a/content/riak/kv/2.9.1/setup/installing/windows-azure.md b/content/riak/kv/2.9.1/setup/installing/windows-azure.md new file mode 100644 index 0000000000..e72339765e --- /dev/null +++ b/content/riak/kv/2.9.1/setup/installing/windows-azure.md @@ -0,0 +1,192 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.9.1/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.9.1/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.9.1/installing/windows-azure/ + - /riak/kv/2.9.1/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` diff --git a/content/riak/kv/2.9.1/setup/planning.md b/content/riak/kv/2.9.1/setup/planning.md new file mode 100644 index 0000000000..dcf0a3ddf7 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning.md @@ -0,0 +1,55 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + diff --git a/content/riak/kv/2.9.1/setup/planning/backend.md b/content/riak/kv/2.9.1/setup/planning/backend.md new file mode 100644 index 0000000000..e0faf5591c --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/backend.md @@ -0,0 +1,56 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.9.1/ops/building/planning/backends/ + - /riak/kv/2.9.1/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/2.9.1/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. diff --git a/content/riak/kv/2.9.1/setup/planning/backend/bitcask.md b/content/riak/kv/2.9.1/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..2bfc9f22b3 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/backend/bitcask.md @@ -0,0 +1,990 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/backends/bitcask/ + - /riak/kv/2.9.1/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.9.1/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` --- lets the operating system manage syncing writes + (default) + * `o_sync` --- uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval --- Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) --- Writes are made via Erlang's built-in file API +* `nif` --- Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` --- No restrictions on when merge operations can occur + (default) +* `never` --- Merge will never be attempted +* `window` --- Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** --- This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** --- This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** --- This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** --- This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** --- This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322492444576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322492444576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. diff --git a/content/riak/kv/2.9.1/setup/planning/backend/leveldb.md b/content/riak/kv/2.9.1/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..10323c1c6f --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/backend/leveldb.md @@ -0,0 +1,502 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/backends/leveldb/ + - /riak/kv/2.9.1/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.1/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** --- The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** --- LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322492444576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322492444576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. diff --git a/content/riak/kv/2.9.1/setup/planning/backend/leveled.md b/content/riak/kv/2.9.1/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..7946e60810 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/backend/leveled.md @@ -0,0 +1,137 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/backends/leveled/ + - /riak/kv/2.9.1/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.1/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.1 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. diff --git a/content/riak/kv/2.9.1/setup/planning/backend/memory.md b/content/riak/kv/2.9.1/setup/planning/backend/memory.md new file mode 100644 index 0000000000..91359896d9 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/backend/memory.md @@ -0,0 +1,143 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/backends/memory/ + - /riak/kv/2.9.1/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. diff --git a/content/riak/kv/2.9.1/setup/planning/backend/multi.md b/content/riak/kv/2.9.1/setup/planning/backend/multi.md new file mode 100644 index 0000000000..a305c26198 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/backend/multi.md @@ -0,0 +1,226 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/backends/multi/ + - /riak/kv/2.9.1/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. diff --git a/content/riak/kv/2.9.1/setup/planning/best-practices.md b/content/riak/kv/2.9.1/setup/planning/best-practices.md new file mode 100644 index 0000000000..3001e45899 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/best-practices.md @@ -0,0 +1,141 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.9.1/ops/building/planning/best-practices + - /riak/kv/2.9.1/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/2.9.1/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.9.1/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. diff --git a/content/riak/kv/2.9.1/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.9.1/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..df0c10ef86 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,100 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.9.1/ops/building/planning/bitcask + - /riak/kv/2.9.1/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. diff --git a/content/riak/kv/2.9.1/setup/planning/cluster-capacity.md b/content/riak/kv/2.9.1/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..6fc2eb874c --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/cluster-capacity.md @@ -0,0 +1,234 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.9.1/ops/building/planning/cluster + - /riak/kv/2.9.1/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.9.1/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.9.1/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. diff --git a/content/riak/kv/2.9.1/setup/planning/future.md b/content/riak/kv/2.9.1/setup/planning/future.md new file mode 100644 index 0000000000..85e5582c3c --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/future.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 2.9.1 +#menu: +# riak_kv-2.9.1: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +--- + +**TODO: Add content** diff --git a/content/riak/kv/2.9.1/setup/planning/operating-system.md b/content/riak/kv/2.9.1/setup/planning/operating-system.md new file mode 100644 index 0000000000..51ac33e8c7 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/operating-system.md @@ -0,0 +1,25 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +--- + +[downloads]: {{<baseurl>}}riak/kv/2.9.1/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris diff --git a/content/riak/kv/2.9.1/setup/planning/start.md b/content/riak/kv/2.9.1/setup/planning/start.md new file mode 100644 index 0000000000..98dded5aa3 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/planning/start.md @@ -0,0 +1,57 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.9.1/ops/building/planning/system-planning + - /riak/kv/2.9.1/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + diff --git a/content/riak/kv/2.9.1/setup/search.md b/content/riak/kv/2.9.1/setup/search.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/content/riak/kv/2.9.1/setup/upgrading.md b/content/riak/kv/2.9.1/setup/upgrading.md new file mode 100644 index 0000000000..5f68e24dbb --- /dev/null +++ b/content/riak/kv/2.9.1/setup/upgrading.md @@ -0,0 +1,33 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.1][upgrade version] + +A tutorial on updating to Riak KV 2.9.1 + +[Learn More >>][upgrade version] \ No newline at end of file diff --git a/content/riak/kv/2.9.1/setup/upgrading/checklist.md b/content/riak/kv/2.9.1/setup/upgrading/checklist.md new file mode 100644 index 0000000000..09ab898921 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/upgrading/checklist.md @@ -0,0 +1,220 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.1/ops/upgrading/production-checklist/ + - /riak/kv/2.9.1/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/2.9.1/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.9.1/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.9.1/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.9.1/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.9.1/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.9.1/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.9.1/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. diff --git a/content/riak/kv/2.9.1/setup/upgrading/cluster.md b/content/riak/kv/2.9.1/setup/upgrading/cluster.md new file mode 100644 index 0000000000..6a36d71aa2 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/upgrading/cluster.md @@ -0,0 +1,298 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.1" +menu: + riak_kv-2.9.1: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.1/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.1/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.1/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.1/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.1/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.1/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.1/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` --- See [JMX Monitoring][jmx monitor] for more information. + * `snmp` --- See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. diff --git a/content/riak/kv/2.9.1/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.9.1/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..088f81ac44 --- /dev/null +++ b/content/riak/kv/2.9.1/setup/upgrading/multi-datacenter.md @@ -0,0 +1,18 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 2.9.1 +#menu: +# riak_kv-2.9.1: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +--- + +## TODO + +How to update to a new version with multi-datacenter. diff --git a/content/riak/kv/2.9.1/setup/upgrading/search.md b/content/riak/kv/2.9.1/setup/upgrading/search.md new file mode 100644 index 0000000000..a15551d6cc --- /dev/null +++ b/content/riak/kv/2.9.1/setup/upgrading/search.md @@ -0,0 +1,276 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.9.1" +menu: + riak_kv-2.9.1: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.1/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.1/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.9.1/setup/upgrading/version.md b/content/riak/kv/2.9.1/setup/upgrading/version.md new file mode 100644 index 0000000000..33d3da5e2b --- /dev/null +++ b/content/riak/kv/2.9.1/setup/upgrading/version.md @@ -0,0 +1,247 @@ +--- +title: "Upgrading to Riak KV 2.9.1" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Upgrading to 2.9.1" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.1/upgrade-v20/ + - /riak/kv/2.9.1/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.1/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.1/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/2.9.1/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.1/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.9.1/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.1/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.1/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.1/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.9.1/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.9.1 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.1 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.1 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. diff --git a/content/riak/kv/2.9.1/using.md b/content/riak/kv/2.9.1/using.md new file mode 100644 index 0000000000..4957392433 --- /dev/null +++ b/content/riak/kv/2.9.1/using.md @@ -0,0 +1,72 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] diff --git a/content/riak/kv/2.9.1/using/admin.md b/content/riak/kv/2.9.1/using/admin.md new file mode 100644 index 0000000000..29598f374e --- /dev/null +++ b/content/riak/kv/2.9.1/using/admin.md @@ -0,0 +1,47 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.9.1/ops/running/cluster-admin + - /riak/kv/2.9.1/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] diff --git a/content/riak/kv/2.9.1/using/admin/commands.md b/content/riak/kv/2.9.1/using/admin/commands.md new file mode 100644 index 0000000000..c9d81a3f60 --- /dev/null +++ b/content/riak/kv/2.9.1/using/admin/commands.md @@ -0,0 +1,374 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.1/ops/running/cluster-admin + - /riak/kv/2.9.1/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` --- There are five possible values for status: + * `valid` --- The node has begun participating in cluster operations + * `leaving` --- The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` --- The node's ownership transfers are complete and it is + currently shutting down + * `joining` --- The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` --- The node is not currently responding +* `avail` --- There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` --- What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` --- The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322492444576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322492444576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` diff --git a/content/riak/kv/2.9.1/using/admin/riak-admin.md b/content/riak/kv/2.9.1/using/admin/riak-admin.md new file mode 100644 index 0000000000..e33859154e --- /dev/null +++ b/content/riak/kv/2.9.1/using/admin/riak-admin.md @@ -0,0 +1,717 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.1/ops/running/tools/riak-admin + - /riak/kv/2.9.1/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.9.1/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.9.1/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.9.1/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.9.1/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.9.1/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.9.1/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.9.1/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.9.1/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` diff --git a/content/riak/kv/2.9.1/using/admin/riak-cli.md b/content/riak/kv/2.9.1/using/admin/riak-cli.md new file mode 100644 index 0000000000..d7131f554c --- /dev/null +++ b/content/riak/kv/2.9.1/using/admin/riak-cli.md @@ -0,0 +1,200 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.1/ops/running/tools/riak + - /riak/kv/2.9.1/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. diff --git a/content/riak/kv/2.9.1/using/admin/riak-control.md b/content/riak/kv/2.9.1/using/admin/riak-control.md new file mode 100644 index 0000000000..f77817da60 --- /dev/null +++ b/content/riak/kv/2.9.1/using/admin/riak-control.md @@ -0,0 +1,233 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/riak-control + - /riak/kv/2.9.1/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.9.1/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. diff --git a/content/riak/kv/2.9.1/using/cluster-operations.md b/content/riak/kv/2.9.1/using/cluster-operations.md new file mode 100644 index 0000000000..b6c2256f14 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations.md @@ -0,0 +1,104 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] diff --git a/content/riak/kv/2.9.1/using/cluster-operations/TicTac-Active-anti-entropy.md b/content/riak/kv/2.9.1/using/cluster-operations/TicTac-Active-anti-entropy.md new file mode 100644 index 0000000000..ef7ff0b848 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/TicTac-Active-anti-entropy.md @@ -0,0 +1,31 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.9.1/ops/advanced/tictacaae/ + - /riak/2.9.1/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. \ No newline at end of file diff --git a/content/riak/kv/2.9.1/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.9.1/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..0bb0e20b08 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,285 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak/kv/2.9.1/ops/advanced/aae/ + - /riak/2.9.1/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.9.1/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.9.1/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. diff --git a/content/riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..04473bba78 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,194 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.1/ops/running/nodes/adding-removing + - /riak/kv/2.9.1/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/2.9.1/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` diff --git a/content/riak/kv/2.9.1/using/cluster-operations/backend.md b/content/riak/kv/2.9.1/using/cluster-operations/backend.md new file mode 100644 index 0000000000..bb9bff33ca --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/backend.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 2.9.1 +#menu: +# riak_kv-2.9.1: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content** diff --git a/content/riak/kv/2.9.1/using/cluster-operations/backing-up.md b/content/riak/kv/2.9.1/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..cdf3bd3053 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/backing-up.md @@ -0,0 +1,267 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.1/ops/running/backups + - /riak/kv/2.9.1/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.9.1/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.9.1/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.9.1/using/cluster-operations/bucket-types.md b/content/riak/kv/2.9.1/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..50f96eab84 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/bucket-types.md @@ -0,0 +1,58 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` diff --git a/content/riak/kv/2.9.1/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.9.1/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..4f34c14b5a --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,454 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.1/ops/running/nodes/renaming + - /riak/kv/2.9.1/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` diff --git a/content/riak/kv/2.9.1/using/cluster-operations/handoff.md b/content/riak/kv/2.9.1/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..3782ecf8d0 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/handoff.md @@ -0,0 +1,116 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.1/ops/running/handoff + - /riak/kv/2.9.1/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. diff --git a/content/riak/kv/2.9.1/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.9.1/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..a47cbeb841 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/inspecting-node.md @@ -0,0 +1,492 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.1/ops/running/nodes/inspecting + - /riak/kv/2.9.1/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392993748081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` --- The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` --- The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` --- The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) diff --git a/content/riak/kv/2.9.1/using/cluster-operations/load-balancing.md b/content/riak/kv/2.9.1/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..79ca8a7975 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/load-balancing.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 2.9.1 +#menu: +# riak_kv-2.9.1: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content (not sure where this exists in docs)** diff --git a/content/riak/kv/2.9.1/using/cluster-operations/logging.md b/content/riak/kv/2.9.1/using/cluster-operations/logging.md new file mode 100644 index 0000000000..fda4f04072 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/logging.md @@ -0,0 +1,42 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` diff --git a/content/riak/kv/2.9.1/using/cluster-operations/replacing-node.md b/content/riak/kv/2.9.1/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..e6b4c83dcc --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/replacing-node.md @@ -0,0 +1,95 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.9.1/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} diff --git a/content/riak/kv/2.9.1/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.9.1/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..8dd4fb7448 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.1 +#menu: +# riak_kv-2.9.1: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.1/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.9.1/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..77cd952bdf --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/strong-consistency.md @@ -0,0 +1,71 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response diff --git a/content/riak/kv/2.9.1/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.9.1/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..bebc1acefb --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,259 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v2/operations + - /riak/kv/2.9.1/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.1/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.1/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` --- The IP address and port of a connected client (site)</li><li>`cluster_name` --- The name of the connected client (site)</li><li>`connecting` --- The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.9.1/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.9.1/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests diff --git a/content/riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..31e392e409 --- /dev/null +++ b/content/riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,421 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/operations + - /riak/kv/2.9.1/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.1/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.9.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.9.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.9.1/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. diff --git a/content/riak/kv/2.9.1/using/performance.md b/content/riak/kv/2.9.1/using/performance.md new file mode 100644 index 0000000000..2477145405 --- /dev/null +++ b/content/riak/kv/2.9.1/using/performance.md @@ -0,0 +1,264 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.9.1/ops/tuning/linux/ + - /riak/2.9.1/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.1/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.9.1/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.1/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.9.1/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.9.1/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.9.1/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.9.1/using/performance/amazon-web-services.md b/content/riak/kv/2.9.1/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..931744b435 --- /dev/null +++ b/content/riak/kv/2.9.1/using/performance/amazon-web-services.md @@ -0,0 +1,243 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.1/ops/tuning/aws + - /riak/kv/2.9.1/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) diff --git a/content/riak/kv/2.9.1/using/performance/benchmarking.md b/content/riak/kv/2.9.1/using/performance/benchmarking.md new file mode 100644 index 0000000000..fa1e7b5968 --- /dev/null +++ b/content/riak/kv/2.9.1/using/performance/benchmarking.md @@ -0,0 +1,598 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.1/ops/building/benchmarking + - /riak/kv/2.9.1/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.9.1/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput --- Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` --- generate as many ops per second as possible +* `{rate, N}` --- generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` --- Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` --- Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` --- Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` --- Directly invokes the Bitcask API +* `basho_bench_driver_dets` --- Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` --- operation completed successfully +* `{error, Reason, NewState}` --- operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` --- operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` --- operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` --- generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` --- the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` --- the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` --- selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` --- selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` --- the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` --- specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` --- takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` --- takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` --- generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` --- generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` --- generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` --- specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. diff --git a/content/riak/kv/2.9.1/using/performance/erlang.md b/content/riak/kv/2.9.1/using/performance/erlang.md new file mode 100644 index 0000000000..49c64ec8a2 --- /dev/null +++ b/content/riak/kv/2.9.1/using/performance/erlang.md @@ -0,0 +1,367 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.1/ops/tuning/erlang + - /riak/kv/2.9.1/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. diff --git a/content/riak/kv/2.9.1/using/performance/latency-reduction.md b/content/riak/kv/2.9.1/using/performance/latency-reduction.md new file mode 100644 index 0000000000..8a9bd393fd --- /dev/null +++ b/content/riak/kv/2.9.1/using/performance/latency-reduction.md @@ -0,0 +1,263 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.1/ops/tuning/latency-reduction + - /riak/kv/2.9.1/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. diff --git a/content/riak/kv/2.9.1/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.9.1/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..4b03feeed7 --- /dev/null +++ b/content/riak/kv/2.9.1/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,42 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +[perf index]: {{<baseurl>}}riak/kv/2.9.1/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` diff --git a/content/riak/kv/2.9.1/using/performance/open-files-limit.md b/content/riak/kv/2.9.1/using/performance/open-files-limit.md new file mode 100644 index 0000000000..a05ab867e2 --- /dev/null +++ b/content/riak/kv/2.9.1/using/performance/open-files-limit.md @@ -0,0 +1,347 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.1/ops/tuning/open-files-limit/ + - /riak/kv/2.9.1/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` diff --git a/content/riak/kv/2.9.1/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.9.1/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..d562a74835 --- /dev/null +++ b/content/riak/kv/2.9.1/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,45 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` diff --git a/content/riak/kv/2.9.1/using/reference.md b/content/riak/kv/2.9.1/using/reference.md new file mode 100644 index 0000000000..294afbf2aa --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference.md @@ -0,0 +1,130 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] diff --git a/content/riak/kv/2.9.1/using/reference/architecture.md b/content/riak/kv/2.9.1/using/reference/architecture.md new file mode 100644 index 0000000000..ed6e93b6ab --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/architecture.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +#menu: +# riak_kv-2.9.1: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +--- + +<!-- TODO: Content --> diff --git a/content/riak/kv/2.9.1/using/reference/bucket-types.md b/content/riak/kv/2.9.1/using/reference/bucket-types.md new file mode 100644 index 0000000000..b279b0486e --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/bucket-types.md @@ -0,0 +1,818 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.9.1/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.9.1/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.1/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.1/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.9.1/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.9.1/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. diff --git a/content/riak/kv/2.9.1/using/reference/custom-code.md b/content/riak/kv/2.9.1/using/reference/custom-code.md new file mode 100644 index 0000000000..1a5a4ebee6 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/custom-code.md @@ -0,0 +1,131 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/install-custom-code/ + - /riak/kv/2.9.1/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.9.1/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.9.1/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} diff --git a/content/riak/kv/2.9.1/using/reference/failure-recovery.md b/content/riak/kv/2.9.1/using/reference/failure-recovery.md new file mode 100644 index 0000000000..ba0be64891 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/failure-recovery.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.1/using/reference/handoff.md b/content/riak/kv/2.9.1/using/reference/handoff.md new file mode 100644 index 0000000000..c5a2bbbf67 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/handoff.md @@ -0,0 +1,197 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.1/ops/running/handoff/ + - /riak/kv/2.9.1/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. diff --git a/content/riak/kv/2.9.1/using/reference/jmx.md b/content/riak/kv/2.9.1/using/reference/jmx.md new file mode 100644 index 0000000000..781dd7eb55 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/jmx.md @@ -0,0 +1,186 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/running/monitoring/jmx + - /riak/kv/2.9.1/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> diff --git a/content/riak/kv/2.9.1/using/reference/logging.md b/content/riak/kv/2.9.1/using/reference/logging.md new file mode 100644 index 0000000000..4e957b24e4 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/logging.md @@ -0,0 +1,297 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.1/ops/running/logging + - /riak/kv/2.9.1/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.1 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` --- Every night at midnight +* `$D23` --- Every day at 23:00 (11 pm) +* `$W0D20` --- Every week on Sunday at 20:00 (8 pm) +* `$M1D0` --- On the first day of every month at midnight +* `$M5D6` --- On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` --- Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.9.1/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` --- Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-cli/#attach-direct) command +* `both` --- Console logs will be emitted both to a file and to standard + output +* `off` --- Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] diff --git a/content/riak/kv/2.9.1/using/reference/multi-datacenter.md b/content/riak/kv/2.9.1/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..bb567d33d2 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/multi-datacenter.md @@ -0,0 +1,48 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] diff --git a/content/riak/kv/2.9.1/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.9.1/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..72c281f766 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,96 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.1/ops/mdc/comparison + - /riak/kv/2.9.1/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.9.1/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.9.1/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). diff --git a/content/riak/kv/2.9.1/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.9.1/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..21ebeb1bdb --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,170 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.1/ops/mdc/monitoring + - /riak/kv/2.9.1/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +--- + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +--- + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. diff --git a/content/riak/kv/2.9.1/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.9.1/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..0880bd6899 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,62 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.1/ops/mdc/per-bucket + - /riak/kv/2.9.1/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` --- Enable replication (realtime + fullsync) + * `false` --- Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` --- Replication only occurs in realtime for this bucket + * `fullsync` --- Replication only occurs during a fullsync operation + * `both` --- Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. diff --git a/content/riak/kv/2.9.1/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.9.1/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..5db36f9dfa --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,240 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.1/ops/mdc/statistics + - /riak/kv/2.9.1/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` diff --git a/content/riak/kv/2.9.1/using/reference/object-deletion.md b/content/riak/kv/2.9.1/using/reference/object-deletion.md new file mode 100644 index 0000000000..e8203e5bc6 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/object-deletion.md @@ -0,0 +1,117 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` --- Disables tombstone removal +* `immediate` --- The tombstone is removed as soon as the request is + received +* Custom time interval --- How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) diff --git a/content/riak/kv/2.9.1/using/reference/runtime-interaction.md b/content/riak/kv/2.9.1/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..7546054918 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/runtime-interaction.md @@ -0,0 +1,66 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/runtime + - /riak/kv/2.9.1/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` --- Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` --- Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` --- The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` --- The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` --- A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` --- A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` --- A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` diff --git a/content/riak/kv/2.9.1/using/reference/search.md b/content/riak/kv/2.9.1/using/reference/search.md new file mode 100644 index 0000000000..fb62fbc921 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/search.md @@ -0,0 +1,454 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/search + - /riak/kv/2.9.1/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.9.1/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. \ No newline at end of file diff --git a/content/riak/kv/2.9.1/using/reference/secondary-indexes.md b/content/riak/kv/2.9.1/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..b0b9acdf02 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/secondary-indexes.md @@ -0,0 +1,72 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.1/dev/advanced/2i + - /riak/kv/2.9.1/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.9.1/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. diff --git a/content/riak/kv/2.9.1/using/reference/snmp.md b/content/riak/kv/2.9.1/using/reference/snmp.md new file mode 100644 index 0000000000..c5bcf572bd --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/snmp.md @@ -0,0 +1,162 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/running/monitoring/snmp + - /riak/kv/2.9.1/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) diff --git a/content/riak/kv/2.9.1/using/reference/statistics-monitoring.md b/content/riak/kv/2.9.1/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..96a4aadb4e --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/statistics-monitoring.md @@ -0,0 +1,391 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.1/ops/running/stats-and-monitoring + - /riak/kv/2.9.1/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.9.1/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.9.1/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.9.1/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.1/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.1/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.9.1/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ diff --git a/content/riak/kv/2.9.1/using/reference/strong-consistency.md b/content/riak/kv/2.9.1/using/reference/strong-consistency.md new file mode 100644 index 0000000000..90fc2f47aa --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/strong-consistency.md @@ -0,0 +1,145 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.1/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.9.1/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.9.1/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.9.1/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..3c86bf96e6 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter.md @@ -0,0 +1,35 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.1/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] diff --git a/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..0b50af63e6 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,126 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.1/ops/mdc/v2/architecture + - /riak/kv/2.9.1/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.1/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.9.1/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. diff --git a/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..1b1515c285 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,49 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.1/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.9.1/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.1/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` diff --git a/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..4bef86b911 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter.md @@ -0,0 +1,47 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] diff --git a/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..6ef935936a --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,125 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/aae + - /riak/kv/2.9.1/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` diff --git a/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..98160f388a --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,182 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/architecture + - /riak/kv/2.9.1/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> diff --git a/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..06db97d111 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,98 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/cascading-writes + - /riak/kv/2.9.1/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` diff --git a/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..29226b3c96 --- /dev/null +++ b/content/riak/kv/2.9.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,68 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.1/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.9.1/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. diff --git a/content/riak/kv/2.9.1/using/repair-recovery.md b/content/riak/kv/2.9.1/using/repair-recovery.md new file mode 100644 index 0000000000..3c24732b20 --- /dev/null +++ b/content/riak/kv/2.9.1/using/repair-recovery.md @@ -0,0 +1,48 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] diff --git a/content/riak/kv/2.9.1/using/repair-recovery/errors.md b/content/riak/kv/2.9.1/using/repair-recovery/errors.md new file mode 100644 index 0000000000..b63f9c42d0 --- /dev/null +++ b/content/riak/kv/2.9.1/using/repair-recovery/errors.md @@ -0,0 +1,362 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.1/ops/running/recovery/errors + - /riak/kv/2.9.1/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.1/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.1/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.1/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.1/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.9.1/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.1/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.9.1/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.9.1/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. diff --git a/content/riak/kv/2.9.1/using/repair-recovery/failed-node.md b/content/riak/kv/2.9.1/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..994a2c5154 --- /dev/null +++ b/content/riak/kv/2.9.1/using/repair-recovery/failed-node.md @@ -0,0 +1,110 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.1/ops/running/recovery/failed-node + - /riak/kv/2.9.1/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` diff --git a/content/riak/kv/2.9.1/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.9.1/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..f9ffa591d8 --- /dev/null +++ b/content/riak/kv/2.9.1/using/repair-recovery/failure-recovery.md @@ -0,0 +1,125 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.1/ops/running/recovery/failure-recovery + - /riak/kv/2.9.1/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.9.1/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** --- A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** --- If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** --- Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.9.1/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} diff --git a/content/riak/kv/2.9.1/using/repair-recovery/repairs.md b/content/riak/kv/2.9.1/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..2b98f505b7 --- /dev/null +++ b/content/riak/kv/2.9.1/using/repair-recovery/repairs.md @@ -0,0 +1,387 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.1/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.1/ops/running/recovery/repairing-indexes + - /riak/2.9.1/ops/running/recovery/failed-node + - /riak/kv/2.9.1/ops/running/recovery/failed-node + - /riak/2.9.1/ops/running/recovery/repairing-leveldb + - /riak/kv/2.9.1/ops/running/recovery/repairing-leveldb + - /riak/2.9.1/ops/running/recovery/repairing-partitions + - /riak/kv/2.9.1/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.9.1/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.9.1/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.9.1/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.9.1/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` diff --git a/content/riak/kv/2.9.1/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.9.1/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..d5e2667474 --- /dev/null +++ b/content/riak/kv/2.9.1/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,71 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +--- + +[upgrade]: {{<baseurl>}}riak/kv/2.9.1/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.9.1/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. diff --git a/content/riak/kv/2.9.1/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.9.1/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..08130147a4 --- /dev/null +++ b/content/riak/kv/2.9.1/using/repair-recovery/rolling-restart.md @@ -0,0 +1,60 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.1/ops/running/recovery/rolling-restart + - /riak/kv/2.9.1/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.9.1/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. diff --git a/content/riak/kv/2.9.1/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.9.1/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..0088811947 --- /dev/null +++ b/content/riak/kv/2.9.1/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,138 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.1/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.1/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. diff --git a/content/riak/kv/2.9.1/using/running-a-cluster.md b/content/riak/kv/2.9.1/using/running-a-cluster.md new file mode 100644 index 0000000000..b08a2732f1 --- /dev/null +++ b/content/riak/kv/2.9.1/using/running-a-cluster.md @@ -0,0 +1,335 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.9.1/ops/building/basic-cluster-setup + - /riak/kv/2.9.1/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.1/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.9.1/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. diff --git a/content/riak/kv/2.9.1/using/security.md b/content/riak/kv/2.9.1/using/security.md new file mode 100644 index 0000000000..80c989608f --- /dev/null +++ b/content/riak/kv/2.9.1/using/security.md @@ -0,0 +1,195 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.9.1/ops/advanced/security + - /riak/kv/2.9.1/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/2.9.1/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.9.1/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.1/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.9.1/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.9.1/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] diff --git a/content/riak/kv/2.9.1/using/security/basics.md b/content/riak/kv/2.9.1/using/security/basics.md new file mode 100644 index 0000000000..3b7c719681 --- /dev/null +++ b/content/riak/kv/2.9.1/using/security/basics.md @@ -0,0 +1,847 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.1/ops/running/authz + - /riak/kv/2.9.1/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.9.1/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.9.1/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.9.1/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.9.1/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.9.1/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.9.1/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.9.1/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.9.1/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. diff --git a/content/riak/kv/2.9.1/using/security/best-practices.md b/content/riak/kv/2.9.1/using/security/best-practices.md new file mode 100644 index 0000000000..ea5717e0ee --- /dev/null +++ b/content/riak/kv/2.9.1/using/security/best-practices.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.1/using/security/managing-sources.md b/content/riak/kv/2.9.1/using/security/managing-sources.md new file mode 100644 index 0000000000..644c09a710 --- /dev/null +++ b/content/riak/kv/2.9.1/using/security/managing-sources.md @@ -0,0 +1,269 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.1/ops/running/security-sources + - /riak/kv/2.9.1/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.9.1/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.9.1/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.9.1/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.9.1/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.9.1/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.9.1/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. diff --git a/content/riak/kv/2.9.1/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.9.1/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..4166819b4d --- /dev/null +++ b/content/riak/kv/2.9.1/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.1/using/troubleshooting.md b/content/riak/kv/2.9.1/using/troubleshooting.md new file mode 100644 index 0000000000..f16cd547b2 --- /dev/null +++ b/content/riak/kv/2.9.1/using/troubleshooting.md @@ -0,0 +1,23 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] diff --git a/content/riak/kv/2.9.1/using/troubleshooting/http-204.md b/content/riak/kv/2.9.1/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..6209f8534b --- /dev/null +++ b/content/riak/kv/2.9.1/using/troubleshooting/http-204.md @@ -0,0 +1,17 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 2.9.1 +menu: + riak_kv-2.9.1: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. diff --git a/content/riak/kv/2.9.10/_reference-links.md b/content/riak/kv/2.9.10/_reference-links.md new file mode 100644 index 0000000000..c130e9b300 --- /dev/null +++ b/content/riak/kv/2.9.10/_reference-links.md @@ -0,0 +1,253 @@ + +# Riak KV 2.9.10 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.9.10/downloads/ +[install index]: {{}}riak/kv/2.9.10/setup/installing +[upgrade index]: {{}}riak/kv/2.9.10/upgrading +[plan index]: {{}}riak/kv/2.9.10/planning +[config index]: {{}}riak/kv/2.9.10/using/configuring/ +[config reference]: {{}}riak/kv/2.9.10/configuring/reference/ +[manage index]: {{}}riak/kv/2.9.10/using/managing +[performance index]: {{}}riak/kv/2.9.10/using/performance +[glossary vnode]: {{}}riak/kv/2.9.10/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.9.10/setup/planning +[plan start]: {{}}riak/kv/2.9.10/setup/planning/start +[plan backend]: {{}}riak/kv/2.9.10/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.9.10/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.10/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/2.9.10/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.10/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.9.10/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.9.10/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.9.10/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.9.10/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.9.10/setup/installing +[install aws]: {{}}riak/kv/2.9.10/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.9.10/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.9.10/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.9.10/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.9.10/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.9.10/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.9.10/setup/installing/solaris +[install suse]: {{}}riak/kv/2.9.10/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.9.10/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.9.10/setup/installing/source +[install source erlang]: {{}}riak/kv/2.9.10/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.9.10/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.9.10/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.9.10/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.9.10/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.9.10/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.9.10/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.9.10/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.9.10/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.9.10/configuring +[config basic]: {{}}riak/kv/2.9.10/configuring/basic +[config backend]: {{}}riak/kv/2.9.10/configuring/backend +[config manage]: {{}}riak/kv/2.9.10/configuring/managing +[config reference]: {{}}riak/kv/2.9.10/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.9.10/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.9.10/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.9.10/configuring/mapreduce +[config search]: {{}}riak/kv/2.9.10/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.9.10/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.9.10/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.9.10/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.9.10/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.9.10/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.9.10/using/ +[use admin commands]: {{}}riak/kv/2.9.10/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.9.10/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.9.10/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.9.10/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.9.10/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.9.10/using/reference/search +[use ref 2i]: {{}}riak/kv/2.9.10/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.9.10/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.9.10/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.9.10/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.9.10/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.9.10/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.9.10/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.9.10/using/admin/ +[use admin commands]: {{}}riak/kv/2.9.10/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.9.10/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.9.10/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.9.10/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.9.10/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.9.10/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.9.10/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.9.10/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.9.10/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.9.10/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.9.10/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.9.10/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.9.10/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.9.10/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.9.10/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.9.10/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.9.10/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.9.10/using/security/ +[security basics]: {{}}riak/kv/2.9.10/using/security/basics +[security managing]: {{}}riak/kv/2.9.10/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.9.10/using/performance/ +[perf benchmark]: {{}}riak/kv/2.9.10/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.10/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.9.10/using/performance/erlang +[perf aws]: {{}}riak/kv/2.9.10/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.9.10/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.9.10/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.9.10/developing +[dev client libraries]: {{}}riak/kv/2.9.10/developing/client-libraries +[dev data model]: {{}}riak/kv/2.9.10/developing/data-modeling +[dev data types]: {{}}riak/kv/2.9.10/developing/data-types +[dev kv model]: {{}}riak/kv/2.9.10/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.9.10/developing/getting-started +[getting started java]: {{}}riak/kv/2.9.10/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.9.10/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.9.10/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.9.10/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.9.10/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.9.10/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.9.10/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.9.10/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.9.10/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.10/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.10/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.10/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.10/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.10/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.10/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.9.10/developing/usage +[usage bucket types]: {{}}riak/kv/2.9.10/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.9.10/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.9.10/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.9.10/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.9.10/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.9.10/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.9.10/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.9.10/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.9.10/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.10/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.10/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.9.10/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.9.10/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.9.10/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.9.10/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.9.10/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.9.10/developing/api/backend +[dev api http]: {{}}riak/kv/2.9.10/developing/api/http +[dev api http status]: {{}}riak/kv/2.9.10/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.9.10/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.9.10/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.9.10/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.9.10/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.9.10/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.9.10/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.9.10/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.9.10/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.9.10/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.9.10/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.9.10/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.9.10/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.9.10/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.9.10/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + + + diff --git a/content/riak/kv/2.9.10/add-ons.md b/content/riak/kv/2.9.10/add-ons.md new file mode 100644 index 0000000000..eb429d48db --- /dev/null +++ b/content/riak/kv/2.9.10/add-ons.md @@ -0,0 +1,23 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.9.10/add-ons/redis/) + + + + diff --git a/content/riak/kv/2.9.10/add-ons/redis.md b/content/riak/kv/2.9.10/add-ons/redis.md new file mode 100644 index 0000000000..c452efc623 --- /dev/null +++ b/content/riak/kv/2.9.10/add-ons/redis.md @@ -0,0 +1,61 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + + + diff --git a/content/riak/kv/2.9.10/add-ons/redis/developing-rra.md b/content/riak/kv/2.9.10/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..4744500163 --- /dev/null +++ b/content/riak/kv/2.9.10/add-ons/redis/developing-rra.md @@ -0,0 +1,328 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.9.10/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.9.10/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.9.10/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.9.10/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.9.10/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + + diff --git a/content/riak/kv/2.9.10/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.9.10/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..441c76dc2f --- /dev/null +++ b/content/riak/kv/2.9.10/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,134 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + + + diff --git a/content/riak/kv/2.9.10/add-ons/redis/set-up-rra.md b/content/riak/kv/2.9.10/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..3874e70c98 --- /dev/null +++ b/content/riak/kv/2.9.10/add-ons/redis/set-up-rra.md @@ -0,0 +1,283 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.9.10/setup/installing +[perf open files]: {{}}riak/kv/2.9.10/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + + + diff --git a/content/riak/kv/2.9.10/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.9.10/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..550679566e --- /dev/null +++ b/content/riak/kv/2.9.10/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,141 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + + + diff --git a/content/riak/kv/2.9.10/add-ons/redis/using-rra.md b/content/riak/kv/2.9.10/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..de3552868c --- /dev/null +++ b/content/riak/kv/2.9.10/add-ons/redis/using-rra.md @@ -0,0 +1,245 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.9.10/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.9.10/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + + + diff --git a/content/riak/kv/2.9.10/configuring.md b/content/riak/kv/2.9.10/configuring.md new file mode 100644 index 0000000000..bcda5c04ce --- /dev/null +++ b/content/riak/kv/2.9.10/configuring.md @@ -0,0 +1,86 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + + + diff --git a/content/riak/kv/2.9.10/configuring/backend.md b/content/riak/kv/2.9.10/configuring/backend.md new file mode 100644 index 0000000000..b83fdae8be --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/backend.md @@ -0,0 +1,645 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +--- + +[plan backend leveldb]: {{}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.10/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/2.9.10/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.10/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.10/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + + + diff --git a/content/riak/kv/2.9.10/configuring/basic.md b/content/riak/kv/2.9.10/configuring/basic.md new file mode 100644 index 0000000000..e4c4a074bd --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/basic.md @@ -0,0 +1,238 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.10/ops/building/configuration/ + - /riak/kv/2.9.10/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/2.9.10/configuring/reference +[use running cluster]: {{}}riak/kv/2.9.10/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.9.10/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.9.10/using/performance/erlang +[plan start]: {{}}riak/kv/2.9.10/setup/planning/start +[plan best practices]: {{}}riak/kv/2.9.10/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.9.10/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.9.10/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.9.10/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.9.10/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.9.10/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.9.10/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.9.10/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.9.10/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.10/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.9.10/using/performance +[perf aws]: {{}}riak/kv/2.9.10/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.9.10/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.10/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + + + diff --git a/content/riak/kv/2.9.10/configuring/global-object-expiration.md b/content/riak/kv/2.9.10/configuring/global-object-expiration.md new file mode 100644 index 0000000000..bed6c46efe --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/global-object-expiration.md @@ -0,0 +1,88 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.9.10: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 2.9.10 +toc: true +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + + + diff --git a/content/riak/kv/2.9.10/configuring/load-balancing-proxy.md b/content/riak/kv/2.9.10/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..5e644c1ff1 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/load-balancing-proxy.md @@ -0,0 +1,274 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.9.10/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/2.9.10/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + + + diff --git a/content/riak/kv/2.9.10/configuring/managing.md b/content/riak/kv/2.9.10/configuring/managing.md new file mode 100644 index 0000000000..16beb4ab73 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/managing.md @@ -0,0 +1,119 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +--- + +[use admin riak cli]: {{}}riak/kv/2.9.10/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.9.10/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.9.10/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + + + diff --git a/content/riak/kv/2.9.10/configuring/mapreduce.md b/content/riak/kv/2.9.10/configuring/mapreduce.md new file mode 100644 index 0000000000..a784d5ed6d --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/mapreduce.md @@ -0,0 +1,199 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/configs/mapreduce/ + - /riak/kv/2.9.10/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/2.9.10/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.9.10/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.9.10/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + + + diff --git a/content/riak/kv/2.9.10/configuring/next-gen-replication.md b/content/riak/kv/2.9.10/configuring/next-gen-replication.md new file mode 100644 index 0000000000..9fa397bf2d --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/next-gen-replication.md @@ -0,0 +1,61 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.10" +menu: + riak_kv-2.9.10: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. diff --git a/content/riak/kv/2.9.10/configuring/reference.md b/content/riak/kv/2.9.10/configuring/reference.md new file mode 100644 index 0000000000..57fd96f4fe --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/reference.md @@ -0,0 +1,2038 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/configs/configuration-files/ + - /riak/kv/2.9.10/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] + - [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] + - [configuration][config backend leveldb] +* [Leveled][plan backend leveled] + - [configuration][config backend leveled] +* [Memory][plan backend memory] + - [configuration][config backend memory] +* [Multi][plan backend multi] + - [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + + + diff --git a/content/riak/kv/2.9.10/configuring/search.md b/content/riak/kv/2.9.10/configuring/search.md new file mode 100644 index 0000000000..e4d18d806b --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/search.md @@ -0,0 +1,277 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/configs/search/ + - /riak/kv/2.9.10/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/2.9.10/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.10/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.10/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.9.10/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.9.10/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.9.10/configuring/reference +[config reference#search]: {{}}riak/kv/2.9.10/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.9.10/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.9.10/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + + + diff --git a/content/riak/kv/2.9.10/configuring/strong-consistency.md b/content/riak/kv/2.9.10/configuring/strong-consistency.md new file mode 100644 index 0000000000..293f793ac6 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/strong-consistency.md @@ -0,0 +1,703 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/2.9.10/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.9.10/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.9.10/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.9.10/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.9.10/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.9.10/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.9.10/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.9.10/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.9.10/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.9.10/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.9.10/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.9.10/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.9.10/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.9.10/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.9.10/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.9.10/developing/data-types +[glossary aae]: {{}}riak/kv/2.9.10/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.9.10/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.9.10/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.9.10/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.9.10/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble + +The ID of the ensemble
  • Quorum + +The number of ensemble peers that are either leading or following
  • Nodes + +The number of nodes currently online
  • Leader + +The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer + +The ID of the peer
  • Status + +Whether the peer is a leader or a follower
  • Trusted + +Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch + +The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node + +The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] + +If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] + +Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] + +Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** + +A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** + +In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** + +Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** + +At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** + +Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + + + diff --git a/content/riak/kv/2.9.10/configuring/v2-multi-datacenter.md b/content/riak/kv/2.9.10/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..fda324c682 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/v2-multi-datacenter.md @@ -0,0 +1,159 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v2/configuration + - /riak/kv/2.9.10/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/2.9.10/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. + + + diff --git a/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..65ef12fe54 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,81 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v2/nat + - /riak/kv/2.9.10/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/2.9.10/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + + + diff --git a/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..5a70e5c45f --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,370 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v2/quick-start + - /riak/kv/2.9.10/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + + + diff --git a/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..30f5c1b959 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,163 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v2/ssl + - /riak/kv/2.9.10/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + + + diff --git a/content/riak/kv/2.9.10/configuring/v3-multi-datacenter.md b/content/riak/kv/2.9.10/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..ceb92612d9 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/v3-multi-datacenter.md @@ -0,0 +1,160 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/configuration + - /riak/kv/2.9.10/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/2.9.10/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + + + diff --git a/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..0f2c3c45c3 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,170 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/nat + - /riak/kv/2.9.10/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + + + diff --git a/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..326d437c45 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/quick-start + - /riak/kv/2.9.10/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/2.9.10/using/performance +[config v3 mdc]: {{}}riak/kv/2.9.10/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + + + diff --git a/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..f84d7b2df7 --- /dev/null +++ b/content/riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,173 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/ssl + - /riak/kv/2.9.10/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/2.9.10/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + + + diff --git a/content/riak/kv/2.9.10/developing.md b/content/riak/kv/2.9.10/developing.md new file mode 100644 index 0000000000..d239bbd07c --- /dev/null +++ b/content/riak/kv/2.9.10/developing.md @@ -0,0 +1,77 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + + + diff --git a/content/riak/kv/2.9.10/developing/api.md b/content/riak/kv/2.9.10/developing/api.md new file mode 100644 index 0000000000..cba87d8e17 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api.md @@ -0,0 +1,40 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + + + diff --git a/content/riak/kv/2.9.10/developing/api/backend.md b/content/riak/kv/2.9.10/developing/api/backend.md new file mode 100644 index 0000000000..cba7a27813 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/backend.md @@ -0,0 +1,117 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.10/dev/references/backend-api + - /riak/kv/2.9.10/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/2.9.10/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http.md b/content/riak/kv/2.9.10/developing/api/http.md new file mode 100644 index 0000000000..91a101cbf4 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http.md @@ -0,0 +1,92 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.10/dev/references/http + - /riak/kv/2.9.10/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.9.10/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.9.10/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.10/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.9.10/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.9.10/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.9.10/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.9.10/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.10/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.10/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.9.10/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.9.10/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.9.10/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.9.10/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.9.10/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.10/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.10/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.9.10/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.9.10/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.9.10/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.9.10/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.9.10/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.9.10/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.9.10/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.9.10/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.9.10/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.9.10/developing/api/http/store-search-schema) + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/counters.md b/content/riak/kv/2.9.10/developing/api/http/counters.md new file mode 100644 index 0000000000..8b37e5b33a --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/counters.md @@ -0,0 +1,81 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/counters + - /riak/kv/2.9.10/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.9.10/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.9.10/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/delete-object.md b/content/riak/kv/2.9.10/developing/api/http/delete-object.md new file mode 100644 index 0000000000..b1a721f0c9 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/delete-object.md @@ -0,0 +1,78 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/delete-object + - /riak/kv/2.9.10/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/delete-search-index.md b/content/riak/kv/2.9.10/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..a68db8bda0 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/delete-search-index.md @@ -0,0 +1,36 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/delete-search-index + - /riak/kv/2.9.10/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` - The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` - The request timed out internally + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/fetch-object.md b/content/riak/kv/2.9.10/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..51ee3b3709 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/fetch-object.md @@ -0,0 +1,245 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/fetch-object + - /riak/kv/2.9.10/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.9.10/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.9.10/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.9.10/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.9.10/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.10/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/fetch-search-index.md b/content/riak/kv/2.9.10/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..b1105145a7 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/fetch-search-index.md @@ -0,0 +1,50 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/fetch-search-index + - /riak/kv/2.9.10/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.9.10/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` - No Search index with that name is currently + available +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.9.10/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.9.10/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..3048d6392c --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/fetch-search-schema.md @@ -0,0 +1,41 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/fetch-search-schema + - /riak/kv/2.9.10/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/get-bucket-props.md b/content/riak/kv/2.9.10/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..8973642252 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/get-bucket-props.md @@ -0,0 +1,85 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/get-bucket-props + - /riak/kv/2.9.10/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.9.10/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.9.10/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.9.10/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/link-walking.md b/content/riak/kv/2.9.10/developing/api/http/link-walking.md new file mode 100644 index 0000000000..78c5814a6a --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/link-walking.md @@ -0,0 +1,128 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/link-walking + - /riak/kv/2.9.10/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.9.10/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.9.10/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.9.10/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/list-buckets.md b/content/riak/kv/2.9.10/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..233fcf5c69 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/list-buckets.md @@ -0,0 +1,67 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/list-buckets + - /riak/kv/2.9.10/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/list-keys.md b/content/riak/kv/2.9.10/developing/api/http/list-keys.md new file mode 100644 index 0000000000..84218dd05a --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/list-keys.md @@ -0,0 +1,79 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/list-keys + - /riak/kv/2.9.10/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/list-resources.md b/content/riak/kv/2.9.10/developing/api/http/list-resources.md new file mode 100644 index 0000000000..b37ac0dd28 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/list-resources.md @@ -0,0 +1,83 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/list-resources + - /riak/kv/2.9.10/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.9.10/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.9.10/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.9.10/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.9.10/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.9.10/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.9.10/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.9.10/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.9.10/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/mapreduce.md b/content/riak/kv/2.9.10/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..0391b93699 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/mapreduce.md @@ -0,0 +1,73 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/mapreduce + - /riak/kv/2.9.10/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/2.9.10/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.9.10/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/ping.md b/content/riak/kv/2.9.10/developing/api/http/ping.md new file mode 100644 index 0000000000..11f8d59157 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/ping.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/ping + - /riak/kv/2.9.10/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.9.10/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..cd43767399 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/reset-bucket-props.md @@ -0,0 +1,60 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/reset-bucket-props + - /riak/kv/2.9.10/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/search-index-info.md b/content/riak/kv/2.9.10/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..0a6caa1696 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/search-index-info.md @@ -0,0 +1,55 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/search-index-info + - /riak/kv/2.9.10/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.9.10/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` - Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` - The request timed out internally + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/search-query.md b/content/riak/kv/2.9.10/developing/api/http/search-query.md new file mode 100644 index 0000000000..8d9e05cf67 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/search-query.md @@ -0,0 +1,72 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/search-query + - /riak/kv/2.9.10/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/2.9.10/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` - The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` - The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.9.10/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` - Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` - Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` - The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/secondary-indexes.md b/content/riak/kv/2.9.10/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..471be83050 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/secondary-indexes.md @@ -0,0 +1,94 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/secondary-indexes + - /riak/kv/2.9.10/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/set-bucket-props.md b/content/riak/kv/2.9.10/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..73ad3ba6bf --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/set-bucket-props.md @@ -0,0 +1,115 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/set-bucket-props + - /riak/kv/2.9.10/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.9.10/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.9.10/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/status.md b/content/riak/kv/2.9.10/developing/api/http/status.md new file mode 100644 index 0000000000..9dca7dd9c8 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/status.md @@ -0,0 +1,172 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/status + - /riak/kv/2.9.10/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.9.10/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/store-object.md b/content/riak/kv/2.9.10/developing/api/http/store-object.md new file mode 100644 index 0000000000..a16c3738d7 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/store-object.md @@ -0,0 +1,149 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/store-object + - /riak/kv/2.9.10/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.9.10/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.10/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.9.10/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/store-search-index.md b/content/riak/kv/2.9.10/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..119d017b64 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/store-search-index.md @@ -0,0 +1,55 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/store-search-index + - /riak/kv/2.9.10/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/2.9.10/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.9.10/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` - The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` - The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` - The request timed out internally + + + diff --git a/content/riak/kv/2.9.10/developing/api/http/store-search-schema.md b/content/riak/kv/2.9.10/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..13e3e2c375 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/http/store-search-schema.md @@ -0,0 +1,53 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.10/dev/references/http/store-search-schema + - /riak/kv/2.9.10/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` - The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` - The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` - The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` - The request timed out internally + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..b63671f59d --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers.md @@ -0,0 +1,188 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers + - /riak/kv/2.9.10/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` - A string representation of what went wrong +* `errcode` - A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/yz-schema-put) + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..5a085a7ad0 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,33 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/auth-req + - /riak/kv/2.9.10/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.9.10/using/security/basics). + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..1391f51c80 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,81 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.9.10" +menu: + riak_kv-2.9.10: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.9.10/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..cccd54afae --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,103 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/delete-object + - /riak/kv/2.9.10/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/2.9.10/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..c111d4e605 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,34 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.9.10/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/2.9.10/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-map-store). + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..851345731d --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,130 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.9.10/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.9.10/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.9.10/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.9.10/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..23124dc905 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,76 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.9.10/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..40c8d262f9 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.9.10/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..38555771d6 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,131 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/dt-store + - /riak/kv/2.9.10/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.9.10/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.9.10/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.9.10/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..2247cb407a --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,34 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/dt-union + - /riak/kv/2.9.10/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/dt-store) message. + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..246e6d29a5 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,184 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.9.10/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` - The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` - The character encoding of the object, e.g. `utf-8` +* `content_encoding` - The content encoding of the object, e.g. + `video/mp4` +* `vtag` - The object's [vtag]({{}}riak/kv/2.9.10/learn/glossary/#vector-clock) +* `links` - This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` - A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` - A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` - This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` - Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..f6099eb27e --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,113 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.9.10/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.9.10/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.9.10/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..a3a5db03eb --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,36 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.9.10/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.9.10/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-props) message. + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..2e1bc7aac1 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,64 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.9.10/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..5153d3ea96 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,79 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.9.10/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` - Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..1e3abd8d5b --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,100 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/list-keys + - /riak/kv/2.9.10/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` - bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..7fa69a2a5d --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,152 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.9.10/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` - MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` - JSON-encoded MapReduce job +* `application/x-erlang-binary` - Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.9.10/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.9.10/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` - Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..0ba4fa6d5f --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/ping.md @@ -0,0 +1,45 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/ping + - /riak/kv/2.9.10/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..4310775958 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,62 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.9.10/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.9.10/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/search.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..4674be2db6 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/search.md @@ -0,0 +1,151 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/search + - /riak/kv/2.9.10/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` - The contents of the query +* `index` - The name of the index to search + +Optional Parameters + +* `rows` - The maximum number of rows to return +* `start` - A start offset, i.e. the number of keys to skip before + returning values +* `sort` - How the search results are to be sorted +* `filter` - Filters search with additional query scoped to inline + fields +* `df` - Override the `default_field` setting in the schema file +* `op` - `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` - Return the fields limit +* `presort` - Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` - A list of docs that match the search request +* `max_score` - The top score returned +* `num_found` - Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..afc63e0c42 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,124 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.9.10/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.9.10/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..d216d85939 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,61 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/server-info + - /riak/kv/2.9.10/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..17850f866f --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,71 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.9.10/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.9.10/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..da42f809f6 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,34 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.9.10/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.9.10/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/get-bucket-props). + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..99286775d0 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,65 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.9.10/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..580e538285 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,153 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/store-object + - /riak/kv/2.9.10/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.9.10/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.9.10/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.9.10/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.9.10/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.9.10/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.9.10/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..09ae9227b4 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,36 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.9.10/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..3f22dc05e7 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,62 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.9.10/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.10/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..1813a5e4c6 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,48 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.9.10/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.10/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..0fe149c5cc --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,51 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.9.10/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + + + diff --git a/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..ea81b88850 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,44 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.10/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.9.10/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.9.10/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + diff --git a/content/riak/kv/2.9.10/developing/api/repl-hooks.md b/content/riak/kv/2.9.10/developing/api/repl-hooks.md new file mode 100644 index 0000000000..c56fcb922e --- /dev/null +++ b/content/riak/kv/2.9.10/developing/api/repl-hooks.md @@ -0,0 +1,195 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v2/hooks + - /riak/kv/2.9.10/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + + + diff --git a/content/riak/kv/2.9.10/developing/app-guide.md b/content/riak/kv/2.9.10/developing/app-guide.md new file mode 100644 index 0000000000..d3aa7c2852 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/app-guide.md @@ -0,0 +1,419 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.9.10/dev/using/application-guide/ + - /riak/kv/2.9.10/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/2.9.10/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.9.10/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.9.10/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.9.10/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.9.10/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.9.10/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.9.10/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.9.10/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.9.10/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.9.10/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.9.10/developing/usage/search +[use ref search]: {{}}riak/kv/2.9.10/using/reference/search +[usage 2i]: {{}}riak/kv/2.9.10/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.9.10/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.9.10/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.9.10/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.9.10/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.9.10/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.9.10/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.9.10/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.10/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/2.9.10/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/2.9.10/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.10/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.10/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.10/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.10/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.10/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.10/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.9.10/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.9.10/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.9.10/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.9.10/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.9.10/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.9.10/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.9.10/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.9.10/setup/installing +[getting started]: {{}}riak/kv/2.9.10/developing/getting-started +[usage index]: {{}}riak/kv/2.9.10/developing/usage +[glossary]: {{}}riak/kv/2.9.10/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** - While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** - Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** - Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** - It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** - If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** - If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** - If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] - Getting started with Riak Search +* [Search Details][use ref search] - A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] - How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** - Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** - At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** - In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] - A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] - A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] - An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** - If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** - If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** - If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** - While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** - Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] - A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] - A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** - You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** - Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] - A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] - Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** - At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** - If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** - 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] - Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] - A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] - How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] - A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] - A listing of frequently used terms in Riak's + documentation + + + + diff --git a/content/riak/kv/2.9.10/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.9.10/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..75c2c4b5b2 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,801 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/mapreduce/ + - /riak/kv/2.9.10/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/2.9.10/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.9.10/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.9.10/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.9.10/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.9.10/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.9.10/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.9.10/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) - Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) - Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) - Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
+
+
diff --git a/content/riak/kv/2.9.10/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.9.10/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..69b43a23ce
--- /dev/null
+++ b/content/riak/kv/2.9.10/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,70 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 2.9.10
+menu:
+  riak_kv-2.9.10:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.9.10/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.9.10/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.9.10/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
+
+
diff --git a/content/riak/kv/2.9.10/developing/app-guide/reference.md b/content/riak/kv/2.9.10/developing/app-guide/reference.md
new file mode 100644
index 0000000000..6e5085be93
--- /dev/null
+++ b/content/riak/kv/2.9.10/developing/app-guide/reference.md
@@ -0,0 +1,19 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 2.9.10
+#menu:
+#  riak_kv-2.9.10:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+---
+
+**TODO: Add content**
+
+
+
diff --git a/content/riak/kv/2.9.10/developing/app-guide/replication-properties.md b/content/riak/kv/2.9.10/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..d2b461cf3d
--- /dev/null
+++ b/content/riak/kv/2.9.10/developing/app-guide/replication-properties.md
@@ -0,0 +1,583 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 2.9.10
+menu:
+  riak_kv-2.9.10:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.10/dev/advanced/replication-properties
+  - /riak/kv/2.9.10/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/2.9.10/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.9.10/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.9.10/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.9.10/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.9.10/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.9.10/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.9.10/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.9.10/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.9.10/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.9.10/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.9.10/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.9.10/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.9.10/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.9.10/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.9.10/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.9.10/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
+
+
diff --git a/content/riak/kv/2.9.10/developing/app-guide/strong-consistency.md b/content/riak/kv/2.9.10/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..795231d47b
--- /dev/null
+++ b/content/riak/kv/2.9.10/developing/app-guide/strong-consistency.md
@@ -0,0 +1,260 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 2.9.10
+menu:
+  riak_kv-2.9.10:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.10/dev/advanced/strong-consistency
+  - /riak/kv/2.9.10/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/2.9.10/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.9.10/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.9.10/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.9.10/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.9.10/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.9.10/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.9.10/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.9.10/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.9.10/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.9.10/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.9.10/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.9.10/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.9.10/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.9.10/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.9.10/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.9.10/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.9.10/developing/client-libraries
+[getting started]: {{}}riak/kv/2.9.10/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.9.10/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + + + diff --git a/content/riak/kv/2.9.10/developing/app-guide/write-once.md b/content/riak/kv/2.9.10/developing/app-guide/write-once.md new file mode 100644 index 0000000000..65df088463 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/app-guide/write-once.md @@ -0,0 +1,158 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.9.10/dev/advanced/write-once + - /riak/kv/2.9.10/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/2.9.10/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.9.10/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.9.10/developing/data-types +[strong consistency]: {{}}riak/kv/2.9.10/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.9.10/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + + + diff --git a/content/riak/kv/2.9.10/developing/client-libraries.md b/content/riak/kv/2.9.10/developing/client-libraries.md new file mode 100644 index 0000000000..ff0b1d4586 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/client-libraries.md @@ -0,0 +1,293 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.9.10/dev/using/libraries + - /riak/kv/2.9.10/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) - A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) - A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) - A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) - A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) - An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) - An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) - Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) - A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) - Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) - A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) - HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) - Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) - A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) - Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) - Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) - Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) - Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) - An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) - A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) - A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) - A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) - A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) - A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) - Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) - Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) - A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) - Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) - Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) - Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) - Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) - Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) - Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) - A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) - Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) - A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) - Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) - Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) - a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) - A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) - A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) - Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) - Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) - Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) - A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) - Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) - A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) - A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) - Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) - Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) - Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) - A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) - A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) - A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) - A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) - [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) - A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak` - Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) - A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) - A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) - Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) - A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) - A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) - Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) - Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) - Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) - A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) - Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) - Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) - DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) - Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) - An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) - Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) - Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) - Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) - A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) - An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) - A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) - A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + + + diff --git a/content/riak/kv/2.9.10/developing/data-modeling.md b/content/riak/kv/2.9.10/developing/data-modeling.md new file mode 100644 index 0000000000..bb4e04f1e4 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/data-modeling.md @@ -0,0 +1,13 @@ +--- +layout: redirect +target: "riak/kv/2.9.10/learn/use-cases/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + + + diff --git a/content/riak/kv/2.9.10/developing/data-types.md b/content/riak/kv/2.9.10/developing/data-types.md new file mode 100644 index 0000000000..9855fe53de --- /dev/null +++ b/content/riak/kv/2.9.10/developing/data-types.md @@ -0,0 +1,278 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.9.10/dev/using/data-types + - /riak/kv/2.9.10/dev/using/data-types + - /riak/2.9.10/dev/data-modeling/data-types + - /riak/kv/2.9.10/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + + + diff --git a/content/riak/kv/2.9.10/developing/data-types/counters.md b/content/riak/kv/2.9.10/developing/data-types/counters.md new file mode 100644 index 0000000000..7d6af858af --- /dev/null +++ b/content/riak/kv/2.9.10/developing/data-types/counters.md @@ -0,0 +1,634 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.10/dev/using/data-types/counters + - /riak/kv/2.9.10/dev/using/data-types/counters + - /riak/2.9.10/dev/data-modeling/data-types/counters + - /riak/kv/2.9.10/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + + + diff --git a/content/riak/kv/2.9.10/developing/data-types/gsets.md b/content/riak/kv/2.9.10/developing/data-types/gsets.md new file mode 100644 index 0000000000..0da5b30d8e --- /dev/null +++ b/content/riak/kv/2.9.10/developing/data-types/gsets.md @@ -0,0 +1,630 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.10/dev/using/data-types/gsets + - /riak/kv/2.9.10/dev/using/data-types/gsets + - /riak/2.9.10/dev/data-modeling/data-types/gsets + - /riak/kv/2.9.10/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + diff --git a/content/riak/kv/2.9.10/developing/data-types/hyperloglogs.md b/content/riak/kv/2.9.10/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..09193627e2 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/data-types/hyperloglogs.md @@ -0,0 +1,642 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.10/dev/using/data-types/hyperloglogs + - /riak/kv/2.9.10/dev/using/data-types/hyperloglogs + - /riak/2.9.10/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.9.10/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/data-types/maps.md b/content/riak/kv/2.9.10/developing/data-types/maps.md new file mode 100644 index 0000000000..b6aad550d3 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/data-types/maps.md @@ -0,0 +1,1884 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.10/dev/using/data-types/maps + - /riak/kv/2.9.10/dev/using/data-types/maps + - /riak/2.9.10/dev/data-modeling/data-types/maps + - /riak/kv/2.9.10/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + + + diff --git a/content/riak/kv/2.9.10/developing/data-types/sets.md b/content/riak/kv/2.9.10/developing/data-types/sets.md new file mode 100644 index 0000000000..9d5b9b8e89 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/data-types/sets.md @@ -0,0 +1,772 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.10/dev/using/data-types/sets + - /riak/kv/2.9.10/dev/using/data-types/sets + - /riak/2.9.10/dev/data-modeling/data-types/sets + - /riak/kv/2.9.10/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + diff --git a/content/riak/kv/2.9.10/developing/faq.md b/content/riak/kv/2.9.10/developing/faq.md new file mode 100644 index 0000000000..a7a7438e25 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/faq.md @@ -0,0 +1,558 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.9.10/community/faqs/developing + - /riak/kv/2.9.10/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/2.9.10/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.9.10/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.9.10/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.9.10/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.9.10/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.9.10/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.9.10/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.9.10/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.9.10/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.9.10/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) - requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) - if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started.md b/content/riak/kv/2.9.10/developing/getting-started.md new file mode 100644 index 0000000000..6a77f4ec71 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started.md @@ -0,0 +1,49 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +--- + +[install index]: {{}}riak/kv/2.9.10/setup/installing +[dev client libraries]: {{}}riak/kv/2.9.10/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/csharp.md b/content/riak/kv/2.9.10/developing/getting-started/csharp.md new file mode 100644 index 0000000000..271eec6424 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/csharp.md @@ -0,0 +1,85 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/csharp + - /riak/kv/2.9.10/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.10/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.10/developing/getting-started/csharp/crud-operations) + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.9.10/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..f3f32d63d3 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.9.10/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..f1b21f14d9 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,110 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.9.10/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/csharp/querying.md b/content/riak/kv/2.9.10/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..d95d334535 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/csharp/querying.md @@ -0,0 +1,213 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/querying-csharp + - /riak/kv/2.9.10/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.10/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/erlang.md b/content/riak/kv/2.9.10/developing/getting-started/erlang.md new file mode 100644 index 0000000000..8bc6364bea --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/erlang.md @@ -0,0 +1,58 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/erlang + - /riak/kv/2.9.10/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.10/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.10/developing/getting-started/erlang/crud-operations) + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.9.10/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..04411c4707 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,170 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.9.10/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..0193076a08 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,341 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.9.10/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.10/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/erlang/querying.md b/content/riak/kv/2.9.10/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..a1308d4faa --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/erlang/querying.md @@ -0,0 +1,307 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/querying-erlang + - /riak/kv/2.9.10/dev/taste-of-riak/querying-erlang +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.9.10/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.10/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/golang.md b/content/riak/kv/2.9.10/developing/getting-started/golang.md new file mode 100644 index 0000000000..50b58d9715 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/golang.md @@ -0,0 +1,81 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/golang + - /riak/kv/2.9.10/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.10/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.10/developing/getting-started/golang/crud-operations) + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.9.10/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..0ff041744a --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,374 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.9.10/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..b5559087a7 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,551 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.9.10/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.10/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/golang/querying.md b/content/riak/kv/2.9.10/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..120028829f --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/golang/querying.md @@ -0,0 +1,579 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/querying-golang + - /riak/kv/2.9.10/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.10/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/java.md b/content/riak/kv/2.9.10/developing/getting-started/java.md new file mode 100644 index 0000000000..399318c116 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/java.md @@ -0,0 +1,92 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/java + - /riak/kv/2.9.10/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.10/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.10/developing/getting-started/java/crud-operations) + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.9.10/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..eb31e5b7d1 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/java/crud-operations.md @@ -0,0 +1,204 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.10/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.10/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.10/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.10/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.10/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.10/developing/usage/conflict-resolution/) +documention. + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.9.10/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..d0ea940476 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/java/object-modeling.md @@ -0,0 +1,431 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.9.10/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/java/querying.md b/content/riak/kv/2.9.10/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..c338d338fc --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/java/querying.md @@ -0,0 +1,279 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/querying-java + - /riak/kv/2.9.10/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.10/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/nodejs.md b/content/riak/kv/2.9.10/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..bbb1743466 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/nodejs.md @@ -0,0 +1,103 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/nodejs + - /riak/kv/2.9.10/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.10/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.10/developing/getting-started/nodejs/crud-operations) + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.9.10/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..1c5c91f32f --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,136 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.9.10/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..f0a187b6af --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.9.10/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.9.10/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..6c11e5f534 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/nodejs/querying.md @@ -0,0 +1,145 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.9.10/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.10/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/php.md b/content/riak/kv/2.9.10/developing/getting-started/php.md new file mode 100644 index 0000000000..4296cf1a23 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/php.md @@ -0,0 +1,79 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/php + - /riak/kv/2.9.10/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.10/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.10/developing/getting-started/php/crud-operations) + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.9.10/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..92ce2e1918 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/php/crud-operations.md @@ -0,0 +1,185 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.9.10/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/php/querying.md b/content/riak/kv/2.9.10/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..7b7cca88e7 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/php/querying.md @@ -0,0 +1,407 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/querying-php + - /riak/kv/2.9.10/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.10/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/python.md b/content/riak/kv/2.9.10/developing/getting-started/python.md new file mode 100644 index 0000000000..860639e95b --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/python.md @@ -0,0 +1,102 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/python + - /riak/kv/2.9.10/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.10/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` - Header files and a static library for Python +* `libffi-dev` - Foreign function interface library +* `libssl-dev` - libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.10/developing/getting-started/python/crud-operations) + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.9.10/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..eea3fde009 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/python/crud-operations.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.9.10/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..915b6d751b --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/python/object-modeling.md @@ -0,0 +1,263 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.9.10/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/python/querying.md b/content/riak/kv/2.9.10/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..a03b39e362 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/python/querying.md @@ -0,0 +1,239 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/querying-python + - /riak/kv/2.9.10/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.10/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/ruby.md b/content/riak/kv/2.9.10/developing/getting-started/ruby.md new file mode 100644 index 0000000000..d390463f1b --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/ruby.md @@ -0,0 +1,67 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/ruby + - /riak/kv/2.9.10/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.10/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.10/developing/getting-started/ruby/crud-operations) + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.9.10/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..be481cae96 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,149 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.9.10/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..4ceb7e3206 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,294 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.9.10/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + diff --git a/content/riak/kv/2.9.10/developing/getting-started/ruby/querying.md b/content/riak/kv/2.9.10/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..f69e5ea963 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/getting-started/ruby/querying.md @@ -0,0 +1,255 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.10/dev/taste-of-riak/querying-ruby + - /riak/kv/2.9.10/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.10/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.10/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.10/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/2.9.10/developing/key-value-modeling.md b/content/riak/kv/2.9.10/developing/key-value-modeling.md new file mode 100644 index 0000000000..7864b0c9c1 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/key-value-modeling.md @@ -0,0 +1,534 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.9.10/dev/data-modeling/key-value/ + - /riak/kv/2.9.10/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.9.10/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.9.10/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.9.10/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.9.10/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.9.10/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.9.10/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.9.10/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.9.10/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.9.10/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.9.10/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.9.10/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.9.10/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.9.10/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.9.10/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.9.10/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.9.10/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.9.10/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.9.10/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + + + diff --git a/content/riak/kv/2.9.10/developing/usage.md b/content/riak/kv/2.9.10/developing/usage.md new file mode 100644 index 0000000000..e78ab17c3e --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage.md @@ -0,0 +1,136 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + + + diff --git a/content/riak/kv/2.9.10/developing/usage/bucket-types.md b/content/riak/kv/2.9.10/developing/usage/bucket-types.md new file mode 100644 index 0000000000..040716c929 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/bucket-types.md @@ -0,0 +1,101 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/bucket-types + - /riak/kv/2.9.10/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/commit-hooks.md b/content/riak/kv/2.9.10/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..02ccd978db --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/commit-hooks.md @@ -0,0 +1,242 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/using/commit-hooks + - /riak/kv/2.9.10/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.9.10/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object - This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` - The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.9.10/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` - The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/conflict-resolution.md b/content/riak/kv/2.9.10/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..0bc66c2c8a --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/conflict-resolution.md @@ -0,0 +1,680 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/using/conflict-resolution + - /riak/kv/2.9.10/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/strong-consistency) - A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.9.10/configuring/strong-consistency) - A guide for operators +> * [strong consistency][use ref strong consistency] - A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.9.10/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.9.10/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.9.10/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.10/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.9.10/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** - If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** - Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** - If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.9.10/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.10/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.9.10/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + + + diff --git a/content/riak/kv/2.9.10/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..6935a81d03 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.10/dev/using/conflict-resolution/csharp + - /riak/kv/2.9.10/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + + + diff --git a/content/riak/kv/2.9.10/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..564a0b5d15 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,61 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.10/dev/using/conflict-resolution/golang + - /riak/kv/2.9.10/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + + + diff --git a/content/riak/kv/2.9.10/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..37e2353757 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/java.md @@ -0,0 +1,275 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.10/dev/using/conflict-resolution/java + - /riak/kv/2.9.10/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.10/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets). + + + diff --git a/content/riak/kv/2.9.10/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..b6e32741b4 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,61 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.10/dev/using/conflict-resolution/nodejs + - /riak/kv/2.9.10/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + + + diff --git a/content/riak/kv/2.9.10/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..32b3139da4 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/php.md @@ -0,0 +1,243 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.10/dev/using/conflict-resolution/php + - /riak/kv/2.9.10/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.10/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets). + + + diff --git a/content/riak/kv/2.9.10/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..c22c0cf06e --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/python.md @@ -0,0 +1,257 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.10/dev/using/conflict-resolution/python + - /riak/kv/2.9.10/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.10/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets). + + + diff --git a/content/riak/kv/2.9.10/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..fc408b39d0 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,253 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.10/dev/using/conflict-resolution/ruby + - /riak/kv/2.9.10/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.10/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets). + + + diff --git a/content/riak/kv/2.9.10/developing/usage/content-types.md b/content/riak/kv/2.9.10/developing/usage/content-types.md new file mode 100644 index 0000000000..bebb44231a --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/content-types.md @@ -0,0 +1,190 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/creating-objects.md b/content/riak/kv/2.9.10/developing/usage/creating-objects.md new file mode 100644 index 0000000000..84dd0c7df7 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/creating-objects.md @@ -0,0 +1,553 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +--- + +[usage content types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.9.10/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/custom-extractors.md b/content/riak/kv/2.9.10/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..07e7c5cb55 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/custom-extractors.md @@ -0,0 +1,423 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/search/custom-extractors + - /riak/kv/2.9.10/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` - Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` - Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/deleting-objects.md b/content/riak/kv/2.9.10/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..a3296f9a28 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/deleting-objects.md @@ -0,0 +1,155 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/document-store.md b/content/riak/kv/2.9.10/developing/usage/document-store.md new file mode 100644 index 0000000000..4741f54fd8 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/document-store.md @@ -0,0 +1,616 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/search/document-store + - /riak/kv/2.9.10/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.9.10/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/mapreduce.md b/content/riak/kv/2.9.10/developing/usage/mapreduce.md new file mode 100644 index 0000000000..94d6d0553f --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/mapreduce.md @@ -0,0 +1,245 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/using/mapreduce + - /riak/kv/2.9.10/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.9.10/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.9.10/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** - The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** - The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/next-gen-replication.md b/content/riak/kv/2.9.10/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..11b672d954 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/next-gen-replication.md @@ -0,0 +1,152 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.10" +menu: + riak_kv-2.9.10: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/2.9.10/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. + + diff --git a/content/riak/kv/2.9.10/developing/usage/reading-objects.md b/content/riak/kv/2.9.10/developing/usage/reading-objects.md new file mode 100644 index 0000000000..16f4a734f3 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/reading-objects.md @@ -0,0 +1,250 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/replication.md b/content/riak/kv/2.9.10/developing/usage/replication.md new file mode 100644 index 0000000000..5e4efc854e --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/replication.md @@ -0,0 +1,591 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/replication-properties + - /riak/kv/2.9.10/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.9.10/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.9.10/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.10/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.10/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.9.10/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.9.10/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.9.10/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.9.10/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/search-schemas.md b/content/riak/kv/2.9.10/developing/usage/search-schemas.md new file mode 100644 index 0000000000..688c88a241 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/search-schemas.md @@ -0,0 +1,510 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/search-schema + - /riak/kv/2.9.10/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.9.10/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/search.md b/content/riak/kv/2.9.10/developing/usage/search.md new file mode 100644 index 0000000000..4776de90b6 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/search.md @@ -0,0 +1,1454 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/using/search + - /riak/kv/2.9.10/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.9.10/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.10/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.10/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.9.10/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.9.10/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.9.10/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.9.10/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/searching-data-types.md b/content/riak/kv/2.9.10/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..b4021937f1 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/searching-data-types.md @@ -0,0 +1,1686 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/search/search-data-types + - /riak/kv/2.9.10/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/secondary-indexes.md b/content/riak/kv/2.9.10/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..537d6916c0 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/secondary-indexes.md @@ -0,0 +1,2029 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/using/2i + - /riak/kv/2.9.10/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.9.10/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.10/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.10/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` - Binary index `field1_bin` and integer index `field2_int` +* `Moe` - Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` - Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` - Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/security.md b/content/riak/kv/2.9.10/developing/usage/security.md new file mode 100644 index 0000000000..9c64c2a506 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/security.md @@ -0,0 +1,102 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/client-security + - /riak/kv/2.9.10/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.9.10/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.9.10/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.9.10/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.9.10/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.10/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.9.10/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.9.10/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.9.10/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.9.10/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/security/erlang.md b/content/riak/kv/2.9.10/developing/usage/security/erlang.md new file mode 100644 index 0000000000..3df9025588 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/security/erlang.md @@ -0,0 +1,117 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/client-security/erlang + - /riak/kv/2.9.10/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.10/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + + + diff --git a/content/riak/kv/2.9.10/developing/usage/security/java.md b/content/riak/kv/2.9.10/developing/usage/security/java.md new file mode 100644 index 0000000000..6f84129490 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/security/java.md @@ -0,0 +1,120 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/client-security/java + - /riak/kv/2.9.10/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/security/php.md b/content/riak/kv/2.9.10/developing/usage/security/php.md new file mode 100644 index 0000000000..7d18e7ec9b --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/security/php.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/client-security/php + - /riak/kv/2.9.10/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/security/python.md b/content/riak/kv/2.9.10/developing/usage/security/python.md new file mode 100644 index 0000000000..0bf183dd63 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/security/python.md @@ -0,0 +1,175 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/client-security/python + - /riak/kv/2.9.10/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.10/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.9.10/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/security/ruby.md b/content/riak/kv/2.9.10/developing/usage/security/ruby.md new file mode 100644 index 0000000000..b468755409 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/security/ruby.md @@ -0,0 +1,161 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/client-security/ruby + - /riak/kv/2.9.10/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.10/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + + + diff --git a/content/riak/kv/2.9.10/developing/usage/updating-objects.md b/content/riak/kv/2.9.10/developing/usage/updating-objects.md new file mode 100644 index 0000000000..212419c118 --- /dev/null +++ b/content/riak/kv/2.9.10/developing/usage/updating-objects.md @@ -0,0 +1,777 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.10/dev/using/updates + - /riak/kv/2.9.10/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.9.10/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + + + diff --git a/content/riak/kv/2.9.10/downloads.md b/content/riak/kv/2.9.10/downloads.md new file mode 100644 index 0000000000..fb057ccbb1 --- /dev/null +++ b/content/riak/kv/2.9.10/downloads.md @@ -0,0 +1,26 @@ +--- +title: "Download for Riak KV 2.9.10" +description: "Download some stuff!" +menu: + riak_kv-2.9.10: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 2.9.10 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 2.9.10 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.9.10/downloads + - /riak/kv/2.9.10/downloads +--- + + + + diff --git a/content/riak/kv/2.9.10/index.md b/content/riak/kv/2.9.10/index.md new file mode 100644 index 0000000000..2f9e9a26c7 --- /dev/null +++ b/content/riak/kv/2.9.10/index.md @@ -0,0 +1,76 @@ +--- +title: "Riak KV 2.9.10" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.9.10/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.9.10/configuring +[downloads]: {{<baseurl>}}riak/kv/2.9.10/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.9.10/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.9.10/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.9.10/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.9.10/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.9.10/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + + + diff --git a/content/riak/kv/2.9.10/learn.md b/content/riak/kv/2.9.10/learn.md new file mode 100644 index 0000000000..e94c597b64 --- /dev/null +++ b/content/riak/kv/2.9.10/learn.md @@ -0,0 +1,51 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + + + diff --git a/content/riak/kv/2.9.10/learn/concepts.md b/content/riak/kv/2.9.10/learn/concepts.md new file mode 100644 index 0000000000..5f6c0a29ed --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts.md @@ -0,0 +1,47 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +--- + +[concept aae]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.9.10/configuring +[plan index]: {{<baseurl>}}riak/kv/2.9.10/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.9.10/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.9.10/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..0e0093cb77 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/active-anti-entropy.md @@ -0,0 +1,110 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/aae + - /riak/kv/2.9.10/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/buckets.md b/content/riak/kv/2.9.10/learn/concepts/buckets.md new file mode 100644 index 0000000000..c8e27fc35b --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/buckets.md @@ -0,0 +1,216 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/Buckets + - /riak/kv/2.9.10/theory/concepts/Buckets + - /riak/2.9.10/theory/concepts/buckets + - /riak/kv/2.9.10/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.9.10/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.9.10/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.9.10/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.9.10/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/capability-negotiation.md b/content/riak/kv/2.9.10/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..2a6e1e6647 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/capability-negotiation.md @@ -0,0 +1,35 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/capability-negotiation + - /riak/kv/2.9.10/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.9.10/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/causal-context.md b/content/riak/kv/2.9.10/learn/concepts/causal-context.md new file mode 100644 index 0000000000..a1418afe55 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/causal-context.md @@ -0,0 +1,288 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/context + - /riak/kv/2.9.10/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.9.10/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.9.10/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.9.10/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/clusters.md b/content/riak/kv/2.9.10/learn/concepts/clusters.md new file mode 100644 index 0000000000..18159a2029 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/clusters.md @@ -0,0 +1,116 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/Clusters + - /riak/kv/2.9.10/theory/concepts/Clusters + - /riak/2.9.10/theory/concepts/clusters + - /riak/kv/2.9.10/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.10/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/crdts.md b/content/riak/kv/2.9.10/learn/concepts/crdts.md new file mode 100644 index 0000000000..0d90c2d950 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/crdts.md @@ -0,0 +1,251 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/crdts + - /riak/kv/2.9.10/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.9.10/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.9.10/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/eventual-consistency.md b/content/riak/kv/2.9.10/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..dfedc9111d --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/eventual-consistency.md @@ -0,0 +1,201 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/Eventual-Consistency + - /riak/kv/2.9.10/theory/concepts/Eventual-Consistency + - /riak/2.9.10/theory/concepts/eventual-consistency + - /riak/kv/2.9.10/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/keys-and-objects.md b/content/riak/kv/2.9.10/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..47156e6fab --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/keys-and-objects.md @@ -0,0 +1,52 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/keys-and-values + - /riak/kv/2.9.10/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/replication.md b/content/riak/kv/2.9.10/learn/concepts/replication.md new file mode 100644 index 0000000000..0e10f45da3 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/replication.md @@ -0,0 +1,322 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/Replication + - /riak/kv/2.9.10/theory/concepts/Replication + - /riak/2.9.10/theory/concepts/replication + - /riak/kv/2.9.10/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.9.10/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/strong-consistency.md b/content/riak/kv/2.9.10/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..538054d134 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/strong-consistency.md @@ -0,0 +1,104 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/strong-consistency + - /riak/kv/2.9.10/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.10/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + + + diff --git a/content/riak/kv/2.9.10/learn/concepts/vnodes.md b/content/riak/kv/2.9.10/learn/concepts/vnodes.md new file mode 100644 index 0000000000..e03f8f824e --- /dev/null +++ b/content/riak/kv/2.9.10/learn/concepts/vnodes.md @@ -0,0 +1,159 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.10/theory/concepts/vnodes + - /riak/kv/2.9.10/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322.9.1044576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + + + diff --git a/content/riak/kv/2.9.10/learn/dynamo.md b/content/riak/kv/2.9.10/learn/dynamo.md new file mode 100644 index 0000000000..2737467a04 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/dynamo.md @@ -0,0 +1,1927 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.9.10/theory/dynamo + - /riak/kv/2.9.10/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.9.10/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.9.10/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.9.10/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.9.10/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.10 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.9.10/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.10/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.9.10/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.9.10/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.9.10/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.10/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + + + diff --git a/content/riak/kv/2.9.10/learn/glossary.md b/content/riak/kv/2.9.10/learn/glossary.md new file mode 100644 index 0000000000..7a816fa77a --- /dev/null +++ b/content/riak/kv/2.9.10/learn/glossary.md @@ -0,0 +1,356 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.9.10/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.9.10/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.9.10/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.10/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.9.10/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.9.10/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.9.10/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + + + diff --git a/content/riak/kv/2.9.10/learn/new-to-nosql.md b/content/riak/kv/2.9.10/learn/new-to-nosql.md new file mode 100644 index 0000000000..de8f7b7258 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/new-to-nosql.md @@ -0,0 +1,19 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 2.9.10 +#menu: +# riak_kv-2.9.10: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +--- + +**TODO: Add content (not sure where this lives in existing docs)** + + + diff --git a/content/riak/kv/2.9.10/learn/use-cases.md b/content/riak/kv/2.9.10/learn/use-cases.md new file mode 100644 index 0000000000..4097a9bcce --- /dev/null +++ b/content/riak/kv/2.9.10/learn/use-cases.md @@ -0,0 +1,404 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.9.10/dev/data-modeling/ + - /riak/kv/2.9.10/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.9.10/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.9.10/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + + + diff --git a/content/riak/kv/2.9.10/learn/why-riak-kv.md b/content/riak/kv/2.9.10/learn/why-riak-kv.md new file mode 100644 index 0000000000..30702fa0b9 --- /dev/null +++ b/content/riak/kv/2.9.10/learn/why-riak-kv.md @@ -0,0 +1,224 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.9.10/theory/why-riak/ + - /riak/kv/2.9.10/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.10/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.9.10/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + + + diff --git a/content/riak/kv/2.9.10/release-notes.md b/content/riak/kv/2.9.10/release-notes.md new file mode 100644 index 0000000000..4e9fd192a1 --- /dev/null +++ b/content/riak/kv/2.9.10/release-notes.md @@ -0,0 +1,35 @@ +--- +title: "Riak KV 2.9.10 Release Notes" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.9.10/community/release-notes + - /riak/kv/2.9.10/intro-v20 + - /riak/2.9.10/intro-v20 + - /riak/kv/2.9.10/introduction +--- + +Released Aug 16, 2020. + + +## Overview + +Fix to critical issue in leveled when using (non-default, but recommended, option): [leveled_reload_recalc = enabled](https://github.com/basho/riak_kv/blob/33add2a29b6880b680a407dc91828736f54c7911/priv/riak_kv.schema#L1156-L1174) + +If using this option, it is recommended to rebuild the ledger on each vnode at some stage after updating. + +## Previous Release Notes + +Please see the KV 2.9.9 release notes [here]({{<baseurl>}}riak/kv/2.9.9/release-notes/). + + + + diff --git a/content/riak/kv/2.9.10/setup.md b/content/riak/kv/2.9.10/setup.md new file mode 100644 index 0000000000..473e83d242 --- /dev/null +++ b/content/riak/kv/2.9.10/setup.md @@ -0,0 +1,49 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + + + diff --git a/content/riak/kv/2.9.10/setup/downgrade.md b/content/riak/kv/2.9.10/setup/downgrade.md new file mode 100644 index 0000000000..3045a483d0 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/downgrade.md @@ -0,0 +1,178 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.10/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.9.10/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.9.10/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.10, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` + + + + diff --git a/content/riak/kv/2.9.10/setup/installing.md b/content/riak/kv/2.9.10/setup/installing.md new file mode 100644 index 0000000000..c80ce8fff1 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing.md @@ -0,0 +1,60 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.9.10/ops/building/installing + - /riak/kv/2.9.10/ops/building/installing + - /riak/2.9.10/installing/ + - /riak/kv/2.9.10/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.9.10/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/amazon-web-services.md b/content/riak/kv/2.9.10/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..2b02ac6a90 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/amazon-web-services.md @@ -0,0 +1,152 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.9.10/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.9.10/installing/amazon-web-services/ + - /riak/kv/2.9.10/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.10/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2/riak-2.9.10-1.amzn2x86_64.rpm +sudo yum localinstall -y riak-2.9.10-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2/riak-2.9.10-1.amzn2x86_64.rpm +sudo rpm -i riak-2.9.10-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2016.09/riak-2.9.10-1.amzn1x86_64.rpm +sudo yum localinstall -y riak-2.9.10-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2016.09/riak-2.9.10-1.amzn1x86_64.rpm +sudo rpm -i riak-2.9.10-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/debian-ubuntu.md b/content/riak/kv/2.9.10/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..0a744e5770 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/debian-ubuntu.md @@ -0,0 +1,170 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.9.10/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.9.10/installing/debian-ubuntu/ + - /riak/kv/2.9.10/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.9.10/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/bionic64/riak-2.9.10-1_amd64.deb +sudo dpkg -i riak-2.9.10-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/xenial64/riak-2.9.10-1_amd64.deb +sudo dpkg -i riak-2.9.10-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/trusty64/riak-2.9.10-1_amd64.deb +sudo dpkg -i riak-2.9.10-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/precise64/riak-2.9.10-1_amd64.deb +sudo dpkg -i riak-2.9.10-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/9/riak-2.9.10-1_amd64.deb +sudo dpkg -i riak-2.9.10-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/8/riak-2.9.10-1_amd64.deb +sudo dpkg -i riak-2.9.10-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/7/riak-2.9.10-1_amd64.deb +sudo dpkg -i riak-2.9.10-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/raspbian/buster/riak-2.9.10-1_armhf.deb +sudo dpkg -i riak-2.9.10-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/riak-2.9.10.tar.gz +tar zxvf riak-2.9.10.tar.gz +cd riak-2.9.10 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/freebsd.md b/content/riak/kv/2.9.10/setup/installing/freebsd.md new file mode 100644 index 0000000000..9bd218eecc --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/freebsd.md @@ -0,0 +1,132 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.9.10/ops/building/installing/Installing-on-FreeBSD + - /riak/2.9.10/installing/freebsd/ + - /riak/kv/2.9.10/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.10/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.9.10.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.10/freebsd/11.1/riak-2.9.10.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.10/freebsd/10.4/riak-2.9.10.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/mac-osx.md b/content/riak/kv/2.9.10/setup/installing/mac-osx.md new file mode 100644 index 0000000000..60b451f84c --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/mac-osx.md @@ -0,0 +1,120 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.9.10/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.9.10/installing/mac-osx/ + - /riak/kv/2.9.10/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.10/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.10/osx/10.11/riak-2.9.10-OSX-x86_64.tar.gz +tar xzvf riak-2.9.10-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.9.10 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.10` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.10/riak-2.9.10.tar.gz +tar zxvf riak-2.9.10.tar.gz +cd riak-2.9.10 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/rhel-centos.md b/content/riak/kv/2.9.10/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..35c10f5a34 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/rhel-centos.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.9.10/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.9.10/installing/rhel-centos/ + - /riak/kv/2.9.10/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/8/riak-2.9.10-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.9.10-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/8/riak-2.9.10-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.9.10-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/7/riak-2.9.10-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.9.10-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/7/riak-2.9.10-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.9.10-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/6/riak-2.9.10-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.9.10-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/6/riak-2.9.10-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.9.10-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.10/riak-2.9.10.tar.gz +tar zxvf riak-2.9.10.tar.gz +cd riak-2.9.10 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/smartos.md b/content/riak/kv/2.9.10/setup/installing/smartos.md new file mode 100644 index 0000000000..d9df0a1e5b --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/smartos.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.9.10" +menu: + riak_kv-2.9.10: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.9.10/ops/building/installing/Installing-on-SmartOS + - /riak/2.9.10/installing/smartos/ + - /riak/kv/2.9.10/installing/smartos/ + - /riak/kv/2.9.10/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.10/setup/installing/solaris.md b/content/riak/kv/2.9.10/setup/installing/solaris.md new file mode 100644 index 0000000000..9c2d02f8b2 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/solaris.md @@ -0,0 +1,90 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.9.10" +menu: + riak_kv-2.9.10: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.9.10/ops/building/installing/Installing-on-Solaris + - /riak/2.9.10/installing/solaris/ + - /riak/kv/2.9.10/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.10/setup/installing/source.md b/content/riak/kv/2.9.10/setup/installing/source.md new file mode 100644 index 0000000000..9006653430 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/source.md @@ -0,0 +1,109 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/Installing-Riak-from-Source + - /riak/kv/2.9.10/ops/building/Installing-Riak-from-Source + - /riak/2.9.10/installing/source/ + - /riak/kv/2.9.10/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.10/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.10/riak-2.9.10.tar.gz +tar zxvf riak-2.9.10.tar.gz +cd riak-2.9.10 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/source/erlang.md b/content/riak/kv/2.9.10/setup/installing/source/erlang.md new file mode 100644 index 0000000000..9a28d3a6c3 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/source/erlang.md @@ -0,0 +1,570 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/erlang + - /riak/kv/2.9.10/ops/building/installing/erlang + - /riak/2.9.10/installing/source/erlang/ + - /riak/kv/2.9.10/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/2.9.10/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.9.10/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/source/jvm.md b/content/riak/kv/2.9.10/setup/installing/source/jvm.md new file mode 100644 index 0000000000..f22d180a99 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/source/jvm.md @@ -0,0 +1,54 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/jvm + - /riak/kv/2.9.10/ops/building/installing/jvm + - /riak/2.9.10/ops/building/installing/Installing-the-JVM + - /riak/kv/2.9.10/ops/building/installing/Installing-the-JVM + - /riak/2.9.10/installing/source/jvm/ + - /riak/kv/2.9.10/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + + + diff --git a/content/riak/kv/2.9.10/setup/installing/suse.md b/content/riak/kv/2.9.10/setup/installing/suse.md new file mode 100644 index 0000000000..1e8fcaa88c --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/suse.md @@ -0,0 +1,51 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.9.10/ops/building/installing/Installing-on-SUSE + - /riak/2.9.10/installing/suse/ + - /riak/kv/2.9.10/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.10/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.10+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/verify.md b/content/riak/kv/2.9.10/setup/installing/verify.md new file mode 100644 index 0000000000..5d7c92ca63 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/verify.md @@ -0,0 +1,168 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/installing/Post-Installation + - /riak/kv/2.9.10/ops/installing/Post-Installation + - /riak/2.9.10/installing/verify-install/ + - /riak/kv/2.9.10/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/2.9.10/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.9.10/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + + + diff --git a/content/riak/kv/2.9.10/setup/installing/windows-azure.md b/content/riak/kv/2.9.10/setup/installing/windows-azure.md new file mode 100644 index 0000000000..64571f6d9c --- /dev/null +++ b/content/riak/kv/2.9.10/setup/installing/windows-azure.md @@ -0,0 +1,196 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.9.10/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.9.10/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.9.10/installing/windows-azure/ + - /riak/kv/2.9.10/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + + + diff --git a/content/riak/kv/2.9.10/setup/planning.md b/content/riak/kv/2.9.10/setup/planning.md new file mode 100644 index 0000000000..2c9bf94409 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning.md @@ -0,0 +1,59 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + + + diff --git a/content/riak/kv/2.9.10/setup/planning/backend.md b/content/riak/kv/2.9.10/setup/planning/backend.md new file mode 100644 index 0000000000..1688fce241 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/backend.md @@ -0,0 +1,59 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.9.10/ops/building/planning/backends/ + - /riak/kv/2.9.10/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/2.9.10/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/backend/bitcask.md b/content/riak/kv/2.9.10/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..d386c0c34d --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/backend/bitcask.md @@ -0,0 +1,993 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/backends/bitcask/ + - /riak/kv/2.9.10/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.9.10/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` - lets the operating system manage syncing writes + (default) + * `o_sync` - uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval - Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) - Writes are made via Erlang's built-in file API +* `nif` - Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` - No restrictions on when merge operations can occur + (default) +* `never` - Merge will never be attempted +* `window` - Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** - This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** - This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** - This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** - This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** - This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322.9.1044576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322.9.1044576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/backend/leveldb.md b/content/riak/kv/2.9.10/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..53d58dfb3c --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/backend/leveldb.md @@ -0,0 +1,505 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/backends/leveldb/ + - /riak/kv/2.9.10/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.10/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** - The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** - LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322.9.1044576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322.9.1044576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/backend/leveled.md b/content/riak/kv/2.9.10/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..f4bf24923c --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/backend/leveled.md @@ -0,0 +1,140 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/backends/leveled/ + - /riak/kv/2.9.10/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.10/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.10 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/backend/memory.md b/content/riak/kv/2.9.10/setup/planning/backend/memory.md new file mode 100644 index 0000000000..6225e56379 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/backend/memory.md @@ -0,0 +1,146 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/backends/memory/ + - /riak/kv/2.9.10/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/backend/multi.md b/content/riak/kv/2.9.10/setup/planning/backend/multi.md new file mode 100644 index 0000000000..0cacd497ee --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/backend/multi.md @@ -0,0 +1,229 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/backends/multi/ + - /riak/kv/2.9.10/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/best-practices.md b/content/riak/kv/2.9.10/setup/planning/best-practices.md new file mode 100644 index 0000000000..0d5ad8178a --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/best-practices.md @@ -0,0 +1,144 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.9.10/ops/building/planning/best-practices + - /riak/kv/2.9.10/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/2.9.10/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.9.10/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.9.10/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..9fb767a894 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,103 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.9.10/ops/building/planning/bitcask + - /riak/kv/2.9.10/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/cluster-capacity.md b/content/riak/kv/2.9.10/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..38d949328d --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/cluster-capacity.md @@ -0,0 +1,237 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.9.10/ops/building/planning/cluster + - /riak/kv/2.9.10/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.9.10/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.9.10/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + + + diff --git a/content/riak/kv/2.9.10/setup/planning/future.md b/content/riak/kv/2.9.10/setup/planning/future.md new file mode 100644 index 0000000000..6776f3045a --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/future.md @@ -0,0 +1,19 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 2.9.10 +#menu: +# riak_kv-2.9.10: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +--- + +**TODO: Add content** + + + diff --git a/content/riak/kv/2.9.10/setup/planning/operating-system.md b/content/riak/kv/2.9.10/setup/planning/operating-system.md new file mode 100644 index 0000000000..31b1649792 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/operating-system.md @@ -0,0 +1,28 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +--- + +[downloads]: {{<baseurl>}}riak/kv/2.9.10/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + + + diff --git a/content/riak/kv/2.9.10/setup/planning/start.md b/content/riak/kv/2.9.10/setup/planning/start.md new file mode 100644 index 0000000000..1c4b26f06e --- /dev/null +++ b/content/riak/kv/2.9.10/setup/planning/start.md @@ -0,0 +1,60 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.9.10/ops/building/planning/system-planning + - /riak/kv/2.9.10/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + + + diff --git a/content/riak/kv/2.9.10/setup/search.md b/content/riak/kv/2.9.10/setup/search.md new file mode 100644 index 0000000000..fd40910d9e --- /dev/null +++ b/content/riak/kv/2.9.10/setup/search.md @@ -0,0 +1,4 @@ + + + + diff --git a/content/riak/kv/2.9.10/setup/upgrading.md b/content/riak/kv/2.9.10/setup/upgrading.md new file mode 100644 index 0000000000..84261c18af --- /dev/null +++ b/content/riak/kv/2.9.10/setup/upgrading.md @@ -0,0 +1,36 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.10][upgrade version] + +A tutorial on updating to Riak KV 2.9.10 + +[Learn More >>][upgrade version] + + + diff --git a/content/riak/kv/2.9.10/setup/upgrading/checklist.md b/content/riak/kv/2.9.10/setup/upgrading/checklist.md new file mode 100644 index 0000000000..03312f43be --- /dev/null +++ b/content/riak/kv/2.9.10/setup/upgrading/checklist.md @@ -0,0 +1,224 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.10/ops/upgrading/production-checklist/ + - /riak/kv/2.9.10/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/2.9.10/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.9.10/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.9.10/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.9.10/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.9.10/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.9.10/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.9.10/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + + + diff --git a/content/riak/kv/2.9.10/setup/upgrading/cluster.md b/content/riak/kv/2.9.10/setup/upgrading/cluster.md new file mode 100644 index 0000000000..07d558fb22 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/upgrading/cluster.md @@ -0,0 +1,302 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.10" +menu: + riak_kv-2.9.10: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.10/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.10/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.10/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.10/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.10/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.10/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.10/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` - See [JMX Monitoring][jmx monitor] for more information. + * `snmp` - See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + + + diff --git a/content/riak/kv/2.9.10/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.9.10/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..6f5b57e636 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/upgrading/multi-datacenter.md @@ -0,0 +1,22 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 2.9.10 +#menu: +# riak_kv-2.9.10: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +--- + +## TODO + +How to update to a new version with multi-datacenter. + + + + diff --git a/content/riak/kv/2.9.10/setup/upgrading/search.md b/content/riak/kv/2.9.10/setup/upgrading/search.md new file mode 100644 index 0000000000..29d4f60bb8 --- /dev/null +++ b/content/riak/kv/2.9.10/setup/upgrading/search.md @@ -0,0 +1,280 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.9.10" +menu: + riak_kv-2.9.10: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.10/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.10/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + + + diff --git a/content/riak/kv/2.9.10/setup/upgrading/version.md b/content/riak/kv/2.9.10/setup/upgrading/version.md new file mode 100644 index 0000000000..6b96f6aeef --- /dev/null +++ b/content/riak/kv/2.9.10/setup/upgrading/version.md @@ -0,0 +1,250 @@ +--- +title: "Upgrading to Riak KV 2.9.10" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Upgrading to 2.9.10" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.10/upgrade-v20/ + - /riak/kv/2.9.10/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.10/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.10/setup/upgrading/cluster/ +--- + + +[production checklist]: {{<baseurl>}}riak/kv/2.9.10/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.10/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.9.10/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.10/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.10/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.10/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.9.10/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.9.10 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.10 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.10 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + + + diff --git a/content/riak/kv/2.9.10/using.md b/content/riak/kv/2.9.10/using.md new file mode 100644 index 0000000000..e9d5956f11 --- /dev/null +++ b/content/riak/kv/2.9.10/using.md @@ -0,0 +1,76 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + + + diff --git a/content/riak/kv/2.9.10/using/admin.md b/content/riak/kv/2.9.10/using/admin.md new file mode 100644 index 0000000000..091c231648 --- /dev/null +++ b/content/riak/kv/2.9.10/using/admin.md @@ -0,0 +1,50 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.9.10/ops/running/cluster-admin + - /riak/kv/2.9.10/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + + + diff --git a/content/riak/kv/2.9.10/using/admin/commands.md b/content/riak/kv/2.9.10/using/admin/commands.md new file mode 100644 index 0000000000..d0fafde960 --- /dev/null +++ b/content/riak/kv/2.9.10/using/admin/commands.md @@ -0,0 +1,377 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.10/ops/running/cluster-admin + - /riak/kv/2.9.10/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` - There are five possible values for status: + * `valid` - The node has begun participating in cluster operations + * `leaving` - The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` - The node's ownership transfers are complete and it is + currently shutting down + * `joining` - The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` - The node is not currently responding +* `avail` - There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` - What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` - The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322.9.1044576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322.9.1044576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` + + + diff --git a/content/riak/kv/2.9.10/using/admin/riak-admin.md b/content/riak/kv/2.9.10/using/admin/riak-admin.md new file mode 100644 index 0000000000..ff1137f2fa --- /dev/null +++ b/content/riak/kv/2.9.10/using/admin/riak-admin.md @@ -0,0 +1,720 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.10/ops/running/tools/riak-admin + - /riak/kv/2.9.10/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.9.10/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.9.10/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.9.10/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.9.10/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.9.10/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.9.10/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.9.10/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.9.10/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + + + diff --git a/content/riak/kv/2.9.10/using/admin/riak-cli.md b/content/riak/kv/2.9.10/using/admin/riak-cli.md new file mode 100644 index 0000000000..e8fd84a825 --- /dev/null +++ b/content/riak/kv/2.9.10/using/admin/riak-cli.md @@ -0,0 +1,203 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.10/ops/running/tools/riak + - /riak/kv/2.9.10/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + + + diff --git a/content/riak/kv/2.9.10/using/admin/riak-control.md b/content/riak/kv/2.9.10/using/admin/riak-control.md new file mode 100644 index 0000000000..cb730ee4ec --- /dev/null +++ b/content/riak/kv/2.9.10/using/admin/riak-control.md @@ -0,0 +1,236 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/riak-control + - /riak/kv/2.9.10/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.9.10/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations.md b/content/riak/kv/2.9.10/using/cluster-operations.md new file mode 100644 index 0000000000..35c1cfab9e --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations.md @@ -0,0 +1,107 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.9.10/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..55340bdcf3 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,288 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/2.9.10/ops/advanced/aae/ + - /riak/2.9.10/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.9.10/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.9.10/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..9e22375af1 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,197 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.10/ops/running/nodes/adding-removing + - /riak/kv/2.9.10/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/2.9.10/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/backend.md b/content/riak/kv/2.9.10/using/cluster-operations/backend.md new file mode 100644 index 0000000000..f12a4aec67 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/backend.md @@ -0,0 +1,19 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 2.9.10 +#menu: +# riak_kv-2.9.10: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content** + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/backing-up.md b/content/riak/kv/2.9.10/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..95aa5704d7 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/backing-up.md @@ -0,0 +1,270 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.10/ops/running/backups + - /riak/kv/2.9.10/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.9.10/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.9.10/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/bucket-types.md b/content/riak/kv/2.9.10/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..94c0152ac1 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/bucket-types.md @@ -0,0 +1,61 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.9.10/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..8fb5b2ab7e --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,457 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.10/ops/running/nodes/renaming + - /riak/kv/2.9.10/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/handoff.md b/content/riak/kv/2.9.10/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..ab814deab1 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/handoff.md @@ -0,0 +1,119 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.10/ops/running/handoff + - /riak/kv/2.9.10/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.9.10/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..50a557d01f --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/inspecting-node.md @@ -0,0 +1,495 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.10/ops/running/nodes/inspecting + - /riak/kv/2.9.10/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392.9.1048081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` - The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` - The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` - The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/load-balancing.md b/content/riak/kv/2.9.10/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..b466cf47df --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/load-balancing.md @@ -0,0 +1,19 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 2.9.10 +#menu: +# riak_kv-2.9.10: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content (not sure where this exists in docs)** + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/logging.md b/content/riak/kv/2.9.10/using/cluster-operations/logging.md new file mode 100644 index 0000000000..eb64125f8f --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/logging.md @@ -0,0 +1,45 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/replacing-node.md b/content/riak/kv/2.9.10/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..22114a7d35 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/replacing-node.md @@ -0,0 +1,98 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.9.10/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.9.10/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..9236fad45b --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,83 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.10 +#menu: +# riak_kv-2.9.10: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.9.10/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..622d314660 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/strong-consistency.md @@ -0,0 +1,74 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/2.9.10/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..2c14d56c64 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,33 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.9.10/ops/advanced/tictacaae/ + - /riak/2.9.10/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.9.10/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..8f3caae168 --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,262 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v2/operations + - /riak/kv/2.9.10/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.10/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.10/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` - The IP address and port of a connected client (site)</li><li>`cluster_name` - The name of the connected client (site)</li><li>`connecting` - The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.9.10/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.9.10/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + + + diff --git a/content/riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..471d897f3a --- /dev/null +++ b/content/riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,424 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/operations + - /riak/kv/2.9.10/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.10/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.9.10/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.9.10/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.9.10/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + + + diff --git a/content/riak/kv/2.9.10/using/performance.md b/content/riak/kv/2.9.10/using/performance.md new file mode 100644 index 0000000000..f2c7e1b7f9 --- /dev/null +++ b/content/riak/kv/2.9.10/using/performance.md @@ -0,0 +1,267 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.9.10/ops/tuning/linux/ + - /riak/2.9.10/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.10/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.9.10/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.10/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.9.10/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.9.10/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.9.10/using/performance/open-files-limit/) + + + diff --git a/content/riak/kv/2.9.10/using/performance/amazon-web-services.md b/content/riak/kv/2.9.10/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..1b2a45cc69 --- /dev/null +++ b/content/riak/kv/2.9.10/using/performance/amazon-web-services.md @@ -0,0 +1,246 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.10/ops/tuning/aws + - /riak/kv/2.9.10/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + + + diff --git a/content/riak/kv/2.9.10/using/performance/benchmarking.md b/content/riak/kv/2.9.10/using/performance/benchmarking.md new file mode 100644 index 0000000000..c5c682beab --- /dev/null +++ b/content/riak/kv/2.9.10/using/performance/benchmarking.md @@ -0,0 +1,601 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.10/ops/building/benchmarking + - /riak/kv/2.9.10/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.9.10/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput - Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` - generate as many ops per second as possible +* `{rate, N}` - generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` - Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` - Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` - Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` - Directly invokes the Bitcask API +* `basho_bench_driver_dets` - Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` - operation completed successfully +* `{error, Reason, NewState}` - operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` - operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` - operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` - generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` - the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` - the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` - selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` - selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` - the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` - takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` - takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` - generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` - generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` - generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + + + diff --git a/content/riak/kv/2.9.10/using/performance/erlang.md b/content/riak/kv/2.9.10/using/performance/erlang.md new file mode 100644 index 0000000000..a8a48261dc --- /dev/null +++ b/content/riak/kv/2.9.10/using/performance/erlang.md @@ -0,0 +1,370 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.10/ops/tuning/erlang + - /riak/kv/2.9.10/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + + + diff --git a/content/riak/kv/2.9.10/using/performance/latency-reduction.md b/content/riak/kv/2.9.10/using/performance/latency-reduction.md new file mode 100644 index 0000000000..effe2f6cc2 --- /dev/null +++ b/content/riak/kv/2.9.10/using/performance/latency-reduction.md @@ -0,0 +1,266 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.10/ops/tuning/latency-reduction + - /riak/kv/2.9.10/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + + + diff --git a/content/riak/kv/2.9.10/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.9.10/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..b88e589333 --- /dev/null +++ b/content/riak/kv/2.9.10/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,45 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +[perf index]: {{<baseurl>}}riak/kv/2.9.10/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + + + diff --git a/content/riak/kv/2.9.10/using/performance/open-files-limit.md b/content/riak/kv/2.9.10/using/performance/open-files-limit.md new file mode 100644 index 0000000000..2c351386a0 --- /dev/null +++ b/content/riak/kv/2.9.10/using/performance/open-files-limit.md @@ -0,0 +1,350 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.10/ops/tuning/open-files-limit/ + - /riak/kv/2.9.10/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + + + diff --git a/content/riak/kv/2.9.10/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.9.10/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..f889452a5d --- /dev/null +++ b/content/riak/kv/2.9.10/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,48 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + diff --git a/content/riak/kv/2.9.10/using/reference.md b/content/riak/kv/2.9.10/using/reference.md new file mode 100644 index 0000000000..950e867588 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference.md @@ -0,0 +1,133 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + + + diff --git a/content/riak/kv/2.9.10/using/reference/architecture.md b/content/riak/kv/2.9.10/using/reference/architecture.md new file mode 100644 index 0000000000..036c0f8d14 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/architecture.md @@ -0,0 +1,19 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +#menu: +# riak_kv-2.9.10: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +--- + +<!-- TODO: Content --> + + + diff --git a/content/riak/kv/2.9.10/using/reference/bucket-types.md b/content/riak/kv/2.9.10/using/reference/bucket-types.md new file mode 100644 index 0000000000..1f73f5c36a --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/bucket-types.md @@ -0,0 +1,821 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.9.10/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.9.10/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.10/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.10/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.9.10/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.9.10/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + + + diff --git a/content/riak/kv/2.9.10/using/reference/custom-code.md b/content/riak/kv/2.9.10/using/reference/custom-code.md new file mode 100644 index 0000000000..6c1acba6af --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/custom-code.md @@ -0,0 +1,134 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/install-custom-code/ + - /riak/kv/2.9.10/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.9.10/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.9.10/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + + + diff --git a/content/riak/kv/2.9.10/using/reference/failure-recovery.md b/content/riak/kv/2.9.10/using/reference/failure-recovery.md new file mode 100644 index 0000000000..d998cda3cd --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/failure-recovery.md @@ -0,0 +1,83 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + diff --git a/content/riak/kv/2.9.10/using/reference/handoff.md b/content/riak/kv/2.9.10/using/reference/handoff.md new file mode 100644 index 0000000000..0b7f709836 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/handoff.md @@ -0,0 +1,200 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.10/ops/running/handoff/ + - /riak/kv/2.9.10/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + + + diff --git a/content/riak/kv/2.9.10/using/reference/jmx.md b/content/riak/kv/2.9.10/using/reference/jmx.md new file mode 100644 index 0000000000..d03eea212f --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/jmx.md @@ -0,0 +1,189 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/running/monitoring/jmx + - /riak/kv/2.9.10/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + + + diff --git a/content/riak/kv/2.9.10/using/reference/logging.md b/content/riak/kv/2.9.10/using/reference/logging.md new file mode 100644 index 0000000000..d85c312e5e --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/logging.md @@ -0,0 +1,300 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.10/ops/running/logging + - /riak/kv/2.9.10/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.10 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` - Every night at midnight +* `$D23` - Every day at 23:00 (11 pm) +* `$W0D20` - Every week on Sunday at 20:00 (8 pm) +* `$M1D0` - On the first day of every month at midnight +* `$M5D6` - On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` - Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.9.10/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` - Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-cli/#attach-direct) command +* `both` - Console logs will be emitted both to a file and to standard + output +* `off` - Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + + + diff --git a/content/riak/kv/2.9.10/using/reference/multi-datacenter.md b/content/riak/kv/2.9.10/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..246c74fa58 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/multi-datacenter.md @@ -0,0 +1,51 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + + + diff --git a/content/riak/kv/2.9.10/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.9.10/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..9d76b5d358 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,99 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.10/ops/mdc/comparison + - /riak/kv/2.9.10/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.9.10/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.9.10/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + + + diff --git a/content/riak/kv/2.9.10/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.9.10/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..3563bf5ee8 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,169 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.10/ops/mdc/monitoring + - /riak/kv/2.9.10/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + + + diff --git a/content/riak/kv/2.9.10/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.9.10/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..3ae845ebe6 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,65 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.10/ops/mdc/per-bucket + - /riak/kv/2.9.10/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` - Enable replication (realtime + fullsync) + * `false` - Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` - Replication only occurs in realtime for this bucket + * `fullsync` - Replication only occurs during a fullsync operation + * `both` - Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + + + diff --git a/content/riak/kv/2.9.10/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.9.10/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..ee42691773 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,243 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.10/ops/mdc/statistics + - /riak/kv/2.9.10/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + + + diff --git a/content/riak/kv/2.9.10/using/reference/object-deletion.md b/content/riak/kv/2.9.10/using/reference/object-deletion.md new file mode 100644 index 0000000000..b1b32ace26 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/object-deletion.md @@ -0,0 +1,120 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` - Disables tombstone removal +* `immediate` - The tombstone is removed as soon as the request is + received +* Custom time interval - How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + + + diff --git a/content/riak/kv/2.9.10/using/reference/runtime-interaction.md b/content/riak/kv/2.9.10/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..fabf57196f --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/runtime-interaction.md @@ -0,0 +1,69 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/runtime + - /riak/kv/2.9.10/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` - Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` - Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` - The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` - The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` - A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` - A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` - A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + + + diff --git a/content/riak/kv/2.9.10/using/reference/search.md b/content/riak/kv/2.9.10/using/reference/search.md new file mode 100644 index 0000000000..430348b16e --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/search.md @@ -0,0 +1,456 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/search + - /riak/kv/2.9.10/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.9.10/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. + + diff --git a/content/riak/kv/2.9.10/using/reference/secondary-indexes.md b/content/riak/kv/2.9.10/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..ca402cecff --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/secondary-indexes.md @@ -0,0 +1,75 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.10/dev/advanced/2i + - /riak/kv/2.9.10/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.9.10/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + + + diff --git a/content/riak/kv/2.9.10/using/reference/snmp.md b/content/riak/kv/2.9.10/using/reference/snmp.md new file mode 100644 index 0000000000..3276029f1d --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/snmp.md @@ -0,0 +1,165 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/running/monitoring/snmp + - /riak/kv/2.9.10/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + + + diff --git a/content/riak/kv/2.9.10/using/reference/statistics-monitoring.md b/content/riak/kv/2.9.10/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..18b78df9fd --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/statistics-monitoring.md @@ -0,0 +1,394 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.10/ops/running/stats-and-monitoring + - /riak/kv/2.9.10/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.9.10/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.9.10/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.9.10/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.10/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.10/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.9.10/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + + + diff --git a/content/riak/kv/2.9.10/using/reference/strong-consistency.md b/content/riak/kv/2.9.10/using/reference/strong-consistency.md new file mode 100644 index 0000000000..ac2cf698f2 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/strong-consistency.md @@ -0,0 +1,148 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.10/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.9.10/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.9.10/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.9.10/configuring/strong-consistency/#performance). + + + diff --git a/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..b92c1f96cb --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter.md @@ -0,0 +1,38 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.10/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + + + diff --git a/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..a45f9eb47d --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,129 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.10/ops/mdc/v2/architecture + - /riak/kv/2.9.10/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.10/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.9.10/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + + + diff --git a/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..d62103b02d --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,52 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.10/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.9.10/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.10/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + diff --git a/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..69ede7e4df --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter.md @@ -0,0 +1,50 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + + + diff --git a/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..3988c85d53 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,128 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/aae + - /riak/kv/2.9.10/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + + + diff --git a/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..f61d6321ac --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,185 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/architecture + - /riak/kv/2.9.10/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + + + diff --git a/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..b715131555 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,101 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/cascading-writes + - /riak/kv/2.9.10/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + + + diff --git a/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..8065bd0f59 --- /dev/null +++ b/content/riak/kv/2.9.10/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,71 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.10/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.9.10/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + + + diff --git a/content/riak/kv/2.9.10/using/repair-recovery.md b/content/riak/kv/2.9.10/using/repair-recovery.md new file mode 100644 index 0000000000..9d3259b015 --- /dev/null +++ b/content/riak/kv/2.9.10/using/repair-recovery.md @@ -0,0 +1,51 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + + + diff --git a/content/riak/kv/2.9.10/using/repair-recovery/errors.md b/content/riak/kv/2.9.10/using/repair-recovery/errors.md new file mode 100644 index 0000000000..8aabb5ed9a --- /dev/null +++ b/content/riak/kv/2.9.10/using/repair-recovery/errors.md @@ -0,0 +1,365 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.10/ops/running/recovery/errors + - /riak/kv/2.9.10/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.10/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.10/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.10/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.10/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.9.10/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.10/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.9.10/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.9.10/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + + + diff --git a/content/riak/kv/2.9.10/using/repair-recovery/failed-node.md b/content/riak/kv/2.9.10/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..2fd97013d5 --- /dev/null +++ b/content/riak/kv/2.9.10/using/repair-recovery/failed-node.md @@ -0,0 +1,113 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.10/ops/running/recovery/failed-node + - /riak/kv/2.9.10/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` + + + diff --git a/content/riak/kv/2.9.10/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.9.10/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..6fc9e02180 --- /dev/null +++ b/content/riak/kv/2.9.10/using/repair-recovery/failure-recovery.md @@ -0,0 +1,128 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.10/ops/running/recovery/failure-recovery + - /riak/kv/2.9.10/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.9.10/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** - A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** - If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** - Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.9.10/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + + + diff --git a/content/riak/kv/2.9.10/using/repair-recovery/repairs.md b/content/riak/kv/2.9.10/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..d623e39341 --- /dev/null +++ b/content/riak/kv/2.9.10/using/repair-recovery/repairs.md @@ -0,0 +1,390 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.10/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.10/ops/running/recovery/repairing-indexes + - /riak/2.9.10/ops/running/recovery/failed-node + - /riak/kv/2.9.10/ops/running/recovery/failed-node + - /riak/2.9.10/ops/running/recovery/repairing-leveldb + - /riak/kv/2.9.10/ops/running/recovery/repairing-leveldb + - /riak/2.9.10/ops/running/recovery/repairing-partitions + - /riak/kv/2.9.10/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.9.10/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.9.10/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.9.10/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.9.10/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + + + diff --git a/content/riak/kv/2.9.10/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.9.10/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..a413e2019c --- /dev/null +++ b/content/riak/kv/2.9.10/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,74 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +--- + +[upgrade]: {{<baseurl>}}riak/kv/2.9.10/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.9.10/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + + + diff --git a/content/riak/kv/2.9.10/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.9.10/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..a3fdfcf408 --- /dev/null +++ b/content/riak/kv/2.9.10/using/repair-recovery/rolling-restart.md @@ -0,0 +1,63 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.10/ops/running/recovery/rolling-restart + - /riak/kv/2.9.10/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.9.10/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + + + diff --git a/content/riak/kv/2.9.10/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.9.10/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..5011f5e05c --- /dev/null +++ b/content/riak/kv/2.9.10/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,140 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.10/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.10/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + + + diff --git a/content/riak/kv/2.9.10/using/running-a-cluster.md b/content/riak/kv/2.9.10/using/running-a-cluster.md new file mode 100644 index 0000000000..de8902efcb --- /dev/null +++ b/content/riak/kv/2.9.10/using/running-a-cluster.md @@ -0,0 +1,338 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.9.10/ops/building/basic-cluster-setup + - /riak/kv/2.9.10/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.10/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.9.10/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + + + diff --git a/content/riak/kv/2.9.10/using/security.md b/content/riak/kv/2.9.10/using/security.md new file mode 100644 index 0000000000..632fce2634 --- /dev/null +++ b/content/riak/kv/2.9.10/using/security.md @@ -0,0 +1,198 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.9.10/ops/advanced/security + - /riak/kv/2.9.10/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/2.9.10/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.9.10/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.10/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.9.10/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.9.10/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + + + diff --git a/content/riak/kv/2.9.10/using/security/basics.md b/content/riak/kv/2.9.10/using/security/basics.md new file mode 100644 index 0000000000..7868df1934 --- /dev/null +++ b/content/riak/kv/2.9.10/using/security/basics.md @@ -0,0 +1,850 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.10/ops/running/authz + - /riak/kv/2.9.10/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.9.10/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.9.10/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.9.10/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.9.10/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.9.10/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.9.10/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.9.10/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.9.10/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + + + diff --git a/content/riak/kv/2.9.10/using/security/best-practices.md b/content/riak/kv/2.9.10/using/security/best-practices.md new file mode 100644 index 0000000000..8df176c725 --- /dev/null +++ b/content/riak/kv/2.9.10/using/security/best-practices.md @@ -0,0 +1,83 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + diff --git a/content/riak/kv/2.9.10/using/security/managing-sources.md b/content/riak/kv/2.9.10/using/security/managing-sources.md new file mode 100644 index 0000000000..b1900d29d0 --- /dev/null +++ b/content/riak/kv/2.9.10/using/security/managing-sources.md @@ -0,0 +1,272 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.10/ops/running/security-sources + - /riak/kv/2.9.10/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.9.10/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.9.10/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.9.10/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.9.10/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.9.10/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.9.10/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + + + diff --git a/content/riak/kv/2.9.10/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.9.10/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..9a54293fd9 --- /dev/null +++ b/content/riak/kv/2.9.10/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,83 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + diff --git a/content/riak/kv/2.9.10/using/troubleshooting.md b/content/riak/kv/2.9.10/using/troubleshooting.md new file mode 100644 index 0000000000..8b6df117c2 --- /dev/null +++ b/content/riak/kv/2.9.10/using/troubleshooting.md @@ -0,0 +1,26 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + + + diff --git a/content/riak/kv/2.9.10/using/troubleshooting/http-204.md b/content/riak/kv/2.9.10/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..c9facdd134 --- /dev/null +++ b/content/riak/kv/2.9.10/using/troubleshooting/http-204.md @@ -0,0 +1,20 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 2.9.10 +menu: + riak_kv-2.9.10: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + + + diff --git a/content/riak/kv/2.9.2/_reference-links.md b/content/riak/kv/2.9.2/_reference-links.md new file mode 100644 index 0000000000..5a96240776 --- /dev/null +++ b/content/riak/kv/2.9.2/_reference-links.md @@ -0,0 +1,249 @@ + +# Riak KV 2.9.2 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.9.2/downloads/ +[install index]: {{}}riak/kv/2.9.2/setup/installing +[upgrade index]: {{}}riak/kv/2.9.2/upgrading +[plan index]: {{}}riak/kv/2.9.2/planning +[config index]: {{}}riak/kv/2.9.2/using/configuring/ +[config reference]: {{}}riak/kv/2.9.2/configuring/reference/ +[manage index]: {{}}riak/kv/2.9.2/using/managing +[performance index]: {{}}riak/kv/2.9.2/using/performance +[glossary vnode]: {{}}riak/kv/2.9.2/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.9.2/setup/planning +[plan start]: {{}}riak/kv/2.9.2/setup/planning/start +[plan backend]: {{}}riak/kv/2.9.2/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.9.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.2/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/2.9.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.2/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.9.2/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.9.2/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.9.2/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.9.2/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.9.2/setup/installing +[install aws]: {{}}riak/kv/2.9.2/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.9.2/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.9.2/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.9.2/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.9.2/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.9.2/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.9.2/setup/installing/solaris +[install suse]: {{}}riak/kv/2.9.2/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.9.2/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.9.2/setup/installing/source +[install source erlang]: {{}}riak/kv/2.9.2/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.9.2/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.9.2/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.9.2/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.9.2/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.9.2/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.9.2/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.9.2/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.9.2/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.9.2/configuring +[config basic]: {{}}riak/kv/2.9.2/configuring/basic +[config backend]: {{}}riak/kv/2.9.2/configuring/backend +[config manage]: {{}}riak/kv/2.9.2/configuring/managing +[config reference]: {{}}riak/kv/2.9.2/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.9.2/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.9.2/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.9.2/configuring/mapreduce +[config search]: {{}}riak/kv/2.9.2/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.9.2/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.9.2/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.9.2/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.9.2/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.9.2/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.9.2/using/ +[use admin commands]: {{}}riak/kv/2.9.2/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.9.2/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.9.2/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.9.2/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.9.2/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.9.2/using/reference/search +[use ref 2i]: {{}}riak/kv/2.9.2/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.9.2/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.9.2/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.9.2/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.9.2/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.9.2/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.9.2/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.9.2/using/admin/ +[use admin commands]: {{}}riak/kv/2.9.2/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.9.2/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.9.2/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.9.2/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.9.2/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.9.2/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.9.2/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.9.2/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.9.2/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.9.2/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.9.2/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.9.2/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.9.2/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.9.2/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.9.2/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.9.2/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.9.2/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.9.2/using/security/ +[security basics]: {{}}riak/kv/2.9.2/using/security/basics +[security managing]: {{}}riak/kv/2.9.2/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.9.2/using/performance/ +[perf benchmark]: {{}}riak/kv/2.9.2/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.2/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.9.2/using/performance/erlang +[perf aws]: {{}}riak/kv/2.9.2/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.9.2/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.9.2/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.9.2/developing +[dev client libraries]: {{}}riak/kv/2.9.2/developing/client-libraries +[dev data model]: {{}}riak/kv/2.9.2/developing/data-modeling +[dev data types]: {{}}riak/kv/2.9.2/developing/data-types +[dev kv model]: {{}}riak/kv/2.9.2/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.9.2/developing/getting-started +[getting started java]: {{}}riak/kv/2.9.2/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.9.2/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.9.2/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.9.2/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.9.2/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.9.2/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.9.2/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.9.2/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.9.2/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.2/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.2/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.2/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.2/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.2/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.2/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.9.2/developing/usage +[usage bucket types]: {{}}riak/kv/2.9.2/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.9.2/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.9.2/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.9.2/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.9.2/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.9.2/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.9.2/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.9.2/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.9.2/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.2/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.2/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.9.2/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.9.2/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.9.2/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.9.2/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.9.2/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.9.2/developing/api/backend +[dev api http]: {{}}riak/kv/2.9.2/developing/api/http +[dev api http status]: {{}}riak/kv/2.9.2/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.9.2/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.9.2/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.9.2/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.9.2/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.9.2/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.9.2/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.9.2/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.9.2/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.9.2/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.9.2/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.9.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.9.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.9.2/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.9.2/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + diff --git a/content/riak/kv/2.9.2/add-ons.md b/content/riak/kv/2.9.2/add-ons.md new file mode 100644 index 0000000000..336c781dc6 --- /dev/null +++ b/content/riak/kv/2.9.2/add-ons.md @@ -0,0 +1,19 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.9.2/add-ons/redis/) diff --git a/content/riak/kv/2.9.2/add-ons/redis.md b/content/riak/kv/2.9.2/add-ons/redis.md new file mode 100644 index 0000000000..967cd498c0 --- /dev/null +++ b/content/riak/kv/2.9.2/add-ons/redis.md @@ -0,0 +1,58 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] diff --git a/content/riak/kv/2.9.2/add-ons/redis/developing-rra.md b/content/riak/kv/2.9.2/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..987b819d2a --- /dev/null +++ b/content/riak/kv/2.9.2/add-ons/redis/developing-rra.md @@ -0,0 +1,325 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.9.2/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.9.2/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.9.2/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.9.2/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.9.2/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | diff --git a/content/riak/kv/2.9.2/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.9.2/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..614ae94abd --- /dev/null +++ b/content/riak/kv/2.9.2/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,131 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/kv/2.9.2/add-ons/redis/set-up-rra.md b/content/riak/kv/2.9.2/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..a94c1c00e8 --- /dev/null +++ b/content/riak/kv/2.9.2/add-ons/redis/set-up-rra.md @@ -0,0 +1,280 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.9.2/setup/installing +[perf open files]: {{}}riak/kv/2.9.2/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. diff --git a/content/riak/kv/2.9.2/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.9.2/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..c0266bdbd3 --- /dev/null +++ b/content/riak/kv/2.9.2/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,138 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. diff --git a/content/riak/kv/2.9.2/add-ons/redis/using-rra.md b/content/riak/kv/2.9.2/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..8cab701c08 --- /dev/null +++ b/content/riak/kv/2.9.2/add-ons/redis/using-rra.md @@ -0,0 +1,242 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.9.2/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.9.2/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details diff --git a/content/riak/kv/2.9.2/configuring.md b/content/riak/kv/2.9.2/configuring.md new file mode 100644 index 0000000000..b4e8eb5273 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring.md @@ -0,0 +1,82 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + diff --git a/content/riak/kv/2.9.2/configuring/backend.md b/content/riak/kv/2.9.2/configuring/backend.md new file mode 100644 index 0000000000..3171fa5c5a --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/backend.md @@ -0,0 +1,642 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +--- + +[plan backend leveldb]: {{}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.2/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/2.9.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.2/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` diff --git a/content/riak/kv/2.9.2/configuring/basic.md b/content/riak/kv/2.9.2/configuring/basic.md new file mode 100644 index 0000000000..0168d3bbc6 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/basic.md @@ -0,0 +1,235 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.2/ops/building/configuration/ + - /riak/kv/2.9.2/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/2.9.2/configuring/reference +[use running cluster]: {{}}riak/kv/2.9.2/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.9.2/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.9.2/using/performance/erlang +[plan start]: {{}}riak/kv/2.9.2/setup/planning/start +[plan best practices]: {{}}riak/kv/2.9.2/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.9.2/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.9.2/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.9.2/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.9.2/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.9.2/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.9.2/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.9.2/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.9.2/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.2/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.9.2/using/performance +[perf aws]: {{}}riak/kv/2.9.2/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.9.2/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.2/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. diff --git a/content/riak/kv/2.9.2/configuring/global-object-expiration.md b/content/riak/kv/2.9.2/configuring/global-object-expiration.md new file mode 100644 index 0000000000..217a80aa7d --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/global-object-expiration.md @@ -0,0 +1,85 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.9.2: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 2.9.2 +toc: true +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` diff --git a/content/riak/kv/2.9.2/configuring/load-balancing-proxy.md b/content/riak/kv/2.9.2/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..c06ae38a5a --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/load-balancing-proxy.md @@ -0,0 +1,271 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.9.2/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/2.9.2/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` diff --git a/content/riak/kv/2.9.2/configuring/managing.md b/content/riak/kv/2.9.2/configuring/managing.md new file mode 100644 index 0000000000..075e786d08 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/managing.md @@ -0,0 +1,116 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +--- + +[use admin riak cli]: {{}}riak/kv/2.9.2/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.9.2/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.9.2/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. diff --git a/content/riak/kv/2.9.2/configuring/mapreduce.md b/content/riak/kv/2.9.2/configuring/mapreduce.md new file mode 100644 index 0000000000..c902c888e4 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/mapreduce.md @@ -0,0 +1,196 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/configs/mapreduce/ + - /riak/kv/2.9.2/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/2.9.2/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.9.2/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.9.2/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. diff --git a/content/riak/kv/2.9.2/configuring/next-gen-replication.md b/content/riak/kv/2.9.2/configuring/next-gen-replication.md new file mode 100644 index 0000000000..3c65d7dd86 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/next-gen-replication.md @@ -0,0 +1,61 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.2" +menu: + riak_kv-2.9.2: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. \ No newline at end of file diff --git a/content/riak/kv/2.9.2/configuring/reference.md b/content/riak/kv/2.9.2/configuring/reference.md new file mode 100644 index 0000000000..5c702c0785 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/reference.md @@ -0,0 +1,2030 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/configs/configuration-files/ + - /riak/kv/2.9.2/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] --- [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] --- [configuration][config backend leveldb] +* [Leveled][plan backend leveled] --- [configuration][config backend leveled] +* [Memory][plan backend memory] --- [configuration][config backend memory] +* [Multi][plan backend multi] --- [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` diff --git a/content/riak/kv/2.9.2/configuring/search.md b/content/riak/kv/2.9.2/configuring/search.md new file mode 100644 index 0000000000..2728ebb36e --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/search.md @@ -0,0 +1,274 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/configs/search/ + - /riak/kv/2.9.2/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/2.9.2/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.2/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.2/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.9.2/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.9.2/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.9.2/configuring/reference +[config reference#search]: {{}}riak/kv/2.9.2/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.9.2/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.9.2/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. diff --git a/content/riak/kv/2.9.2/configuring/strong-consistency.md b/content/riak/kv/2.9.2/configuring/strong-consistency.md new file mode 100644 index 0000000000..9b1b857b2d --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/strong-consistency.md @@ -0,0 +1,666 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/2.9.2/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.9.2/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.9.2/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.9.2/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.9.2/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.9.2/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.9.2/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.9.2/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.9.2/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.9.2/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.9.2/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.9.2/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.9.2/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.9.2/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.9.2/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.9.2/developing/data-types +[glossary aae]: {{}}riak/kv/2.9.2/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.9.2/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.9.2/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.9.2/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.9.2/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble --- The ID of the ensemble
  • Quorum --- The number of ensemble peers that are either leading or following
  • Nodes --- The number of nodes currently online
  • Leader --- The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer --- The ID of the peer
  • Status --- Whether the peer is a leader or a follower
  • Trusted --- Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch --- The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node --- The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] --- If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] --- Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] --- Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** --- A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** --- In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** --- Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** --- At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** --- Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. diff --git a/content/riak/kv/2.9.2/configuring/v2-multi-datacenter.md b/content/riak/kv/2.9.2/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..6f87b26edb --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/v2-multi-datacenter.md @@ -0,0 +1,156 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v2/configuration + - /riak/kv/2.9.2/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/2.9.2/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. diff --git a/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..198750ed5d --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,78 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v2/nat + - /riak/kv/2.9.2/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/2.9.2/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` diff --git a/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..cc0a620dfb --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,367 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v2/quick-start + - /riak/kv/2.9.2/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. diff --git a/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..db525c1e6a --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,160 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v2/ssl + - /riak/kv/2.9.2/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. diff --git a/content/riak/kv/2.9.2/configuring/v3-multi-datacenter.md b/content/riak/kv/2.9.2/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..cabb641d55 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/v3-multi-datacenter.md @@ -0,0 +1,157 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/configuration + - /riak/kv/2.9.2/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/2.9.2/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. diff --git a/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..eed9ce9f92 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,167 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/nat + - /riak/kv/2.9.2/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` diff --git a/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..0587f09320 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,168 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/quick-start + - /riak/kv/2.9.2/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/2.9.2/using/performance +[config v3 mdc]: {{}}riak/kv/2.9.2/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. diff --git a/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..a7c7258bf8 --- /dev/null +++ b/content/riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,170 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/ssl + - /riak/kv/2.9.2/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/2.9.2/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. diff --git a/content/riak/kv/2.9.2/developing.md b/content/riak/kv/2.9.2/developing.md new file mode 100644 index 0000000000..a786dc7020 --- /dev/null +++ b/content/riak/kv/2.9.2/developing.md @@ -0,0 +1,73 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + diff --git a/content/riak/kv/2.9.2/developing/api.md b/content/riak/kv/2.9.2/developing/api.md new file mode 100644 index 0000000000..633d80f69c --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api.md @@ -0,0 +1,37 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] diff --git a/content/riak/kv/2.9.2/developing/api/backend.md b/content/riak/kv/2.9.2/developing/api/backend.md new file mode 100644 index 0000000000..28e2b15ac1 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/backend.md @@ -0,0 +1,114 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.2/dev/references/backend-api + - /riak/kv/2.9.2/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/2.9.2/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` diff --git a/content/riak/kv/2.9.2/developing/api/http.md b/content/riak/kv/2.9.2/developing/api/http.md new file mode 100644 index 0000000000..2b66de71ce --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http.md @@ -0,0 +1,89 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.2/dev/references/http + - /riak/kv/2.9.2/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.9.2/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.9.2/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.2/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.9.2/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.9.2/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.9.2/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.9.2/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.2/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.2/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.9.2/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.9.2/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.9.2/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.9.2/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.9.2/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.2/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.2/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.9.2/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.9.2/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.9.2/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.9.2/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.9.2/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.9.2/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.9.2/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.9.2/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.9.2/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.9.2/developing/api/http/store-search-schema) diff --git a/content/riak/kv/2.9.2/developing/api/http/counters.md b/content/riak/kv/2.9.2/developing/api/http/counters.md new file mode 100644 index 0000000000..a8b2ee11f2 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/counters.md @@ -0,0 +1,78 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/counters + - /riak/kv/2.9.2/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.9.2/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.9.2/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/delete-object.md b/content/riak/kv/2.9.2/developing/api/http/delete-object.md new file mode 100644 index 0000000000..69f4f997b8 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/delete-object.md @@ -0,0 +1,75 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/delete-object + - /riak/kv/2.9.2/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/delete-search-index.md b/content/riak/kv/2.9.2/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..5c5571d17f --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/delete-search-index.md @@ -0,0 +1,33 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/delete-search-index + - /riak/kv/2.9.2/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` --- The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.2/developing/api/http/fetch-object.md b/content/riak/kv/2.9.2/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..c2275d8509 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/fetch-object.md @@ -0,0 +1,242 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/fetch-object + - /riak/kv/2.9.2/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.9.2/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.9.2/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.9.2/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.9.2/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.2/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/fetch-search-index.md b/content/riak/kv/2.9.2/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..5f1eef75eb --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/fetch-search-index.md @@ -0,0 +1,47 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/fetch-search-index + - /riak/kv/2.9.2/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.9.2/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` --- No Search index with that name is currently + available +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.9.2/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.9.2/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..a78416cf29 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/fetch-search-schema.md @@ -0,0 +1,38 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/fetch-search-schema + - /riak/kv/2.9.2/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). diff --git a/content/riak/kv/2.9.2/developing/api/http/get-bucket-props.md b/content/riak/kv/2.9.2/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..1d229c04f3 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/get-bucket-props.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/get-bucket-props + - /riak/kv/2.9.2/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.9.2/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.9.2/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.9.2/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/link-walking.md b/content/riak/kv/2.9.2/developing/api/http/link-walking.md new file mode 100644 index 0000000000..1440b1d277 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/link-walking.md @@ -0,0 +1,125 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/link-walking + - /riak/kv/2.9.2/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.9.2/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.9.2/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.9.2/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/list-buckets.md b/content/riak/kv/2.9.2/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..ec17ddf03b --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/list-buckets.md @@ -0,0 +1,64 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/list-buckets + - /riak/kv/2.9.2/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/list-keys.md b/content/riak/kv/2.9.2/developing/api/http/list-keys.md new file mode 100644 index 0000000000..1df6d87b1d --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/list-keys.md @@ -0,0 +1,76 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/list-keys + - /riak/kv/2.9.2/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/list-resources.md b/content/riak/kv/2.9.2/developing/api/http/list-resources.md new file mode 100644 index 0000000000..b732e843e5 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/list-resources.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/list-resources + - /riak/kv/2.9.2/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.9.2/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.9.2/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.9.2/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.9.2/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.9.2/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.9.2/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.9.2/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.9.2/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/mapreduce.md b/content/riak/kv/2.9.2/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..1263dd6a90 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/mapreduce.md @@ -0,0 +1,70 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/mapreduce + - /riak/kv/2.9.2/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/2.9.2/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.9.2/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/ping.md b/content/riak/kv/2.9.2/developing/api/http/ping.md new file mode 100644 index 0000000000..2be78052e1 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/ping.md @@ -0,0 +1,53 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/ping + - /riak/kv/2.9.2/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.9.2/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..8bfb912ada --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/reset-bucket-props.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/reset-bucket-props + - /riak/kv/2.9.2/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/search-index-info.md b/content/riak/kv/2.9.2/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..6a3ff22848 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/search-index-info.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/search-index-info + - /riak/kv/2.9.2/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.9.2/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` --- Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.2/developing/api/http/search-query.md b/content/riak/kv/2.9.2/developing/api/http/search-query.md new file mode 100644 index 0000000000..f90d9a66da --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/search-query.md @@ -0,0 +1,69 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/search-query + - /riak/kv/2.9.2/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/2.9.2/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` --- The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` --- The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.9.2/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` --- Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` --- Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` --- The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/secondary-indexes.md b/content/riak/kv/2.9.2/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..da337c28a0 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/secondary-indexes.md @@ -0,0 +1,91 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/secondary-indexes + - /riak/kv/2.9.2/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/set-bucket-props.md b/content/riak/kv/2.9.2/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..fb030782a1 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/set-bucket-props.md @@ -0,0 +1,112 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/set-bucket-props + - /riak/kv/2.9.2/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.9.2/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.9.2/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/status.md b/content/riak/kv/2.9.2/developing/api/http/status.md new file mode 100644 index 0000000000..511fab92d4 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/status.md @@ -0,0 +1,169 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/status + - /riak/kv/2.9.2/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.9.2/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute diff --git a/content/riak/kv/2.9.2/developing/api/http/store-object.md b/content/riak/kv/2.9.2/developing/api/http/store-object.md new file mode 100644 index 0000000000..f1767d6c1d --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/store-object.md @@ -0,0 +1,146 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/store-object + - /riak/kv/2.9.2/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.9.2/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.2/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.9.2/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` diff --git a/content/riak/kv/2.9.2/developing/api/http/store-search-index.md b/content/riak/kv/2.9.2/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..fbeb360a59 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/store-search-index.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/store-search-index + - /riak/kv/2.9.2/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/2.9.2/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.9.2/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` --- The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` --- The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.2/developing/api/http/store-search-schema.md b/content/riak/kv/2.9.2/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..ce3d63cfcb --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/http/store-search-schema.md @@ -0,0 +1,50 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.2/dev/references/http/store-search-schema + - /riak/kv/2.9.2/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` --- The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` --- The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` --- The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` --- The request timed out internally diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..adcf8471a4 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers.md @@ -0,0 +1,185 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers + - /riak/kv/2.9.2/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` --- A string representation of what went wrong +* `errcode` --- A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/yz-schema-put) diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..aa45ad3776 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,30 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/auth-req + - /riak/kv/2.9.2/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.9.2/using/security/basics). diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..507e6ff937 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,78 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.9.2" +menu: + riak_kv-2.9.2: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.9.2/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..b5288f42a5 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,100 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/delete-object + - /riak/kv/2.9.2/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/2.9.2/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..1dbefa0b64 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,31 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.9.2/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/2.9.2/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-map-store). diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..a136f165ad --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,127 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.9.2/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.9.2/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.9.2/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.9.2/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..e6c48d1c32 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,73 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.9.2/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..7288756c4f --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,32 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.9.2/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..b210218f40 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,128 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/dt-store + - /riak/kv/2.9.2/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.9.2/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.9.2/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.9.2/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..32b61c6b4d --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,31 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/dt-union + - /riak/kv/2.9.2/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/dt-store) message. diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..60cc51f91c --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,181 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.9.2/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` --- The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` --- The character encoding of the object, e.g. `utf-8` +* `content_encoding` --- The content encoding of the object, e.g. + `video/mp4` +* `vtag` --- The object's [vtag]({{}}riak/kv/2.9.2/learn/glossary/#vector-clock) +* `links` --- This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` --- A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` --- A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` --- This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` --- Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..1cd16478a0 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,110 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.9.2/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.9.2/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.9.2/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..30698e9204 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,33 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.9.2/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.9.2/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-props) message. diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..44bd276652 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,61 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.9.2/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..1781989924 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,76 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.9.2/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` --- Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..5ef43de8ef --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,97 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/list-keys + - /riak/kv/2.9.2/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` --- bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..8db958ff79 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,149 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.9.2/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` --- MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` --- JSON-encoded MapReduce job +* `application/x-erlang-binary` --- Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.9.2/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.9.2/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` --- Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..e41024373e --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/ping.md @@ -0,0 +1,42 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/ping + - /riak/kv/2.9.2/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..d3d9d686af --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,59 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.9.2/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.9.2/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/search.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..18643135de --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/search.md @@ -0,0 +1,148 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/search + - /riak/kv/2.9.2/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` --- The contents of the query +* `index` --- The name of the index to search + +Optional Parameters + +* `rows` --- The maximum number of rows to return +* `start` --- A start offset, i.e. the number of keys to skip before + returning values +* `sort` --- How the search results are to be sorted +* `filter` --- Filters search with additional query scoped to inline + fields +* `df` --- Override the `default_field` setting in the schema file +* `op` --- `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` --- Return the fields limit +* `presort` --- Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` --- A list of docs that match the search request +* `max_score` --- The top score returned +* `num_found` --- Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..6608f80fab --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,121 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.9.2/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.9.2/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..9fce1f8b36 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,58 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/server-info + - /riak/kv/2.9.2/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..36497a87bd --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,68 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.9.2/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.9.2/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..2af1031637 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,31 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.9.2/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.9.2/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/get-bucket-props). diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..1e66d1c4ca --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,62 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.9.2/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..3edeed5b6f --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,150 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/store-object + - /riak/kv/2.9.2/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.9.2/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.9.2/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.9.2/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.9.2/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.9.2/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.9.2/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..eac0e1de8d --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,33 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.9.2/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..fc853008f7 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,59 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.9.2/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..18777fa817 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.9.2/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..321cf1b538 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,48 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.9.2/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. diff --git a/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..164f3cb7ce --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,41 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.2/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.9.2/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.9.2/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/#message-codes) code with no data on success. diff --git a/content/riak/kv/2.9.2/developing/api/repl-hooks.md b/content/riak/kv/2.9.2/developing/api/repl-hooks.md new file mode 100644 index 0000000000..fdbd001bfa --- /dev/null +++ b/content/riak/kv/2.9.2/developing/api/repl-hooks.md @@ -0,0 +1,192 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v2/hooks + - /riak/kv/2.9.2/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + diff --git a/content/riak/kv/2.9.2/developing/app-guide.md b/content/riak/kv/2.9.2/developing/app-guide.md new file mode 100644 index 0000000000..d7af5d8831 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/app-guide.md @@ -0,0 +1,416 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.9.2/dev/using/application-guide/ + - /riak/kv/2.9.2/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/2.9.2/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.9.2/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.9.2/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.9.2/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.9.2/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.9.2/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.9.2/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.9.2/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.9.2/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.9.2/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.9.2/developing/usage/search +[use ref search]: {{}}riak/kv/2.9.2/using/reference/search +[usage 2i]: {{}}riak/kv/2.9.2/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.9.2/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.9.2/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.9.2/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.9.2/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.9.2/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.9.2/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.9.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.2/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/2.9.2/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/2.9.2/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.2/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.2/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.2/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.2/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.2/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.2/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.9.2/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.9.2/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.9.2/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.9.2/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.9.2/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.9.2/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.9.2/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.9.2/setup/installing +[getting started]: {{}}riak/kv/2.9.2/developing/getting-started +[usage index]: {{}}riak/kv/2.9.2/developing/usage +[glossary]: {{}}riak/kv/2.9.2/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** --- While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** --- Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** --- Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** --- It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** --- If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** --- If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** --- If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] --- Getting started with Riak Search +* [Search Details][use ref search] --- A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] --- How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** --- Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** --- At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** --- In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] --- A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] --- A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] --- An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** --- If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** --- If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** --- If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** --- While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** --- Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] --- A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] --- A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** --- You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** --- Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] --- A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] --- Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** --- At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** --- If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** --- 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] --- Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] --- A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] --- How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] --- A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] --- A listing of frequently used terms in Riak's + documentation + diff --git a/content/riak/kv/2.9.2/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.9.2/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..c8c9f0c4c1 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,798 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/mapreduce/ + - /riak/kv/2.9.2/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/2.9.2/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.9.2/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.9.2/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.9.2/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.9.2/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.9.2/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.9.2/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) --- Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) --- Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) --- Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
diff --git a/content/riak/kv/2.9.2/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.9.2/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..e710051965
--- /dev/null
+++ b/content/riak/kv/2.9.2/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,67 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 2.9.2
+menu:
+  riak_kv-2.9.2:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.9.2/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.9.2/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.9.2/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
diff --git a/content/riak/kv/2.9.2/developing/app-guide/reference.md b/content/riak/kv/2.9.2/developing/app-guide/reference.md
new file mode 100644
index 0000000000..9ef60b58eb
--- /dev/null
+++ b/content/riak/kv/2.9.2/developing/app-guide/reference.md
@@ -0,0 +1,16 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 2.9.2
+#menu:
+#  riak_kv-2.9.2:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+---
+
+**TODO: Add content**
diff --git a/content/riak/kv/2.9.2/developing/app-guide/replication-properties.md b/content/riak/kv/2.9.2/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..3044946b56
--- /dev/null
+++ b/content/riak/kv/2.9.2/developing/app-guide/replication-properties.md
@@ -0,0 +1,580 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 2.9.2
+menu:
+  riak_kv-2.9.2:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.2/dev/advanced/replication-properties
+  - /riak/kv/2.9.2/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/2.9.2/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.9.2/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.9.2/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.9.2/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.9.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.9.2/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.9.2/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.9.2/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.9.2/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.9.2/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.9.2/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.9.2/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.9.2/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` --- All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` --- This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` --- A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` --- Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.9.2/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.9.2/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.9.2/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
diff --git a/content/riak/kv/2.9.2/developing/app-guide/strong-consistency.md b/content/riak/kv/2.9.2/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..384ef9fbfd
--- /dev/null
+++ b/content/riak/kv/2.9.2/developing/app-guide/strong-consistency.md
@@ -0,0 +1,257 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 2.9.2
+menu:
+  riak_kv-2.9.2:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.2/dev/advanced/strong-consistency
+  - /riak/kv/2.9.2/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/2.9.2/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.9.2/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.9.2/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.9.2/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.9.2/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.9.2/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.9.2/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.9.2/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.9.2/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.9.2/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.9.2/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.9.2/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.9.2/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.9.2/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.9.2/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.9.2/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.9.2/developing/client-libraries
+[getting started]: {{}}riak/kv/2.9.2/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.9.2/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. diff --git a/content/riak/kv/2.9.2/developing/app-guide/write-once.md b/content/riak/kv/2.9.2/developing/app-guide/write-once.md new file mode 100644 index 0000000000..6aa4b5127a --- /dev/null +++ b/content/riak/kv/2.9.2/developing/app-guide/write-once.md @@ -0,0 +1,155 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.9.2/dev/advanced/write-once + - /riak/kv/2.9.2/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/2.9.2/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.9.2/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.9.2/developing/data-types +[strong consistency]: {{}}riak/kv/2.9.2/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.9.2/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} diff --git a/content/riak/kv/2.9.2/developing/client-libraries.md b/content/riak/kv/2.9.2/developing/client-libraries.md new file mode 100644 index 0000000000..79bd00114f --- /dev/null +++ b/content/riak/kv/2.9.2/developing/client-libraries.md @@ -0,0 +1,304 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.9.2/dev/using/libraries + - /riak/kv/2.9.2/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) --- A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) --- A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) --- A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) --- A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) --- An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) --- An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) --- Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) --- A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) --- Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) + --- A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) --- HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) + --- Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) + --- A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) --- Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) --- Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) --- Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) --- Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) --- An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) --- A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) --- A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) --- A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) --- A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) --- A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) --- Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) --- Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) + --- A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) + --- Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) + --- Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) --- Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) --- Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) --- Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) --- Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) --- A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) --- Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) --- A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) --- Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) --- Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) --- a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) --- A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) --- A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) --- Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) --- + Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) --- Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) --- A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) --- + Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) --- A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) --- A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) + --- Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) --- Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) --- Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) --- A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) --- A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) --- A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) --- A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) --- + [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) --- A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) + --- Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) --- A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) --- A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) --- Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) --- A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) --- A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) --- Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) --- Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) --- Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) --- A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) --- + Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) --- Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) --- + DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) --- Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) --- An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) --- Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) --- Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) --- Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) --- A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) --- An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) --- A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) + --- A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). diff --git a/content/riak/kv/2.9.2/developing/data-modeling.md b/content/riak/kv/2.9.2/developing/data-modeling.md new file mode 100644 index 0000000000..a0d74ef791 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/data-modeling.md @@ -0,0 +1,10 @@ +--- +layout: redirect +target: "riak/kv/2.9.2/learn/use-cases/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. diff --git a/content/riak/kv/2.9.2/developing/data-types.md b/content/riak/kv/2.9.2/developing/data-types.md new file mode 100644 index 0000000000..db44730e1e --- /dev/null +++ b/content/riak/kv/2.9.2/developing/data-types.md @@ -0,0 +1,275 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.9.2/dev/using/data-types + - /riak/kv/2.9.2/dev/using/data-types + - /riak/2.9.2/dev/data-modeling/data-types + - /riak/kv/2.9.2/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. diff --git a/content/riak/kv/2.9.2/developing/data-types/counters.md b/content/riak/kv/2.9.2/developing/data-types/counters.md new file mode 100644 index 0000000000..36bc5633e4 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/data-types/counters.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.2/dev/using/data-types/counters + - /riak/kv/2.9.2/dev/using/data-types/counters + - /riak/2.9.2/dev/data-modeling/data-types/counters + - /riak/kv/2.9.2/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` diff --git a/content/riak/kv/2.9.2/developing/data-types/gsets.md b/content/riak/kv/2.9.2/developing/data-types/gsets.md new file mode 100644 index 0000000000..ef29a582d5 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/data-types/gsets.md @@ -0,0 +1,627 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.2/dev/using/data-types/gsets + - /riak/kv/2.9.2/dev/using/data-types/gsets + - /riak/2.9.2/dev/data-modeling/data-types/gsets + - /riak/kv/2.9.2/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` diff --git a/content/riak/kv/2.9.2/developing/data-types/hyperloglogs.md b/content/riak/kv/2.9.2/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..63a1b3df00 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/data-types/hyperloglogs.md @@ -0,0 +1,639 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.2/dev/using/data-types/hyperloglogs + - /riak/kv/2.9.2/dev/using/data-types/hyperloglogs + - /riak/2.9.2/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.9.2/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` diff --git a/content/riak/kv/2.9.2/developing/data-types/maps.md b/content/riak/kv/2.9.2/developing/data-types/maps.md new file mode 100644 index 0000000000..8ec23f50c2 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/data-types/maps.md @@ -0,0 +1,1881 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.2/dev/using/data-types/maps + - /riak/kv/2.9.2/dev/using/data-types/maps + - /riak/2.9.2/dev/data-modeling/data-types/maps + - /riak/kv/2.9.2/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` diff --git a/content/riak/kv/2.9.2/developing/data-types/sets.md b/content/riak/kv/2.9.2/developing/data-types/sets.md new file mode 100644 index 0000000000..c4fb6b9235 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/data-types/sets.md @@ -0,0 +1,769 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.2/dev/using/data-types/sets + - /riak/kv/2.9.2/dev/using/data-types/sets + - /riak/2.9.2/dev/data-modeling/data-types/sets + - /riak/kv/2.9.2/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` diff --git a/content/riak/kv/2.9.2/developing/faq.md b/content/riak/kv/2.9.2/developing/faq.md new file mode 100644 index 0000000000..acad3433cc --- /dev/null +++ b/content/riak/kv/2.9.2/developing/faq.md @@ -0,0 +1,654 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.9.2/community/faqs/developing + - /riak/kv/2.9.2/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/2.9.2/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.9.2/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.9.2/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.9.2/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.9.2/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.9.2/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.9.2/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.9.2/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.9.2/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.9.2/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + + +--- + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + + +--- + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + + +--- + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +--- + +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +--- + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + + +--- + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + + +--- + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + + +--- + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + + +--- + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + + +--- + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + + +--- + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +--- + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +--- + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +--- + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + + +--- + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +--- + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + + +--- + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + + +--- + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + + +--- + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +--- + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + + +--- + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) --- requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) --- if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +--- + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +--- + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +--- + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +--- + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +--- + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +--- + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +--- + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +--- + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +--- + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +--- + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +--- + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +--- + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. diff --git a/content/riak/kv/2.9.2/developing/getting-started.md b/content/riak/kv/2.9.2/developing/getting-started.md new file mode 100644 index 0000000000..4253bd59d9 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started.md @@ -0,0 +1,46 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +--- + +[install index]: {{}}riak/kv/2.9.2/setup/installing +[dev client libraries]: {{}}riak/kv/2.9.2/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. diff --git a/content/riak/kv/2.9.2/developing/getting-started/csharp.md b/content/riak/kv/2.9.2/developing/getting-started/csharp.md new file mode 100644 index 0000000000..f8b9a19a91 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/csharp.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/csharp + - /riak/kv/2.9.2/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.2/developing/getting-started/csharp/crud-operations) diff --git a/content/riak/kv/2.9.2/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.9.2/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..1ba1f14e5a --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,143 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. diff --git a/content/riak/kv/2.9.2/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.9.2/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..c2edac38f7 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,107 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.9.2/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + diff --git a/content/riak/kv/2.9.2/developing/getting-started/csharp/querying.md b/content/riak/kv/2.9.2/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..8342c09f70 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/csharp/querying.md @@ -0,0 +1,210 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/querying-csharp + - /riak/kv/2.9.2/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip diff --git a/content/riak/kv/2.9.2/developing/getting-started/erlang.md b/content/riak/kv/2.9.2/developing/getting-started/erlang.md new file mode 100644 index 0000000000..3d784437d2 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/erlang.md @@ -0,0 +1,55 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/erlang + - /riak/kv/2.9.2/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.2/developing/getting-started/erlang/crud-operations) diff --git a/content/riak/kv/2.9.2/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.9.2/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..94233033bb --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,167 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` diff --git a/content/riak/kv/2.9.2/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.9.2/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..9bf648a302 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,338 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.9.2/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.2/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.2/developing/getting-started/erlang/querying.md b/content/riak/kv/2.9.2/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..6d6ef35943 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/erlang/querying.md @@ -0,0 +1,303 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/querying-erlang + - /riak/kv/2.9.2/dev/taste-of-riak/querying-erlang +--- + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.9.2/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.2/developing/getting-started/golang.md b/content/riak/kv/2.9.2/developing/getting-started/golang.md new file mode 100644 index 0000000000..3873a2d874 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/golang.md @@ -0,0 +1,78 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/golang + - /riak/kv/2.9.2/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.2/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.2/developing/getting-started/golang/crud-operations) diff --git a/content/riak/kv/2.9.2/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.9.2/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..a271553514 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,370 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +--- + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` diff --git a/content/riak/kv/2.9.2/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.9.2/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..01dc0c1401 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,548 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.9.2/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.2/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + diff --git a/content/riak/kv/2.9.2/developing/getting-started/golang/querying.md b/content/riak/kv/2.9.2/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..9342dc7c7d --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/golang/querying.md @@ -0,0 +1,576 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/querying-golang + - /riak/kv/2.9.2/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. diff --git a/content/riak/kv/2.9.2/developing/getting-started/java.md b/content/riak/kv/2.9.2/developing/getting-started/java.md new file mode 100644 index 0000000000..13b1463220 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/java.md @@ -0,0 +1,89 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/java + - /riak/kv/2.9.2/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.2/developing/getting-started/java/crud-operations) diff --git a/content/riak/kv/2.9.2/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.9.2/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..bf91222111 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/java/crud-operations.md @@ -0,0 +1,201 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.2/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.2/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.2/developing/usage/conflict-resolution/) +documention. diff --git a/content/riak/kv/2.9.2/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.9.2/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..91371b310d --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/java/object-modeling.md @@ -0,0 +1,428 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.9.2/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data diff --git a/content/riak/kv/2.9.2/developing/getting-started/java/querying.md b/content/riak/kv/2.9.2/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..9f859a0805 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/java/querying.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/querying-java + - /riak/kv/2.9.2/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.2/developing/getting-started/nodejs.md b/content/riak/kv/2.9.2/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..ab7e378b9a --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/nodejs.md @@ -0,0 +1,100 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/nodejs + - /riak/kv/2.9.2/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.2/developing/getting-started/nodejs/crud-operations) diff --git a/content/riak/kv/2.9.2/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.9.2/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..8f3e4a70f9 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. diff --git a/content/riak/kv/2.9.2/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.9.2/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..6e5431f8c8 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.9.2/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + diff --git a/content/riak/kv/2.9.2/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.9.2/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..8c83b15d0f --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/nodejs/querying.md @@ -0,0 +1,142 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.9.2/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.2/developing/getting-started/php.md b/content/riak/kv/2.9.2/developing/getting-started/php.md new file mode 100644 index 0000000000..9506b5e641 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/php.md @@ -0,0 +1,76 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/php + - /riak/kv/2.9.2/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.2/developing/getting-started/php/crud-operations) diff --git a/content/riak/kv/2.9.2/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.9.2/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..62bcb2cd35 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/php/crud-operations.md @@ -0,0 +1,182 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.9.2/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. diff --git a/content/riak/kv/2.9.2/developing/getting-started/php/querying.md b/content/riak/kv/2.9.2/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..551380e047 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/php/querying.md @@ -0,0 +1,404 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/querying-php + - /riak/kv/2.9.2/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query diff --git a/content/riak/kv/2.9.2/developing/getting-started/python.md b/content/riak/kv/2.9.2/developing/getting-started/python.md new file mode 100644 index 0000000000..80e83ae1d2 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/python.md @@ -0,0 +1,99 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/python + - /riak/kv/2.9.2/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` --- Header files and a static library for Python +* `libffi-dev` --- Foreign function interface library +* `libssl-dev` --- libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.2/developing/getting-started/python/crud-operations) diff --git a/content/riak/kv/2.9.2/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.9.2/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..f16747bcdf --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/python/crud-operations.md @@ -0,0 +1,145 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` diff --git a/content/riak/kv/2.9.2/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.9.2/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..dd26887709 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/python/object-modeling.md @@ -0,0 +1,260 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.9.2/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.2/developing/getting-started/python/querying.md b/content/riak/kv/2.9.2/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..60c62a9bba --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/python/querying.md @@ -0,0 +1,236 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/querying-python + - /riak/kv/2.9.2/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.2/developing/getting-started/ruby.md b/content/riak/kv/2.9.2/developing/getting-started/ruby.md new file mode 100644 index 0000000000..f27084feda --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/ruby.md @@ -0,0 +1,64 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/ruby + - /riak/kv/2.9.2/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.2/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.2/developing/getting-started/ruby/crud-operations) diff --git a/content/riak/kv/2.9.2/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.9.2/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..97ea4932d2 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` diff --git a/content/riak/kv/2.9.2/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.9.2/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..8c2e6c1c8f --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,291 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.9.2/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + diff --git a/content/riak/kv/2.9.2/developing/getting-started/ruby/querying.md b/content/riak/kv/2.9.2/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..a9b55333db --- /dev/null +++ b/content/riak/kv/2.9.2/developing/getting-started/ruby/querying.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.2/dev/taste-of-riak/querying-ruby + - /riak/kv/2.9.2/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query diff --git a/content/riak/kv/2.9.2/developing/key-value-modeling.md b/content/riak/kv/2.9.2/developing/key-value-modeling.md new file mode 100644 index 0000000000..9c34a06a2a --- /dev/null +++ b/content/riak/kv/2.9.2/developing/key-value-modeling.md @@ -0,0 +1,531 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.9.2/dev/data-modeling/key-value/ + - /riak/kv/2.9.2/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.9.2/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.9.2/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.9.2/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.9.2/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.9.2/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.9.2/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.9.2/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.9.2/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.9.2/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.9.2/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.9.2/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.9.2/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.9.2/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.9.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.9.2/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.9.2/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.9.2/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.9.2/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). diff --git a/content/riak/kv/2.9.2/developing/usage.md b/content/riak/kv/2.9.2/developing/usage.md new file mode 100644 index 0000000000..24adae6030 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage.md @@ -0,0 +1,133 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) diff --git a/content/riak/kv/2.9.2/developing/usage/bucket-types.md b/content/riak/kv/2.9.2/developing/usage/bucket-types.md new file mode 100644 index 0000000000..768451878d --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/bucket-types.md @@ -0,0 +1,98 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/bucket-types + - /riak/kv/2.9.2/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` diff --git a/content/riak/kv/2.9.2/developing/usage/commit-hooks.md b/content/riak/kv/2.9.2/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..061d50520b --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/commit-hooks.md @@ -0,0 +1,239 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/using/commit-hooks + - /riak/kv/2.9.2/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.9.2/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object --- This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` --- The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.9.2/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` --- The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. diff --git a/content/riak/kv/2.9.2/developing/usage/conflict-resolution.md b/content/riak/kv/2.9.2/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..95be10de50 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/conflict-resolution.md @@ -0,0 +1,677 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/using/conflict-resolution + - /riak/kv/2.9.2/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/strong-consistency) --- A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.9.2/configuring/strong-consistency) --- A guide for operators +> * [strong consistency][use ref strong consistency] --- A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.9.2/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.9.2/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.9.2/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.2/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.9.2/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** --- If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** --- Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** --- If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.9.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.2/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.9.2/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) diff --git a/content/riak/kv/2.9.2/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..fd62d3cdca --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.2/dev/using/conflict-resolution/csharp + - /riak/kv/2.9.2/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client diff --git a/content/riak/kv/2.9.2/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..3c07a9d66a --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,58 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.2/dev/using/conflict-resolution/golang + - /riak/kv/2.9.2/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) diff --git a/content/riak/kv/2.9.2/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..6b3472ac33 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/java.md @@ -0,0 +1,272 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.2/dev/using/conflict-resolution/java + - /riak/kv/2.9.2/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.2/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.2/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..acb5f926a5 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,58 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.2/dev/using/conflict-resolution/nodejs + - /riak/kv/2.9.2/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) diff --git a/content/riak/kv/2.9.2/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..4df351bae2 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/php.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.2/dev/using/conflict-resolution/php + - /riak/kv/2.9.2/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.2/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.2/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..73f7eb3e75 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/python.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.2/dev/using/conflict-resolution/python + - /riak/kv/2.9.2/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.2/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.2/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..8b8c54ad0f --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,250 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.2/dev/using/conflict-resolution/ruby + - /riak/kv/2.9.2/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.2/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets). diff --git a/content/riak/kv/2.9.2/developing/usage/content-types.md b/content/riak/kv/2.9.2/developing/usage/content-types.md new file mode 100644 index 0000000000..d7885fe22c --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/content-types.md @@ -0,0 +1,187 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` diff --git a/content/riak/kv/2.9.2/developing/usage/creating-objects.md b/content/riak/kv/2.9.2/developing/usage/creating-objects.md new file mode 100644 index 0000000000..af82d3c477 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/creating-objects.md @@ -0,0 +1,550 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +--- + +[usage content types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.9.2/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` diff --git a/content/riak/kv/2.9.2/developing/usage/custom-extractors.md b/content/riak/kv/2.9.2/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..63c8e799fc --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/custom-extractors.md @@ -0,0 +1,420 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/search/custom-extractors + - /riak/kv/2.9.2/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` --- Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` --- Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` diff --git a/content/riak/kv/2.9.2/developing/usage/deleting-objects.md b/content/riak/kv/2.9.2/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..3a3d8f612a --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/deleting-objects.md @@ -0,0 +1,152 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` diff --git a/content/riak/kv/2.9.2/developing/usage/document-store.md b/content/riak/kv/2.9.2/developing/usage/document-store.md new file mode 100644 index 0000000000..0a9384c293 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/document-store.md @@ -0,0 +1,613 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/search/document-store + - /riak/kv/2.9.2/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.9.2/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` diff --git a/content/riak/kv/2.9.2/developing/usage/mapreduce.md b/content/riak/kv/2.9.2/developing/usage/mapreduce.md new file mode 100644 index 0000000000..7b7a140b4f --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/mapreduce.md @@ -0,0 +1,242 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/using/mapreduce + - /riak/kv/2.9.2/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.9.2/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.9.2/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** --- The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** --- The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. diff --git a/content/riak/kv/2.9.2/developing/usage/next-gen-replication.md b/content/riak/kv/2.9.2/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..f5ce9d905c --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/next-gen-replication.md @@ -0,0 +1,150 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.2" +menu: + riak_kv-2.9.2: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/2.9.2/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. \ No newline at end of file diff --git a/content/riak/kv/2.9.2/developing/usage/reading-objects.md b/content/riak/kv/2.9.2/developing/usage/reading-objects.md new file mode 100644 index 0000000000..40d0c2440a --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/reading-objects.md @@ -0,0 +1,247 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` diff --git a/content/riak/kv/2.9.2/developing/usage/replication.md b/content/riak/kv/2.9.2/developing/usage/replication.md new file mode 100644 index 0000000000..a0781facf7 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/replication.md @@ -0,0 +1,588 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/replication-properties + - /riak/kv/2.9.2/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.9.2/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.9.2/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.2/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.2/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` --- All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` --- This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` --- A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` --- Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.9.2/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.9.2/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.9.2/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.9.2/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. diff --git a/content/riak/kv/2.9.2/developing/usage/search-schemas.md b/content/riak/kv/2.9.2/developing/usage/search-schemas.md new file mode 100644 index 0000000000..c3a7cd9333 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/search-schemas.md @@ -0,0 +1,507 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/search-schema + - /riak/kv/2.9.2/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.9.2/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` diff --git a/content/riak/kv/2.9.2/developing/usage/search.md b/content/riak/kv/2.9.2/developing/usage/search.md new file mode 100644 index 0000000000..b2505dac72 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/search.md @@ -0,0 +1,1451 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/using/search + - /riak/kv/2.9.2/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.9.2/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.2/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.2/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.9.2/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.9.2/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.9.2/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.9.2/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` diff --git a/content/riak/kv/2.9.2/developing/usage/searching-data-types.md b/content/riak/kv/2.9.2/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..360e47cb26 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/searching-data-types.md @@ -0,0 +1,1683 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/search/search-data-types + - /riak/kv/2.9.2/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. diff --git a/content/riak/kv/2.9.2/developing/usage/secondary-indexes.md b/content/riak/kv/2.9.2/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..36b6f8c8ae --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/secondary-indexes.md @@ -0,0 +1,2026 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/using/2i + - /riak/kv/2.9.2/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.9.2/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.2/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.2/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` --- Binary index `field1_bin` and integer index `field2_int` +* `Moe` --- Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` --- Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` --- Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` diff --git a/content/riak/kv/2.9.2/developing/usage/security.md b/content/riak/kv/2.9.2/developing/usage/security.md new file mode 100644 index 0000000000..9c8732faba --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/security.md @@ -0,0 +1,99 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/client-security + - /riak/kv/2.9.2/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.9.2/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.9.2/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.9.2/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.9.2/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.2/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.9.2/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.9.2/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.9.2/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.9.2/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. diff --git a/content/riak/kv/2.9.2/developing/usage/security/erlang.md b/content/riak/kv/2.9.2/developing/usage/security/erlang.md new file mode 100644 index 0000000000..8c5f91adf9 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/security/erlang.md @@ -0,0 +1,114 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/client-security/erlang + - /riak/kv/2.9.2/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.2/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` diff --git a/content/riak/kv/2.9.2/developing/usage/security/java.md b/content/riak/kv/2.9.2/developing/usage/security/java.md new file mode 100644 index 0000000000..6d8eac60d9 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/security/java.md @@ -0,0 +1,117 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/client-security/java + - /riak/kv/2.9.2/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. diff --git a/content/riak/kv/2.9.2/developing/usage/security/php.md b/content/riak/kv/2.9.2/developing/usage/security/php.md new file mode 100644 index 0000000000..c56a22d458 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/security/php.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/client-security/php + - /riak/kv/2.9.2/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. diff --git a/content/riak/kv/2.9.2/developing/usage/security/python.md b/content/riak/kv/2.9.2/developing/usage/security/python.md new file mode 100644 index 0000000000..998fa7e554 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/security/python.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/client-security/python + - /riak/kv/2.9.2/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.2/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.9.2/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. diff --git a/content/riak/kv/2.9.2/developing/usage/security/ruby.md b/content/riak/kv/2.9.2/developing/usage/security/ruby.md new file mode 100644 index 0000000000..f7ed0ff9df --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/security/ruby.md @@ -0,0 +1,158 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/client-security/ruby + - /riak/kv/2.9.2/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.2/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. diff --git a/content/riak/kv/2.9.2/developing/usage/updating-objects.md b/content/riak/kv/2.9.2/developing/usage/updating-objects.md new file mode 100644 index 0000000000..91f8124c56 --- /dev/null +++ b/content/riak/kv/2.9.2/developing/usage/updating-objects.md @@ -0,0 +1,774 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.2/dev/using/updates + - /riak/kv/2.9.2/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.9.2/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. diff --git a/content/riak/kv/2.9.2/downloads.md b/content/riak/kv/2.9.2/downloads.md new file mode 100644 index 0000000000..8cd5444cda --- /dev/null +++ b/content/riak/kv/2.9.2/downloads.md @@ -0,0 +1,22 @@ +--- +title: "Download for Riak KV 2.9.2" +description: "Download some stuff!" +menu: + riak_kv-2.9.2: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 2.9.2 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 2.9.2 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.9.2/downloads + - /riak/kv/2.9.2/downloads +--- diff --git a/content/riak/kv/2.9.2/index.md b/content/riak/kv/2.9.2/index.md new file mode 100644 index 0000000000..d03e54520c --- /dev/null +++ b/content/riak/kv/2.9.2/index.md @@ -0,0 +1,72 @@ +--- +title: "Riak KV 2.9.2" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.9.2/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.9.2/configuring +[downloads]: {{<baseurl>}}riak/kv/2.9.2/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.9.2/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.9.2/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.9.2/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.9.2/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.9.2/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] diff --git a/content/riak/kv/2.9.2/learn.md b/content/riak/kv/2.9.2/learn.md new file mode 100644 index 0000000000..01f19413bb --- /dev/null +++ b/content/riak/kv/2.9.2/learn.md @@ -0,0 +1,47 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] diff --git a/content/riak/kv/2.9.2/learn/concepts.md b/content/riak/kv/2.9.2/learn/concepts.md new file mode 100644 index 0000000000..cff2f5f0e8 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts.md @@ -0,0 +1,44 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +--- + +[concept aae]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.9.2/configuring +[plan index]: {{<baseurl>}}riak/kv/2.9.2/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.9.2/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] diff --git a/content/riak/kv/2.9.2/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.9.2/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..9fab8dd8a5 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/active-anti-entropy.md @@ -0,0 +1,107 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/aae + - /riak/kv/2.9.2/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. diff --git a/content/riak/kv/2.9.2/learn/concepts/buckets.md b/content/riak/kv/2.9.2/learn/concepts/buckets.md new file mode 100644 index 0000000000..7b7bb76a0f --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/buckets.md @@ -0,0 +1,213 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/Buckets + - /riak/kv/2.9.2/theory/concepts/Buckets + - /riak/2.9.2/theory/concepts/buckets + - /riak/kv/2.9.2/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.9.2/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.9.2/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.9.2/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.9.2/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` diff --git a/content/riak/kv/2.9.2/learn/concepts/capability-negotiation.md b/content/riak/kv/2.9.2/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..257d30ebbe --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/capability-negotiation.md @@ -0,0 +1,32 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/capability-negotiation + - /riak/kv/2.9.2/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.9.2/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + diff --git a/content/riak/kv/2.9.2/learn/concepts/causal-context.md b/content/riak/kv/2.9.2/learn/concepts/causal-context.md new file mode 100644 index 0000000000..2171c6e66f --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/causal-context.md @@ -0,0 +1,285 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/context + - /riak/kv/2.9.2/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.9.2/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.9.2/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.9.2/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] diff --git a/content/riak/kv/2.9.2/learn/concepts/clusters.md b/content/riak/kv/2.9.2/learn/concepts/clusters.md new file mode 100644 index 0000000000..508c9397d9 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/clusters.md @@ -0,0 +1,113 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/Clusters + - /riak/kv/2.9.2/theory/concepts/Clusters + - /riak/2.9.2/theory/concepts/clusters + - /riak/kv/2.9.2/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.2/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. diff --git a/content/riak/kv/2.9.2/learn/concepts/crdts.md b/content/riak/kv/2.9.2/learn/concepts/crdts.md new file mode 100644 index 0000000000..0cd88bf935 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/crdts.md @@ -0,0 +1,248 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/crdts + - /riak/kv/2.9.2/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.9.2/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.9.2/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. diff --git a/content/riak/kv/2.9.2/learn/concepts/eventual-consistency.md b/content/riak/kv/2.9.2/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..b6cefb2a76 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/eventual-consistency.md @@ -0,0 +1,198 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/Eventual-Consistency + - /riak/kv/2.9.2/theory/concepts/Eventual-Consistency + - /riak/2.9.2/theory/concepts/eventual-consistency + - /riak/kv/2.9.2/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) diff --git a/content/riak/kv/2.9.2/learn/concepts/keys-and-objects.md b/content/riak/kv/2.9.2/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..8f2f0b1ded --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/keys-and-objects.md @@ -0,0 +1,49 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/keys-and-values + - /riak/kv/2.9.2/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). diff --git a/content/riak/kv/2.9.2/learn/concepts/replication.md b/content/riak/kv/2.9.2/learn/concepts/replication.md new file mode 100644 index 0000000000..19ce297e4d --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/replication.md @@ -0,0 +1,319 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/Replication + - /riak/kv/2.9.2/theory/concepts/Replication + - /riak/2.9.2/theory/concepts/replication + - /riak/kv/2.9.2/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.9.2/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + diff --git a/content/riak/kv/2.9.2/learn/concepts/strong-consistency.md b/content/riak/kv/2.9.2/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..baf2ae8ee4 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/strong-consistency.md @@ -0,0 +1,101 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/strong-consistency + - /riak/kv/2.9.2/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.2/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes diff --git a/content/riak/kv/2.9.2/learn/concepts/vnodes.md b/content/riak/kv/2.9.2/learn/concepts/vnodes.md new file mode 100644 index 0000000000..0f0c450338 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/concepts/vnodes.md @@ -0,0 +1,156 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.2/theory/concepts/vnodes + - /riak/kv/2.9.2/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322492444576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. diff --git a/content/riak/kv/2.9.2/learn/dynamo.md b/content/riak/kv/2.9.2/learn/dynamo.md new file mode 100644 index 0000000000..681c5e4bc6 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/dynamo.md @@ -0,0 +1,1924 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.9.2/theory/dynamo + - /riak/kv/2.9.2/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.9.2/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.9.2/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.9.2/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.9.2/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.2 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.9.2/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.2/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.9.2/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.9.2/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.9.2/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.2/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. diff --git a/content/riak/kv/2.9.2/learn/glossary.md b/content/riak/kv/2.9.2/learn/glossary.md new file mode 100644 index 0000000000..fc529b796a --- /dev/null +++ b/content/riak/kv/2.9.2/learn/glossary.md @@ -0,0 +1,353 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.9.2/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.9.2/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.9.2/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.2/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.9.2/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.9.2/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.9.2/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] diff --git a/content/riak/kv/2.9.2/learn/new-to-nosql.md b/content/riak/kv/2.9.2/learn/new-to-nosql.md new file mode 100644 index 0000000000..94b8eb03c4 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/new-to-nosql.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 2.9.2 +#menu: +# riak_kv-2.9.2: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +--- + +**TODO: Add content (not sure where this lives in existing docs)** diff --git a/content/riak/kv/2.9.2/learn/use-cases.md b/content/riak/kv/2.9.2/learn/use-cases.md new file mode 100644 index 0000000000..17a2eb1dac --- /dev/null +++ b/content/riak/kv/2.9.2/learn/use-cases.md @@ -0,0 +1,401 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.9.2/dev/data-modeling/ + - /riak/kv/2.9.2/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.9.2/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.9.2/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + diff --git a/content/riak/kv/2.9.2/learn/why-riak-kv.md b/content/riak/kv/2.9.2/learn/why-riak-kv.md new file mode 100644 index 0000000000..672b12c5f6 --- /dev/null +++ b/content/riak/kv/2.9.2/learn/why-riak-kv.md @@ -0,0 +1,221 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.9.2/theory/why-riak/ + - /riak/kv/2.9.2/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.2/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.9.2/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. diff --git a/content/riak/kv/2.9.2/release-notes.md b/content/riak/kv/2.9.2/release-notes.md new file mode 100644 index 0000000000..b071c74be4 --- /dev/null +++ b/content/riak/kv/2.9.2/release-notes.md @@ -0,0 +1,42 @@ +--- +title: "Riak KV 2.9.2 Release Notes" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.9.2/community/release-notes + - /riak/kv/2.9.2/intro-v20 + - /riak/2.9.2/intro-v20 + - /riak/kv/2.9.2/introduction +--- + +Released Apr 08, 2020. + + +## Overview + +This release includes: + +- An extension to the node_confirms feature so that node_confirms can be tracked on GETs as well as PUTs. This is provided so that if an attempt to PUT with a node_confirms value failed, a read can be made which will only succeed if repsonses from sufficient nodes are received. This does not confirm absolutely that the actual returned response is on sufficient nodes, but does confirm nodes are now up so that anti-entropy mechanisms will soon resolve any missing data. + +- Support for building leveldb on 32bit platforms. + +- Improvements to reduce the cost of journal compaction in leveled when there are large numbers of files containing mainly skeleton key-changes objects. The cost of scoring all of these files could have a notable impact on read loads when spinning HDDs are used (although this could be mitigated by running the journal compaction less frequently, or out of hours). Now an attempt is made to reduce this scoring cost by reading the keys to be scored in order, and scoring keys relatively close together. This will reduce the size of the disk head movements required to complete the scoring process. + +- The abilty to switch the configuration of leveled journal compaction to using recalc mode, and hence avoid using skeleton key changes objects altogether. The default remains retain mode, the switch from retain mode to enabling recalc is supported without any data modification (just a restart required). There is though, no safe way other than leaving the node from the cluster (and rejoining) to revert from recalc back to retain. The use of the recalc strategy can be enabled via configuration. The use of recalc mode has outperformed retain in tests, when both running journal compaction jobs, and recovering empty ledgers via journal reloads. + +- An improvement to the efficiency of compaction in the leveled LSM-tree based ledger with large numbers of tombstones (or modified index entries), by using a grooming selection strategy 50% of the time when selecting files to merge rather than selecting files at random each time. The grooming selection, will take a sample of files and merge the one with the most tombstones. The use of the grooming strategy is not configurable, and will have no impact until the vast majority of SST files have been re-written under this release. + +[Previous Release Notes](#previous-release-notes) + + +## Previous Release Notes + +Please see the KV 2.9.1 release notes [here]({{<baseurl>}}riak/kv/2.9.1/release-notes/), and the KV 2.9.0p5 release notes [here]({{<baseurl>}}riak/kv/2.9.2/release-notes/). diff --git a/content/riak/kv/2.9.2/setup.md b/content/riak/kv/2.9.2/setup.md new file mode 100644 index 0000000000..7b1983eb11 --- /dev/null +++ b/content/riak/kv/2.9.2/setup.md @@ -0,0 +1,45 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + diff --git a/content/riak/kv/2.9.2/setup/downgrade.md b/content/riak/kv/2.9.2/setup/downgrade.md new file mode 100644 index 0000000000..2efe47c894 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/downgrade.md @@ -0,0 +1,174 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.2/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.9.2/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.9.2/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.2, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` diff --git a/content/riak/kv/2.9.2/setup/installing.md b/content/riak/kv/2.9.2/setup/installing.md new file mode 100644 index 0000000000..79a2b1f972 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing.md @@ -0,0 +1,56 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.9.2/ops/building/installing + - /riak/kv/2.9.2/ops/building/installing + - /riak/2.9.2/installing/ + - /riak/kv/2.9.2/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.9.2/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. diff --git a/content/riak/kv/2.9.2/setup/installing/amazon-web-services.md b/content/riak/kv/2.9.2/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..a059214b6c --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/amazon-web-services.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.9.2/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.9.2/installing/amazon-web-services/ + - /riak/kv/2.9.2/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.2/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2/riak-2.9.2-1.amzn2x86_64.rpm +sudo yum localinstall -y riak-2.9.2-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2/riak-2.9.2-1.amzn2x86_64.rpm +sudo rpm -i riak-2.9.2-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2016.09/riak-2.9.2-1.amzn1x86_64.rpm +sudo yum localinstall -y riak-2.9.2-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2016.09/riak-2.9.2-1.amzn1x86_64.rpm +sudo rpm -i riak-2.9.2-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/debian-ubuntu.md b/content/riak/kv/2.9.2/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..198cc7ef58 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/debian-ubuntu.md @@ -0,0 +1,166 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.9.2/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.9.2/installing/debian-ubuntu/ + - /riak/kv/2.9.2/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.9.2/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/bionic64/riak-2.9.2-1_amd64.deb +sudo dpkg -i riak-2.9.2-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/xenial64/riak-2.9.2-1_amd64.deb +sudo dpkg -i riak-2.9.2-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/trusty64/riak-2.9.2-1_amd64.deb +sudo dpkg -i riak-2.9.2-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/precise64/riak-2.9.2-1_amd64.deb +sudo dpkg -i riak-2.9.2-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/9/riak-2.9.2-1_amd64.deb +sudo dpkg -i riak-2.9.2-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/8/riak-2.9.2-1_amd64.deb +sudo dpkg -i riak-2.9.2-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/7/riak-2.9.2-1_amd64.deb +sudo dpkg -i riak-2.9.2-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/raspbian/buster/riak-2.9.2-1_armhf.deb +sudo dpkg -i riak-2.9.2-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/riak-2.9.2.tar.gz +tar zxvf riak-2.9.2.tar.gz +cd riak-2.9.2 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/freebsd.md b/content/riak/kv/2.9.2/setup/installing/freebsd.md new file mode 100644 index 0000000000..6c9a09ccd3 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/freebsd.md @@ -0,0 +1,128 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.9.2/ops/building/installing/Installing-on-FreeBSD + - /riak/2.9.2/installing/freebsd/ + - /riak/kv/2.9.2/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.2/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.9.2.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.2/freebsd/11.1/riak-2.9.2.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.2/freebsd/10.4/riak-2.9.2.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/mac-osx.md b/content/riak/kv/2.9.2/setup/installing/mac-osx.md new file mode 100644 index 0000000000..9b933b1f68 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/mac-osx.md @@ -0,0 +1,116 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.9.2/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.9.2/installing/mac-osx/ + - /riak/kv/2.9.2/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.2/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.11/riak-2.9.2-OSX-x86_64.tar.gz +tar xzvf riak-2.9.2-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.9.2 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.2` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.2/riak-2.9.2.tar.gz +tar zxvf riak-2.9.2.tar.gz +cd riak-2.9.2 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/rhel-centos.md b/content/riak/kv/2.9.2/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..1adda5e6dc --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/rhel-centos.md @@ -0,0 +1,129 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.9.2/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.9.2/installing/rhel-centos/ + - /riak/kv/2.9.2/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/8/riak-2.9.2-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.9.2-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/8/riak-2.9.2-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.9.2-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/7/riak-2.9.2-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.9.2-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/7/riak-2.9.2-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.9.2-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/6/riak-2.9.2-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.9.2-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/6/riak-2.9.2-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.9.2-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.2/riak-2.9.2.tar.gz +tar zxvf riak-2.9.2.tar.gz +cd riak-2.9.2 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/smartos.md b/content/riak/kv/2.9.2/setup/installing/smartos.md new file mode 100644 index 0000000000..b0c45ba4f8 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/smartos.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.9.2" +menu: + riak_kv-2.9.2: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.9.2/ops/building/installing/Installing-on-SmartOS + - /riak/2.9.2/installing/smartos/ + - /riak/kv/2.9.2/installing/smartos/ + - /riak/kv/2.9.2/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/solaris.md b/content/riak/kv/2.9.2/setup/installing/solaris.md new file mode 100644 index 0000000000..7658e87453 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/solaris.md @@ -0,0 +1,90 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.9.2" +menu: + riak_kv-2.9.2: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.9.2/ops/building/installing/Installing-on-Solaris + - /riak/2.9.2/installing/solaris/ + - /riak/kv/2.9.2/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/source.md b/content/riak/kv/2.9.2/setup/installing/source.md new file mode 100644 index 0000000000..d215b6736e --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/source.md @@ -0,0 +1,105 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/Installing-Riak-from-Source + - /riak/kv/2.9.2/ops/building/Installing-Riak-from-Source + - /riak/2.9.2/installing/source/ + - /riak/kv/2.9.2/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.2/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.2/riak-2.9.2.tar.gz +tar zxvf riak-2.9.2.tar.gz +cd riak-2.9.2 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/source/erlang.md b/content/riak/kv/2.9.2/setup/installing/source/erlang.md new file mode 100644 index 0000000000..7ca3b33cf5 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/source/erlang.md @@ -0,0 +1,566 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/erlang + - /riak/kv/2.9.2/ops/building/installing/erlang + - /riak/2.9.2/installing/source/erlang/ + - /riak/kv/2.9.2/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/2.9.2/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.9.2/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` diff --git a/content/riak/kv/2.9.2/setup/installing/source/jvm.md b/content/riak/kv/2.9.2/setup/installing/source/jvm.md new file mode 100644 index 0000000000..5740226879 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/source/jvm.md @@ -0,0 +1,51 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/jvm + - /riak/kv/2.9.2/ops/building/installing/jvm + - /riak/2.9.2/ops/building/installing/Installing-the-JVM + - /riak/kv/2.9.2/ops/building/installing/Installing-the-JVM + - /riak/2.9.2/installing/source/jvm/ + - /riak/kv/2.9.2/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` diff --git a/content/riak/kv/2.9.2/setup/installing/suse.md b/content/riak/kv/2.9.2/setup/installing/suse.md new file mode 100644 index 0000000000..67cd96ad83 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/suse.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.9.2/ops/building/installing/Installing-on-SUSE + - /riak/2.9.2/installing/suse/ + - /riak/kv/2.9.2/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.2/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.2+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.2/setup/installing/verify.md b/content/riak/kv/2.9.2/setup/installing/verify.md new file mode 100644 index 0000000000..0f66e4f245 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/verify.md @@ -0,0 +1,164 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/installing/Post-Installation + - /riak/kv/2.9.2/ops/installing/Post-Installation + - /riak/2.9.2/installing/verify-install/ + - /riak/kv/2.9.2/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/2.9.2/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.9.2/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language diff --git a/content/riak/kv/2.9.2/setup/installing/windows-azure.md b/content/riak/kv/2.9.2/setup/installing/windows-azure.md new file mode 100644 index 0000000000..771cc6f623 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/installing/windows-azure.md @@ -0,0 +1,192 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.9.2/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.9.2/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.9.2/installing/windows-azure/ + - /riak/kv/2.9.2/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` diff --git a/content/riak/kv/2.9.2/setup/planning.md b/content/riak/kv/2.9.2/setup/planning.md new file mode 100644 index 0000000000..e3985a7e11 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning.md @@ -0,0 +1,55 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + diff --git a/content/riak/kv/2.9.2/setup/planning/backend.md b/content/riak/kv/2.9.2/setup/planning/backend.md new file mode 100644 index 0000000000..8399eb7528 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/backend.md @@ -0,0 +1,56 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.9.2/ops/building/planning/backends/ + - /riak/kv/2.9.2/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/2.9.2/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. diff --git a/content/riak/kv/2.9.2/setup/planning/backend/bitcask.md b/content/riak/kv/2.9.2/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..8b7c44a60b --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/backend/bitcask.md @@ -0,0 +1,990 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/backends/bitcask/ + - /riak/kv/2.9.2/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.9.2/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` --- lets the operating system manage syncing writes + (default) + * `o_sync` --- uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval --- Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) --- Writes are made via Erlang's built-in file API +* `nif` --- Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` --- No restrictions on when merge operations can occur + (default) +* `never` --- Merge will never be attempted +* `window` --- Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** --- This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** --- This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** --- This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** --- This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** --- This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322492444576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322492444576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. diff --git a/content/riak/kv/2.9.2/setup/planning/backend/leveldb.md b/content/riak/kv/2.9.2/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..e20d16da37 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/backend/leveldb.md @@ -0,0 +1,502 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/backends/leveldb/ + - /riak/kv/2.9.2/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.2/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** --- The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** --- LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322492444576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322492444576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. diff --git a/content/riak/kv/2.9.2/setup/planning/backend/leveled.md b/content/riak/kv/2.9.2/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..0609ed680e --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/backend/leveled.md @@ -0,0 +1,137 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/backends/leveled/ + - /riak/kv/2.9.2/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.2/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.2 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. diff --git a/content/riak/kv/2.9.2/setup/planning/backend/memory.md b/content/riak/kv/2.9.2/setup/planning/backend/memory.md new file mode 100644 index 0000000000..0d225fc77b --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/backend/memory.md @@ -0,0 +1,143 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/backends/memory/ + - /riak/kv/2.9.2/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. diff --git a/content/riak/kv/2.9.2/setup/planning/backend/multi.md b/content/riak/kv/2.9.2/setup/planning/backend/multi.md new file mode 100644 index 0000000000..fb1da7a971 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/backend/multi.md @@ -0,0 +1,226 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/backends/multi/ + - /riak/kv/2.9.2/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. diff --git a/content/riak/kv/2.9.2/setup/planning/best-practices.md b/content/riak/kv/2.9.2/setup/planning/best-practices.md new file mode 100644 index 0000000000..d28e49210d --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/best-practices.md @@ -0,0 +1,141 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.9.2/ops/building/planning/best-practices + - /riak/kv/2.9.2/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/2.9.2/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.9.2/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. diff --git a/content/riak/kv/2.9.2/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.9.2/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..aab3840642 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,100 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.9.2/ops/building/planning/bitcask + - /riak/kv/2.9.2/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. diff --git a/content/riak/kv/2.9.2/setup/planning/cluster-capacity.md b/content/riak/kv/2.9.2/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..bac04e5799 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/cluster-capacity.md @@ -0,0 +1,234 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.9.2/ops/building/planning/cluster + - /riak/kv/2.9.2/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.9.2/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.9.2/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. diff --git a/content/riak/kv/2.9.2/setup/planning/future.md b/content/riak/kv/2.9.2/setup/planning/future.md new file mode 100644 index 0000000000..d69de0f75b --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/future.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 2.9.2 +#menu: +# riak_kv-2.9.2: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +--- + +**TODO: Add content** diff --git a/content/riak/kv/2.9.2/setup/planning/operating-system.md b/content/riak/kv/2.9.2/setup/planning/operating-system.md new file mode 100644 index 0000000000..85c41f8d7b --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/operating-system.md @@ -0,0 +1,25 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +--- + +[downloads]: {{<baseurl>}}riak/kv/2.9.2/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris diff --git a/content/riak/kv/2.9.2/setup/planning/start.md b/content/riak/kv/2.9.2/setup/planning/start.md new file mode 100644 index 0000000000..6f1c70f94a --- /dev/null +++ b/content/riak/kv/2.9.2/setup/planning/start.md @@ -0,0 +1,57 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.9.2/ops/building/planning/system-planning + - /riak/kv/2.9.2/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + diff --git a/content/riak/kv/2.9.2/setup/search.md b/content/riak/kv/2.9.2/setup/search.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/content/riak/kv/2.9.2/setup/upgrading.md b/content/riak/kv/2.9.2/setup/upgrading.md new file mode 100644 index 0000000000..e8f2a205de --- /dev/null +++ b/content/riak/kv/2.9.2/setup/upgrading.md @@ -0,0 +1,33 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.2][upgrade version] + +A tutorial on updating to Riak KV 2.9.2 + +[Learn More >>][upgrade version] \ No newline at end of file diff --git a/content/riak/kv/2.9.2/setup/upgrading/checklist.md b/content/riak/kv/2.9.2/setup/upgrading/checklist.md new file mode 100644 index 0000000000..4479bf292c --- /dev/null +++ b/content/riak/kv/2.9.2/setup/upgrading/checklist.md @@ -0,0 +1,220 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.2/ops/upgrading/production-checklist/ + - /riak/kv/2.9.2/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/2.9.2/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.9.2/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.9.2/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.9.2/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.9.2/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.9.2/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.9.2/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. diff --git a/content/riak/kv/2.9.2/setup/upgrading/cluster.md b/content/riak/kv/2.9.2/setup/upgrading/cluster.md new file mode 100644 index 0000000000..0a3d9f8995 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/upgrading/cluster.md @@ -0,0 +1,298 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.2" +menu: + riak_kv-2.9.2: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.2/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.2/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.2/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.2/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.2/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.2/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.2/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` --- See [JMX Monitoring][jmx monitor] for more information. + * `snmp` --- See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. diff --git a/content/riak/kv/2.9.2/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.9.2/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..6b6d68a1aa --- /dev/null +++ b/content/riak/kv/2.9.2/setup/upgrading/multi-datacenter.md @@ -0,0 +1,18 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 2.9.2 +#menu: +# riak_kv-2.9.2: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +--- + +## TODO + +How to update to a new version with multi-datacenter. diff --git a/content/riak/kv/2.9.2/setup/upgrading/search.md b/content/riak/kv/2.9.2/setup/upgrading/search.md new file mode 100644 index 0000000000..fe66659fa1 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/upgrading/search.md @@ -0,0 +1,276 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.9.2" +menu: + riak_kv-2.9.2: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.2/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.2/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. diff --git a/content/riak/kv/2.9.2/setup/upgrading/version.md b/content/riak/kv/2.9.2/setup/upgrading/version.md new file mode 100644 index 0000000000..f4a75a45b2 --- /dev/null +++ b/content/riak/kv/2.9.2/setup/upgrading/version.md @@ -0,0 +1,247 @@ +--- +title: "Upgrading to Riak KV 2.9.2" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Upgrading to 2.9.2" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.2/upgrade-v20/ + - /riak/kv/2.9.2/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.2/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.2/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/2.9.2/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.2/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.9.2/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.2/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.2/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.2/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.9.2/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.9.2 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.2 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` --- the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` --- See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.2 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. diff --git a/content/riak/kv/2.9.2/using.md b/content/riak/kv/2.9.2/using.md new file mode 100644 index 0000000000..ebc2b64c2e --- /dev/null +++ b/content/riak/kv/2.9.2/using.md @@ -0,0 +1,72 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] diff --git a/content/riak/kv/2.9.2/using/admin.md b/content/riak/kv/2.9.2/using/admin.md new file mode 100644 index 0000000000..837b774cab --- /dev/null +++ b/content/riak/kv/2.9.2/using/admin.md @@ -0,0 +1,47 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.9.2/ops/running/cluster-admin + - /riak/kv/2.9.2/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] diff --git a/content/riak/kv/2.9.2/using/admin/commands.md b/content/riak/kv/2.9.2/using/admin/commands.md new file mode 100644 index 0000000000..618619a66d --- /dev/null +++ b/content/riak/kv/2.9.2/using/admin/commands.md @@ -0,0 +1,374 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.2/ops/running/cluster-admin + - /riak/kv/2.9.2/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` --- There are five possible values for status: + * `valid` --- The node has begun participating in cluster operations + * `leaving` --- The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` --- The node's ownership transfers are complete and it is + currently shutting down + * `joining` --- The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` --- The node is not currently responding +* `avail` --- There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` --- What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` --- The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322492444576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322492444576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` diff --git a/content/riak/kv/2.9.2/using/admin/riak-admin.md b/content/riak/kv/2.9.2/using/admin/riak-admin.md new file mode 100644 index 0000000000..2d5bda41f3 --- /dev/null +++ b/content/riak/kv/2.9.2/using/admin/riak-admin.md @@ -0,0 +1,717 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.2/ops/running/tools/riak-admin + - /riak/kv/2.9.2/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.9.2/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.9.2/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.9.2/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.9.2/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.9.2/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.9.2/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.9.2/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.9.2/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` diff --git a/content/riak/kv/2.9.2/using/admin/riak-cli.md b/content/riak/kv/2.9.2/using/admin/riak-cli.md new file mode 100644 index 0000000000..d96720eaa2 --- /dev/null +++ b/content/riak/kv/2.9.2/using/admin/riak-cli.md @@ -0,0 +1,200 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.2/ops/running/tools/riak + - /riak/kv/2.9.2/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. diff --git a/content/riak/kv/2.9.2/using/admin/riak-control.md b/content/riak/kv/2.9.2/using/admin/riak-control.md new file mode 100644 index 0000000000..579dd02b8d --- /dev/null +++ b/content/riak/kv/2.9.2/using/admin/riak-control.md @@ -0,0 +1,233 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/riak-control + - /riak/kv/2.9.2/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.9.2/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. diff --git a/content/riak/kv/2.9.2/using/cluster-operations.md b/content/riak/kv/2.9.2/using/cluster-operations.md new file mode 100644 index 0000000000..0d5ed6086d --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations.md @@ -0,0 +1,104 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] diff --git a/content/riak/kv/2.9.2/using/cluster-operations/TicTac-Active-anti-entropy.md b/content/riak/kv/2.9.2/using/cluster-operations/TicTac-Active-anti-entropy.md new file mode 100644 index 0000000000..bef7028d28 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/TicTac-Active-anti-entropy.md @@ -0,0 +1,31 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.9.2/ops/advanced/tictacaae/ + - /riak/2.9.2/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. \ No newline at end of file diff --git a/content/riak/kv/2.9.2/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.9.2/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..44da2c34f0 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,285 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/2.9.2/ops/advanced/aae/ + - /riak/2.9.2/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.9.2/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.9.2/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. diff --git a/content/riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..bff8ba8291 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,194 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.2/ops/running/nodes/adding-removing + - /riak/kv/2.9.2/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/2.9.2/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` diff --git a/content/riak/kv/2.9.2/using/cluster-operations/backend.md b/content/riak/kv/2.9.2/using/cluster-operations/backend.md new file mode 100644 index 0000000000..4a7a93c922 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/backend.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 2.9.2 +#menu: +# riak_kv-2.9.2: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content** diff --git a/content/riak/kv/2.9.2/using/cluster-operations/backing-up.md b/content/riak/kv/2.9.2/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..8e2e1321e0 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/backing-up.md @@ -0,0 +1,267 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.2/ops/running/backups + - /riak/kv/2.9.2/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.9.2/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.9.2/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). diff --git a/content/riak/kv/2.9.2/using/cluster-operations/bucket-types.md b/content/riak/kv/2.9.2/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..62b390c72a --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/bucket-types.md @@ -0,0 +1,58 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` diff --git a/content/riak/kv/2.9.2/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.9.2/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..012c6a987f --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,454 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.2/ops/running/nodes/renaming + - /riak/kv/2.9.2/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` diff --git a/content/riak/kv/2.9.2/using/cluster-operations/handoff.md b/content/riak/kv/2.9.2/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..d13f6b52a7 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/handoff.md @@ -0,0 +1,116 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.2/ops/running/handoff + - /riak/kv/2.9.2/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. diff --git a/content/riak/kv/2.9.2/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.9.2/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..5156ca5283 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/inspecting-node.md @@ -0,0 +1,492 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.2/ops/running/nodes/inspecting + - /riak/kv/2.9.2/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392993748081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` --- The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` --- The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` --- The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) diff --git a/content/riak/kv/2.9.2/using/cluster-operations/load-balancing.md b/content/riak/kv/2.9.2/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..bdd0cc27d2 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/load-balancing.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 2.9.2 +#menu: +# riak_kv-2.9.2: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content (not sure where this exists in docs)** diff --git a/content/riak/kv/2.9.2/using/cluster-operations/logging.md b/content/riak/kv/2.9.2/using/cluster-operations/logging.md new file mode 100644 index 0000000000..72e998de9a --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/logging.md @@ -0,0 +1,42 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` diff --git a/content/riak/kv/2.9.2/using/cluster-operations/replacing-node.md b/content/riak/kv/2.9.2/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..dd8422ff9b --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/replacing-node.md @@ -0,0 +1,95 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.9.2/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} diff --git a/content/riak/kv/2.9.2/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.9.2/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..a376f50099 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.2 +#menu: +# riak_kv-2.9.2: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.2/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.9.2/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..b330f5f226 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/strong-consistency.md @@ -0,0 +1,71 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response diff --git a/content/riak/kv/2.9.2/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.9.2/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..a93fe2c8e4 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,259 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v2/operations + - /riak/kv/2.9.2/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.2/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.2/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` --- The IP address and port of a connected client (site)</li><li>`cluster_name` --- The name of the connected client (site)</li><li>`connecting` --- The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.9.2/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.9.2/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests diff --git a/content/riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..568bc145f6 --- /dev/null +++ b/content/riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,421 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/operations + - /riak/kv/2.9.2/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.2/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.9.2/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.9.2/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.9.2/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. diff --git a/content/riak/kv/2.9.2/using/performance.md b/content/riak/kv/2.9.2/using/performance.md new file mode 100644 index 0000000000..1fdcb7e4c1 --- /dev/null +++ b/content/riak/kv/2.9.2/using/performance.md @@ -0,0 +1,264 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.9.2/ops/tuning/linux/ + - /riak/2.9.2/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.2/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.9.2/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.2/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.9.2/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.9.2/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.9.2/using/performance/open-files-limit/) diff --git a/content/riak/kv/2.9.2/using/performance/amazon-web-services.md b/content/riak/kv/2.9.2/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..7b47dd251d --- /dev/null +++ b/content/riak/kv/2.9.2/using/performance/amazon-web-services.md @@ -0,0 +1,243 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.2/ops/tuning/aws + - /riak/kv/2.9.2/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) diff --git a/content/riak/kv/2.9.2/using/performance/benchmarking.md b/content/riak/kv/2.9.2/using/performance/benchmarking.md new file mode 100644 index 0000000000..80ba64e009 --- /dev/null +++ b/content/riak/kv/2.9.2/using/performance/benchmarking.md @@ -0,0 +1,598 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.2/ops/building/benchmarking + - /riak/kv/2.9.2/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.9.2/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput --- Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` --- generate as many ops per second as possible +* `{rate, N}` --- generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` --- Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` --- Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` --- Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` --- Directly invokes the Bitcask API +* `basho_bench_driver_dets` --- Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` --- operation completed successfully +* `{error, Reason, NewState}` --- operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` --- operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` --- operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` --- generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` --- the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` --- the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` --- selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` --- selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` --- the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` --- specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` --- takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` --- takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` --- generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` --- generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` --- generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` --- specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. diff --git a/content/riak/kv/2.9.2/using/performance/erlang.md b/content/riak/kv/2.9.2/using/performance/erlang.md new file mode 100644 index 0000000000..573b21cd99 --- /dev/null +++ b/content/riak/kv/2.9.2/using/performance/erlang.md @@ -0,0 +1,367 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.2/ops/tuning/erlang + - /riak/kv/2.9.2/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. diff --git a/content/riak/kv/2.9.2/using/performance/latency-reduction.md b/content/riak/kv/2.9.2/using/performance/latency-reduction.md new file mode 100644 index 0000000000..7be52f1c68 --- /dev/null +++ b/content/riak/kv/2.9.2/using/performance/latency-reduction.md @@ -0,0 +1,263 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.2/ops/tuning/latency-reduction + - /riak/kv/2.9.2/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. diff --git a/content/riak/kv/2.9.2/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.9.2/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..2734383380 --- /dev/null +++ b/content/riak/kv/2.9.2/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,42 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +[perf index]: {{<baseurl>}}riak/kv/2.9.2/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` diff --git a/content/riak/kv/2.9.2/using/performance/open-files-limit.md b/content/riak/kv/2.9.2/using/performance/open-files-limit.md new file mode 100644 index 0000000000..644d094df4 --- /dev/null +++ b/content/riak/kv/2.9.2/using/performance/open-files-limit.md @@ -0,0 +1,347 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.2/ops/tuning/open-files-limit/ + - /riak/kv/2.9.2/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` diff --git a/content/riak/kv/2.9.2/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.9.2/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..2f134686f9 --- /dev/null +++ b/content/riak/kv/2.9.2/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,45 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` diff --git a/content/riak/kv/2.9.2/using/reference.md b/content/riak/kv/2.9.2/using/reference.md new file mode 100644 index 0000000000..67d7affcb8 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference.md @@ -0,0 +1,130 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] diff --git a/content/riak/kv/2.9.2/using/reference/architecture.md b/content/riak/kv/2.9.2/using/reference/architecture.md new file mode 100644 index 0000000000..d824f12372 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/architecture.md @@ -0,0 +1,16 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +#menu: +# riak_kv-2.9.2: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +--- + +<!-- TODO: Content --> diff --git a/content/riak/kv/2.9.2/using/reference/bucket-types.md b/content/riak/kv/2.9.2/using/reference/bucket-types.md new file mode 100644 index 0000000000..74bc2c7c49 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/bucket-types.md @@ -0,0 +1,818 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.9.2/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.9.2/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.2/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.2/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.9.2/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.9.2/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. diff --git a/content/riak/kv/2.9.2/using/reference/custom-code.md b/content/riak/kv/2.9.2/using/reference/custom-code.md new file mode 100644 index 0000000000..b19dc143ba --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/custom-code.md @@ -0,0 +1,131 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/install-custom-code/ + - /riak/kv/2.9.2/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.9.2/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.9.2/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} diff --git a/content/riak/kv/2.9.2/using/reference/failure-recovery.md b/content/riak/kv/2.9.2/using/reference/failure-recovery.md new file mode 100644 index 0000000000..cbe48a11d7 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/failure-recovery.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.2/using/reference/handoff.md b/content/riak/kv/2.9.2/using/reference/handoff.md new file mode 100644 index 0000000000..34c9db0d34 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/handoff.md @@ -0,0 +1,197 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.2/ops/running/handoff/ + - /riak/kv/2.9.2/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. diff --git a/content/riak/kv/2.9.2/using/reference/jmx.md b/content/riak/kv/2.9.2/using/reference/jmx.md new file mode 100644 index 0000000000..fae69e599c --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/jmx.md @@ -0,0 +1,186 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/running/monitoring/jmx + - /riak/kv/2.9.2/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> diff --git a/content/riak/kv/2.9.2/using/reference/logging.md b/content/riak/kv/2.9.2/using/reference/logging.md new file mode 100644 index 0000000000..fcb2bf8d72 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/logging.md @@ -0,0 +1,297 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.2/ops/running/logging + - /riak/kv/2.9.2/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.2 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` --- Every night at midnight +* `$D23` --- Every day at 23:00 (11 pm) +* `$W0D20` --- Every week on Sunday at 20:00 (8 pm) +* `$M1D0` --- On the first day of every month at midnight +* `$M5D6` --- On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` --- Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.9.2/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` --- Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-cli/#attach-direct) command +* `both` --- Console logs will be emitted both to a file and to standard + output +* `off` --- Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] diff --git a/content/riak/kv/2.9.2/using/reference/multi-datacenter.md b/content/riak/kv/2.9.2/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..dd6ae6e72c --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/multi-datacenter.md @@ -0,0 +1,48 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] diff --git a/content/riak/kv/2.9.2/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.9.2/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..ac806e277e --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,96 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.2/ops/mdc/comparison + - /riak/kv/2.9.2/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.9.2/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.9.2/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). diff --git a/content/riak/kv/2.9.2/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.9.2/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..cbb8735afc --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,170 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.2/ops/mdc/monitoring + - /riak/kv/2.9.2/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +--- + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +--- + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. diff --git a/content/riak/kv/2.9.2/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.9.2/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..3da3cf84e0 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,62 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.2/ops/mdc/per-bucket + - /riak/kv/2.9.2/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` --- Enable replication (realtime + fullsync) + * `false` --- Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` --- Replication only occurs in realtime for this bucket + * `fullsync` --- Replication only occurs during a fullsync operation + * `both` --- Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. diff --git a/content/riak/kv/2.9.2/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.9.2/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..1e083e4397 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,240 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.2/ops/mdc/statistics + - /riak/kv/2.9.2/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` diff --git a/content/riak/kv/2.9.2/using/reference/object-deletion.md b/content/riak/kv/2.9.2/using/reference/object-deletion.md new file mode 100644 index 0000000000..a7bb87b018 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/object-deletion.md @@ -0,0 +1,117 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` --- Disables tombstone removal +* `immediate` --- The tombstone is removed as soon as the request is + received +* Custom time interval --- How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) diff --git a/content/riak/kv/2.9.2/using/reference/runtime-interaction.md b/content/riak/kv/2.9.2/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..5bf17433c2 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/runtime-interaction.md @@ -0,0 +1,66 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/runtime + - /riak/kv/2.9.2/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` --- Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` --- Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` --- The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` --- The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` --- A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` --- A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` --- A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` diff --git a/content/riak/kv/2.9.2/using/reference/search.md b/content/riak/kv/2.9.2/using/reference/search.md new file mode 100644 index 0000000000..70b3c9b26a --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/search.md @@ -0,0 +1,454 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/search + - /riak/kv/2.9.2/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.9.2/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. \ No newline at end of file diff --git a/content/riak/kv/2.9.2/using/reference/secondary-indexes.md b/content/riak/kv/2.9.2/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..d6878c8bc2 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/secondary-indexes.md @@ -0,0 +1,72 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.2/dev/advanced/2i + - /riak/kv/2.9.2/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.9.2/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. diff --git a/content/riak/kv/2.9.2/using/reference/snmp.md b/content/riak/kv/2.9.2/using/reference/snmp.md new file mode 100644 index 0000000000..20067803e2 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/snmp.md @@ -0,0 +1,162 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/running/monitoring/snmp + - /riak/kv/2.9.2/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) diff --git a/content/riak/kv/2.9.2/using/reference/statistics-monitoring.md b/content/riak/kv/2.9.2/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..05be0e862d --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/statistics-monitoring.md @@ -0,0 +1,391 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.2/ops/running/stats-and-monitoring + - /riak/kv/2.9.2/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.9.2/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.9.2/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.9.2/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.2/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.2/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.9.2/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ diff --git a/content/riak/kv/2.9.2/using/reference/strong-consistency.md b/content/riak/kv/2.9.2/using/reference/strong-consistency.md new file mode 100644 index 0000000000..2efcdedb08 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/strong-consistency.md @@ -0,0 +1,145 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.2/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.9.2/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.9.2/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.9.2/configuring/strong-consistency/#performance). diff --git a/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..a70e94a125 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter.md @@ -0,0 +1,35 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.2/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] diff --git a/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..39e5352221 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,126 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.2/ops/mdc/v2/architecture + - /riak/kv/2.9.2/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.2/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.9.2/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. diff --git a/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..8fd667453a --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,49 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.2/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.9.2/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.2/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` diff --git a/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..ff67272543 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter.md @@ -0,0 +1,47 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] diff --git a/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..76981578dd --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,125 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/aae + - /riak/kv/2.9.2/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` diff --git a/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..438a3a3a4a --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,182 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/architecture + - /riak/kv/2.9.2/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> diff --git a/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..37fe7e91f8 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,98 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/cascading-writes + - /riak/kv/2.9.2/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` diff --git a/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..96eab6cbf2 --- /dev/null +++ b/content/riak/kv/2.9.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,68 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.2/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.9.2/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. diff --git a/content/riak/kv/2.9.2/using/repair-recovery.md b/content/riak/kv/2.9.2/using/repair-recovery.md new file mode 100644 index 0000000000..71ab93afa5 --- /dev/null +++ b/content/riak/kv/2.9.2/using/repair-recovery.md @@ -0,0 +1,48 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] diff --git a/content/riak/kv/2.9.2/using/repair-recovery/errors.md b/content/riak/kv/2.9.2/using/repair-recovery/errors.md new file mode 100644 index 0000000000..6450921b88 --- /dev/null +++ b/content/riak/kv/2.9.2/using/repair-recovery/errors.md @@ -0,0 +1,362 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.2/ops/running/recovery/errors + - /riak/kv/2.9.2/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.2/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.2/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.2/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.2/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.9.2/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.2/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.9.2/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.9.2/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. diff --git a/content/riak/kv/2.9.2/using/repair-recovery/failed-node.md b/content/riak/kv/2.9.2/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..df9bef890b --- /dev/null +++ b/content/riak/kv/2.9.2/using/repair-recovery/failed-node.md @@ -0,0 +1,110 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.2/ops/running/recovery/failed-node + - /riak/kv/2.9.2/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` diff --git a/content/riak/kv/2.9.2/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.9.2/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..842e054773 --- /dev/null +++ b/content/riak/kv/2.9.2/using/repair-recovery/failure-recovery.md @@ -0,0 +1,125 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.2/ops/running/recovery/failure-recovery + - /riak/kv/2.9.2/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.9.2/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** --- A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** --- If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** --- Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.9.2/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} diff --git a/content/riak/kv/2.9.2/using/repair-recovery/repairs.md b/content/riak/kv/2.9.2/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..876d64bda5 --- /dev/null +++ b/content/riak/kv/2.9.2/using/repair-recovery/repairs.md @@ -0,0 +1,387 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.2/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.2/ops/running/recovery/repairing-indexes + - /riak/2.9.2/ops/running/recovery/failed-node + - /riak/kv/2.9.2/ops/running/recovery/failed-node + - /riak/2.9.2/ops/running/recovery/repairing-leveldb + - /riak/kv/2.9.2/ops/running/recovery/repairing-leveldb + - /riak/2.9.2/ops/running/recovery/repairing-partitions + - /riak/kv/2.9.2/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.9.2/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.9.2/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.9.2/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.9.2/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` diff --git a/content/riak/kv/2.9.2/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.9.2/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..236f37e755 --- /dev/null +++ b/content/riak/kv/2.9.2/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,71 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +--- + +[upgrade]: {{<baseurl>}}riak/kv/2.9.2/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.9.2/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. diff --git a/content/riak/kv/2.9.2/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.9.2/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..8b06fbf4de --- /dev/null +++ b/content/riak/kv/2.9.2/using/repair-recovery/rolling-restart.md @@ -0,0 +1,60 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.2/ops/running/recovery/rolling-restart + - /riak/kv/2.9.2/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.9.2/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. diff --git a/content/riak/kv/2.9.2/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.9.2/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..75fa94af8d --- /dev/null +++ b/content/riak/kv/2.9.2/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,138 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.2/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.2/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. diff --git a/content/riak/kv/2.9.2/using/running-a-cluster.md b/content/riak/kv/2.9.2/using/running-a-cluster.md new file mode 100644 index 0000000000..0bedd1c60d --- /dev/null +++ b/content/riak/kv/2.9.2/using/running-a-cluster.md @@ -0,0 +1,335 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.9.2/ops/building/basic-cluster-setup + - /riak/kv/2.9.2/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.2/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.9.2/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. diff --git a/content/riak/kv/2.9.2/using/security.md b/content/riak/kv/2.9.2/using/security.md new file mode 100644 index 0000000000..21d2db4d33 --- /dev/null +++ b/content/riak/kv/2.9.2/using/security.md @@ -0,0 +1,195 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.9.2/ops/advanced/security + - /riak/kv/2.9.2/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/2.9.2/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.9.2/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.2/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.9.2/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.9.2/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] diff --git a/content/riak/kv/2.9.2/using/security/basics.md b/content/riak/kv/2.9.2/using/security/basics.md new file mode 100644 index 0000000000..43adebe542 --- /dev/null +++ b/content/riak/kv/2.9.2/using/security/basics.md @@ -0,0 +1,847 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.2/ops/running/authz + - /riak/kv/2.9.2/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.9.2/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.9.2/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.9.2/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.9.2/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.9.2/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.9.2/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.9.2/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.9.2/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. diff --git a/content/riak/kv/2.9.2/using/security/best-practices.md b/content/riak/kv/2.9.2/using/security/best-practices.md new file mode 100644 index 0000000000..6cc22d6dfc --- /dev/null +++ b/content/riak/kv/2.9.2/using/security/best-practices.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.2/using/security/managing-sources.md b/content/riak/kv/2.9.2/using/security/managing-sources.md new file mode 100644 index 0000000000..88c174e674 --- /dev/null +++ b/content/riak/kv/2.9.2/using/security/managing-sources.md @@ -0,0 +1,269 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.2/ops/running/security-sources + - /riak/kv/2.9.2/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.9.2/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.9.2/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.9.2/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.9.2/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.9.2/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.9.2/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. diff --git a/content/riak/kv/2.9.2/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.9.2/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..2ea004bbb5 --- /dev/null +++ b/content/riak/kv/2.9.2/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,80 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` diff --git a/content/riak/kv/2.9.2/using/troubleshooting.md b/content/riak/kv/2.9.2/using/troubleshooting.md new file mode 100644 index 0000000000..1c3c3c22c7 --- /dev/null +++ b/content/riak/kv/2.9.2/using/troubleshooting.md @@ -0,0 +1,23 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] diff --git a/content/riak/kv/2.9.2/using/troubleshooting/http-204.md b/content/riak/kv/2.9.2/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..53b469b3d8 --- /dev/null +++ b/content/riak/kv/2.9.2/using/troubleshooting/http-204.md @@ -0,0 +1,17 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 2.9.2 +menu: + riak_kv-2.9.2: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. diff --git a/content/riak/kv/2.9.4/_reference-links.md b/content/riak/kv/2.9.4/_reference-links.md new file mode 100644 index 0000000000..404a12c362 --- /dev/null +++ b/content/riak/kv/2.9.4/_reference-links.md @@ -0,0 +1,251 @@ + +# Riak KV 2.9.4 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.9.4/downloads/ +[install index]: {{}}riak/kv/2.9.4/setup/installing +[upgrade index]: {{}}riak/kv/2.9.4/upgrading +[plan index]: {{}}riak/kv/2.9.4/planning +[config index]: {{}}riak/kv/2.9.4/using/configuring/ +[config reference]: {{}}riak/kv/2.9.4/configuring/reference/ +[manage index]: {{}}riak/kv/2.9.4/using/managing +[performance index]: {{}}riak/kv/2.9.4/using/performance +[glossary vnode]: {{}}riak/kv/2.9.4/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.9.4/setup/planning +[plan start]: {{}}riak/kv/2.9.4/setup/planning/start +[plan backend]: {{}}riak/kv/2.9.4/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.9.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.4/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/2.9.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.4/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.9.4/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.9.4/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.9.4/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.9.4/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.9.4/setup/installing +[install aws]: {{}}riak/kv/2.9.4/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.9.4/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.9.4/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.9.4/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.9.4/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.9.4/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.9.4/setup/installing/solaris +[install suse]: {{}}riak/kv/2.9.4/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.9.4/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.9.4/setup/installing/source +[install source erlang]: {{}}riak/kv/2.9.4/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.9.4/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.9.4/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.9.4/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.9.4/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.9.4/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.9.4/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.9.4/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.9.4/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.9.4/configuring +[config basic]: {{}}riak/kv/2.9.4/configuring/basic +[config backend]: {{}}riak/kv/2.9.4/configuring/backend +[config manage]: {{}}riak/kv/2.9.4/configuring/managing +[config reference]: {{}}riak/kv/2.9.4/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.9.4/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.9.4/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.9.4/configuring/mapreduce +[config search]: {{}}riak/kv/2.9.4/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.9.4/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.9.4/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.9.4/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.9.4/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.9.4/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.9.4/using/ +[use admin commands]: {{}}riak/kv/2.9.4/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.9.4/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.9.4/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.9.4/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.9.4/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.9.4/using/reference/search +[use ref 2i]: {{}}riak/kv/2.9.4/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.9.4/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.9.4/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.9.4/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.9.4/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.9.4/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.9.4/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.9.4/using/admin/ +[use admin commands]: {{}}riak/kv/2.9.4/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.9.4/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.9.4/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.9.4/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.9.4/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.9.4/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.9.4/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.9.4/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.9.4/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.9.4/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.9.4/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.9.4/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.9.4/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.9.4/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.9.4/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.9.4/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.9.4/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.9.4/using/security/ +[security basics]: {{}}riak/kv/2.9.4/using/security/basics +[security managing]: {{}}riak/kv/2.9.4/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.9.4/using/performance/ +[perf benchmark]: {{}}riak/kv/2.9.4/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.4/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.9.4/using/performance/erlang +[perf aws]: {{}}riak/kv/2.9.4/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.9.4/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.9.4/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.9.4/developing +[dev client libraries]: {{}}riak/kv/2.9.4/developing/client-libraries +[dev data model]: {{}}riak/kv/2.9.4/developing/data-modeling +[dev data types]: {{}}riak/kv/2.9.4/developing/data-types +[dev kv model]: {{}}riak/kv/2.9.4/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.9.4/developing/getting-started +[getting started java]: {{}}riak/kv/2.9.4/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.9.4/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.9.4/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.9.4/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.9.4/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.9.4/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.9.4/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.9.4/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.9.4/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.4/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.4/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.4/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.4/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.4/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.4/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.9.4/developing/usage +[usage bucket types]: {{}}riak/kv/2.9.4/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.9.4/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.9.4/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.9.4/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.9.4/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.9.4/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.9.4/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.9.4/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.9.4/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.4/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.4/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.9.4/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.9.4/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.9.4/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.9.4/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.9.4/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.9.4/developing/api/backend +[dev api http]: {{}}riak/kv/2.9.4/developing/api/http +[dev api http status]: {{}}riak/kv/2.9.4/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.9.4/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.9.4/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.9.4/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.9.4/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.9.4/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.9.4/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.9.4/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.9.4/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.9.4/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.9.4/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.9.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.9.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.9.4/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.9.4/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + diff --git a/content/riak/kv/2.9.4/add-ons.md b/content/riak/kv/2.9.4/add-ons.md new file mode 100644 index 0000000000..3529241818 --- /dev/null +++ b/content/riak/kv/2.9.4/add-ons.md @@ -0,0 +1,22 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.9.4/add-ons/redis/) + + diff --git a/content/riak/kv/2.9.4/add-ons/redis.md b/content/riak/kv/2.9.4/add-ons/redis.md new file mode 100644 index 0000000000..6f31b9cd0f --- /dev/null +++ b/content/riak/kv/2.9.4/add-ons/redis.md @@ -0,0 +1,59 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + diff --git a/content/riak/kv/2.9.4/add-ons/redis/developing-rra.md b/content/riak/kv/2.9.4/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..79a2ca4106 --- /dev/null +++ b/content/riak/kv/2.9.4/add-ons/redis/developing-rra.md @@ -0,0 +1,326 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.9.4/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.9.4/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.9.4/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.9.4/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.9.4/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + diff --git a/content/riak/kv/2.9.4/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.9.4/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..43382dae61 --- /dev/null +++ b/content/riak/kv/2.9.4/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,132 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + diff --git a/content/riak/kv/2.9.4/add-ons/redis/set-up-rra.md b/content/riak/kv/2.9.4/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..477d01d87f --- /dev/null +++ b/content/riak/kv/2.9.4/add-ons/redis/set-up-rra.md @@ -0,0 +1,281 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.9.4/setup/installing +[perf open files]: {{}}riak/kv/2.9.4/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + diff --git a/content/riak/kv/2.9.4/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.9.4/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..e6cbf547ee --- /dev/null +++ b/content/riak/kv/2.9.4/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,139 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + diff --git a/content/riak/kv/2.9.4/add-ons/redis/using-rra.md b/content/riak/kv/2.9.4/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..4bba4e8904 --- /dev/null +++ b/content/riak/kv/2.9.4/add-ons/redis/using-rra.md @@ -0,0 +1,243 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.9.4/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.9.4/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + diff --git a/content/riak/kv/2.9.4/configuring.md b/content/riak/kv/2.9.4/configuring.md new file mode 100644 index 0000000000..5b0ad14faa --- /dev/null +++ b/content/riak/kv/2.9.4/configuring.md @@ -0,0 +1,85 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + diff --git a/content/riak/kv/2.9.4/configuring/backend.md b/content/riak/kv/2.9.4/configuring/backend.md new file mode 100644 index 0000000000..167dd14848 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/backend.md @@ -0,0 +1,643 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +--- + +[plan backend leveldb]: {{}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.4/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/2.9.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.4/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + diff --git a/content/riak/kv/2.9.4/configuring/basic.md b/content/riak/kv/2.9.4/configuring/basic.md new file mode 100644 index 0000000000..ecf2342e59 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/basic.md @@ -0,0 +1,236 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.4/ops/building/configuration/ + - /riak/kv/2.9.4/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/2.9.4/configuring/reference +[use running cluster]: {{}}riak/kv/2.9.4/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.9.4/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.9.4/using/performance/erlang +[plan start]: {{}}riak/kv/2.9.4/setup/planning/start +[plan best practices]: {{}}riak/kv/2.9.4/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.9.4/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.9.4/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.9.4/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.9.4/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.9.4/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.9.4/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.9.4/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.9.4/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.4/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.9.4/using/performance +[perf aws]: {{}}riak/kv/2.9.4/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.9.4/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.4/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + diff --git a/content/riak/kv/2.9.4/configuring/global-object-expiration.md b/content/riak/kv/2.9.4/configuring/global-object-expiration.md new file mode 100644 index 0000000000..4f41f7c6d2 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/global-object-expiration.md @@ -0,0 +1,86 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.9.4: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 2.9.4 +toc: true +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + diff --git a/content/riak/kv/2.9.4/configuring/load-balancing-proxy.md b/content/riak/kv/2.9.4/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..0de38ccc25 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/load-balancing-proxy.md @@ -0,0 +1,272 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.9.4/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/2.9.4/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + diff --git a/content/riak/kv/2.9.4/configuring/managing.md b/content/riak/kv/2.9.4/configuring/managing.md new file mode 100644 index 0000000000..d80efdfc29 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/managing.md @@ -0,0 +1,117 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +--- + +[use admin riak cli]: {{}}riak/kv/2.9.4/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.9.4/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.9.4/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + diff --git a/content/riak/kv/2.9.4/configuring/mapreduce.md b/content/riak/kv/2.9.4/configuring/mapreduce.md new file mode 100644 index 0000000000..71ddfb4669 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/mapreduce.md @@ -0,0 +1,197 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/configs/mapreduce/ + - /riak/kv/2.9.4/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/2.9.4/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.9.4/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.9.4/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + diff --git a/content/riak/kv/2.9.4/configuring/next-gen-replication.md b/content/riak/kv/2.9.4/configuring/next-gen-replication.md new file mode 100644 index 0000000000..29f7ffd2d4 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/next-gen-replication.md @@ -0,0 +1,61 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.4" +menu: + riak_kv-2.9.4: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. \ No newline at end of file diff --git a/content/riak/kv/2.9.4/configuring/reference.md b/content/riak/kv/2.9.4/configuring/reference.md new file mode 100644 index 0000000000..0ccd6ebb30 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/reference.md @@ -0,0 +1,2041 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/configs/configuration-files/ + - /riak/kv/2.9.4/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] +--- +[configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] +--- +[configuration][config backend leveldb] +* [Leveled][plan backend leveled] +--- +[configuration][config backend leveled] +* [Memory][plan backend memory] +--- +[configuration][config backend memory] +* [Multi][plan backend multi] +--- +[configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + diff --git a/content/riak/kv/2.9.4/configuring/search.md b/content/riak/kv/2.9.4/configuring/search.md new file mode 100644 index 0000000000..73b4b52d16 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/search.md @@ -0,0 +1,275 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/configs/search/ + - /riak/kv/2.9.4/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/2.9.4/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.4/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.4/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.9.4/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.9.4/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.9.4/configuring/reference +[config reference#search]: {{}}riak/kv/2.9.4/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.9.4/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.9.4/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + diff --git a/content/riak/kv/2.9.4/configuring/strong-consistency.md b/content/riak/kv/2.9.4/configuring/strong-consistency.md new file mode 100644 index 0000000000..bf85729c83 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/strong-consistency.md @@ -0,0 +1,701 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/2.9.4/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.9.4/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.9.4/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.9.4/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.9.4/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.9.4/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.9.4/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.9.4/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.9.4/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.9.4/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.9.4/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.9.4/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.9.4/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.9.4/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.9.4/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.9.4/developing/data-types +[glossary aae]: {{}}riak/kv/2.9.4/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.9.4/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.9.4/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.9.4/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.9.4/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble +--- +The ID of the ensemble
  • Quorum +--- +The number of ensemble peers that are either leading or following
  • Nodes +--- +The number of nodes currently online
  • Leader +--- +The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer +--- +The ID of the peer
  • Status +--- +Whether the peer is a leader or a follower
  • Trusted +--- +Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch +--- +The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node +--- +The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] +--- +If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] +--- +Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] +--- +Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** +--- +A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** +--- +In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** +--- +Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** +--- +At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** +--- +Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + diff --git a/content/riak/kv/2.9.4/configuring/v2-multi-datacenter.md b/content/riak/kv/2.9.4/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..da9e66516d --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/v2-multi-datacenter.md @@ -0,0 +1,157 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v2/configuration + - /riak/kv/2.9.4/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/2.9.4/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. + diff --git a/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..8502e4d14d --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,79 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v2/nat + - /riak/kv/2.9.4/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/2.9.4/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + diff --git a/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..d44ee5d746 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,368 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v2/quick-start + - /riak/kv/2.9.4/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + diff --git a/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..65df40d33e --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,161 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v2/ssl + - /riak/kv/2.9.4/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + diff --git a/content/riak/kv/2.9.4/configuring/v3-multi-datacenter.md b/content/riak/kv/2.9.4/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..5b964ecfdf --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/v3-multi-datacenter.md @@ -0,0 +1,158 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/configuration + - /riak/kv/2.9.4/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/2.9.4/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + diff --git a/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..56d768c0c0 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,168 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/nat + - /riak/kv/2.9.4/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + diff --git a/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..3a653ce662 --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,169 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/quick-start + - /riak/kv/2.9.4/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/2.9.4/using/performance +[config v3 mdc]: {{}}riak/kv/2.9.4/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + diff --git a/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..a8ac956bfa --- /dev/null +++ b/content/riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/ssl + - /riak/kv/2.9.4/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/2.9.4/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + diff --git a/content/riak/kv/2.9.4/developing.md b/content/riak/kv/2.9.4/developing.md new file mode 100644 index 0000000000..b383b83718 --- /dev/null +++ b/content/riak/kv/2.9.4/developing.md @@ -0,0 +1,76 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + diff --git a/content/riak/kv/2.9.4/developing/api.md b/content/riak/kv/2.9.4/developing/api.md new file mode 100644 index 0000000000..f0670f5383 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api.md @@ -0,0 +1,38 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + diff --git a/content/riak/kv/2.9.4/developing/api/backend.md b/content/riak/kv/2.9.4/developing/api/backend.md new file mode 100644 index 0000000000..215c8f2c7d --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/backend.md @@ -0,0 +1,115 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.4/dev/references/backend-api + - /riak/kv/2.9.4/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/2.9.4/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http.md b/content/riak/kv/2.9.4/developing/api/http.md new file mode 100644 index 0000000000..51c2c713f8 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http.md @@ -0,0 +1,90 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.4/dev/references/http + - /riak/kv/2.9.4/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.9.4/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.9.4/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.4/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.9.4/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.9.4/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.9.4/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.9.4/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.4/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.4/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.9.4/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.9.4/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.9.4/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.9.4/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.9.4/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.4/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.4/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.9.4/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.9.4/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.9.4/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.9.4/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.9.4/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.9.4/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.9.4/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.9.4/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.9.4/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.9.4/developing/api/http/store-search-schema) + diff --git a/content/riak/kv/2.9.4/developing/api/http/counters.md b/content/riak/kv/2.9.4/developing/api/http/counters.md new file mode 100644 index 0000000000..8d52f1c472 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/counters.md @@ -0,0 +1,79 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/counters + - /riak/kv/2.9.4/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.9.4/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.9.4/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/delete-object.md b/content/riak/kv/2.9.4/developing/api/http/delete-object.md new file mode 100644 index 0000000000..681bf67609 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/delete-object.md @@ -0,0 +1,76 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/delete-object + - /riak/kv/2.9.4/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/delete-search-index.md b/content/riak/kv/2.9.4/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..2358c6fdeb --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/delete-search-index.md @@ -0,0 +1,38 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/delete-search-index + - /riak/kv/2.9.4/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` +--- +The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` +--- +The request timed out internally + diff --git a/content/riak/kv/2.9.4/developing/api/http/fetch-object.md b/content/riak/kv/2.9.4/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..cbf5a12ea1 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/fetch-object.md @@ -0,0 +1,243 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/fetch-object + - /riak/kv/2.9.4/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.9.4/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.9.4/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.9.4/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.9.4/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.4/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/fetch-search-index.md b/content/riak/kv/2.9.4/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..aa45064e48 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/fetch-search-index.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/fetch-search-index + - /riak/kv/2.9.4/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.9.4/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +--- +No Search index with that name is currently + available +* `503 Service Unavailable` +--- +The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.9.4/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.9.4/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..02de5fbc8d --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/fetch-search-schema.md @@ -0,0 +1,41 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/fetch-search-schema + - /riak/kv/2.9.4/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` +--- +The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + diff --git a/content/riak/kv/2.9.4/developing/api/http/get-bucket-props.md b/content/riak/kv/2.9.4/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..e773ad6c56 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/get-bucket-props.md @@ -0,0 +1,83 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/get-bucket-props + - /riak/kv/2.9.4/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.9.4/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.9.4/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.9.4/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/link-walking.md b/content/riak/kv/2.9.4/developing/api/http/link-walking.md new file mode 100644 index 0000000000..9c7728209b --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/link-walking.md @@ -0,0 +1,126 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/link-walking + - /riak/kv/2.9.4/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.9.4/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.9.4/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.9.4/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/list-buckets.md b/content/riak/kv/2.9.4/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..079aa1c80c --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/list-buckets.md @@ -0,0 +1,65 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/list-buckets + - /riak/kv/2.9.4/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/list-keys.md b/content/riak/kv/2.9.4/developing/api/http/list-keys.md new file mode 100644 index 0000000000..72d5e042cc --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/list-keys.md @@ -0,0 +1,77 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/list-keys + - /riak/kv/2.9.4/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/list-resources.md b/content/riak/kv/2.9.4/developing/api/http/list-resources.md new file mode 100644 index 0000000000..ce3b771e1c --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/list-resources.md @@ -0,0 +1,81 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/list-resources + - /riak/kv/2.9.4/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.9.4/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.9.4/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.9.4/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.9.4/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.9.4/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.9.4/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.9.4/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.9.4/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/mapreduce.md b/content/riak/kv/2.9.4/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..deede2993d --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/mapreduce.md @@ -0,0 +1,71 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/mapreduce + - /riak/kv/2.9.4/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/2.9.4/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.9.4/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/ping.md b/content/riak/kv/2.9.4/developing/api/http/ping.md new file mode 100644 index 0000000000..b8f3be0b6a --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/ping.md @@ -0,0 +1,54 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/ping + - /riak/kv/2.9.4/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.9.4/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..a2b50eeefe --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/reset-bucket-props.md @@ -0,0 +1,58 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/reset-bucket-props + - /riak/kv/2.9.4/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/search-index-info.md b/content/riak/kv/2.9.4/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..a5fa8d1c43 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/search-index-info.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/search-index-info + - /riak/kv/2.9.4/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.9.4/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` +--- +Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` +--- +The request timed out internally + diff --git a/content/riak/kv/2.9.4/developing/api/http/search-query.md b/content/riak/kv/2.9.4/developing/api/http/search-query.md new file mode 100644 index 0000000000..5058640771 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/search-query.md @@ -0,0 +1,80 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/search-query + - /riak/kv/2.9.4/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/2.9.4/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` +--- +The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` +--- +The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.9.4/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` +--- +Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` +--- +Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` +--- +The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/secondary-indexes.md b/content/riak/kv/2.9.4/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..89b4e05c0a --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/secondary-indexes.md @@ -0,0 +1,92 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/secondary-indexes + - /riak/kv/2.9.4/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/set-bucket-props.md b/content/riak/kv/2.9.4/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..e3f7101d7a --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/set-bucket-props.md @@ -0,0 +1,113 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/set-bucket-props + - /riak/kv/2.9.4/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.9.4/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.9.4/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/status.md b/content/riak/kv/2.9.4/developing/api/http/status.md new file mode 100644 index 0000000000..c32e526b71 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/status.md @@ -0,0 +1,170 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/status + - /riak/kv/2.9.4/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.9.4/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + diff --git a/content/riak/kv/2.9.4/developing/api/http/store-object.md b/content/riak/kv/2.9.4/developing/api/http/store-object.md new file mode 100644 index 0000000000..81cdc9f4c9 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/store-object.md @@ -0,0 +1,147 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/store-object + - /riak/kv/2.9.4/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.9.4/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.4/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.9.4/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + diff --git a/content/riak/kv/2.9.4/developing/api/http/store-search-index.md b/content/riak/kv/2.9.4/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..0c9814286a --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/store-search-index.md @@ -0,0 +1,59 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/store-search-index + - /riak/kv/2.9.4/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/2.9.4/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.9.4/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` +--- +The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` +--- +The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` +--- +The request timed out internally + diff --git a/content/riak/kv/2.9.4/developing/api/http/store-search-schema.md b/content/riak/kv/2.9.4/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..132166ee68 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/http/store-search-schema.md @@ -0,0 +1,59 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.4/dev/references/http/store-search-schema + - /riak/kv/2.9.4/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` +--- +The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` +--- +The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` +--- +The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` +--- +The request timed out internally + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..90cb4a8bc6 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers.md @@ -0,0 +1,190 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers + - /riak/kv/2.9.4/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` +--- +A string representation of what went wrong +* `errcode` +--- +A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/yz-schema-put) + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..d6035cea8e --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,31 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/auth-req + - /riak/kv/2.9.4/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.9.4/using/security/basics). + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..ada4c5b4cb --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,79 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.9.4" +menu: + riak_kv-2.9.4: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.9.4/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..03e491042a --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,101 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/delete-object + - /riak/kv/2.9.4/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/2.9.4/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..2401e5a744 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,32 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.9.4/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/2.9.4/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-map-store). + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..7912f25c28 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,128 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.9.4/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.9.4/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.9.4/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.9.4/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..e3dbfa34c2 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,74 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.9.4/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..d2a9fda24d --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,33 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.9.4/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..c7da1940cb --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,129 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/dt-store + - /riak/kv/2.9.4/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.9.4/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.9.4/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.9.4/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..ea638c7a41 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,32 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/dt-union + - /riak/kv/2.9.4/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/dt-store) message. + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..1651b0a8ec --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,200 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.9.4/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` +--- +The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` +--- +The character encoding of the object, e.g. `utf-8` +* `content_encoding` +--- +The content encoding of the object, e.g. + `video/mp4` +* `vtag` +--- +The object's [vtag]({{}}riak/kv/2.9.4/learn/glossary/#vector-clock) +* `links` +--- +This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` +--- +A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` +--- +A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` +--- +This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` +--- +Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..cf54be2071 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,111 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.9.4/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.9.4/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.9.4/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..147859aa2b --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,34 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.9.4/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.9.4/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-props) message. + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..fc13a49e24 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,62 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.9.4/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..3a1ea94257 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,79 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.9.4/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` +--- +Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..b19e42ec84 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,100 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/list-keys + - /riak/kv/2.9.4/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` +--- +bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..064c22869d --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,158 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.9.4/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` +--- +MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` +--- +JSON-encoded MapReduce job +* `application/x-erlang-binary` +--- +Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.9.4/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.9.4/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` +--- +Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..d5aa2f6b5f --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/ping.md @@ -0,0 +1,43 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/ping + - /riak/kv/2.9.4/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..344d8b09cf --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,60 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.9.4/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.9.4/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/search.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..69643470c0 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/search.md @@ -0,0 +1,175 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/search + - /riak/kv/2.9.4/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` +--- +The contents of the query +* `index` +--- +The name of the index to search + +Optional Parameters + +* `rows` +--- +The maximum number of rows to return +* `start` +--- +A start offset, i.e. the number of keys to skip before + returning values +* `sort` +--- +How the search results are to be sorted +* `filter` +--- +Filters search with additional query scoped to inline + fields +* `df` +--- +Override the `default_field` setting in the schema file +* `op` +--- +`and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` +--- +Return the fields limit +* `presort` +--- +Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` +--- +A list of docs that match the search request +* `max_score` +--- +The top score returned +* `num_found` +--- +Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..4007efa926 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,122 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.9.4/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.9.4/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..229297bf29 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,59 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/server-info + - /riak/kv/2.9.4/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..364bcacfec --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,69 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.9.4/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.9.4/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..cf6a83fc8f --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,32 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.9.4/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.9.4/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/get-bucket-props). + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..c401ace8b9 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,63 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.9.4/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..fa274c345e --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,151 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/store-object + - /riak/kv/2.9.4/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.9.4/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.9.4/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.9.4/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.9.4/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.9.4/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.9.4/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..9503e80c02 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,34 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.9.4/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/#message-codes) code with no data on success. + + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..32e4e74925 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,60 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.9.4/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..459a1a6e35 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,46 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.9.4/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/#message-codes) code with no data on success. + + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..3e14019c9c --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.9.4/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + diff --git a/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..07a5230f5a --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,42 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.4/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.9.4/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.9.4/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/#message-codes) code with no data on success. + diff --git a/content/riak/kv/2.9.4/developing/api/repl-hooks.md b/content/riak/kv/2.9.4/developing/api/repl-hooks.md new file mode 100644 index 0000000000..26b2b01152 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/api/repl-hooks.md @@ -0,0 +1,193 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v2/hooks + - /riak/kv/2.9.4/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + diff --git a/content/riak/kv/2.9.4/developing/app-guide.md b/content/riak/kv/2.9.4/developing/app-guide.md new file mode 100644 index 0000000000..9ccd5e71f6 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/app-guide.md @@ -0,0 +1,487 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.9.4/dev/using/application-guide/ + - /riak/kv/2.9.4/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/2.9.4/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.9.4/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.9.4/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.9.4/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.9.4/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.9.4/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.9.4/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.9.4/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.9.4/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.9.4/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.9.4/developing/usage/search +[use ref search]: {{}}riak/kv/2.9.4/using/reference/search +[usage 2i]: {{}}riak/kv/2.9.4/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.9.4/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.9.4/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.9.4/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.9.4/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.9.4/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.9.4/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.9.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.4/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/2.9.4/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/2.9.4/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.4/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.4/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.4/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.4/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.4/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.4/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.9.4/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.9.4/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.9.4/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.9.4/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.9.4/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.9.4/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.9.4/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.9.4/setup/installing +[getting started]: {{}}riak/kv/2.9.4/developing/getting-started +[usage index]: {{}}riak/kv/2.9.4/developing/usage +[glossary]: {{}}riak/kv/2.9.4/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** +--- +While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** +--- +Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** +--- +Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** +--- +It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** +--- +If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** +--- +If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** +--- +If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] +--- +Getting started with Riak Search +* [Search Details][use ref search] +--- +A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] +--- +How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** +--- +Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** +--- +At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** +--- +In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] +--- +A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] +--- +A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] +--- +An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** +--- +If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** +--- +If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** +--- +If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** +--- +While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** +--- +Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] +--- +A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] +--- +A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** +--- +You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** +--- +Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] +--- +A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] +--- +Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** +--- +At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** +--- +If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** +--- +2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] +--- +Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] +--- +A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] +--- +How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] +--- +A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] +--- +A listing of frequently used terms in Riak's + documentation + + diff --git a/content/riak/kv/2.9.4/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.9.4/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..15ceaeb690 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,805 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/mapreduce/ + - /riak/kv/2.9.4/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/2.9.4/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.9.4/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.9.4/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.9.4/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.9.4/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.9.4/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.9.4/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) +--- +Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) +--- +Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) +--- +Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
diff --git a/content/riak/kv/2.9.4/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.9.4/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..9ef1783487
--- /dev/null
+++ b/content/riak/kv/2.9.4/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,68 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 2.9.4
+menu:
+  riak_kv-2.9.4:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.9.4/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.9.4/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.9.4/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
diff --git a/content/riak/kv/2.9.4/developing/app-guide/reference.md b/content/riak/kv/2.9.4/developing/app-guide/reference.md
new file mode 100644
index 0000000000..cc6daf91a7
--- /dev/null
+++ b/content/riak/kv/2.9.4/developing/app-guide/reference.md
@@ -0,0 +1,17 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 2.9.4
+#menu:
+#  riak_kv-2.9.4:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+---
+
+**TODO: Add content**
+
diff --git a/content/riak/kv/2.9.4/developing/app-guide/replication-properties.md b/content/riak/kv/2.9.4/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..13308af4f9
--- /dev/null
+++ b/content/riak/kv/2.9.4/developing/app-guide/replication-properties.md
@@ -0,0 +1,589 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 2.9.4
+menu:
+  riak_kv-2.9.4:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.4/dev/advanced/replication-properties
+  - /riak/kv/2.9.4/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/2.9.4/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.9.4/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.9.4/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.9.4/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.9.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.9.4/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.9.4/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.9.4/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.9.4/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.9.4/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.9.4/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.9.4/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.9.4/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all`
+---
+All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one`
+---
+This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum`
+---
+A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default`
+---
+Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.9.4/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.9.4/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.9.4/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
diff --git a/content/riak/kv/2.9.4/developing/app-guide/strong-consistency.md b/content/riak/kv/2.9.4/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..506d1e599e
--- /dev/null
+++ b/content/riak/kv/2.9.4/developing/app-guide/strong-consistency.md
@@ -0,0 +1,258 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 2.9.4
+menu:
+  riak_kv-2.9.4:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.4/dev/advanced/strong-consistency
+  - /riak/kv/2.9.4/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/2.9.4/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.9.4/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.9.4/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.9.4/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.9.4/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.9.4/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.9.4/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.9.4/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.9.4/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.9.4/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.9.4/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.9.4/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.9.4/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.9.4/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.9.4/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.9.4/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.9.4/developing/client-libraries
+[getting started]: {{}}riak/kv/2.9.4/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.9.4/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + diff --git a/content/riak/kv/2.9.4/developing/app-guide/write-once.md b/content/riak/kv/2.9.4/developing/app-guide/write-once.md new file mode 100644 index 0000000000..ae309f5dda --- /dev/null +++ b/content/riak/kv/2.9.4/developing/app-guide/write-once.md @@ -0,0 +1,156 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.9.4/dev/advanced/write-once + - /riak/kv/2.9.4/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/2.9.4/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.9.4/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.9.4/developing/data-types +[strong consistency]: {{}}riak/kv/2.9.4/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.9.4/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + diff --git a/content/riak/kv/2.9.4/developing/client-libraries.md b/content/riak/kv/2.9.4/developing/client-libraries.md new file mode 100644 index 0000000000..5297bde650 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/client-libraries.md @@ -0,0 +1,456 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.9.4/dev/using/libraries + - /riak/kv/2.9.4/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) +--- +A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) +--- +A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) +--- +A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) +--- +A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) +--- +An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) +--- +An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) +--- +Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) +--- +A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) +--- +Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) + +--- +A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) +--- +HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) + +--- +Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) + +--- +A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) +--- +Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) +--- +Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) +--- +Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) +--- +Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) +--- +An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) +--- +A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) +--- +A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) +--- +A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) +--- +A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) +--- +A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) +--- +Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) +--- +Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) + +--- +A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) + +--- +Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) + +--- +Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) +--- +Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) +--- +Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) +--- +Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) +--- +Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) +--- +A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) +--- +Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) +--- +A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) +--- +Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) +--- +Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) +--- +a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) +--- +A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) +--- +A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) +--- +Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) +--- + Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) +--- +Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) +--- +A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) +--- + Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) +--- +A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) +--- +A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) + +--- +Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) +--- +Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) +--- +Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) +--- +A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) +--- +A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) +--- +A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) +--- +A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) +--- + [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) +--- +A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) + +--- +Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) +--- +A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) +--- +A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) +--- +Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) +--- +A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) +--- +A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) +--- +Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) +--- +Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) +--- +Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) +--- +A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) +--- + Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) +--- +Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) +--- + DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) +--- +Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) +--- +An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) +--- +Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) +--- +Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) +--- +Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) +--- +A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) +--- +An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) +--- +A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) + +--- +A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + diff --git a/content/riak/kv/2.9.4/developing/data-modeling.md b/content/riak/kv/2.9.4/developing/data-modeling.md new file mode 100644 index 0000000000..aae1fc26e8 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/data-modeling.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/kv/2.9.4/learn/use-cases/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + diff --git a/content/riak/kv/2.9.4/developing/data-types.md b/content/riak/kv/2.9.4/developing/data-types.md new file mode 100644 index 0000000000..484b05b834 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/data-types.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.9.4/dev/using/data-types + - /riak/kv/2.9.4/dev/using/data-types + - /riak/2.9.4/dev/data-modeling/data-types + - /riak/kv/2.9.4/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + diff --git a/content/riak/kv/2.9.4/developing/data-types/counters.md b/content/riak/kv/2.9.4/developing/data-types/counters.md new file mode 100644 index 0000000000..60c9f5ee8f --- /dev/null +++ b/content/riak/kv/2.9.4/developing/data-types/counters.md @@ -0,0 +1,632 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.4/dev/using/data-types/counters + - /riak/kv/2.9.4/dev/using/data-types/counters + - /riak/2.9.4/dev/data-modeling/data-types/counters + - /riak/kv/2.9.4/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + diff --git a/content/riak/kv/2.9.4/developing/data-types/gsets.md b/content/riak/kv/2.9.4/developing/data-types/gsets.md new file mode 100644 index 0000000000..d165469e1c --- /dev/null +++ b/content/riak/kv/2.9.4/developing/data-types/gsets.md @@ -0,0 +1,628 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.4/dev/using/data-types/gsets + - /riak/kv/2.9.4/dev/using/data-types/gsets + - /riak/2.9.4/dev/data-modeling/data-types/gsets + - /riak/kv/2.9.4/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + diff --git a/content/riak/kv/2.9.4/developing/data-types/hyperloglogs.md b/content/riak/kv/2.9.4/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..2089b6f05b --- /dev/null +++ b/content/riak/kv/2.9.4/developing/data-types/hyperloglogs.md @@ -0,0 +1,640 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.4/dev/using/data-types/hyperloglogs + - /riak/kv/2.9.4/dev/using/data-types/hyperloglogs + - /riak/2.9.4/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.9.4/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + diff --git a/content/riak/kv/2.9.4/developing/data-types/maps.md b/content/riak/kv/2.9.4/developing/data-types/maps.md new file mode 100644 index 0000000000..fb37781ed7 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/data-types/maps.md @@ -0,0 +1,1882 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.4/dev/using/data-types/maps + - /riak/kv/2.9.4/dev/using/data-types/maps + - /riak/2.9.4/dev/data-modeling/data-types/maps + - /riak/kv/2.9.4/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + diff --git a/content/riak/kv/2.9.4/developing/data-types/sets.md b/content/riak/kv/2.9.4/developing/data-types/sets.md new file mode 100644 index 0000000000..4e3c8dd031 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/data-types/sets.md @@ -0,0 +1,770 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.4/dev/using/data-types/sets + - /riak/kv/2.9.4/dev/using/data-types/sets + - /riak/2.9.4/dev/data-modeling/data-types/sets + - /riak/kv/2.9.4/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + diff --git a/content/riak/kv/2.9.4/developing/faq.md b/content/riak/kv/2.9.4/developing/faq.md new file mode 100644 index 0000000000..e53f03dd01 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/faq.md @@ -0,0 +1,659 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.9.4/community/faqs/developing + - /riak/kv/2.9.4/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/2.9.4/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.9.4/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.9.4/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.9.4/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.9.4/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.9.4/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.9.4/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.9.4/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.9.4/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.9.4/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + + +--- + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + + +--- + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + + +--- + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +--- + +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +--- + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + + +--- + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + + +--- + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + + +--- + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + + +--- + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + + +--- + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + + +--- + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +--- + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +--- + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +--- + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + + +--- + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +--- + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + + +--- + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + + +--- + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + + +--- + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +--- + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + + +--- + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) +--- +requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) +--- +if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +--- + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +--- + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +--- + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +--- + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +--- + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +--- + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +--- + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +--- + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +--- + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +--- + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +--- + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +--- + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + diff --git a/content/riak/kv/2.9.4/developing/getting-started.md b/content/riak/kv/2.9.4/developing/getting-started.md new file mode 100644 index 0000000000..7e2db53201 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started.md @@ -0,0 +1,47 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +--- + +[install index]: {{}}riak/kv/2.9.4/setup/installing +[dev client libraries]: {{}}riak/kv/2.9.4/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + diff --git a/content/riak/kv/2.9.4/developing/getting-started/csharp.md b/content/riak/kv/2.9.4/developing/getting-started/csharp.md new file mode 100644 index 0000000000..b1afc203b6 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/csharp.md @@ -0,0 +1,83 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/csharp + - /riak/kv/2.9.4/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.4/developing/getting-started/csharp/crud-operations) + diff --git a/content/riak/kv/2.9.4/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.9.4/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..785df3485e --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,144 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + diff --git a/content/riak/kv/2.9.4/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.9.4/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..fa477a1d01 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,108 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.9.4/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + diff --git a/content/riak/kv/2.9.4/developing/getting-started/csharp/querying.md b/content/riak/kv/2.9.4/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..c18687c019 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/csharp/querying.md @@ -0,0 +1,211 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/querying-csharp + - /riak/kv/2.9.4/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + diff --git a/content/riak/kv/2.9.4/developing/getting-started/erlang.md b/content/riak/kv/2.9.4/developing/getting-started/erlang.md new file mode 100644 index 0000000000..324551636b --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/erlang.md @@ -0,0 +1,56 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/erlang + - /riak/kv/2.9.4/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.4/developing/getting-started/erlang/crud-operations) + diff --git a/content/riak/kv/2.9.4/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.9.4/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..37e870949a --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,168 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + diff --git a/content/riak/kv/2.9.4/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.9.4/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..f082baabc9 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,339 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.9.4/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.4/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + diff --git a/content/riak/kv/2.9.4/developing/getting-started/erlang/querying.md b/content/riak/kv/2.9.4/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..84daae3e2d --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/erlang/querying.md @@ -0,0 +1,305 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/querying-erlang + - /riak/kv/2.9.4/dev/taste-of-riak/querying-erlang +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.9.4/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + diff --git a/content/riak/kv/2.9.4/developing/getting-started/golang.md b/content/riak/kv/2.9.4/developing/getting-started/golang.md new file mode 100644 index 0000000000..afdaf23553 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/golang.md @@ -0,0 +1,79 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/golang + - /riak/kv/2.9.4/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.4/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.4/developing/getting-started/golang/crud-operations) + diff --git a/content/riak/kv/2.9.4/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.9.4/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..25e85f25a2 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,372 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + diff --git a/content/riak/kv/2.9.4/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.9.4/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..4ea4690c09 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,549 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.9.4/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.4/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + diff --git a/content/riak/kv/2.9.4/developing/getting-started/golang/querying.md b/content/riak/kv/2.9.4/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..1a64323713 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/golang/querying.md @@ -0,0 +1,577 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/querying-golang + - /riak/kv/2.9.4/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + diff --git a/content/riak/kv/2.9.4/developing/getting-started/java.md b/content/riak/kv/2.9.4/developing/getting-started/java.md new file mode 100644 index 0000000000..938ad06ebc --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/java.md @@ -0,0 +1,90 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/java + - /riak/kv/2.9.4/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.4/developing/getting-started/java/crud-operations) + diff --git a/content/riak/kv/2.9.4/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.9.4/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..23547320fc --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/java/crud-operations.md @@ -0,0 +1,202 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.4/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.4/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.4/developing/usage/conflict-resolution/) +documention. + diff --git a/content/riak/kv/2.9.4/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.9.4/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..43f5c5aa49 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/java/object-modeling.md @@ -0,0 +1,429 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.9.4/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + diff --git a/content/riak/kv/2.9.4/developing/getting-started/java/querying.md b/content/riak/kv/2.9.4/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..0f177b7ac5 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/java/querying.md @@ -0,0 +1,277 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/querying-java + - /riak/kv/2.9.4/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + diff --git a/content/riak/kv/2.9.4/developing/getting-started/nodejs.md b/content/riak/kv/2.9.4/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..40f717e20e --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/nodejs.md @@ -0,0 +1,101 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/nodejs + - /riak/kv/2.9.4/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.4/developing/getting-started/nodejs/crud-operations) + diff --git a/content/riak/kv/2.9.4/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.9.4/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..76ebb733dd --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,134 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + diff --git a/content/riak/kv/2.9.4/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.9.4/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..0947c054fe --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,120 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.9.4/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + diff --git a/content/riak/kv/2.9.4/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.9.4/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..023e39777b --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/nodejs/querying.md @@ -0,0 +1,143 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.9.4/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + diff --git a/content/riak/kv/2.9.4/developing/getting-started/php.md b/content/riak/kv/2.9.4/developing/getting-started/php.md new file mode 100644 index 0000000000..844fa48e06 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/php.md @@ -0,0 +1,77 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/php + - /riak/kv/2.9.4/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.4/developing/getting-started/php/crud-operations) + diff --git a/content/riak/kv/2.9.4/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.9.4/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..6c079e9786 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/php/crud-operations.md @@ -0,0 +1,183 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.9.4/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + diff --git a/content/riak/kv/2.9.4/developing/getting-started/php/querying.md b/content/riak/kv/2.9.4/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..747b16efe2 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/php/querying.md @@ -0,0 +1,405 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/querying-php + - /riak/kv/2.9.4/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + diff --git a/content/riak/kv/2.9.4/developing/getting-started/python.md b/content/riak/kv/2.9.4/developing/getting-started/python.md new file mode 100644 index 0000000000..c517c8ca6b --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/python.md @@ -0,0 +1,106 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/python + - /riak/kv/2.9.4/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` +--- +Header files and a static library for Python +* `libffi-dev` +--- +Foreign function interface library +* `libssl-dev` +--- +libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.4/developing/getting-started/python/crud-operations) + diff --git a/content/riak/kv/2.9.4/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.9.4/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..56f659ae16 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/python/crud-operations.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + diff --git a/content/riak/kv/2.9.4/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.9.4/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..a11c733e93 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/python/object-modeling.md @@ -0,0 +1,261 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.9.4/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + diff --git a/content/riak/kv/2.9.4/developing/getting-started/python/querying.md b/content/riak/kv/2.9.4/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..4a0290f180 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/python/querying.md @@ -0,0 +1,237 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/querying-python + - /riak/kv/2.9.4/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + diff --git a/content/riak/kv/2.9.4/developing/getting-started/ruby.md b/content/riak/kv/2.9.4/developing/getting-started/ruby.md new file mode 100644 index 0000000000..93c6b51f39 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/ruby.md @@ -0,0 +1,65 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/ruby + - /riak/kv/2.9.4/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.4/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.4/developing/getting-started/ruby/crud-operations) + diff --git a/content/riak/kv/2.9.4/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.9.4/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..9241ee1657 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,147 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + diff --git a/content/riak/kv/2.9.4/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.9.4/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..43905bef06 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,292 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.9.4/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + diff --git a/content/riak/kv/2.9.4/developing/getting-started/ruby/querying.md b/content/riak/kv/2.9.4/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..78a504f3b9 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/getting-started/ruby/querying.md @@ -0,0 +1,253 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.4/dev/taste-of-riak/querying-ruby + - /riak/kv/2.9.4/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + diff --git a/content/riak/kv/2.9.4/developing/key-value-modeling.md b/content/riak/kv/2.9.4/developing/key-value-modeling.md new file mode 100644 index 0000000000..7c22aec051 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/key-value-modeling.md @@ -0,0 +1,532 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.9.4/dev/data-modeling/key-value/ + - /riak/kv/2.9.4/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.9.4/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.9.4/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.9.4/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.9.4/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.9.4/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.9.4/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.9.4/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.9.4/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.9.4/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.9.4/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.9.4/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.9.4/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.9.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.9.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.9.4/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.9.4/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.9.4/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.9.4/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + diff --git a/content/riak/kv/2.9.4/developing/usage.md b/content/riak/kv/2.9.4/developing/usage.md new file mode 100644 index 0000000000..1620ad4834 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage.md @@ -0,0 +1,134 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + diff --git a/content/riak/kv/2.9.4/developing/usage/bucket-types.md b/content/riak/kv/2.9.4/developing/usage/bucket-types.md new file mode 100644 index 0000000000..c34a8a619c --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/bucket-types.md @@ -0,0 +1,99 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/bucket-types + - /riak/kv/2.9.4/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/commit-hooks.md b/content/riak/kv/2.9.4/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..7e27da3dca --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/commit-hooks.md @@ -0,0 +1,246 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/using/commit-hooks + - /riak/kv/2.9.4/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.9.4/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object +--- +This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` +--- +The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.9.4/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` +--- +The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + diff --git a/content/riak/kv/2.9.4/developing/usage/conflict-resolution.md b/content/riak/kv/2.9.4/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..9a8366d378 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/conflict-resolution.md @@ -0,0 +1,690 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/using/conflict-resolution + - /riak/kv/2.9.4/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/strong-consistency) +--- +A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.9.4/configuring/strong-consistency) +--- +A guide for operators +> * [strong consistency][use ref strong consistency] +--- +A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.9.4/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.9.4/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.9.4/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.4/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.9.4/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** +--- +If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** +--- +Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** +--- +If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.9.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.4/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.9.4/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + diff --git a/content/riak/kv/2.9.4/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..63246e688b --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,120 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.4/dev/using/conflict-resolution/csharp + - /riak/kv/2.9.4/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + diff --git a/content/riak/kv/2.9.4/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..4ece91d843 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,59 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.4/dev/using/conflict-resolution/golang + - /riak/kv/2.9.4/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + diff --git a/content/riak/kv/2.9.4/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..57b0763081 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/java.md @@ -0,0 +1,273 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.4/dev/using/conflict-resolution/java + - /riak/kv/2.9.4/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.4/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets). + diff --git a/content/riak/kv/2.9.4/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..3728a3416e --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,59 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.4/dev/using/conflict-resolution/nodejs + - /riak/kv/2.9.4/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + diff --git a/content/riak/kv/2.9.4/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..5b016245f4 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/php.md @@ -0,0 +1,241 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.4/dev/using/conflict-resolution/php + - /riak/kv/2.9.4/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.4/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets). + diff --git a/content/riak/kv/2.9.4/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..a6b1002cb1 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/python.md @@ -0,0 +1,255 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.4/dev/using/conflict-resolution/python + - /riak/kv/2.9.4/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.4/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets). + diff --git a/content/riak/kv/2.9.4/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..035a687cfe --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,251 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.4/dev/using/conflict-resolution/ruby + - /riak/kv/2.9.4/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.4/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets). + diff --git a/content/riak/kv/2.9.4/developing/usage/content-types.md b/content/riak/kv/2.9.4/developing/usage/content-types.md new file mode 100644 index 0000000000..bab54981d5 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/content-types.md @@ -0,0 +1,188 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/creating-objects.md b/content/riak/kv/2.9.4/developing/usage/creating-objects.md new file mode 100644 index 0000000000..4505b34ef9 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/creating-objects.md @@ -0,0 +1,551 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +--- + +[usage content types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.9.4/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/custom-extractors.md b/content/riak/kv/2.9.4/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..3485c61dff --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/custom-extractors.md @@ -0,0 +1,425 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/search/custom-extractors + - /riak/kv/2.9.4/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` +--- +Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` +--- +Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/deleting-objects.md b/content/riak/kv/2.9.4/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..33190c00fe --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/deleting-objects.md @@ -0,0 +1,153 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/document-store.md b/content/riak/kv/2.9.4/developing/usage/document-store.md new file mode 100644 index 0000000000..0c7ff7a755 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/document-store.md @@ -0,0 +1,614 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/search/document-store + - /riak/kv/2.9.4/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.9.4/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + diff --git a/content/riak/kv/2.9.4/developing/usage/mapreduce.md b/content/riak/kv/2.9.4/developing/usage/mapreduce.md new file mode 100644 index 0000000000..d945b1d3c9 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/mapreduce.md @@ -0,0 +1,247 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/using/mapreduce + - /riak/kv/2.9.4/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.9.4/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.9.4/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** +--- +The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** +--- +The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + diff --git a/content/riak/kv/2.9.4/developing/usage/next-gen-replication.md b/content/riak/kv/2.9.4/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..59ddd0b802 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/next-gen-replication.md @@ -0,0 +1,150 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.4" +menu: + riak_kv-2.9.4: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/2.9.4/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. diff --git a/content/riak/kv/2.9.4/developing/usage/reading-objects.md b/content/riak/kv/2.9.4/developing/usage/reading-objects.md new file mode 100644 index 0000000000..c527b9ff36 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/reading-objects.md @@ -0,0 +1,248 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/replication.md b/content/riak/kv/2.9.4/developing/usage/replication.md new file mode 100644 index 0000000000..b79d342e8d --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/replication.md @@ -0,0 +1,597 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/replication-properties + - /riak/kv/2.9.4/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.9.4/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.9.4/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.4/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.4/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` +--- +All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` +--- +This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` +--- +A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` +--- +Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.9.4/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.9.4/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.9.4/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.9.4/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + diff --git a/content/riak/kv/2.9.4/developing/usage/search-schemas.md b/content/riak/kv/2.9.4/developing/usage/search-schemas.md new file mode 100644 index 0000000000..32489fd14e --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/search-schemas.md @@ -0,0 +1,508 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/search-schema + - /riak/kv/2.9.4/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.9.4/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/search.md b/content/riak/kv/2.9.4/developing/usage/search.md new file mode 100644 index 0000000000..854e010a35 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/search.md @@ -0,0 +1,1452 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/using/search + - /riak/kv/2.9.4/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.9.4/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.4/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.4/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.9.4/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.9.4/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.9.4/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.9.4/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/searching-data-types.md b/content/riak/kv/2.9.4/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..e62b5c7445 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/searching-data-types.md @@ -0,0 +1,1684 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/search/search-data-types + - /riak/kv/2.9.4/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + diff --git a/content/riak/kv/2.9.4/developing/usage/secondary-indexes.md b/content/riak/kv/2.9.4/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..1be1dc8492 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/secondary-indexes.md @@ -0,0 +1,2035 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/using/2i + - /riak/kv/2.9.4/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.9.4/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.4/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.4/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` +--- +Binary index `field1_bin` and integer index `field2_int` +* `Moe` +--- +Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` +--- +Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` +--- +Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/security.md b/content/riak/kv/2.9.4/developing/usage/security.md new file mode 100644 index 0000000000..a49296bfee --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/security.md @@ -0,0 +1,100 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/client-security + - /riak/kv/2.9.4/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.9.4/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.9.4/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.9.4/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.9.4/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.4/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.9.4/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.9.4/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.9.4/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.9.4/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + diff --git a/content/riak/kv/2.9.4/developing/usage/security/erlang.md b/content/riak/kv/2.9.4/developing/usage/security/erlang.md new file mode 100644 index 0000000000..96682f4c13 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/security/erlang.md @@ -0,0 +1,115 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/client-security/erlang + - /riak/kv/2.9.4/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.4/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + diff --git a/content/riak/kv/2.9.4/developing/usage/security/java.md b/content/riak/kv/2.9.4/developing/usage/security/java.md new file mode 100644 index 0000000000..24a56d4d78 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/security/java.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/client-security/java + - /riak/kv/2.9.4/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + diff --git a/content/riak/kv/2.9.4/developing/usage/security/php.md b/content/riak/kv/2.9.4/developing/usage/security/php.md new file mode 100644 index 0000000000..bd325ab647 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/security/php.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/client-security/php + - /riak/kv/2.9.4/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + diff --git a/content/riak/kv/2.9.4/developing/usage/security/python.md b/content/riak/kv/2.9.4/developing/usage/security/python.md new file mode 100644 index 0000000000..df544b1a4b --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/security/python.md @@ -0,0 +1,173 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/client-security/python + - /riak/kv/2.9.4/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.4/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.9.4/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + diff --git a/content/riak/kv/2.9.4/developing/usage/security/ruby.md b/content/riak/kv/2.9.4/developing/usage/security/ruby.md new file mode 100644 index 0000000000..d6c6815420 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/security/ruby.md @@ -0,0 +1,159 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/client-security/ruby + - /riak/kv/2.9.4/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.4/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + diff --git a/content/riak/kv/2.9.4/developing/usage/updating-objects.md b/content/riak/kv/2.9.4/developing/usage/updating-objects.md new file mode 100644 index 0000000000..bebd6827a8 --- /dev/null +++ b/content/riak/kv/2.9.4/developing/usage/updating-objects.md @@ -0,0 +1,775 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.4/dev/using/updates + - /riak/kv/2.9.4/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.9.4/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + diff --git a/content/riak/kv/2.9.4/downloads.md b/content/riak/kv/2.9.4/downloads.md new file mode 100644 index 0000000000..28b5374c02 --- /dev/null +++ b/content/riak/kv/2.9.4/downloads.md @@ -0,0 +1,24 @@ +--- +title: "Download for Riak KV 2.9.4" +description: "Download some stuff!" +menu: + riak_kv-2.9.4: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 2.9.4 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 2.9.4 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.9.4/downloads + - /riak/kv/2.9.4/downloads +--- + + diff --git a/content/riak/kv/2.9.4/index.md b/content/riak/kv/2.9.4/index.md new file mode 100644 index 0000000000..a13663fc71 --- /dev/null +++ b/content/riak/kv/2.9.4/index.md @@ -0,0 +1,74 @@ +--- +title: "Riak KV 2.9.4" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.9.4/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.9.4/configuring +[downloads]: {{<baseurl>}}riak/kv/2.9.4/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.9.4/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.9.4/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.9.4/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.9.4/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.9.4/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + diff --git a/content/riak/kv/2.9.4/learn.md b/content/riak/kv/2.9.4/learn.md new file mode 100644 index 0000000000..d110e7222c --- /dev/null +++ b/content/riak/kv/2.9.4/learn.md @@ -0,0 +1,50 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + diff --git a/content/riak/kv/2.9.4/learn/concepts.md b/content/riak/kv/2.9.4/learn/concepts.md new file mode 100644 index 0000000000..4164cf6c8c --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts.md @@ -0,0 +1,45 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +--- + +[concept aae]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.9.4/configuring +[plan index]: {{<baseurl>}}riak/kv/2.9.4/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.9.4/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + diff --git a/content/riak/kv/2.9.4/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.9.4/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..c91f562618 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/active-anti-entropy.md @@ -0,0 +1,108 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/aae + - /riak/kv/2.9.4/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + diff --git a/content/riak/kv/2.9.4/learn/concepts/buckets.md b/content/riak/kv/2.9.4/learn/concepts/buckets.md new file mode 100644 index 0000000000..6e7f475ff6 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/buckets.md @@ -0,0 +1,214 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/Buckets + - /riak/kv/2.9.4/theory/concepts/Buckets + - /riak/2.9.4/theory/concepts/buckets + - /riak/kv/2.9.4/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.9.4/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.9.4/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.9.4/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.9.4/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + diff --git a/content/riak/kv/2.9.4/learn/concepts/capability-negotiation.md b/content/riak/kv/2.9.4/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..c7f2b87538 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/capability-negotiation.md @@ -0,0 +1,33 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/capability-negotiation + - /riak/kv/2.9.4/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.9.4/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + diff --git a/content/riak/kv/2.9.4/learn/concepts/causal-context.md b/content/riak/kv/2.9.4/learn/concepts/causal-context.md new file mode 100644 index 0000000000..831dc1ea23 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/causal-context.md @@ -0,0 +1,286 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/context + - /riak/kv/2.9.4/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.9.4/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.9.4/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.9.4/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + diff --git a/content/riak/kv/2.9.4/learn/concepts/clusters.md b/content/riak/kv/2.9.4/learn/concepts/clusters.md new file mode 100644 index 0000000000..e2c09841df --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/clusters.md @@ -0,0 +1,114 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/Clusters + - /riak/kv/2.9.4/theory/concepts/Clusters + - /riak/2.9.4/theory/concepts/clusters + - /riak/kv/2.9.4/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.4/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + diff --git a/content/riak/kv/2.9.4/learn/concepts/crdts.md b/content/riak/kv/2.9.4/learn/concepts/crdts.md new file mode 100644 index 0000000000..665eda47c7 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/crdts.md @@ -0,0 +1,249 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/crdts + - /riak/kv/2.9.4/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.9.4/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.9.4/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + diff --git a/content/riak/kv/2.9.4/learn/concepts/eventual-consistency.md b/content/riak/kv/2.9.4/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..7635cc46c2 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/eventual-consistency.md @@ -0,0 +1,199 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/Eventual-Consistency + - /riak/kv/2.9.4/theory/concepts/Eventual-Consistency + - /riak/2.9.4/theory/concepts/eventual-consistency + - /riak/kv/2.9.4/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + diff --git a/content/riak/kv/2.9.4/learn/concepts/keys-and-objects.md b/content/riak/kv/2.9.4/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..6252140488 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/keys-and-objects.md @@ -0,0 +1,50 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/keys-and-values + - /riak/kv/2.9.4/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + diff --git a/content/riak/kv/2.9.4/learn/concepts/replication.md b/content/riak/kv/2.9.4/learn/concepts/replication.md new file mode 100644 index 0000000000..48e2b101ac --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/replication.md @@ -0,0 +1,320 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/Replication + - /riak/kv/2.9.4/theory/concepts/Replication + - /riak/2.9.4/theory/concepts/replication + - /riak/kv/2.9.4/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.9.4/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + diff --git a/content/riak/kv/2.9.4/learn/concepts/strong-consistency.md b/content/riak/kv/2.9.4/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..8859d3129f --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/strong-consistency.md @@ -0,0 +1,102 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/strong-consistency + - /riak/kv/2.9.4/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.4/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + diff --git a/content/riak/kv/2.9.4/learn/concepts/vnodes.md b/content/riak/kv/2.9.4/learn/concepts/vnodes.md new file mode 100644 index 0000000000..173d662015 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/concepts/vnodes.md @@ -0,0 +1,157 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.4/theory/concepts/vnodes + - /riak/kv/2.9.4/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322492444576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + diff --git a/content/riak/kv/2.9.4/learn/dynamo.md b/content/riak/kv/2.9.4/learn/dynamo.md new file mode 100644 index 0000000000..be64ab7b9d --- /dev/null +++ b/content/riak/kv/2.9.4/learn/dynamo.md @@ -0,0 +1,1925 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.9.4/theory/dynamo + - /riak/kv/2.9.4/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.9.4/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.9.4/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.9.4/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.9.4/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.4 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.9.4/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.4/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.9.4/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.9.4/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.9.4/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.4/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + diff --git a/content/riak/kv/2.9.4/learn/glossary.md b/content/riak/kv/2.9.4/learn/glossary.md new file mode 100644 index 0000000000..a41d92e4b1 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/glossary.md @@ -0,0 +1,354 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.9.4/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.9.4/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.9.4/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.4/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.9.4/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.9.4/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.9.4/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + diff --git a/content/riak/kv/2.9.4/learn/new-to-nosql.md b/content/riak/kv/2.9.4/learn/new-to-nosql.md new file mode 100644 index 0000000000..92390144c6 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/new-to-nosql.md @@ -0,0 +1,17 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 2.9.4 +#menu: +# riak_kv-2.9.4: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +--- + +**TODO: Add content (not sure where this lives in existing docs)** + diff --git a/content/riak/kv/2.9.4/learn/use-cases.md b/content/riak/kv/2.9.4/learn/use-cases.md new file mode 100644 index 0000000000..47d8436824 --- /dev/null +++ b/content/riak/kv/2.9.4/learn/use-cases.md @@ -0,0 +1,402 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.9.4/dev/data-modeling/ + - /riak/kv/2.9.4/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.9.4/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.9.4/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + diff --git a/content/riak/kv/2.9.4/learn/why-riak-kv.md b/content/riak/kv/2.9.4/learn/why-riak-kv.md new file mode 100644 index 0000000000..d56f9f911a --- /dev/null +++ b/content/riak/kv/2.9.4/learn/why-riak-kv.md @@ -0,0 +1,222 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.9.4/theory/why-riak/ + - /riak/kv/2.9.4/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.4/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.9.4/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + diff --git a/content/riak/kv/2.9.4/release-notes.md b/content/riak/kv/2.9.4/release-notes.md new file mode 100644 index 0000000000..329b5a3c1e --- /dev/null +++ b/content/riak/kv/2.9.4/release-notes.md @@ -0,0 +1,35 @@ +--- +title: "Riak KV 2.9.4 Release Notes" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.9.4/community/release-notes + - /riak/kv/2.9.4/intro-v20 + - /riak/2.9.4/intro-v20 + - /riak/kv/2.9.4/introduction +--- + +Released Jul 03, 2020. + + +## Overview + +This release replaces the Riak KV 2.9.3 release, extending the issue resolution in kv_index_tictactree to detect other files where file truncation means the CRC is not present. + +This release has a key outstanding issue when Tictac AAE is used in parallel mode. On larger clusters, this has been seen to cause significant issues, and so this feature should not be used other than in native mode. + +[Previous Release Notes](#previous-release-notes) + +## Previous Release Notes + +Please see the KV 2.9.2 release notes [here]({{<baseurl>}}riak/kv/2.9.2/release-notes/), the KV 2.9.1 release notes [here]({{<baseurl>}}riak/kv/2.9.1/release-notes/), and the KV 2.9.0p5 release notes [here]({{<baseurl>}}riak/kv/2.9.4/release-notes/). + + diff --git a/content/riak/kv/2.9.4/setup.md b/content/riak/kv/2.9.4/setup.md new file mode 100644 index 0000000000..3584f4fbab --- /dev/null +++ b/content/riak/kv/2.9.4/setup.md @@ -0,0 +1,48 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + diff --git a/content/riak/kv/2.9.4/setup/downgrade.md b/content/riak/kv/2.9.4/setup/downgrade.md new file mode 100644 index 0000000000..dadb785324 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/downgrade.md @@ -0,0 +1,176 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.4/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.9.4/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.9.4/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.4, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` + + diff --git a/content/riak/kv/2.9.4/setup/installing.md b/content/riak/kv/2.9.4/setup/installing.md new file mode 100644 index 0000000000..2670fed586 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing.md @@ -0,0 +1,58 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.9.4/ops/building/installing + - /riak/kv/2.9.4/ops/building/installing + - /riak/2.9.4/installing/ + - /riak/kv/2.9.4/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.9.4/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + diff --git a/content/riak/kv/2.9.4/setup/installing/amazon-web-services.md b/content/riak/kv/2.9.4/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..4ba8f09be0 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/amazon-web-services.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.9.4/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.9.4/installing/amazon-web-services/ + - /riak/kv/2.9.4/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.4/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2/riak-2.9.4-1.amzn2x86_64.rpm +sudo yum localinstall -y riak-2.9.4-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2/riak-2.9.4-1.amzn2x86_64.rpm +sudo rpm -i riak-2.9.4-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2016.09/riak-2.9.4-1.amzn1x86_64.rpm +sudo yum localinstall -y riak-2.9.4-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2016.09/riak-2.9.4-1.amzn1x86_64.rpm +sudo rpm -i riak-2.9.4-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + diff --git a/content/riak/kv/2.9.4/setup/installing/debian-ubuntu.md b/content/riak/kv/2.9.4/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..398da13123 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/debian-ubuntu.md @@ -0,0 +1,168 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.9.4/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.9.4/installing/debian-ubuntu/ + - /riak/kv/2.9.4/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.9.4/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/bionic64/riak-2.9.4-1_amd64.deb +sudo dpkg -i riak-2.9.4-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/xenial64/riak-2.9.4-1_amd64.deb +sudo dpkg -i riak-2.9.4-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/trusty64/riak-2.9.4-1_amd64.deb +sudo dpkg -i riak-2.9.4-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/precise64/riak-2.9.4-1_amd64.deb +sudo dpkg -i riak-2.9.4-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/9/riak-2.9.4-1_amd64.deb +sudo dpkg -i riak-2.9.4-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/8/riak-2.9.4-1_amd64.deb +sudo dpkg -i riak-2.9.4-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/7/riak-2.9.4-1_amd64.deb +sudo dpkg -i riak-2.9.4-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/raspbian/buster/riak-2.9.4-1_armhf.deb +sudo dpkg -i riak-2.9.4-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/riak-2.9.4.tar.gz +tar zxvf riak-2.9.4.tar.gz +cd riak-2.9.4 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + diff --git a/content/riak/kv/2.9.4/setup/installing/freebsd.md b/content/riak/kv/2.9.4/setup/installing/freebsd.md new file mode 100644 index 0000000000..ff13da89e0 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/freebsd.md @@ -0,0 +1,130 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.9.4/ops/building/installing/Installing-on-FreeBSD + - /riak/2.9.4/installing/freebsd/ + - /riak/kv/2.9.4/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.4/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.9.4.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.4/freebsd/11.1/riak-2.9.4.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.4/freebsd/10.4/riak-2.9.4.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + diff --git a/content/riak/kv/2.9.4/setup/installing/mac-osx.md b/content/riak/kv/2.9.4/setup/installing/mac-osx.md new file mode 100644 index 0000000000..1d35debbf5 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/mac-osx.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.9.4/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.9.4/installing/mac-osx/ + - /riak/kv/2.9.4/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.4/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.4/osx/10.11/riak-2.9.4-OSX-x86_64.tar.gz +tar xzvf riak-2.9.4-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.9.4 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.4` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.4/riak-2.9.4.tar.gz +tar zxvf riak-2.9.4.tar.gz +cd riak-2.9.4 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + diff --git a/content/riak/kv/2.9.4/setup/installing/rhel-centos.md b/content/riak/kv/2.9.4/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..33cc21577c --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/rhel-centos.md @@ -0,0 +1,131 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.9.4/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.9.4/installing/rhel-centos/ + - /riak/kv/2.9.4/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/8/riak-2.9.4-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.9.4-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/8/riak-2.9.4-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.9.4-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/7/riak-2.9.4-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.9.4-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/7/riak-2.9.4-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.9.4-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/6/riak-2.9.4-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.9.4-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/6/riak-2.9.4-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.9.4-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.4/riak-2.9.4.tar.gz +tar zxvf riak-2.9.4.tar.gz +cd riak-2.9.4 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + diff --git a/content/riak/kv/2.9.4/setup/installing/smartos.md b/content/riak/kv/2.9.4/setup/installing/smartos.md new file mode 100644 index 0000000000..e53ba72bd8 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/smartos.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.9.4" +menu: + riak_kv-2.9.4: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.9.4/ops/building/installing/Installing-on-SmartOS + - /riak/2.9.4/installing/smartos/ + - /riak/kv/2.9.4/installing/smartos/ + - /riak/kv/2.9.4/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.4/setup/installing/solaris.md b/content/riak/kv/2.9.4/setup/installing/solaris.md new file mode 100644 index 0000000000..57ade1a095 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/solaris.md @@ -0,0 +1,90 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.9.4" +menu: + riak_kv-2.9.4: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.9.4/ops/building/installing/Installing-on-Solaris + - /riak/2.9.4/installing/solaris/ + - /riak/kv/2.9.4/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/2.9.4/setup/installing/source.md b/content/riak/kv/2.9.4/setup/installing/source.md new file mode 100644 index 0000000000..718f20dc2f --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/source.md @@ -0,0 +1,107 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/Installing-Riak-from-Source + - /riak/kv/2.9.4/ops/building/Installing-Riak-from-Source + - /riak/2.9.4/installing/source/ + - /riak/kv/2.9.4/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.4/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.4/riak-2.9.4.tar.gz +tar zxvf riak-2.9.4.tar.gz +cd riak-2.9.4 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + diff --git a/content/riak/kv/2.9.4/setup/installing/source/erlang.md b/content/riak/kv/2.9.4/setup/installing/source/erlang.md new file mode 100644 index 0000000000..38549afd26 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/source/erlang.md @@ -0,0 +1,568 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/erlang + - /riak/kv/2.9.4/ops/building/installing/erlang + - /riak/2.9.4/installing/source/erlang/ + - /riak/kv/2.9.4/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/2.9.4/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.9.4/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + diff --git a/content/riak/kv/2.9.4/setup/installing/source/jvm.md b/content/riak/kv/2.9.4/setup/installing/source/jvm.md new file mode 100644 index 0000000000..3129b9fa74 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/source/jvm.md @@ -0,0 +1,52 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/jvm + - /riak/kv/2.9.4/ops/building/installing/jvm + - /riak/2.9.4/ops/building/installing/Installing-the-JVM + - /riak/kv/2.9.4/ops/building/installing/Installing-the-JVM + - /riak/2.9.4/installing/source/jvm/ + - /riak/kv/2.9.4/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + diff --git a/content/riak/kv/2.9.4/setup/installing/suse.md b/content/riak/kv/2.9.4/setup/installing/suse.md new file mode 100644 index 0000000000..01dda4dafb --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/suse.md @@ -0,0 +1,49 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.9.4/ops/building/installing/Installing-on-SUSE + - /riak/2.9.4/installing/suse/ + - /riak/kv/2.9.4/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.4/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.4+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + diff --git a/content/riak/kv/2.9.4/setup/installing/verify.md b/content/riak/kv/2.9.4/setup/installing/verify.md new file mode 100644 index 0000000000..79c66f18ac --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/verify.md @@ -0,0 +1,166 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/installing/Post-Installation + - /riak/kv/2.9.4/ops/installing/Post-Installation + - /riak/2.9.4/installing/verify-install/ + - /riak/kv/2.9.4/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/2.9.4/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.9.4/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + diff --git a/content/riak/kv/2.9.4/setup/installing/windows-azure.md b/content/riak/kv/2.9.4/setup/installing/windows-azure.md new file mode 100644 index 0000000000..51c6741bc6 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/installing/windows-azure.md @@ -0,0 +1,194 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.9.4/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.9.4/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.9.4/installing/windows-azure/ + - /riak/kv/2.9.4/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + diff --git a/content/riak/kv/2.9.4/setup/planning.md b/content/riak/kv/2.9.4/setup/planning.md new file mode 100644 index 0000000000..58d00652c6 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning.md @@ -0,0 +1,58 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + diff --git a/content/riak/kv/2.9.4/setup/planning/backend.md b/content/riak/kv/2.9.4/setup/planning/backend.md new file mode 100644 index 0000000000..341ae54da3 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/backend.md @@ -0,0 +1,57 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.9.4/ops/building/planning/backends/ + - /riak/kv/2.9.4/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/2.9.4/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + diff --git a/content/riak/kv/2.9.4/setup/planning/backend/bitcask.md b/content/riak/kv/2.9.4/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..126020d3fa --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/backend/bitcask.md @@ -0,0 +1,1017 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/backends/bitcask/ + - /riak/kv/2.9.4/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.9.4/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` +--- +lets the operating system manage syncing writes + (default) + * `o_sync` +--- +uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval +--- +Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) +--- +Writes are made via Erlang's built-in file API +* `nif` +--- +Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` +--- +No restrictions on when merge operations can occur + (default) +* `never` +--- +Merge will never be attempted +* `window` +--- +Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** +--- +This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** +--- +This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** +--- +This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** +--- +This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** +--- +This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322492444576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322492444576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + diff --git a/content/riak/kv/2.9.4/setup/planning/backend/leveldb.md b/content/riak/kv/2.9.4/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..a1d7e1a573 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/backend/leveldb.md @@ -0,0 +1,507 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/backends/leveldb/ + - /riak/kv/2.9.4/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.4/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** +--- +The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** +--- +LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322492444576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322492444576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + diff --git a/content/riak/kv/2.9.4/setup/planning/backend/leveled.md b/content/riak/kv/2.9.4/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..d61e24bc6e --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/backend/leveled.md @@ -0,0 +1,138 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/backends/leveled/ + - /riak/kv/2.9.4/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.4/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.4 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + diff --git a/content/riak/kv/2.9.4/setup/planning/backend/memory.md b/content/riak/kv/2.9.4/setup/planning/backend/memory.md new file mode 100644 index 0000000000..40252f7d46 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/backend/memory.md @@ -0,0 +1,144 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/backends/memory/ + - /riak/kv/2.9.4/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + diff --git a/content/riak/kv/2.9.4/setup/planning/backend/multi.md b/content/riak/kv/2.9.4/setup/planning/backend/multi.md new file mode 100644 index 0000000000..57dff03861 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/backend/multi.md @@ -0,0 +1,227 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/backends/multi/ + - /riak/kv/2.9.4/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + diff --git a/content/riak/kv/2.9.4/setup/planning/best-practices.md b/content/riak/kv/2.9.4/setup/planning/best-practices.md new file mode 100644 index 0000000000..9511788491 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/best-practices.md @@ -0,0 +1,142 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.9.4/ops/building/planning/best-practices + - /riak/kv/2.9.4/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/2.9.4/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.9.4/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + diff --git a/content/riak/kv/2.9.4/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.9.4/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..3bff46eb0b --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,101 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.9.4/ops/building/planning/bitcask + - /riak/kv/2.9.4/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + diff --git a/content/riak/kv/2.9.4/setup/planning/cluster-capacity.md b/content/riak/kv/2.9.4/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..68dfa267a3 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/cluster-capacity.md @@ -0,0 +1,235 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.9.4/ops/building/planning/cluster + - /riak/kv/2.9.4/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.9.4/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.9.4/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + diff --git a/content/riak/kv/2.9.4/setup/planning/future.md b/content/riak/kv/2.9.4/setup/planning/future.md new file mode 100644 index 0000000000..9fba0d154d --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/future.md @@ -0,0 +1,17 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 2.9.4 +#menu: +# riak_kv-2.9.4: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +--- + +**TODO: Add content** + diff --git a/content/riak/kv/2.9.4/setup/planning/operating-system.md b/content/riak/kv/2.9.4/setup/planning/operating-system.md new file mode 100644 index 0000000000..c432972cf5 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/operating-system.md @@ -0,0 +1,26 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +--- + +[downloads]: {{<baseurl>}}riak/kv/2.9.4/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + diff --git a/content/riak/kv/2.9.4/setup/planning/start.md b/content/riak/kv/2.9.4/setup/planning/start.md new file mode 100644 index 0000000000..9bc8a52a09 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/planning/start.md @@ -0,0 +1,58 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.9.4/ops/building/planning/system-planning + - /riak/kv/2.9.4/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + diff --git a/content/riak/kv/2.9.4/setup/search.md b/content/riak/kv/2.9.4/setup/search.md new file mode 100644 index 0000000000..139597f9cb --- /dev/null +++ b/content/riak/kv/2.9.4/setup/search.md @@ -0,0 +1,2 @@ + + diff --git a/content/riak/kv/2.9.4/setup/upgrading.md b/content/riak/kv/2.9.4/setup/upgrading.md new file mode 100644 index 0000000000..7216e5665d --- /dev/null +++ b/content/riak/kv/2.9.4/setup/upgrading.md @@ -0,0 +1,35 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.4][upgrade version] + +A tutorial on updating to Riak KV 2.9.4 + +[Learn More >>][upgrade version] + diff --git a/content/riak/kv/2.9.4/setup/upgrading/checklist.md b/content/riak/kv/2.9.4/setup/upgrading/checklist.md new file mode 100644 index 0000000000..c3448218d5 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/upgrading/checklist.md @@ -0,0 +1,222 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.4/ops/upgrading/production-checklist/ + - /riak/kv/2.9.4/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/2.9.4/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.9.4/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.9.4/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.9.4/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.9.4/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.9.4/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.9.4/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + diff --git a/content/riak/kv/2.9.4/setup/upgrading/cluster.md b/content/riak/kv/2.9.4/setup/upgrading/cluster.md new file mode 100644 index 0000000000..dfec824d30 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/upgrading/cluster.md @@ -0,0 +1,308 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.4" +menu: + riak_kv-2.9.4: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.4/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.4/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.4/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.4/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.4/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.4/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.4/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` +--- +the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` +--- +See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` +--- +See [JMX Monitoring][jmx monitor] for more information. + * `snmp` +--- +See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + diff --git a/content/riak/kv/2.9.4/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.9.4/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..4d833a655e --- /dev/null +++ b/content/riak/kv/2.9.4/setup/upgrading/multi-datacenter.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 2.9.4 +#menu: +# riak_kv-2.9.4: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: +--- + +## TODO + +How to update to a new version with multi-datacenter. + + diff --git a/content/riak/kv/2.9.4/setup/upgrading/search.md b/content/riak/kv/2.9.4/setup/upgrading/search.md new file mode 100644 index 0000000000..5c8317c421 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/upgrading/search.md @@ -0,0 +1,278 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.9.4" +menu: + riak_kv-2.9.4: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.4/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.4/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + diff --git a/content/riak/kv/2.9.4/setup/upgrading/version.md b/content/riak/kv/2.9.4/setup/upgrading/version.md new file mode 100644 index 0000000000..f8948da3f4 --- /dev/null +++ b/content/riak/kv/2.9.4/setup/upgrading/version.md @@ -0,0 +1,253 @@ +--- +title: "Upgrading to Riak KV 2.9.4" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Upgrading to 2.9.4" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.4/upgrade-v20/ + - /riak/kv/2.9.4/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.4/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.4/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/2.9.4/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.4/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.9.4/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.4/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.4/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.4/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.9.4/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.9.4 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.4 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` +--- +the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` +--- +See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.4 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + diff --git a/content/riak/kv/2.9.4/using.md b/content/riak/kv/2.9.4/using.md new file mode 100644 index 0000000000..516398b29b --- /dev/null +++ b/content/riak/kv/2.9.4/using.md @@ -0,0 +1,75 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + diff --git a/content/riak/kv/2.9.4/using/admin.md b/content/riak/kv/2.9.4/using/admin.md new file mode 100644 index 0000000000..e781a89a12 --- /dev/null +++ b/content/riak/kv/2.9.4/using/admin.md @@ -0,0 +1,48 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.9.4/ops/running/cluster-admin + - /riak/kv/2.9.4/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + diff --git a/content/riak/kv/2.9.4/using/admin/commands.md b/content/riak/kv/2.9.4/using/admin/commands.md new file mode 100644 index 0000000000..fbcfe72afe --- /dev/null +++ b/content/riak/kv/2.9.4/using/admin/commands.md @@ -0,0 +1,393 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.4/ops/running/cluster-admin + - /riak/kv/2.9.4/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` +--- +There are five possible values for status: + * `valid` +--- +The node has begun participating in cluster operations + * `leaving` +--- +The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` +--- +The node's ownership transfers are complete and it is + currently shutting down + * `joining` +--- +The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` +--- +The node is not currently responding +* `avail` +--- +There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` +--- +What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` +--- +The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322492444576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322492444576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` + diff --git a/content/riak/kv/2.9.4/using/admin/riak-admin.md b/content/riak/kv/2.9.4/using/admin/riak-admin.md new file mode 100644 index 0000000000..4e48a6e2b9 --- /dev/null +++ b/content/riak/kv/2.9.4/using/admin/riak-admin.md @@ -0,0 +1,718 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.4/ops/running/tools/riak-admin + - /riak/kv/2.9.4/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.9.4/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.9.4/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.9.4/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.9.4/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.9.4/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.9.4/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.9.4/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.9.4/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + diff --git a/content/riak/kv/2.9.4/using/admin/riak-cli.md b/content/riak/kv/2.9.4/using/admin/riak-cli.md new file mode 100644 index 0000000000..6677cd7970 --- /dev/null +++ b/content/riak/kv/2.9.4/using/admin/riak-cli.md @@ -0,0 +1,201 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.4/ops/running/tools/riak + - /riak/kv/2.9.4/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + diff --git a/content/riak/kv/2.9.4/using/admin/riak-control.md b/content/riak/kv/2.9.4/using/admin/riak-control.md new file mode 100644 index 0000000000..0075d5b906 --- /dev/null +++ b/content/riak/kv/2.9.4/using/admin/riak-control.md @@ -0,0 +1,234 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/riak-control + - /riak/kv/2.9.4/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.9.4/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + diff --git a/content/riak/kv/2.9.4/using/cluster-operations.md b/content/riak/kv/2.9.4/using/cluster-operations.md new file mode 100644 index 0000000000..72268f88e1 --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations.md @@ -0,0 +1,105 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.9.4/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..ce82eed444 --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,286 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/2.9.4/ops/advanced/aae/ + - /riak/2.9.4/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.9.4/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.9.4/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..f24abbe646 --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,195 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.4/ops/running/nodes/adding-removing + - /riak/kv/2.9.4/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/2.9.4/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/backend.md b/content/riak/kv/2.9.4/using/cluster-operations/backend.md new file mode 100644 index 0000000000..c0c624e99f --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/backend.md @@ -0,0 +1,17 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 2.9.4 +#menu: +# riak_kv-2.9.4: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content** + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/backing-up.md b/content/riak/kv/2.9.4/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..17a153790c --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/backing-up.md @@ -0,0 +1,268 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.4/ops/running/backups + - /riak/kv/2.9.4/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.9.4/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.9.4/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/bucket-types.md b/content/riak/kv/2.9.4/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..e302dce73e --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/bucket-types.md @@ -0,0 +1,59 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.9.4/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..cf339f19f7 --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,455 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.4/ops/running/nodes/renaming + - /riak/kv/2.9.4/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/handoff.md b/content/riak/kv/2.9.4/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..4eb80cb2bf --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/handoff.md @@ -0,0 +1,117 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.4/ops/running/handoff + - /riak/kv/2.9.4/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.9.4/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..0df2651c94 --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/inspecting-node.md @@ -0,0 +1,499 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.4/ops/running/nodes/inspecting + - /riak/kv/2.9.4/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392993748081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` +--- +The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` +--- +The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` +--- +The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/load-balancing.md b/content/riak/kv/2.9.4/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..579bbab83a --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/load-balancing.md @@ -0,0 +1,17 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 2.9.4 +#menu: +# riak_kv-2.9.4: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +--- + +**TODO: Add content (not sure where this exists in docs)** + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/logging.md b/content/riak/kv/2.9.4/using/cluster-operations/logging.md new file mode 100644 index 0000000000..db2e8823bf --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/logging.md @@ -0,0 +1,43 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/replacing-node.md b/content/riak/kv/2.9.4/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..f396fcfec7 --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/replacing-node.md @@ -0,0 +1,96 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.9.4/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.9.4/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..4f465eafda --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,81 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.4 +#menu: +# riak_kv-2.9.4: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.9.4/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..423dc8001a --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/strong-consistency.md @@ -0,0 +1,72 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/2.9.4/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..073bb8932b --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,31 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.9.4/ops/advanced/tictacaae/ + - /riak/2.9.4/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. diff --git a/content/riak/kv/2.9.4/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.9.4/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..bca792e49b --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,266 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v2/operations + - /riak/kv/2.9.4/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.4/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.4/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` +--- +The IP address and port of a connected client (site)</li><li>`cluster_name` +--- +The name of the connected client (site)</li><li>`connecting` +--- +The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.9.4/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.9.4/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + diff --git a/content/riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..19a514aec0 --- /dev/null +++ b/content/riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,422 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/operations + - /riak/kv/2.9.4/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.4/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.9.4/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.9.4/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.9.4/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + diff --git a/content/riak/kv/2.9.4/using/performance.md b/content/riak/kv/2.9.4/using/performance.md new file mode 100644 index 0000000000..0cb9950bef --- /dev/null +++ b/content/riak/kv/2.9.4/using/performance.md @@ -0,0 +1,265 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.9.4/ops/tuning/linux/ + - /riak/2.9.4/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.4/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.9.4/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.4/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.9.4/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.9.4/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.9.4/using/performance/open-files-limit/) + diff --git a/content/riak/kv/2.9.4/using/performance/amazon-web-services.md b/content/riak/kv/2.9.4/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..ff3e1cc4cc --- /dev/null +++ b/content/riak/kv/2.9.4/using/performance/amazon-web-services.md @@ -0,0 +1,244 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.4/ops/tuning/aws + - /riak/kv/2.9.4/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + diff --git a/content/riak/kv/2.9.4/using/performance/benchmarking.md b/content/riak/kv/2.9.4/using/performance/benchmarking.md new file mode 100644 index 0000000000..16ac5cb5b6 --- /dev/null +++ b/content/riak/kv/2.9.4/using/performance/benchmarking.md @@ -0,0 +1,649 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.4/ops/building/benchmarking + - /riak/kv/2.9.4/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.9.4/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput +--- +Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` +--- +generate as many ops per second as possible +* `{rate, N}` +--- +generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` +--- +Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` +--- +Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` +--- +Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` +--- +Directly invokes the Bitcask API +* `basho_bench_driver_dets` +--- +Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` +--- +operation completed successfully +* `{error, Reason, NewState}` +--- +operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` +--- +operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` +--- +operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` +--- +generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` +--- +the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` +--- +the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` +--- +selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` +--- +selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` +--- +the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` +--- +specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` +--- +takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` +--- +takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` +--- +generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` +--- +generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` +--- +generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` +--- +specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + diff --git a/content/riak/kv/2.9.4/using/performance/erlang.md b/content/riak/kv/2.9.4/using/performance/erlang.md new file mode 100644 index 0000000000..298e8118e1 --- /dev/null +++ b/content/riak/kv/2.9.4/using/performance/erlang.md @@ -0,0 +1,368 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.4/ops/tuning/erlang + - /riak/kv/2.9.4/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + diff --git a/content/riak/kv/2.9.4/using/performance/latency-reduction.md b/content/riak/kv/2.9.4/using/performance/latency-reduction.md new file mode 100644 index 0000000000..5c8d8c1915 --- /dev/null +++ b/content/riak/kv/2.9.4/using/performance/latency-reduction.md @@ -0,0 +1,264 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.4/ops/tuning/latency-reduction + - /riak/kv/2.9.4/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + diff --git a/content/riak/kv/2.9.4/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.9.4/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..941c77e0a1 --- /dev/null +++ b/content/riak/kv/2.9.4/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,43 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +[perf index]: {{<baseurl>}}riak/kv/2.9.4/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + diff --git a/content/riak/kv/2.9.4/using/performance/open-files-limit.md b/content/riak/kv/2.9.4/using/performance/open-files-limit.md new file mode 100644 index 0000000000..dfddf4e3cb --- /dev/null +++ b/content/riak/kv/2.9.4/using/performance/open-files-limit.md @@ -0,0 +1,348 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.4/ops/tuning/open-files-limit/ + - /riak/kv/2.9.4/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + diff --git a/content/riak/kv/2.9.4/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.9.4/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..e47e03e38c --- /dev/null +++ b/content/riak/kv/2.9.4/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,46 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + diff --git a/content/riak/kv/2.9.4/using/reference.md b/content/riak/kv/2.9.4/using/reference.md new file mode 100644 index 0000000000..7eb0798b98 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference.md @@ -0,0 +1,131 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + diff --git a/content/riak/kv/2.9.4/using/reference/architecture.md b/content/riak/kv/2.9.4/using/reference/architecture.md new file mode 100644 index 0000000000..47814020ca --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/architecture.md @@ -0,0 +1,17 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +#menu: +# riak_kv-2.9.4: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +--- + +<!-- TODO: Content --> + diff --git a/content/riak/kv/2.9.4/using/reference/bucket-types.md b/content/riak/kv/2.9.4/using/reference/bucket-types.md new file mode 100644 index 0000000000..4438f95702 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/bucket-types.md @@ -0,0 +1,819 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.9.4/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.9.4/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.4/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.4/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.9.4/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.9.4/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + diff --git a/content/riak/kv/2.9.4/using/reference/custom-code.md b/content/riak/kv/2.9.4/using/reference/custom-code.md new file mode 100644 index 0000000000..d6d16d7f44 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/custom-code.md @@ -0,0 +1,132 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/install-custom-code/ + - /riak/kv/2.9.4/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.9.4/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.9.4/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + diff --git a/content/riak/kv/2.9.4/using/reference/failure-recovery.md b/content/riak/kv/2.9.4/using/reference/failure-recovery.md new file mode 100644 index 0000000000..8f86a01e5e --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/failure-recovery.md @@ -0,0 +1,81 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + diff --git a/content/riak/kv/2.9.4/using/reference/handoff.md b/content/riak/kv/2.9.4/using/reference/handoff.md new file mode 100644 index 0000000000..337de2fb7a --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/handoff.md @@ -0,0 +1,198 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.4/ops/running/handoff/ + - /riak/kv/2.9.4/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + diff --git a/content/riak/kv/2.9.4/using/reference/jmx.md b/content/riak/kv/2.9.4/using/reference/jmx.md new file mode 100644 index 0000000000..961307a7d8 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/jmx.md @@ -0,0 +1,187 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/running/monitoring/jmx + - /riak/kv/2.9.4/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + diff --git a/content/riak/kv/2.9.4/using/reference/logging.md b/content/riak/kv/2.9.4/using/reference/logging.md new file mode 100644 index 0000000000..48be3abca1 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/logging.md @@ -0,0 +1,316 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.4/ops/running/logging + - /riak/kv/2.9.4/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.4 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` +--- +Every night at midnight +* `$D23` +--- +Every day at 23:00 (11 pm) +* `$W0D20` +--- +Every week on Sunday at 20:00 (8 pm) +* `$M1D0` +--- +On the first day of every month at midnight +* `$M5D6` +--- +On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` +--- +Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.9.4/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` +--- +Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-cli/#attach-direct) command +* `both` +--- +Console logs will be emitted both to a file and to standard + output +* `off` +--- +Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + diff --git a/content/riak/kv/2.9.4/using/reference/multi-datacenter.md b/content/riak/kv/2.9.4/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..260d1d244e --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/multi-datacenter.md @@ -0,0 +1,49 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + diff --git a/content/riak/kv/2.9.4/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.9.4/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..f9d43e799a --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,97 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.4/ops/mdc/comparison + - /riak/kv/2.9.4/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.9.4/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.9.4/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + diff --git a/content/riak/kv/2.9.4/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.9.4/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..d3a06908ce --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,171 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.4/ops/mdc/monitoring + - /riak/kv/2.9.4/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +--- + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +--- + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + diff --git a/content/riak/kv/2.9.4/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.9.4/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..13e8b699d3 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,73 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.4/ops/mdc/per-bucket + - /riak/kv/2.9.4/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` +--- +Enable replication (realtime + fullsync) + * `false` +--- +Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` +--- +Replication only occurs in realtime for this bucket + * `fullsync` +--- +Replication only occurs during a fullsync operation + * `both` +--- +Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + diff --git a/content/riak/kv/2.9.4/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.9.4/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..ea9863fa16 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,241 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.4/ops/mdc/statistics + - /riak/kv/2.9.4/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + diff --git a/content/riak/kv/2.9.4/using/reference/object-deletion.md b/content/riak/kv/2.9.4/using/reference/object-deletion.md new file mode 100644 index 0000000000..bc40e62428 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/object-deletion.md @@ -0,0 +1,124 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` +--- +Disables tombstone removal +* `immediate` +--- +The tombstone is removed as soon as the request is + received +* Custom time interval +--- +How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + diff --git a/content/riak/kv/2.9.4/using/reference/runtime-interaction.md b/content/riak/kv/2.9.4/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..90ce722fb2 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/runtime-interaction.md @@ -0,0 +1,81 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/runtime + - /riak/kv/2.9.4/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` +--- +Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` +--- +Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` +--- +The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` +--- +The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` +--- +A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` +--- +A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` +--- +A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + diff --git a/content/riak/kv/2.9.4/using/reference/search.md b/content/riak/kv/2.9.4/using/reference/search.md new file mode 100644 index 0000000000..265b6d873e --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/search.md @@ -0,0 +1,454 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/search + - /riak/kv/2.9.4/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.9.4/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. diff --git a/content/riak/kv/2.9.4/using/reference/secondary-indexes.md b/content/riak/kv/2.9.4/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..c1346f1122 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/secondary-indexes.md @@ -0,0 +1,73 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.4/dev/advanced/2i + - /riak/kv/2.9.4/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.9.4/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + diff --git a/content/riak/kv/2.9.4/using/reference/snmp.md b/content/riak/kv/2.9.4/using/reference/snmp.md new file mode 100644 index 0000000000..e4de831b06 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/snmp.md @@ -0,0 +1,163 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/running/monitoring/snmp + - /riak/kv/2.9.4/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + diff --git a/content/riak/kv/2.9.4/using/reference/statistics-monitoring.md b/content/riak/kv/2.9.4/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..7eb8c9cb86 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/statistics-monitoring.md @@ -0,0 +1,392 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.4/ops/running/stats-and-monitoring + - /riak/kv/2.9.4/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.9.4/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.9.4/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.9.4/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.4/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.4/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.9.4/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + diff --git a/content/riak/kv/2.9.4/using/reference/strong-consistency.md b/content/riak/kv/2.9.4/using/reference/strong-consistency.md new file mode 100644 index 0000000000..d8b3bc35d3 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/strong-consistency.md @@ -0,0 +1,146 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.4/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.9.4/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.9.4/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.9.4/configuring/strong-consistency/#performance). + diff --git a/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..05787fe105 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter.md @@ -0,0 +1,36 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.4/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + diff --git a/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..423b4d6e87 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,127 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.4/ops/mdc/v2/architecture + - /riak/kv/2.9.4/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.4/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.9.4/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + diff --git a/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..a154389b6b --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,50 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.4/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.9.4/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.4/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + diff --git a/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..d8d41bd3c1 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter.md @@ -0,0 +1,48 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + diff --git a/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..37ed43eb8f --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,126 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/aae + - /riak/kv/2.9.4/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + diff --git a/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..5a83a12fe6 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,183 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/architecture + - /riak/kv/2.9.4/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + diff --git a/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..0d6b288de8 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,99 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/cascading-writes + - /riak/kv/2.9.4/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + diff --git a/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..5b94f53764 --- /dev/null +++ b/content/riak/kv/2.9.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,69 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.4/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.9.4/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + diff --git a/content/riak/kv/2.9.4/using/repair-recovery.md b/content/riak/kv/2.9.4/using/repair-recovery.md new file mode 100644 index 0000000000..7d98f9568f --- /dev/null +++ b/content/riak/kv/2.9.4/using/repair-recovery.md @@ -0,0 +1,49 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + diff --git a/content/riak/kv/2.9.4/using/repair-recovery/errors.md b/content/riak/kv/2.9.4/using/repair-recovery/errors.md new file mode 100644 index 0000000000..84c7849ad4 --- /dev/null +++ b/content/riak/kv/2.9.4/using/repair-recovery/errors.md @@ -0,0 +1,363 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.4/ops/running/recovery/errors + - /riak/kv/2.9.4/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.4/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.4/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.4/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.4/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.9.4/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.4/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.9.4/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.9.4/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + diff --git a/content/riak/kv/2.9.4/using/repair-recovery/failed-node.md b/content/riak/kv/2.9.4/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..2d470e49ae --- /dev/null +++ b/content/riak/kv/2.9.4/using/repair-recovery/failed-node.md @@ -0,0 +1,111 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.4/ops/running/recovery/failed-node + - /riak/kv/2.9.4/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` + diff --git a/content/riak/kv/2.9.4/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.9.4/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..03abdaf36e --- /dev/null +++ b/content/riak/kv/2.9.4/using/repair-recovery/failure-recovery.md @@ -0,0 +1,132 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.4/ops/running/recovery/failure-recovery + - /riak/kv/2.9.4/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.9.4/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** +--- +A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** +--- +If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** +--- +Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.9.4/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + diff --git a/content/riak/kv/2.9.4/using/repair-recovery/repairs.md b/content/riak/kv/2.9.4/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..a479e845c3 --- /dev/null +++ b/content/riak/kv/2.9.4/using/repair-recovery/repairs.md @@ -0,0 +1,388 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.4/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.4/ops/running/recovery/repairing-indexes + - /riak/2.9.4/ops/running/recovery/failed-node + - /riak/kv/2.9.4/ops/running/recovery/failed-node + - /riak/2.9.4/ops/running/recovery/repairing-leveldb + - /riak/kv/2.9.4/ops/running/recovery/repairing-leveldb + - /riak/2.9.4/ops/running/recovery/repairing-partitions + - /riak/kv/2.9.4/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.9.4/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.9.4/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.9.4/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.9.4/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + diff --git a/content/riak/kv/2.9.4/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.9.4/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..c3516bed35 --- /dev/null +++ b/content/riak/kv/2.9.4/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,72 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +--- + +[upgrade]: {{<baseurl>}}riak/kv/2.9.4/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.9.4/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + diff --git a/content/riak/kv/2.9.4/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.9.4/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..501c8fca59 --- /dev/null +++ b/content/riak/kv/2.9.4/using/repair-recovery/rolling-restart.md @@ -0,0 +1,61 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.4/ops/running/recovery/rolling-restart + - /riak/kv/2.9.4/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.9.4/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + diff --git a/content/riak/kv/2.9.4/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.9.4/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..83fd803487 --- /dev/null +++ b/content/riak/kv/2.9.4/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,139 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.4/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.4/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + diff --git a/content/riak/kv/2.9.4/using/running-a-cluster.md b/content/riak/kv/2.9.4/using/running-a-cluster.md new file mode 100644 index 0000000000..86c09719c3 --- /dev/null +++ b/content/riak/kv/2.9.4/using/running-a-cluster.md @@ -0,0 +1,336 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.9.4/ops/building/basic-cluster-setup + - /riak/kv/2.9.4/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.4/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.9.4/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + diff --git a/content/riak/kv/2.9.4/using/security.md b/content/riak/kv/2.9.4/using/security.md new file mode 100644 index 0000000000..2125f4054c --- /dev/null +++ b/content/riak/kv/2.9.4/using/security.md @@ -0,0 +1,196 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.9.4/ops/advanced/security + - /riak/kv/2.9.4/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/2.9.4/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.9.4/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.4/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.9.4/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.9.4/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + diff --git a/content/riak/kv/2.9.4/using/security/basics.md b/content/riak/kv/2.9.4/using/security/basics.md new file mode 100644 index 0000000000..dfa7f31464 --- /dev/null +++ b/content/riak/kv/2.9.4/using/security/basics.md @@ -0,0 +1,848 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.4/ops/running/authz + - /riak/kv/2.9.4/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.9.4/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.9.4/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.9.4/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.9.4/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.9.4/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.9.4/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.9.4/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.9.4/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + diff --git a/content/riak/kv/2.9.4/using/security/best-practices.md b/content/riak/kv/2.9.4/using/security/best-practices.md new file mode 100644 index 0000000000..82de38ed49 --- /dev/null +++ b/content/riak/kv/2.9.4/using/security/best-practices.md @@ -0,0 +1,81 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + diff --git a/content/riak/kv/2.9.4/using/security/managing-sources.md b/content/riak/kv/2.9.4/using/security/managing-sources.md new file mode 100644 index 0000000000..80c4926e50 --- /dev/null +++ b/content/riak/kv/2.9.4/using/security/managing-sources.md @@ -0,0 +1,270 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.4/ops/running/security-sources + - /riak/kv/2.9.4/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.9.4/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.9.4/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.9.4/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.9.4/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.9.4/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.9.4/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + diff --git a/content/riak/kv/2.9.4/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.9.4/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..80a436fafb --- /dev/null +++ b/content/riak/kv/2.9.4/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,81 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + diff --git a/content/riak/kv/2.9.4/using/troubleshooting.md b/content/riak/kv/2.9.4/using/troubleshooting.md new file mode 100644 index 0000000000..cacf2bacac --- /dev/null +++ b/content/riak/kv/2.9.4/using/troubleshooting.md @@ -0,0 +1,24 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + diff --git a/content/riak/kv/2.9.4/using/troubleshooting/http-204.md b/content/riak/kv/2.9.4/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..6e9ef41fa7 --- /dev/null +++ b/content/riak/kv/2.9.4/using/troubleshooting/http-204.md @@ -0,0 +1,18 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 2.9.4 +menu: + riak_kv-2.9.4: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + diff --git a/content/riak/kv/2.9.7/_reference-links.md b/content/riak/kv/2.9.7/_reference-links.md new file mode 100644 index 0000000000..26c43c3467 --- /dev/null +++ b/content/riak/kv/2.9.7/_reference-links.md @@ -0,0 +1,254 @@ + +# Riak KV 2.9.7 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.9.7/downloads/ +[install index]: {{}}riak/kv/2.9.7/setup/installing +[upgrade index]: {{}}riak/kv/2.9.7/upgrading +[plan index]: {{}}riak/kv/2.9.7/planning +[config index]: {{}}riak/kv/2.9.7/using/configuring/ +[config reference]: {{}}riak/kv/2.9.7/configuring/reference/ +[manage index]: {{}}riak/kv/2.9.7/using/managing +[performance index]: {{}}riak/kv/2.9.7/using/performance +[glossary vnode]: {{}}riak/kv/2.9.7/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.9.7/setup/planning +[plan start]: {{}}riak/kv/2.9.7/setup/planning/start +[plan backend]: {{}}riak/kv/2.9.7/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.9.7/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.7/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/2.9.7/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.7/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.9.7/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.9.7/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.9.7/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.9.7/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.9.7/setup/installing +[install aws]: {{}}riak/kv/2.9.7/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.9.7/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.9.7/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.9.7/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.9.7/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.9.7/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.9.7/setup/installing/solaris +[install suse]: {{}}riak/kv/2.9.7/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.9.7/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.9.7/setup/installing/source +[install source erlang]: {{}}riak/kv/2.9.7/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.9.7/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.9.7/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.9.7/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.9.7/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.9.7/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.9.7/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.9.7/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.9.7/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.9.7/configuring +[config basic]: {{}}riak/kv/2.9.7/configuring/basic +[config backend]: {{}}riak/kv/2.9.7/configuring/backend +[config manage]: {{}}riak/kv/2.9.7/configuring/managing +[config reference]: {{}}riak/kv/2.9.7/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.9.7/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.9.7/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.9.7/configuring/mapreduce +[config search]: {{}}riak/kv/2.9.7/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.9.7/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.9.7/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.9.7/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.9.7/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.9.7/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.9.7/using/ +[use admin commands]: {{}}riak/kv/2.9.7/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.9.7/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.9.7/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.9.7/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.9.7/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.9.7/using/reference/search +[use ref 2i]: {{}}riak/kv/2.9.7/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.9.7/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.9.7/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.9.7/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.9.7/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.9.7/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.9.7/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.9.7/using/admin/ +[use admin commands]: {{}}riak/kv/2.9.7/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.9.7/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.9.7/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.9.7/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.9.7/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.9.7/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.9.7/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.9.7/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.9.7/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.9.7/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.9.7/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.9.7/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.9.7/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.9.7/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.9.7/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.9.7/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.9.7/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.9.7/using/security/ +[security basics]: {{}}riak/kv/2.9.7/using/security/basics +[security managing]: {{}}riak/kv/2.9.7/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.9.7/using/performance/ +[perf benchmark]: {{}}riak/kv/2.9.7/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.7/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.9.7/using/performance/erlang +[perf aws]: {{}}riak/kv/2.9.7/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.9.7/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.9.7/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.9.7/developing +[dev client libraries]: {{}}riak/kv/2.9.7/developing/client-libraries +[dev data model]: {{}}riak/kv/2.9.7/developing/data-modeling +[dev data types]: {{}}riak/kv/2.9.7/developing/data-types +[dev kv model]: {{}}riak/kv/2.9.7/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.9.7/developing/getting-started +[getting started java]: {{}}riak/kv/2.9.7/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.9.7/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.9.7/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.9.7/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.9.7/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.9.7/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.9.7/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.9.7/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.9.7/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.7/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.7/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.7/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.7/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.7/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.7/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.9.7/developing/usage +[usage bucket types]: {{}}riak/kv/2.9.7/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.9.7/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.9.7/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.9.7/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.9.7/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.9.7/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.9.7/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.9.7/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.9.7/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.7/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.7/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.9.7/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.9.7/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.9.7/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.9.7/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.9.7/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.9.7/developing/api/backend +[dev api http]: {{}}riak/kv/2.9.7/developing/api/http +[dev api http status]: {{}}riak/kv/2.9.7/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.9.7/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.9.7/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.9.7/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.9.7/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.9.7/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.9.7/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.9.7/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.9.7/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.9.7/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.9.7/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.9.7/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.9.7/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.9.7/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.9.7/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + + + + diff --git a/content/riak/kv/2.9.7/add-ons.md b/content/riak/kv/2.9.7/add-ons.md new file mode 100644 index 0000000000..8dd0bb485b --- /dev/null +++ b/content/riak/kv/2.9.7/add-ons.md @@ -0,0 +1,25 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.9.7/add-ons/redis/) + + + + + diff --git a/content/riak/kv/2.9.7/add-ons/redis.md b/content/riak/kv/2.9.7/add-ons/redis.md new file mode 100644 index 0000000000..4d4bcfb1cc --- /dev/null +++ b/content/riak/kv/2.9.7/add-ons/redis.md @@ -0,0 +1,63 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +aliases: +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + + + + diff --git a/content/riak/kv/2.9.7/add-ons/redis/developing-rra.md b/content/riak/kv/2.9.7/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..ef040dee2a --- /dev/null +++ b/content/riak/kv/2.9.7/add-ons/redis/developing-rra.md @@ -0,0 +1,330 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.9.7/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.9.7/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.9.7/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.9.7/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.9.7/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + + + diff --git a/content/riak/kv/2.9.7/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.9.7/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..37485cd6f4 --- /dev/null +++ b/content/riak/kv/2.9.7/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,136 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + + + + diff --git a/content/riak/kv/2.9.7/add-ons/redis/set-up-rra.md b/content/riak/kv/2.9.7/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..1dee72a222 --- /dev/null +++ b/content/riak/kv/2.9.7/add-ons/redis/set-up-rra.md @@ -0,0 +1,285 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.9.7/setup/installing +[perf open files]: {{}}riak/kv/2.9.7/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + + + + diff --git a/content/riak/kv/2.9.7/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.9.7/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..64bb15e2f8 --- /dev/null +++ b/content/riak/kv/2.9.7/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,143 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +aliases: +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + + + + diff --git a/content/riak/kv/2.9.7/add-ons/redis/using-rra.md b/content/riak/kv/2.9.7/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..f56c7f96c3 --- /dev/null +++ b/content/riak/kv/2.9.7/add-ons/redis/using-rra.md @@ -0,0 +1,246 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.9.7/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.9.7/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + + + + diff --git a/content/riak/kv/2.9.7/configuring.md b/content/riak/kv/2.9.7/configuring.md new file mode 100644 index 0000000000..8f39eb23dd --- /dev/null +++ b/content/riak/kv/2.9.7/configuring.md @@ -0,0 +1,88 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + + + + diff --git a/content/riak/kv/2.9.7/configuring/backend.md b/content/riak/kv/2.9.7/configuring/backend.md new file mode 100644 index 0000000000..16378a1e5f --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/backend.md @@ -0,0 +1,647 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +aliases: +--- + +[plan backend leveldb]: {{}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.7/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/2.9.7/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.7/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.7/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + + + + diff --git a/content/riak/kv/2.9.7/configuring/basic.md b/content/riak/kv/2.9.7/configuring/basic.md new file mode 100644 index 0000000000..ba732a636a --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/basic.md @@ -0,0 +1,239 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.7/ops/building/configuration/ + - /riak/kv/2.9.7/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/2.9.7/configuring/reference +[use running cluster]: {{}}riak/kv/2.9.7/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.9.7/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.9.7/using/performance/erlang +[plan start]: {{}}riak/kv/2.9.7/setup/planning/start +[plan best practices]: {{}}riak/kv/2.9.7/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.9.7/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.9.7/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.9.7/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.9.7/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.9.7/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.9.7/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.9.7/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.9.7/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.7/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.9.7/using/performance +[perf aws]: {{}}riak/kv/2.9.7/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.9.7/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.7/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + + + + diff --git a/content/riak/kv/2.9.7/configuring/global-object-expiration.md b/content/riak/kv/2.9.7/configuring/global-object-expiration.md new file mode 100644 index 0000000000..48713c3fb6 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/global-object-expiration.md @@ -0,0 +1,90 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.9.7: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 2.9.7 +toc: true +aliases: +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + + + + diff --git a/content/riak/kv/2.9.7/configuring/load-balancing-proxy.md b/content/riak/kv/2.9.7/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..10064f41bd --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/load-balancing-proxy.md @@ -0,0 +1,275 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.9.7/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/2.9.7/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + + + + diff --git a/content/riak/kv/2.9.7/configuring/managing.md b/content/riak/kv/2.9.7/configuring/managing.md new file mode 100644 index 0000000000..86019bc8e1 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/managing.md @@ -0,0 +1,121 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +aliases: +--- + +[use admin riak cli]: {{}}riak/kv/2.9.7/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.9.7/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.9.7/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + + + + diff --git a/content/riak/kv/2.9.7/configuring/mapreduce.md b/content/riak/kv/2.9.7/configuring/mapreduce.md new file mode 100644 index 0000000000..177ee6488a --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/mapreduce.md @@ -0,0 +1,200 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/configs/mapreduce/ + - /riak/kv/2.9.7/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/2.9.7/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.9.7/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.9.7/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + + + + diff --git a/content/riak/kv/2.9.7/configuring/next-gen-replication.md b/content/riak/kv/2.9.7/configuring/next-gen-replication.md new file mode 100644 index 0000000000..8ffa638782 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/next-gen-replication.md @@ -0,0 +1,63 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.7" +menu: + riak_kv-2.9.7: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +aliases: +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. + diff --git a/content/riak/kv/2.9.7/configuring/reference.md b/content/riak/kv/2.9.7/configuring/reference.md new file mode 100644 index 0000000000..488fdb1425 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/reference.md @@ -0,0 +1,2039 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/configs/configuration-files/ + - /riak/kv/2.9.7/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] + - [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] + - [configuration][config backend leveldb] +* [Leveled][plan backend leveled] + - [configuration][config backend leveled] +* [Memory][plan backend memory] + - [configuration][config backend memory] +* [Multi][plan backend multi] + - [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + + + + diff --git a/content/riak/kv/2.9.7/configuring/search.md b/content/riak/kv/2.9.7/configuring/search.md new file mode 100644 index 0000000000..18c1ea69b2 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/search.md @@ -0,0 +1,278 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/configs/search/ + - /riak/kv/2.9.7/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/2.9.7/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.7/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.7/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.9.7/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.9.7/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.9.7/configuring/reference +[config reference#search]: {{}}riak/kv/2.9.7/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.9.7/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.9.7/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + + + + diff --git a/content/riak/kv/2.9.7/configuring/strong-consistency.md b/content/riak/kv/2.9.7/configuring/strong-consistency.md new file mode 100644 index 0000000000..cb980018e0 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/strong-consistency.md @@ -0,0 +1,692 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/2.9.7/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.9.7/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.9.7/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.9.7/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.9.7/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.9.7/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.9.7/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.9.7/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.9.7/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.9.7/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.9.7/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.9.7/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.9.7/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.9.7/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.9.7/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.9.7/developing/data-types +[glossary aae]: {{}}riak/kv/2.9.7/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.9.7/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.9.7/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.9.7/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.9.7/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble +--- +The ID of the ensemble
  • Quorum +--- +The number of ensemble peers that are either leading or following
  • Nodes +--- +The number of nodes currently online
  • Leader +--- +The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer +--- +The ID of the peer
  • Status +--- +Whether the peer is a leader or a follower
  • Trusted +--- +Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch +--- +The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node +--- +The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] +--- +If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] +--- +Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] +--- +Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** - A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** - In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** - Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** - At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** - Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + + diff --git a/content/riak/kv/2.9.7/configuring/v2-multi-datacenter.md b/content/riak/kv/2.9.7/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..2c1bdc2e64 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/v2-multi-datacenter.md @@ -0,0 +1,160 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v2/configuration + - /riak/kv/2.9.7/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/2.9.7/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. + + + + diff --git a/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..5225fb876b --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,82 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v2/nat + - /riak/kv/2.9.7/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/2.9.7/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + + + + diff --git a/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..734da0f467 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,371 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v2/quick-start + - /riak/kv/2.9.7/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + + + + diff --git a/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..7ef9e1a51e --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,164 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v2/ssl + - /riak/kv/2.9.7/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + + + + diff --git a/content/riak/kv/2.9.7/configuring/v3-multi-datacenter.md b/content/riak/kv/2.9.7/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..e1b166eec0 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/v3-multi-datacenter.md @@ -0,0 +1,161 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/configuration + - /riak/kv/2.9.7/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/2.9.7/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + + + + diff --git a/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..2ce52fee11 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/nat + - /riak/kv/2.9.7/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + + + + diff --git a/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..92e12bac46 --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,172 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/quick-start + - /riak/kv/2.9.7/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/2.9.7/using/performance +[config v3 mdc]: {{}}riak/kv/2.9.7/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + + + + diff --git a/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..106689c9fd --- /dev/null +++ b/content/riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,174 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/ssl + - /riak/kv/2.9.7/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/2.9.7/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + + + + diff --git a/content/riak/kv/2.9.7/developing.md b/content/riak/kv/2.9.7/developing.md new file mode 100644 index 0000000000..fdacbfbfeb --- /dev/null +++ b/content/riak/kv/2.9.7/developing.md @@ -0,0 +1,79 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + + + + diff --git a/content/riak/kv/2.9.7/developing/api.md b/content/riak/kv/2.9.7/developing/api.md new file mode 100644 index 0000000000..78c22e1516 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api.md @@ -0,0 +1,42 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +aliases: +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + + + + diff --git a/content/riak/kv/2.9.7/developing/api/backend.md b/content/riak/kv/2.9.7/developing/api/backend.md new file mode 100644 index 0000000000..6eda5d48fd --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/backend.md @@ -0,0 +1,118 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.7/dev/references/backend-api + - /riak/kv/2.9.7/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/2.9.7/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http.md b/content/riak/kv/2.9.7/developing/api/http.md new file mode 100644 index 0000000000..12d14fb73e --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http.md @@ -0,0 +1,93 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.7/dev/references/http + - /riak/kv/2.9.7/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.9.7/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.9.7/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.7/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.9.7/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.9.7/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.9.7/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.9.7/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.7/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.7/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.9.7/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.9.7/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.9.7/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.9.7/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.9.7/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.7/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.7/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.9.7/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.9.7/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.9.7/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.9.7/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.9.7/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.9.7/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.9.7/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.9.7/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.9.7/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.9.7/developing/api/http/store-search-schema) + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/counters.md b/content/riak/kv/2.9.7/developing/api/http/counters.md new file mode 100644 index 0000000000..ff075ca7f7 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/counters.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/counters + - /riak/kv/2.9.7/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.9.7/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.9.7/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/delete-object.md b/content/riak/kv/2.9.7/developing/api/http/delete-object.md new file mode 100644 index 0000000000..dd3704d74b --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/delete-object.md @@ -0,0 +1,79 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/delete-object + - /riak/kv/2.9.7/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/delete-search-index.md b/content/riak/kv/2.9.7/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..4519fa4e51 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/delete-search-index.md @@ -0,0 +1,37 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/delete-search-index + - /riak/kv/2.9.7/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` - The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/fetch-object.md b/content/riak/kv/2.9.7/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..4afa3940bb --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/fetch-object.md @@ -0,0 +1,246 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/fetch-object + - /riak/kv/2.9.7/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.9.7/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.9.7/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.9.7/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.9.7/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.7/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/fetch-search-index.md b/content/riak/kv/2.9.7/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..92b7e8ea0a --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/fetch-search-index.md @@ -0,0 +1,51 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/fetch-search-index + - /riak/kv/2.9.7/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.9.7/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` - No Search index with that name is currently + available +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.9.7/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.9.7/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..7cfc15f23c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/fetch-search-schema.md @@ -0,0 +1,42 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/fetch-search-schema + - /riak/kv/2.9.7/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/get-bucket-props.md b/content/riak/kv/2.9.7/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..1da6884862 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/get-bucket-props.md @@ -0,0 +1,86 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/get-bucket-props + - /riak/kv/2.9.7/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.9.7/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.9.7/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.9.7/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/link-walking.md b/content/riak/kv/2.9.7/developing/api/http/link-walking.md new file mode 100644 index 0000000000..0ad6dc1daa --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/link-walking.md @@ -0,0 +1,129 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/link-walking + - /riak/kv/2.9.7/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.9.7/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.9.7/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.9.7/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/list-buckets.md b/content/riak/kv/2.9.7/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..3e9585d28f --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/list-buckets.md @@ -0,0 +1,68 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/list-buckets + - /riak/kv/2.9.7/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/list-keys.md b/content/riak/kv/2.9.7/developing/api/http/list-keys.md new file mode 100644 index 0000000000..d29ec94066 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/list-keys.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/list-keys + - /riak/kv/2.9.7/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/list-resources.md b/content/riak/kv/2.9.7/developing/api/http/list-resources.md new file mode 100644 index 0000000000..e50f41440a --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/list-resources.md @@ -0,0 +1,84 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/list-resources + - /riak/kv/2.9.7/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.9.7/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.9.7/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.9.7/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.9.7/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.9.7/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.9.7/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.9.7/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.9.7/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/mapreduce.md b/content/riak/kv/2.9.7/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..d6d05d7c75 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/mapreduce.md @@ -0,0 +1,74 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/mapreduce + - /riak/kv/2.9.7/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/2.9.7/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.9.7/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/ping.md b/content/riak/kv/2.9.7/developing/api/http/ping.md new file mode 100644 index 0000000000..25ab7c084d --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/ping.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/ping + - /riak/kv/2.9.7/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.9.7/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..3bcece8c31 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/reset-bucket-props.md @@ -0,0 +1,61 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/reset-bucket-props + - /riak/kv/2.9.7/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/search-index-info.md b/content/riak/kv/2.9.7/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..e52f2a0ce9 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/search-index-info.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/search-index-info + - /riak/kv/2.9.7/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.9.7/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` - Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/search-query.md b/content/riak/kv/2.9.7/developing/api/http/search-query.md new file mode 100644 index 0000000000..b69bbf092b --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/search-query.md @@ -0,0 +1,73 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/search-query + - /riak/kv/2.9.7/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/2.9.7/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` - The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` - The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.9.7/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` - Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` - Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` - The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/secondary-indexes.md b/content/riak/kv/2.9.7/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..c5cee82138 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/secondary-indexes.md @@ -0,0 +1,95 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/secondary-indexes + - /riak/kv/2.9.7/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/set-bucket-props.md b/content/riak/kv/2.9.7/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..c0e8639842 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/set-bucket-props.md @@ -0,0 +1,116 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/set-bucket-props + - /riak/kv/2.9.7/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.9.7/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.9.7/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/status.md b/content/riak/kv/2.9.7/developing/api/http/status.md new file mode 100644 index 0000000000..37cca1ab37 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/status.md @@ -0,0 +1,173 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/status + - /riak/kv/2.9.7/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.9.7/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/store-object.md b/content/riak/kv/2.9.7/developing/api/http/store-object.md new file mode 100644 index 0000000000..73ca735734 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/store-object.md @@ -0,0 +1,150 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/store-object + - /riak/kv/2.9.7/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.9.7/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.7/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.9.7/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/store-search-index.md b/content/riak/kv/2.9.7/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..214fe5aebe --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/store-search-index.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/store-search-index + - /riak/kv/2.9.7/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/2.9.7/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.9.7/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` - The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` - The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.7/developing/api/http/store-search-schema.md b/content/riak/kv/2.9.7/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..6899c12841 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/http/store-search-schema.md @@ -0,0 +1,54 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.7/dev/references/http/store-search-schema + - /riak/kv/2.9.7/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` - The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` - The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` - The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..5de6a05769 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers.md @@ -0,0 +1,189 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers + - /riak/kv/2.9.7/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` - A string representation of what went wrong +* `errcode` - A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/yz-schema-put) + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..acb723df61 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,34 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/auth-req + - /riak/kv/2.9.7/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.9.7/using/security/basics). + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..5d1cf96f23 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,82 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.9.7" +menu: + riak_kv-2.9.7: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.9.7/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..07d90f386b --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,104 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/delete-object + - /riak/kv/2.9.7/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/2.9.7/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..21a39c1aa5 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.9.7/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/2.9.7/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-map-store). + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..ffeec649b6 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,131 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.9.7/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.9.7/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.9.7/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.9.7/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..36747ae33a --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,77 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.9.7/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..607fe5a95b --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,36 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.9.7/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..f92f3c2f36 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,132 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/dt-store + - /riak/kv/2.9.7/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.9.7/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.9.7/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.9.7/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..a72dac4f78 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/dt-union + - /riak/kv/2.9.7/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/dt-store) message. + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..120cdcfee4 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,185 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.9.7/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` - The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` - The character encoding of the object, e.g. `utf-8` +* `content_encoding` - The content encoding of the object, e.g. + `video/mp4` +* `vtag` - The object's [vtag]({{}}riak/kv/2.9.7/learn/glossary/#vector-clock) +* `links` - This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` - A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` - A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` - This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` - Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..3cd04f94a1 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,114 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.9.7/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.9.7/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.9.7/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..7e96e1c27a --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,37 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.9.7/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.9.7/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-props) message. + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..bcc9ccb6e0 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,65 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.9.7/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..baeb1051ee --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,80 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.9.7/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` - Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..2a5ebe8520 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,101 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/list-keys + - /riak/kv/2.9.7/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` - bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..abc11a85e8 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,153 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.9.7/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` - MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` - JSON-encoded MapReduce job +* `application/x-erlang-binary` - Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.9.7/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.9.7/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` - Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..dd9d5b20a7 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/ping.md @@ -0,0 +1,46 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/ping + - /riak/kv/2.9.7/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..e11298d8c2 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,63 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.9.7/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.9.7/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/search.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..9d0dca2798 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/search.md @@ -0,0 +1,152 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/search + - /riak/kv/2.9.7/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` - The contents of the query +* `index` - The name of the index to search + +Optional Parameters + +* `rows` - The maximum number of rows to return +* `start` - A start offset, i.e. the number of keys to skip before + returning values +* `sort` - How the search results are to be sorted +* `filter` - Filters search with additional query scoped to inline + fields +* `df` - Override the `default_field` setting in the schema file +* `op` - `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` - Return the fields limit +* `presort` - Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` - A list of docs that match the search request +* `max_score` - The top score returned +* `num_found` - Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..42d18142f8 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,125 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.9.7/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.9.7/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..064023f8aa --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,62 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/server-info + - /riak/kv/2.9.7/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..e7c19f5480 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,72 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.9.7/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.9.7/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..998d74dc42 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,35 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.9.7/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.9.7/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/get-bucket-props). + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..c67ca9a903 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,66 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.9.7/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..23d95ac41c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,154 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/store-object + - /riak/kv/2.9.7/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.9.7/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.9.7/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.9.7/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.9.7/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.9.7/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.9.7/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..c8c2c84953 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,37 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.9.7/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..de07ceabcb --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,63 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.9.7/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.7/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..5d66362ceb --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.9.7/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.7/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..1b69d18669 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,52 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.9.7/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + + + + diff --git a/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..9740575161 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.7/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.9.7/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.9.7/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/2.9.7/developing/api/repl-hooks.md b/content/riak/kv/2.9.7/developing/api/repl-hooks.md new file mode 100644 index 0000000000..4231b26a9a --- /dev/null +++ b/content/riak/kv/2.9.7/developing/api/repl-hooks.md @@ -0,0 +1,196 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v2/hooks + - /riak/kv/2.9.7/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + + + + diff --git a/content/riak/kv/2.9.7/developing/app-guide.md b/content/riak/kv/2.9.7/developing/app-guide.md new file mode 100644 index 0000000000..23f7dd7fb8 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/app-guide.md @@ -0,0 +1,435 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.9.7/dev/using/application-guide/ + - /riak/kv/2.9.7/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/2.9.7/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.9.7/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.9.7/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.9.7/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.9.7/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.9.7/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.9.7/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.9.7/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.9.7/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.9.7/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.9.7/developing/usage/search +[use ref search]: {{}}riak/kv/2.9.7/using/reference/search +[usage 2i]: {{}}riak/kv/2.9.7/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.9.7/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.9.7/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.9.7/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.9.7/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.9.7/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.9.7/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.9.7/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.7/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/2.9.7/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/2.9.7/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.7/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.7/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.7/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.7/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.7/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.7/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.9.7/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.9.7/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.9.7/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.9.7/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.9.7/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.9.7/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.9.7/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.9.7/setup/installing +[getting started]: {{}}riak/kv/2.9.7/developing/getting-started +[usage index]: {{}}riak/kv/2.9.7/developing/usage +[glossary]: {{}}riak/kv/2.9.7/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** - While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** - Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** - Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** - It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** - If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** - If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** - If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] + - Getting started with Riak Search +* [Search Details][use ref search] + - A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] + - How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** - Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** - At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** - In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] + - A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] + - A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] + - An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** - If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** - If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** - If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** - While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** - Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] + - A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] + - A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** - You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** - Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] + - A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] + - Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** - At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** - If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** - 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] + - Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] + - A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] + - How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] + - A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] + - A listing of frequently used terms in Riak's + documentation + + + + + diff --git a/content/riak/kv/2.9.7/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.9.7/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..b83b7775c0 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,802 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/mapreduce/ + - /riak/kv/2.9.7/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/2.9.7/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.9.7/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.9.7/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.9.7/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.9.7/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.9.7/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.9.7/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) - Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) - Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) - Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
+
+
+
diff --git a/content/riak/kv/2.9.7/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.9.7/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..33714a360d
--- /dev/null
+++ b/content/riak/kv/2.9.7/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,72 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 2.9.7
+menu:
+  riak_kv-2.9.7:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.9.7/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.9.7/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.9.7/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
+
+
+
diff --git a/content/riak/kv/2.9.7/developing/app-guide/reference.md b/content/riak/kv/2.9.7/developing/app-guide/reference.md
new file mode 100644
index 0000000000..64a9c36464
--- /dev/null
+++ b/content/riak/kv/2.9.7/developing/app-guide/reference.md
@@ -0,0 +1,21 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 2.9.7
+#menu:
+#  riak_kv-2.9.7:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+**TODO: Add content**
+
+
+
+
diff --git a/content/riak/kv/2.9.7/developing/app-guide/replication-properties.md b/content/riak/kv/2.9.7/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..166b4e6588
--- /dev/null
+++ b/content/riak/kv/2.9.7/developing/app-guide/replication-properties.md
@@ -0,0 +1,584 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 2.9.7
+menu:
+  riak_kv-2.9.7:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.7/dev/advanced/replication-properties
+  - /riak/kv/2.9.7/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/2.9.7/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.9.7/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.9.7/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.9.7/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.9.7/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.9.7/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.9.7/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.9.7/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.9.7/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.9.7/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.9.7/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.9.7/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.9.7/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.9.7/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.9.7/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.9.7/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
+
+
+
diff --git a/content/riak/kv/2.9.7/developing/app-guide/strong-consistency.md b/content/riak/kv/2.9.7/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..fe4c25d700
--- /dev/null
+++ b/content/riak/kv/2.9.7/developing/app-guide/strong-consistency.md
@@ -0,0 +1,261 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 2.9.7
+menu:
+  riak_kv-2.9.7:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.7/dev/advanced/strong-consistency
+  - /riak/kv/2.9.7/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/2.9.7/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.9.7/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.9.7/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.9.7/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.9.7/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.9.7/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.9.7/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.9.7/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.9.7/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.9.7/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.9.7/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.9.7/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.9.7/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.9.7/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.9.7/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.9.7/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.9.7/developing/client-libraries
+[getting started]: {{}}riak/kv/2.9.7/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.9.7/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + + + + diff --git a/content/riak/kv/2.9.7/developing/app-guide/write-once.md b/content/riak/kv/2.9.7/developing/app-guide/write-once.md new file mode 100644 index 0000000000..51307cebad --- /dev/null +++ b/content/riak/kv/2.9.7/developing/app-guide/write-once.md @@ -0,0 +1,159 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.9.7/dev/advanced/write-once + - /riak/kv/2.9.7/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/2.9.7/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.9.7/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.9.7/developing/data-types +[strong consistency]: {{}}riak/kv/2.9.7/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.9.7/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.7/developing/client-libraries.md b/content/riak/kv/2.9.7/developing/client-libraries.md new file mode 100644 index 0000000000..58d7b2cfed --- /dev/null +++ b/content/riak/kv/2.9.7/developing/client-libraries.md @@ -0,0 +1,291 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.9.7/dev/using/libraries + - /riak/kv/2.9.7/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) - A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) - A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) - A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) - A C++ wrapper around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) - An eventual consistency toolbox for Clojure +* [Welle](http://clojureriak.info) - An expressive Clojure client with batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) - Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) - A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) - Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) - A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) - HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) - Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) - A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) - Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) - Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) - Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) - Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) - An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) - A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) - A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) - A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) - A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) - A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) - Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) - Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) - A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) - Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) - Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) - Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) - Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) - Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) - Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) - A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) - Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) - A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) - Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) - Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) - a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) - A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) - A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) - Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) - Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) - Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) - A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) - Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) - A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) - A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) - Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) - Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) - Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) - A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) - A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) - A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) - A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) - [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) - A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) - Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) - A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) - A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) - Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) - A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) - A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) - Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) - Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) - Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) - A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) - Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) - Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) - DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) - Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) - An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) - Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) - Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) - Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) - A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) - An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) - A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) - A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + + + + diff --git a/content/riak/kv/2.9.7/developing/data-modeling.md b/content/riak/kv/2.9.7/developing/data-modeling.md new file mode 100644 index 0000000000..9dd65abece --- /dev/null +++ b/content/riak/kv/2.9.7/developing/data-modeling.md @@ -0,0 +1,15 @@ +--- +layout: redirect +target: "riak/kv/2.9.7/learn/use-cases/" +aliases: +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + + + + diff --git a/content/riak/kv/2.9.7/developing/data-types.md b/content/riak/kv/2.9.7/developing/data-types.md new file mode 100644 index 0000000000..bca2d73a71 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/data-types.md @@ -0,0 +1,279 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.9.7/dev/using/data-types + - /riak/kv/2.9.7/dev/using/data-types + - /riak/2.9.7/dev/data-modeling/data-types + - /riak/kv/2.9.7/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + + + + diff --git a/content/riak/kv/2.9.7/developing/data-types/counters.md b/content/riak/kv/2.9.7/developing/data-types/counters.md new file mode 100644 index 0000000000..b6b2f01c4d --- /dev/null +++ b/content/riak/kv/2.9.7/developing/data-types/counters.md @@ -0,0 +1,635 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.7/dev/using/data-types/counters + - /riak/kv/2.9.7/dev/using/data-types/counters + - /riak/2.9.7/dev/data-modeling/data-types/counters + - /riak/kv/2.9.7/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/data-types/gsets.md b/content/riak/kv/2.9.7/developing/data-types/gsets.md new file mode 100644 index 0000000000..61cbe7f8f4 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/data-types/gsets.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.7/dev/using/data-types/gsets + - /riak/kv/2.9.7/dev/using/data-types/gsets + - /riak/2.9.7/dev/data-modeling/data-types/gsets + - /riak/kv/2.9.7/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/data-types/hyperloglogs.md b/content/riak/kv/2.9.7/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..e77a017710 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/data-types/hyperloglogs.md @@ -0,0 +1,643 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.7/dev/using/data-types/hyperloglogs + - /riak/kv/2.9.7/dev/using/data-types/hyperloglogs + - /riak/2.9.7/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.9.7/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/data-types/maps.md b/content/riak/kv/2.9.7/developing/data-types/maps.md new file mode 100644 index 0000000000..8db95193c0 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/data-types/maps.md @@ -0,0 +1,1885 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.7/dev/using/data-types/maps + - /riak/kv/2.9.7/dev/using/data-types/maps + - /riak/2.9.7/dev/data-modeling/data-types/maps + - /riak/kv/2.9.7/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/data-types/sets.md b/content/riak/kv/2.9.7/developing/data-types/sets.md new file mode 100644 index 0000000000..94626d9981 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/data-types/sets.md @@ -0,0 +1,773 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.7/dev/using/data-types/sets + - /riak/kv/2.9.7/dev/using/data-types/sets + - /riak/2.9.7/dev/data-modeling/data-types/sets + - /riak/kv/2.9.7/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/faq.md b/content/riak/kv/2.9.7/developing/faq.md new file mode 100644 index 0000000000..fdcbd6b893 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/faq.md @@ -0,0 +1,574 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.9.7/community/faqs/developing + - /riak/kv/2.9.7/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/2.9.7/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.9.7/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.9.7/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.9.7/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.9.7/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.9.7/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.9.7/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.9.7/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.9.7/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.9.7/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) - requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) - if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started.md b/content/riak/kv/2.9.7/developing/getting-started.md new file mode 100644 index 0000000000..394c5f1988 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started.md @@ -0,0 +1,51 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +aliases: +--- + +[install index]: {{}}riak/kv/2.9.7/setup/installing +[dev client libraries]: {{}}riak/kv/2.9.7/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/csharp.md b/content/riak/kv/2.9.7/developing/getting-started/csharp.md new file mode 100644 index 0000000000..3e2a7a0451 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/csharp.md @@ -0,0 +1,86 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/csharp + - /riak/kv/2.9.7/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.7/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.7/developing/getting-started/csharp/crud-operations) + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.9.7/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..0dd71b2a5e --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +aliases: +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.9.7/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..9d8f9cbf5e --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,111 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.9.7/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/csharp/querying.md b/content/riak/kv/2.9.7/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..399631297f --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/csharp/querying.md @@ -0,0 +1,214 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/querying-csharp + - /riak/kv/2.9.7/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/erlang.md b/content/riak/kv/2.9.7/developing/getting-started/erlang.md new file mode 100644 index 0000000000..882fa92bd0 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/erlang.md @@ -0,0 +1,59 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/erlang + - /riak/kv/2.9.7/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.7/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.7/developing/getting-started/erlang/crud-operations) + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.9.7/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..797daa4b80 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.9.7/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..ff7ac783f8 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,342 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.9.7/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.7/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/erlang/querying.md b/content/riak/kv/2.9.7/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..bcc32e2058 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/erlang/querying.md @@ -0,0 +1,308 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/querying-erlang + - /riak/kv/2.9.7/dev/taste-of-riak/querying-erlang +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.9.7/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/golang.md b/content/riak/kv/2.9.7/developing/getting-started/golang.md new file mode 100644 index 0000000000..461ceb567b --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/golang.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/golang + - /riak/kv/2.9.7/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.7/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.7/developing/getting-started/golang/crud-operations) + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.9.7/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..a6c430ef0f --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,376 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +aliases: +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.9.7/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..29082ab808 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,552 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.9.7/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.7/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/golang/querying.md b/content/riak/kv/2.9.7/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..65ea89b868 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/golang/querying.md @@ -0,0 +1,580 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/querying-golang + - /riak/kv/2.9.7/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/java.md b/content/riak/kv/2.9.7/developing/getting-started/java.md new file mode 100644 index 0000000000..696cb77167 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/java.md @@ -0,0 +1,93 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/java + - /riak/kv/2.9.7/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.7/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.7/developing/getting-started/java/crud-operations) + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.9.7/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..bfc83d643e --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/java/crud-operations.md @@ -0,0 +1,206 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +aliases: +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.7/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.7/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.7/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.7/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.7/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.7/developing/usage/conflict-resolution/) +documention. + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.9.7/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..aeb56ad8e4 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/java/object-modeling.md @@ -0,0 +1,432 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.9.7/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/java/querying.md b/content/riak/kv/2.9.7/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..e4cd47b79c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/java/querying.md @@ -0,0 +1,280 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/querying-java + - /riak/kv/2.9.7/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/nodejs.md b/content/riak/kv/2.9.7/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..4b764711f0 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/nodejs.md @@ -0,0 +1,104 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/nodejs + - /riak/kv/2.9.7/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.7/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.7/developing/getting-started/nodejs/crud-operations) + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.9.7/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..ba0b6ecfa4 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,138 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +aliases: +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.9.7/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..a49c76022c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.9.7/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.9.7/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..1faf331aa6 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/nodejs/querying.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.9.7/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/php.md b/content/riak/kv/2.9.7/developing/getting-started/php.md new file mode 100644 index 0000000000..4395e90ab6 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/php.md @@ -0,0 +1,80 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/php + - /riak/kv/2.9.7/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.7/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.7/developing/getting-started/php/crud-operations) + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.9.7/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..6b99ffeea1 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/php/crud-operations.md @@ -0,0 +1,187 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +aliases: +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.9.7/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/php/querying.md b/content/riak/kv/2.9.7/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..0b47533a9e --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/php/querying.md @@ -0,0 +1,408 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/querying-php + - /riak/kv/2.9.7/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/python.md b/content/riak/kv/2.9.7/developing/getting-started/python.md new file mode 100644 index 0000000000..889851a462 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/python.md @@ -0,0 +1,103 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/python + - /riak/kv/2.9.7/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.7/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` - Header files and a static library for Python +* `libffi-dev` - Foreign function interface library +* `libssl-dev` - libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.7/developing/getting-started/python/crud-operations) + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.9.7/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..7ffda695ef --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/python/crud-operations.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.9.7/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..6dacd06c44 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/python/object-modeling.md @@ -0,0 +1,264 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.9.7/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/python/querying.md b/content/riak/kv/2.9.7/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..f9caafda6c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/python/querying.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/querying-python + - /riak/kv/2.9.7/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/ruby.md b/content/riak/kv/2.9.7/developing/getting-started/ruby.md new file mode 100644 index 0000000000..b728a8fb11 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/ruby.md @@ -0,0 +1,68 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/ruby + - /riak/kv/2.9.7/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.7/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.7/developing/getting-started/ruby/crud-operations) + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.9.7/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..c1e884ec5c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,151 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.9.7/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..372f467feb --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,295 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.9.7/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.7/developing/getting-started/ruby/querying.md b/content/riak/kv/2.9.7/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..a90de69d29 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/getting-started/ruby/querying.md @@ -0,0 +1,256 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.7/dev/taste-of-riak/querying-ruby + - /riak/kv/2.9.7/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.7/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.7/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.7/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.7/developing/key-value-modeling.md b/content/riak/kv/2.9.7/developing/key-value-modeling.md new file mode 100644 index 0000000000..5dd88856e4 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/key-value-modeling.md @@ -0,0 +1,535 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.9.7/dev/data-modeling/key-value/ + - /riak/kv/2.9.7/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.9.7/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.9.7/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.9.7/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.9.7/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.9.7/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.9.7/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.9.7/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.9.7/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.9.7/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.9.7/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.9.7/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.9.7/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.9.7/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.9.7/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.9.7/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.9.7/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.9.7/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.9.7/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + + + + diff --git a/content/riak/kv/2.9.7/developing/usage.md b/content/riak/kv/2.9.7/developing/usage.md new file mode 100644 index 0000000000..bcbdfe0318 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage.md @@ -0,0 +1,138 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +aliases: +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/bucket-types.md b/content/riak/kv/2.9.7/developing/usage/bucket-types.md new file mode 100644 index 0000000000..0b886b6963 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/bucket-types.md @@ -0,0 +1,102 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/bucket-types + - /riak/kv/2.9.7/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/commit-hooks.md b/content/riak/kv/2.9.7/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..2374bfd380 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/commit-hooks.md @@ -0,0 +1,244 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/using/commit-hooks + - /riak/kv/2.9.7/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.9.7/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object + - This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` - The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.9.7/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` - The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/conflict-resolution.md b/content/riak/kv/2.9.7/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..8f6aa8f56c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/conflict-resolution.md @@ -0,0 +1,681 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/using/conflict-resolution + - /riak/kv/2.9.7/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/strong-consistency) - A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.9.7/configuring/strong-consistency) - A guide for operators +> * [strong consistency][use ref strong consistency] - A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.9.7/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.9.7/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.9.7/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.7/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.9.7/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** - If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** - Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** - If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.9.7/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.7/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.9.7/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..2c6900d40d --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.7/dev/using/conflict-resolution/csharp + - /riak/kv/2.9.7/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..77d396acac --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.7/dev/using/conflict-resolution/golang + - /riak/kv/2.9.7/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..a105abd5b1 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/java.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.7/dev/using/conflict-resolution/java + - /riak/kv/2.9.7/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.7/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..de8e99862c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.7/dev/using/conflict-resolution/nodejs + - /riak/kv/2.9.7/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..40570c2a00 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/php.md @@ -0,0 +1,244 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.7/dev/using/conflict-resolution/php + - /riak/kv/2.9.7/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.7/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..b198b4c10c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/python.md @@ -0,0 +1,258 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.7/dev/using/conflict-resolution/python + - /riak/kv/2.9.7/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.7/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..4566620d77 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.7/dev/using/conflict-resolution/ruby + - /riak/kv/2.9.7/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.7/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/content-types.md b/content/riak/kv/2.9.7/developing/usage/content-types.md new file mode 100644 index 0000000000..a5d5b8f43e --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/content-types.md @@ -0,0 +1,192 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +aliases: +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/creating-objects.md b/content/riak/kv/2.9.7/developing/usage/creating-objects.md new file mode 100644 index 0000000000..09ca254237 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/creating-objects.md @@ -0,0 +1,555 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +aliases: +--- + +[usage content types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.9.7/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/custom-extractors.md b/content/riak/kv/2.9.7/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..68d92abf9d --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/custom-extractors.md @@ -0,0 +1,424 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/search/custom-extractors + - /riak/kv/2.9.7/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` - Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` - Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/deleting-objects.md b/content/riak/kv/2.9.7/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..0a098f60f1 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/deleting-objects.md @@ -0,0 +1,157 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +aliases: +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/document-store.md b/content/riak/kv/2.9.7/developing/usage/document-store.md new file mode 100644 index 0000000000..4f0c05be40 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/document-store.md @@ -0,0 +1,617 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/search/document-store + - /riak/kv/2.9.7/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.9.7/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/mapreduce.md b/content/riak/kv/2.9.7/developing/usage/mapreduce.md new file mode 100644 index 0000000000..6c395474e2 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/mapreduce.md @@ -0,0 +1,246 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/using/mapreduce + - /riak/kv/2.9.7/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.9.7/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.9.7/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** - The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** - The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/next-gen-replication.md b/content/riak/kv/2.9.7/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..062eb11fa6 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/next-gen-replication.md @@ -0,0 +1,153 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.7" +menu: + riak_kv-2.9.7: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/2.9.7/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. + + + diff --git a/content/riak/kv/2.9.7/developing/usage/reading-objects.md b/content/riak/kv/2.9.7/developing/usage/reading-objects.md new file mode 100644 index 0000000000..8ddb741380 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/reading-objects.md @@ -0,0 +1,252 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +aliases: +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/replication.md b/content/riak/kv/2.9.7/developing/usage/replication.md new file mode 100644 index 0000000000..6fe7f5e4f2 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/replication.md @@ -0,0 +1,592 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/replication-properties + - /riak/kv/2.9.7/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.9.7/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.9.7/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.7/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.7/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.9.7/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.9.7/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.9.7/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.9.7/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/search-schemas.md b/content/riak/kv/2.9.7/developing/usage/search-schemas.md new file mode 100644 index 0000000000..48f87cf7fb --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/search-schemas.md @@ -0,0 +1,511 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/search-schema + - /riak/kv/2.9.7/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.9.7/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/search.md b/content/riak/kv/2.9.7/developing/usage/search.md new file mode 100644 index 0000000000..4b7ddca496 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/search.md @@ -0,0 +1,1455 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/using/search + - /riak/kv/2.9.7/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.9.7/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.7/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.7/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.9.7/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.9.7/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.9.7/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.9.7/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/searching-data-types.md b/content/riak/kv/2.9.7/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..34e839031a --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/searching-data-types.md @@ -0,0 +1,1687 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/search/search-data-types + - /riak/kv/2.9.7/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/secondary-indexes.md b/content/riak/kv/2.9.7/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..6758b619e3 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/secondary-indexes.md @@ -0,0 +1,2030 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/using/2i + - /riak/kv/2.9.7/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.9.7/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.7/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.7/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` - Binary index `field1_bin` and integer index `field2_int` +* `Moe` - Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` - Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` - Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/security.md b/content/riak/kv/2.9.7/developing/usage/security.md new file mode 100644 index 0000000000..33a719224e --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/security.md @@ -0,0 +1,103 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/client-security + - /riak/kv/2.9.7/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.9.7/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.9.7/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.9.7/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.9.7/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.7/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.9.7/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.9.7/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.9.7/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.9.7/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/security/erlang.md b/content/riak/kv/2.9.7/developing/usage/security/erlang.md new file mode 100644 index 0000000000..d07d22d5dc --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/security/erlang.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/client-security/erlang + - /riak/kv/2.9.7/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.7/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/security/java.md b/content/riak/kv/2.9.7/developing/usage/security/java.md new file mode 100644 index 0000000000..c57087599c --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/security/java.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/client-security/java + - /riak/kv/2.9.7/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/security/php.md b/content/riak/kv/2.9.7/developing/usage/security/php.md new file mode 100644 index 0000000000..5a5069ed61 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/security/php.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/client-security/php + - /riak/kv/2.9.7/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/security/python.md b/content/riak/kv/2.9.7/developing/usage/security/python.md new file mode 100644 index 0000000000..509e6a6d98 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/security/python.md @@ -0,0 +1,176 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/client-security/python + - /riak/kv/2.9.7/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.7/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.9.7/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/security/ruby.md b/content/riak/kv/2.9.7/developing/usage/security/ruby.md new file mode 100644 index 0000000000..b58c22dd03 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/security/ruby.md @@ -0,0 +1,162 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/client-security/ruby + - /riak/kv/2.9.7/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.7/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + + + + diff --git a/content/riak/kv/2.9.7/developing/usage/updating-objects.md b/content/riak/kv/2.9.7/developing/usage/updating-objects.md new file mode 100644 index 0000000000..dd3bb88362 --- /dev/null +++ b/content/riak/kv/2.9.7/developing/usage/updating-objects.md @@ -0,0 +1,778 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.7/dev/using/updates + - /riak/kv/2.9.7/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.9.7/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + + + + diff --git a/content/riak/kv/2.9.7/downloads.md b/content/riak/kv/2.9.7/downloads.md new file mode 100644 index 0000000000..94f0e7d9d8 --- /dev/null +++ b/content/riak/kv/2.9.7/downloads.md @@ -0,0 +1,27 @@ +--- +title: "Download for Riak KV 2.9.7" +description: "Download some stuff!" +menu: + riak_kv-2.9.7: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 2.9.7 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 2.9.7 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.9.7/downloads + - /riak/kv/2.9.7/downloads +--- + + + + + diff --git a/content/riak/kv/2.9.7/index.md b/content/riak/kv/2.9.7/index.md new file mode 100644 index 0000000000..a8a5049a10 --- /dev/null +++ b/content/riak/kv/2.9.7/index.md @@ -0,0 +1,77 @@ +--- +title: "Riak KV 2.9.7" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.9.7/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.9.7/configuring +[downloads]: {{<baseurl>}}riak/kv/2.9.7/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.9.7/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.9.7/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.9.7/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.9.7/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.9.7/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + + + + diff --git a/content/riak/kv/2.9.7/learn.md b/content/riak/kv/2.9.7/learn.md new file mode 100644 index 0000000000..8af5a64aef --- /dev/null +++ b/content/riak/kv/2.9.7/learn.md @@ -0,0 +1,53 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts.md b/content/riak/kv/2.9.7/learn/concepts.md new file mode 100644 index 0000000000..b04bb1b0e3 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts.md @@ -0,0 +1,49 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +aliases: +--- + +[concept aae]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.9.7/configuring +[plan index]: {{<baseurl>}}riak/kv/2.9.7/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.9.7/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.9.7/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..1cb4de47ce --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/active-anti-entropy.md @@ -0,0 +1,111 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/aae + - /riak/kv/2.9.7/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/buckets.md b/content/riak/kv/2.9.7/learn/concepts/buckets.md new file mode 100644 index 0000000000..f23115a610 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/buckets.md @@ -0,0 +1,217 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/Buckets + - /riak/kv/2.9.7/theory/concepts/Buckets + - /riak/2.9.7/theory/concepts/buckets + - /riak/kv/2.9.7/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.9.7/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.9.7/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.9.7/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.9.7/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/capability-negotiation.md b/content/riak/kv/2.9.7/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..7b6b335da3 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/capability-negotiation.md @@ -0,0 +1,36 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/capability-negotiation + - /riak/kv/2.9.7/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.9.7/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/causal-context.md b/content/riak/kv/2.9.7/learn/concepts/causal-context.md new file mode 100644 index 0000000000..7a778826ba --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/causal-context.md @@ -0,0 +1,289 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/context + - /riak/kv/2.9.7/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.9.7/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.9.7/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.9.7/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/clusters.md b/content/riak/kv/2.9.7/learn/concepts/clusters.md new file mode 100644 index 0000000000..3087237879 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/clusters.md @@ -0,0 +1,117 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/Clusters + - /riak/kv/2.9.7/theory/concepts/Clusters + - /riak/2.9.7/theory/concepts/clusters + - /riak/kv/2.9.7/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.7/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/crdts.md b/content/riak/kv/2.9.7/learn/concepts/crdts.md new file mode 100644 index 0000000000..765b3c6622 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/crdts.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/crdts + - /riak/kv/2.9.7/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.9.7/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.9.7/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/eventual-consistency.md b/content/riak/kv/2.9.7/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..a36f236e3b --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/eventual-consistency.md @@ -0,0 +1,202 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/Eventual-Consistency + - /riak/kv/2.9.7/theory/concepts/Eventual-Consistency + - /riak/2.9.7/theory/concepts/eventual-consistency + - /riak/kv/2.9.7/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/keys-and-objects.md b/content/riak/kv/2.9.7/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..70f089cce4 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/keys-and-objects.md @@ -0,0 +1,53 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/keys-and-values + - /riak/kv/2.9.7/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/replication.md b/content/riak/kv/2.9.7/learn/concepts/replication.md new file mode 100644 index 0000000000..5473164081 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/replication.md @@ -0,0 +1,323 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/Replication + - /riak/kv/2.9.7/theory/concepts/Replication + - /riak/2.9.7/theory/concepts/replication + - /riak/kv/2.9.7/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.9.7/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/strong-consistency.md b/content/riak/kv/2.9.7/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..cbf99810cb --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/strong-consistency.md @@ -0,0 +1,105 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/strong-consistency + - /riak/kv/2.9.7/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.7/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + + + + diff --git a/content/riak/kv/2.9.7/learn/concepts/vnodes.md b/content/riak/kv/2.9.7/learn/concepts/vnodes.md new file mode 100644 index 0000000000..62152c30c3 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/concepts/vnodes.md @@ -0,0 +1,160 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.7/theory/concepts/vnodes + - /riak/kv/2.9.7/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322.9.744576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + + + + diff --git a/content/riak/kv/2.9.7/learn/dynamo.md b/content/riak/kv/2.9.7/learn/dynamo.md new file mode 100644 index 0000000000..71e9fcf3af --- /dev/null +++ b/content/riak/kv/2.9.7/learn/dynamo.md @@ -0,0 +1,1928 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.9.7/theory/dynamo + - /riak/kv/2.9.7/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.9.7/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.9.7/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.9.7/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.9.7/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.7 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.9.7/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.7/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.9.7/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.9.7/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.9.7/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.7/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + + + + diff --git a/content/riak/kv/2.9.7/learn/glossary.md b/content/riak/kv/2.9.7/learn/glossary.md new file mode 100644 index 0000000000..4293f8ce0a --- /dev/null +++ b/content/riak/kv/2.9.7/learn/glossary.md @@ -0,0 +1,358 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +aliases: +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.9.7/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.9.7/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.9.7/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.7/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.9.7/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.9.7/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.9.7/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + + + + diff --git a/content/riak/kv/2.9.7/learn/new-to-nosql.md b/content/riak/kv/2.9.7/learn/new-to-nosql.md new file mode 100644 index 0000000000..02680c1aea --- /dev/null +++ b/content/riak/kv/2.9.7/learn/new-to-nosql.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 2.9.7 +#menu: +# riak_kv-2.9.7: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this lives in existing docs)** + + + + diff --git a/content/riak/kv/2.9.7/learn/use-cases.md b/content/riak/kv/2.9.7/learn/use-cases.md new file mode 100644 index 0000000000..df8385521d --- /dev/null +++ b/content/riak/kv/2.9.7/learn/use-cases.md @@ -0,0 +1,405 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.9.7/dev/data-modeling/ + - /riak/kv/2.9.7/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.9.7/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.9.7/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + + + + diff --git a/content/riak/kv/2.9.7/learn/why-riak-kv.md b/content/riak/kv/2.9.7/learn/why-riak-kv.md new file mode 100644 index 0000000000..5e32b81913 --- /dev/null +++ b/content/riak/kv/2.9.7/learn/why-riak-kv.md @@ -0,0 +1,225 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.9.7/theory/why-riak/ + - /riak/kv/2.9.7/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.7/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.9.7/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + + + + diff --git a/content/riak/kv/2.9.7/release-notes.md b/content/riak/kv/2.9.7/release-notes.md new file mode 100644 index 0000000000..f657093bff --- /dev/null +++ b/content/riak/kv/2.9.7/release-notes.md @@ -0,0 +1,50 @@ +--- +title: "Riak KV 2.9.7 Release Notes" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.9.7/community/release-notes + - /riak/kv/2.9.7/intro-v20 + - /riak/2.9.7/intro-v20 + - /riak/kv/2.9.7/introduction +--- + +Released Aug 16, 2020. + + +## Overview + +This release improves the stability of Riak when running with Tictac AAE in parallel mode: + +- The aae_exchange schedule will back-off when exchanges begin to timeout due to pressure in the system. + +- The aae_runner now has a size-limited queue of snapshots for handling exchange fetch_clock queries. + +- The aae tree rebuilds now take a snapshot at the point the rebuild is de-queued for work, not at the point the rebuild is added to the queue. + +- The loading process will yield when applying the backlog of changes to allow for other messages to interleave (that may otherwise timeout). + +- The aae sub-system will listen to back-pressure signals from the aae_keystore, and ripple a response to slow-down upstream services (and ultimately the riak_kv_vnode). + +- It is possible to accelerate and decelerate AAE repairs by setting riak_kv application variables during running (e.g tictacaae_exchangetick, tictacaae_maxresults), and also log AAE-prompted repairs using log_readrepair. + +The system is now stable under specific load tests designed to trigger AAE failure. However, parallel mode should still not be used in production systems unless it has been subject to environment-specific load testing. + +[Previous Release Notes](#previous-release-notes) + +## Previous Release Notes + +Please see the KV 2.9.4 release notes [here]({{<baseurl>}}riak/kv/2.9.2/release-notes/), the KV 2.9.2 release notes [here]({{<baseurl>}}riak/kv/2.9.1/release-notes/), and the KV 2.9.1 release notes [here]({{<baseurl>}}riak/kv/2.9.7/release-notes/). + + + + + diff --git a/content/riak/kv/2.9.7/setup.md b/content/riak/kv/2.9.7/setup.md new file mode 100644 index 0000000000..9496210ca1 --- /dev/null +++ b/content/riak/kv/2.9.7/setup.md @@ -0,0 +1,51 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + + + + diff --git a/content/riak/kv/2.9.7/setup/downgrade.md b/content/riak/kv/2.9.7/setup/downgrade.md new file mode 100644 index 0000000000..52d5f6e803 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/downgrade.md @@ -0,0 +1,179 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.7/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.9.7/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.9.7/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.7, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing.md b/content/riak/kv/2.9.7/setup/installing.md new file mode 100644 index 0000000000..274f49713f --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing.md @@ -0,0 +1,61 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.9.7/ops/building/installing + - /riak/kv/2.9.7/ops/building/installing + - /riak/2.9.7/installing/ + - /riak/kv/2.9.7/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.9.7/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/amazon-web-services.md b/content/riak/kv/2.9.7/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..279e549b2b --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/amazon-web-services.md @@ -0,0 +1,153 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.9.7/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.9.7/installing/amazon-web-services/ + - /riak/kv/2.9.7/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.7/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2/riak-2.9.7-1.amzn2x86_64.rpm +sudo yum localinstall -y riak-2.9.7-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2/riak-2.9.7-1.amzn2x86_64.rpm +sudo rpm -i riak-2.9.7-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2016.09/riak-2.9.7-1.amzn1x86_64.rpm +sudo yum localinstall -y riak-2.9.7-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2016.09/riak-2.9.7-1.amzn1x86_64.rpm +sudo rpm -i riak-2.9.7-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/debian-ubuntu.md b/content/riak/kv/2.9.7/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..08a035ff97 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/debian-ubuntu.md @@ -0,0 +1,171 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.9.7/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.9.7/installing/debian-ubuntu/ + - /riak/kv/2.9.7/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.9.7/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/bionic64/riak-2.9.7-1_amd64.deb +sudo dpkg -i riak-2.9.7-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/xenial64/riak-2.9.7-1_amd64.deb +sudo dpkg -i riak-2.9.7-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/trusty64/riak-2.9.7-1_amd64.deb +sudo dpkg -i riak-2.9.7-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/precise64/riak-2.9.7-1_amd64.deb +sudo dpkg -i riak-2.9.7-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/9/riak-2.9.7-1_amd64.deb +sudo dpkg -i riak-2.9.7-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/8/riak-2.9.7-1_amd64.deb +sudo dpkg -i riak-2.9.7-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/7/riak-2.9.7-1_amd64.deb +sudo dpkg -i riak-2.9.7-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/raspbian/buster/riak-2.9.7-1_armhf.deb +sudo dpkg -i riak-2.9.7-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/riak-2.9.7.tar.gz +tar zxvf riak-2.9.7.tar.gz +cd riak-2.9.7 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/freebsd.md b/content/riak/kv/2.9.7/setup/installing/freebsd.md new file mode 100644 index 0000000000..6d6cd816bc --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/freebsd.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.9.7/ops/building/installing/Installing-on-FreeBSD + - /riak/2.9.7/installing/freebsd/ + - /riak/kv/2.9.7/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.7/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.9.7.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.7/freebsd/11.1/riak-2.9.7.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.7/freebsd/10.4/riak-2.9.7.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/mac-osx.md b/content/riak/kv/2.9.7/setup/installing/mac-osx.md new file mode 100644 index 0000000000..8ce9a094a5 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/mac-osx.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.9.7/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.9.7/installing/mac-osx/ + - /riak/kv/2.9.7/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.7/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.7/osx/10.11/riak-2.9.7-OSX-x86_64.tar.gz +tar xzvf riak-2.9.7-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.9.7 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.7` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.7/riak-2.9.7.tar.gz +tar zxvf riak-2.9.7.tar.gz +cd riak-2.9.7 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/rhel-centos.md b/content/riak/kv/2.9.7/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..a35eddc485 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/rhel-centos.md @@ -0,0 +1,134 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.9.7/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.9.7/installing/rhel-centos/ + - /riak/kv/2.9.7/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/8/riak-2.9.7-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.9.7-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/8/riak-2.9.7-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.9.7-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/7/riak-2.9.7-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.9.7-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/7/riak-2.9.7-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.9.7-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/6/riak-2.9.7-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.9.7-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/6/riak-2.9.7-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.9.7-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.7/riak-2.9.7.tar.gz +tar zxvf riak-2.9.7.tar.gz +cd riak-2.9.7 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/smartos.md b/content/riak/kv/2.9.7/setup/installing/smartos.md new file mode 100644 index 0000000000..af2540c47f --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/smartos.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.9.7" +menu: + riak_kv-2.9.7: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.9.7/ops/building/installing/Installing-on-SmartOS + - /riak/2.9.7/installing/smartos/ + - /riak/kv/2.9.7/installing/smartos/ + - /riak/kv/2.9.7/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/2.9.7/setup/installing/solaris.md b/content/riak/kv/2.9.7/setup/installing/solaris.md new file mode 100644 index 0000000000..61fdbafe13 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/solaris.md @@ -0,0 +1,91 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.9.7" +menu: + riak_kv-2.9.7: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.9.7/ops/building/installing/Installing-on-Solaris + - /riak/2.9.7/installing/solaris/ + - /riak/kv/2.9.7/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/2.9.7/setup/installing/source.md b/content/riak/kv/2.9.7/setup/installing/source.md new file mode 100644 index 0000000000..fff4b58ef9 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/source.md @@ -0,0 +1,110 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/Installing-Riak-from-Source + - /riak/kv/2.9.7/ops/building/Installing-Riak-from-Source + - /riak/2.9.7/installing/source/ + - /riak/kv/2.9.7/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.7/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.7/riak-2.9.7.tar.gz +tar zxvf riak-2.9.7.tar.gz +cd riak-2.9.7 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/source/erlang.md b/content/riak/kv/2.9.7/setup/installing/source/erlang.md new file mode 100644 index 0000000000..feee2610d0 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/source/erlang.md @@ -0,0 +1,572 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/erlang + - /riak/kv/2.9.7/ops/building/installing/erlang + - /riak/2.9.7/installing/source/erlang/ + - /riak/kv/2.9.7/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/2.9.7/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.9.7/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/source/jvm.md b/content/riak/kv/2.9.7/setup/installing/source/jvm.md new file mode 100644 index 0000000000..5614df5164 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/source/jvm.md @@ -0,0 +1,56 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/jvm + - /riak/kv/2.9.7/ops/building/installing/jvm + - /riak/2.9.7/ops/building/installing/Installing-the-JVM + - /riak/kv/2.9.7/ops/building/installing/Installing-the-JVM + - /riak/2.9.7/installing/source/jvm/ + - /riak/kv/2.9.7/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/suse.md b/content/riak/kv/2.9.7/setup/installing/suse.md new file mode 100644 index 0000000000..69ac0342cb --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/suse.md @@ -0,0 +1,52 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.9.7/ops/building/installing/Installing-on-SUSE + - /riak/2.9.7/installing/suse/ + - /riak/kv/2.9.7/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.7/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.7+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/verify.md b/content/riak/kv/2.9.7/setup/installing/verify.md new file mode 100644 index 0000000000..9f5ec9c877 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/verify.md @@ -0,0 +1,169 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/installing/Post-Installation + - /riak/kv/2.9.7/ops/installing/Post-Installation + - /riak/2.9.7/installing/verify-install/ + - /riak/kv/2.9.7/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/2.9.7/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.9.7/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + + + + diff --git a/content/riak/kv/2.9.7/setup/installing/windows-azure.md b/content/riak/kv/2.9.7/setup/installing/windows-azure.md new file mode 100644 index 0000000000..dce5fc29d7 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/installing/windows-azure.md @@ -0,0 +1,197 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.9.7/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.9.7/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.9.7/installing/windows-azure/ + - /riak/kv/2.9.7/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + + + + diff --git a/content/riak/kv/2.9.7/setup/planning.md b/content/riak/kv/2.9.7/setup/planning.md new file mode 100644 index 0000000000..be8966b1ef --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning.md @@ -0,0 +1,61 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/backend.md b/content/riak/kv/2.9.7/setup/planning/backend.md new file mode 100644 index 0000000000..30df6ce187 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/backend.md @@ -0,0 +1,60 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.9.7/ops/building/planning/backends/ + - /riak/kv/2.9.7/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/2.9.7/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/backend/bitcask.md b/content/riak/kv/2.9.7/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..ea1b11f855 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/backend/bitcask.md @@ -0,0 +1,995 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/backends/bitcask/ + - /riak/kv/2.9.7/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.9.7/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` - lets the operating system manage syncing writes + (default) + * `o_sync` - uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval - Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) + - Writes are made via Erlang's built-in file API +* `nif` - Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` - No restrictions on when merge operations can occur + (default) +* `never` - Merge will never be attempted +* `window` - Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** - This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** - This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** - This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** - This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** - This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322.9.744576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322.9.744576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/backend/leveldb.md b/content/riak/kv/2.9.7/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..181b548e54 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/backend/leveldb.md @@ -0,0 +1,506 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/backends/leveldb/ + - /riak/kv/2.9.7/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.7/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** - The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** - LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322.9.744576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322.9.744576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/backend/leveled.md b/content/riak/kv/2.9.7/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..bc62544ea3 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/backend/leveled.md @@ -0,0 +1,141 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/backends/leveled/ + - /riak/kv/2.9.7/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.7/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.7 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/backend/memory.md b/content/riak/kv/2.9.7/setup/planning/backend/memory.md new file mode 100644 index 0000000000..4815b663e5 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/backend/memory.md @@ -0,0 +1,147 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/backends/memory/ + - /riak/kv/2.9.7/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/backend/multi.md b/content/riak/kv/2.9.7/setup/planning/backend/multi.md new file mode 100644 index 0000000000..20c8d6018d --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/backend/multi.md @@ -0,0 +1,230 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/backends/multi/ + - /riak/kv/2.9.7/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/best-practices.md b/content/riak/kv/2.9.7/setup/planning/best-practices.md new file mode 100644 index 0000000000..855aa6e45a --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/best-practices.md @@ -0,0 +1,145 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.9.7/ops/building/planning/best-practices + - /riak/kv/2.9.7/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/2.9.7/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.9.7/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.9.7/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..86657297ed --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,104 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.9.7/ops/building/planning/bitcask + - /riak/kv/2.9.7/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/cluster-capacity.md b/content/riak/kv/2.9.7/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..965328d3a2 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/cluster-capacity.md @@ -0,0 +1,238 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.9.7/ops/building/planning/cluster + - /riak/kv/2.9.7/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.9.7/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.9.7/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/future.md b/content/riak/kv/2.9.7/setup/planning/future.md new file mode 100644 index 0000000000..f8ff985eab --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/future.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 2.9.7 +#menu: +# riak_kv-2.9.7: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/operating-system.md b/content/riak/kv/2.9.7/setup/planning/operating-system.md new file mode 100644 index 0000000000..5e8565e1db --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/operating-system.md @@ -0,0 +1,30 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +aliases: +--- + +[downloads]: {{<baseurl>}}riak/kv/2.9.7/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + + + + diff --git a/content/riak/kv/2.9.7/setup/planning/start.md b/content/riak/kv/2.9.7/setup/planning/start.md new file mode 100644 index 0000000000..1d307fd86e --- /dev/null +++ b/content/riak/kv/2.9.7/setup/planning/start.md @@ -0,0 +1,61 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.9.7/ops/building/planning/system-planning + - /riak/kv/2.9.7/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + + + + diff --git a/content/riak/kv/2.9.7/setup/search.md b/content/riak/kv/2.9.7/setup/search.md new file mode 100644 index 0000000000..d3f5a12faa --- /dev/null +++ b/content/riak/kv/2.9.7/setup/search.md @@ -0,0 +1 @@ + diff --git a/content/riak/kv/2.9.7/setup/upgrading.md b/content/riak/kv/2.9.7/setup/upgrading.md new file mode 100644 index 0000000000..dce4bfbf39 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/upgrading.md @@ -0,0 +1,38 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.7][upgrade version] + +A tutorial on updating to Riak KV 2.9.7 + +[Learn More >>][upgrade version] + + + + diff --git a/content/riak/kv/2.9.7/setup/upgrading/checklist.md b/content/riak/kv/2.9.7/setup/upgrading/checklist.md new file mode 100644 index 0000000000..2ab418b514 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/upgrading/checklist.md @@ -0,0 +1,225 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.7/ops/upgrading/production-checklist/ + - /riak/kv/2.9.7/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/2.9.7/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.9.7/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.9.7/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.9.7/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.9.7/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.9.7/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.9.7/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + + + + diff --git a/content/riak/kv/2.9.7/setup/upgrading/cluster.md b/content/riak/kv/2.9.7/setup/upgrading/cluster.md new file mode 100644 index 0000000000..b6b38b76ed --- /dev/null +++ b/content/riak/kv/2.9.7/setup/upgrading/cluster.md @@ -0,0 +1,303 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.7" +menu: + riak_kv-2.9.7: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.7/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.7/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.7/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.7/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.7/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.7/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.7/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` - See [JMX Monitoring][jmx monitor] for more information. + * `snmp` - See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + + + + diff --git a/content/riak/kv/2.9.7/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.9.7/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..974d52f5e6 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/upgrading/multi-datacenter.md @@ -0,0 +1,24 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 2.9.7 +#menu: +# riak_kv-2.9.7: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: +--- + +## TODO + +How to update to a new version with multi-datacenter. + + + + + diff --git a/content/riak/kv/2.9.7/setup/upgrading/search.md b/content/riak/kv/2.9.7/setup/upgrading/search.md new file mode 100644 index 0000000000..a99450c9e8 --- /dev/null +++ b/content/riak/kv/2.9.7/setup/upgrading/search.md @@ -0,0 +1,281 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.9.7" +menu: + riak_kv-2.9.7: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.7/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.7/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + + + + diff --git a/content/riak/kv/2.9.7/setup/upgrading/version.md b/content/riak/kv/2.9.7/setup/upgrading/version.md new file mode 100644 index 0000000000..09a7f0779f --- /dev/null +++ b/content/riak/kv/2.9.7/setup/upgrading/version.md @@ -0,0 +1,252 @@ +--- +title: "Upgrading to Riak KV 2.9.7" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Upgrading to 2.9.7" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.7/upgrade-v20/ + - /riak/kv/2.9.7/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.7/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.7/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/2.9.7/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.7/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.9.7/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.7/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.7/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.7/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.9.7/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.9.7 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.7 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.7 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + + + + diff --git a/content/riak/kv/2.9.7/using.md b/content/riak/kv/2.9.7/using.md new file mode 100644 index 0000000000..63c5209a68 --- /dev/null +++ b/content/riak/kv/2.9.7/using.md @@ -0,0 +1,78 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + + + + diff --git a/content/riak/kv/2.9.7/using/admin.md b/content/riak/kv/2.9.7/using/admin.md new file mode 100644 index 0000000000..3c4666571e --- /dev/null +++ b/content/riak/kv/2.9.7/using/admin.md @@ -0,0 +1,51 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.9.7/ops/running/cluster-admin + - /riak/kv/2.9.7/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + + + + diff --git a/content/riak/kv/2.9.7/using/admin/commands.md b/content/riak/kv/2.9.7/using/admin/commands.md new file mode 100644 index 0000000000..d4704fd549 --- /dev/null +++ b/content/riak/kv/2.9.7/using/admin/commands.md @@ -0,0 +1,378 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.7/ops/running/cluster-admin + - /riak/kv/2.9.7/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` - There are five possible values for status: + * `valid` - The node has begun participating in cluster operations + * `leaving` - The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` - The node's ownership transfers are complete and it is + currently shutting down + * `joining` - The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` - The node is not currently responding +* `avail` - There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` - What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` - The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322.9.744576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322.9.744576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` + + + + diff --git a/content/riak/kv/2.9.7/using/admin/riak-admin.md b/content/riak/kv/2.9.7/using/admin/riak-admin.md new file mode 100644 index 0000000000..c12e964a99 --- /dev/null +++ b/content/riak/kv/2.9.7/using/admin/riak-admin.md @@ -0,0 +1,721 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.7/ops/running/tools/riak-admin + - /riak/kv/2.9.7/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.9.7/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.9.7/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.9.7/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.9.7/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.9.7/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.9.7/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.9.7/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.9.7/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + + + + diff --git a/content/riak/kv/2.9.7/using/admin/riak-cli.md b/content/riak/kv/2.9.7/using/admin/riak-cli.md new file mode 100644 index 0000000000..dae07c4df9 --- /dev/null +++ b/content/riak/kv/2.9.7/using/admin/riak-cli.md @@ -0,0 +1,204 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.7/ops/running/tools/riak + - /riak/kv/2.9.7/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + + + + diff --git a/content/riak/kv/2.9.7/using/admin/riak-control.md b/content/riak/kv/2.9.7/using/admin/riak-control.md new file mode 100644 index 0000000000..b697dd617b --- /dev/null +++ b/content/riak/kv/2.9.7/using/admin/riak-control.md @@ -0,0 +1,237 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/riak-control + - /riak/kv/2.9.7/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.9.7/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations.md b/content/riak/kv/2.9.7/using/cluster-operations.md new file mode 100644 index 0000000000..97a4fac8b1 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations.md @@ -0,0 +1,109 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +aliases: +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.9.7/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..ef5bebae1f --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,289 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/2.9.7/ops/advanced/aae/ + - /riak/2.9.7/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.9.7/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.9.7/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..6531e138eb --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,198 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.7/ops/running/nodes/adding-removing + - /riak/kv/2.9.7/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/2.9.7/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/backend.md b/content/riak/kv/2.9.7/using/cluster-operations/backend.md new file mode 100644 index 0000000000..57b71f9205 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/backend.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 2.9.7 +#menu: +# riak_kv-2.9.7: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/backing-up.md b/content/riak/kv/2.9.7/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..1effc8cedf --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/backing-up.md @@ -0,0 +1,271 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.7/ops/running/backups + - /riak/kv/2.9.7/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.9.7/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.9.7/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/bucket-types.md b/content/riak/kv/2.9.7/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..61e580c2ad --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/bucket-types.md @@ -0,0 +1,63 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.9.7/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..226b913759 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,458 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.7/ops/running/nodes/renaming + - /riak/kv/2.9.7/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/handoff.md b/content/riak/kv/2.9.7/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..aa9e4bfb28 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/handoff.md @@ -0,0 +1,120 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.7/ops/running/handoff + - /riak/kv/2.9.7/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.9.7/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..680ad50b45 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/inspecting-node.md @@ -0,0 +1,496 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.7/ops/running/nodes/inspecting + - /riak/kv/2.9.7/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392993748081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` - The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` - The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` - The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/load-balancing.md b/content/riak/kv/2.9.7/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..11bd9ad0eb --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/load-balancing.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 2.9.7 +#menu: +# riak_kv-2.9.7: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this exists in docs)** + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/logging.md b/content/riak/kv/2.9.7/using/cluster-operations/logging.md new file mode 100644 index 0000000000..fda45b8a07 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/logging.md @@ -0,0 +1,47 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/replacing-node.md b/content/riak/kv/2.9.7/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..f99012109c --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/replacing-node.md @@ -0,0 +1,100 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.9.7/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.9.7/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..eca9085e09 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.7 +#menu: +# riak_kv-2.9.7: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.9.7/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..b0375d75e2 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/strong-consistency.md @@ -0,0 +1,76 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/2.9.7/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..648babfc57 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,34 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.9.7/ops/advanced/tictacaae/ + - /riak/2.9.7/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.9.7/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..39688e9324 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,263 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v2/operations + - /riak/kv/2.9.7/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.7/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.7/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` - The IP address and port of a connected client (site)</li><li>`cluster_name` - The name of the connected client (site)</li><li>`connecting` - The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.9.7/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.9.7/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + + + + diff --git a/content/riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..9aae4f5ff3 --- /dev/null +++ b/content/riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,425 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/operations + - /riak/kv/2.9.7/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.7/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.9.7/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.9.7/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.9.7/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + + + + diff --git a/content/riak/kv/2.9.7/using/performance.md b/content/riak/kv/2.9.7/using/performance.md new file mode 100644 index 0000000000..a6db246000 --- /dev/null +++ b/content/riak/kv/2.9.7/using/performance.md @@ -0,0 +1,268 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.9.7/ops/tuning/linux/ + - /riak/2.9.7/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.7/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.9.7/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.7/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.9.7/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.9.7/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.9.7/using/performance/open-files-limit/) + + + + diff --git a/content/riak/kv/2.9.7/using/performance/amazon-web-services.md b/content/riak/kv/2.9.7/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..45208c2f94 --- /dev/null +++ b/content/riak/kv/2.9.7/using/performance/amazon-web-services.md @@ -0,0 +1,247 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.7/ops/tuning/aws + - /riak/kv/2.9.7/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + + + + diff --git a/content/riak/kv/2.9.7/using/performance/benchmarking.md b/content/riak/kv/2.9.7/using/performance/benchmarking.md new file mode 100644 index 0000000000..9aced3dd09 --- /dev/null +++ b/content/riak/kv/2.9.7/using/performance/benchmarking.md @@ -0,0 +1,602 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.7/ops/building/benchmarking + - /riak/kv/2.9.7/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.9.7/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput - Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` - generate as many ops per second as possible +* `{rate, N}` - generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` - Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` - Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` - Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` - Directly invokes the Bitcask API +* `basho_bench_driver_dets` - Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` - operation completed successfully +* `{error, Reason, NewState}` - operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` - operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` - operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` - generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` - the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` - the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` - selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` - selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` - the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` - takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` - takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` - generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` - generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` - generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + + + + diff --git a/content/riak/kv/2.9.7/using/performance/erlang.md b/content/riak/kv/2.9.7/using/performance/erlang.md new file mode 100644 index 0000000000..c62f609321 --- /dev/null +++ b/content/riak/kv/2.9.7/using/performance/erlang.md @@ -0,0 +1,371 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.7/ops/tuning/erlang + - /riak/kv/2.9.7/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + + + + diff --git a/content/riak/kv/2.9.7/using/performance/latency-reduction.md b/content/riak/kv/2.9.7/using/performance/latency-reduction.md new file mode 100644 index 0000000000..e20fc0a1a3 --- /dev/null +++ b/content/riak/kv/2.9.7/using/performance/latency-reduction.md @@ -0,0 +1,267 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.7/ops/tuning/latency-reduction + - /riak/kv/2.9.7/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + + + + diff --git a/content/riak/kv/2.9.7/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.9.7/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..0cc8b22aa7 --- /dev/null +++ b/content/riak/kv/2.9.7/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +[perf index]: {{<baseurl>}}riak/kv/2.9.7/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + + + + diff --git a/content/riak/kv/2.9.7/using/performance/open-files-limit.md b/content/riak/kv/2.9.7/using/performance/open-files-limit.md new file mode 100644 index 0000000000..10bd2157a9 --- /dev/null +++ b/content/riak/kv/2.9.7/using/performance/open-files-limit.md @@ -0,0 +1,351 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.7/ops/tuning/open-files-limit/ + - /riak/kv/2.9.7/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + + + + diff --git a/content/riak/kv/2.9.7/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.9.7/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..26324c8e40 --- /dev/null +++ b/content/riak/kv/2.9.7/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,50 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/2.9.7/using/reference.md b/content/riak/kv/2.9.7/using/reference.md new file mode 100644 index 0000000000..403a3f6eca --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference.md @@ -0,0 +1,135 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +aliases: +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + + + + diff --git a/content/riak/kv/2.9.7/using/reference/architecture.md b/content/riak/kv/2.9.7/using/reference/architecture.md new file mode 100644 index 0000000000..4668cf32e8 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/architecture.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +#menu: +# riak_kv-2.9.7: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +aliases: +--- + +<!-- TODO: Content --> + + + + diff --git a/content/riak/kv/2.9.7/using/reference/bucket-types.md b/content/riak/kv/2.9.7/using/reference/bucket-types.md new file mode 100644 index 0000000000..072823579e --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/bucket-types.md @@ -0,0 +1,823 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +aliases: +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.9.7/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.9.7/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.7/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.7/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.9.7/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.9.7/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + + + + diff --git a/content/riak/kv/2.9.7/using/reference/custom-code.md b/content/riak/kv/2.9.7/using/reference/custom-code.md new file mode 100644 index 0000000000..f96de17671 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/custom-code.md @@ -0,0 +1,135 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/install-custom-code/ + - /riak/kv/2.9.7/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.9.7/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.9.7/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.7/using/reference/failure-recovery.md b/content/riak/kv/2.9.7/using/reference/failure-recovery.md new file mode 100644 index 0000000000..403e7f3a9d --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/failure-recovery.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.7/using/reference/handoff.md b/content/riak/kv/2.9.7/using/reference/handoff.md new file mode 100644 index 0000000000..f4a7356206 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/handoff.md @@ -0,0 +1,201 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.7/ops/running/handoff/ + - /riak/kv/2.9.7/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + + + + diff --git a/content/riak/kv/2.9.7/using/reference/jmx.md b/content/riak/kv/2.9.7/using/reference/jmx.md new file mode 100644 index 0000000000..20955c7bec --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/jmx.md @@ -0,0 +1,190 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/running/monitoring/jmx + - /riak/kv/2.9.7/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + + + + diff --git a/content/riak/kv/2.9.7/using/reference/logging.md b/content/riak/kv/2.9.7/using/reference/logging.md new file mode 100644 index 0000000000..4a0ed1c69f --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/logging.md @@ -0,0 +1,301 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.7/ops/running/logging + - /riak/kv/2.9.7/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.7 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` - Every night at midnight +* `$D23` - Every day at 23:00 (11 pm) +* `$W0D20` - Every week on Sunday at 20:00 (8 pm) +* `$M1D0` - On the first day of every month at midnight +* `$M5D6` - On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` - Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.9.7/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` - Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-cli/#attach-direct) command +* `both` - Console logs will be emitted both to a file and to standard + output +* `off` - Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + + + + diff --git a/content/riak/kv/2.9.7/using/reference/multi-datacenter.md b/content/riak/kv/2.9.7/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..d6025f6bf6 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/multi-datacenter.md @@ -0,0 +1,53 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + + + + diff --git a/content/riak/kv/2.9.7/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.9.7/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..8efa987a4f --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,100 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.7/ops/mdc/comparison + - /riak/kv/2.9.7/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.9.7/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.9.7/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + + + + diff --git a/content/riak/kv/2.9.7/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.9.7/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..36f606560a --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,170 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.7/ops/mdc/monitoring + - /riak/kv/2.9.7/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + + + + diff --git a/content/riak/kv/2.9.7/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.9.7/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..64122d1eef --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,66 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.7/ops/mdc/per-bucket + - /riak/kv/2.9.7/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` - Enable replication (realtime + fullsync) + * `false` - Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` - Replication only occurs in realtime for this bucket + * `fullsync` - Replication only occurs during a fullsync operation + * `both` - Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + + + + diff --git a/content/riak/kv/2.9.7/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.9.7/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..4a3a8e77bf --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,244 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.7/ops/mdc/statistics + - /riak/kv/2.9.7/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + + + + diff --git a/content/riak/kv/2.9.7/using/reference/object-deletion.md b/content/riak/kv/2.9.7/using/reference/object-deletion.md new file mode 100644 index 0000000000..22dfe9fed6 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/object-deletion.md @@ -0,0 +1,121 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` - Disables tombstone removal +* `immediate` - The tombstone is removed as soon as the request is + received +* Custom time interval - How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + + + + diff --git a/content/riak/kv/2.9.7/using/reference/runtime-interaction.md b/content/riak/kv/2.9.7/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..7d769fb65d --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/runtime-interaction.md @@ -0,0 +1,70 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/runtime + - /riak/kv/2.9.7/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` - Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` - Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` - The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` - The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` - A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` - A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` - A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + + + + diff --git a/content/riak/kv/2.9.7/using/reference/search.md b/content/riak/kv/2.9.7/using/reference/search.md new file mode 100644 index 0000000000..3aefc3a797 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/search.md @@ -0,0 +1,457 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/search + - /riak/kv/2.9.7/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.9.7/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. + + + diff --git a/content/riak/kv/2.9.7/using/reference/secondary-indexes.md b/content/riak/kv/2.9.7/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..6360cfa31a --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/secondary-indexes.md @@ -0,0 +1,76 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.7/dev/advanced/2i + - /riak/kv/2.9.7/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.9.7/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + + + + diff --git a/content/riak/kv/2.9.7/using/reference/snmp.md b/content/riak/kv/2.9.7/using/reference/snmp.md new file mode 100644 index 0000000000..af6fb5bc5f --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/snmp.md @@ -0,0 +1,166 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/running/monitoring/snmp + - /riak/kv/2.9.7/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + + + + diff --git a/content/riak/kv/2.9.7/using/reference/statistics-monitoring.md b/content/riak/kv/2.9.7/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..4036e96c5a --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/statistics-monitoring.md @@ -0,0 +1,395 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.7/ops/running/stats-and-monitoring + - /riak/kv/2.9.7/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.9.7/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.9.7/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.9.7/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.7/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.7/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.9.7/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + + + + diff --git a/content/riak/kv/2.9.7/using/reference/strong-consistency.md b/content/riak/kv/2.9.7/using/reference/strong-consistency.md new file mode 100644 index 0000000000..1251acbda4 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/strong-consistency.md @@ -0,0 +1,150 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +aliases: +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.7/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.9.7/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.9.7/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.9.7/configuring/strong-consistency/#performance). + + + + diff --git a/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..94f0d53336 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter.md @@ -0,0 +1,40 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +aliases: +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.7/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + + + + diff --git a/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..b94990e728 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,130 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.7/ops/mdc/v2/architecture + - /riak/kv/2.9.7/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.7/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.9.7/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + + + + diff --git a/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..cbf2b406a5 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,53 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.7/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.9.7/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.7/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..1a9bfbbdf1 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter.md @@ -0,0 +1,52 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +aliases: +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + + + + diff --git a/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..fd98fafd29 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,129 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/aae + - /riak/kv/2.9.7/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + + + + diff --git a/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..d88f79119b --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,186 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/architecture + - /riak/kv/2.9.7/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + + + + diff --git a/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..c4f8389b4a --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,102 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/cascading-writes + - /riak/kv/2.9.7/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + + + + diff --git a/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..215f0ad5b7 --- /dev/null +++ b/content/riak/kv/2.9.7/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,72 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.7/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.9.7/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + + + + diff --git a/content/riak/kv/2.9.7/using/repair-recovery.md b/content/riak/kv/2.9.7/using/repair-recovery.md new file mode 100644 index 0000000000..4e64398eb2 --- /dev/null +++ b/content/riak/kv/2.9.7/using/repair-recovery.md @@ -0,0 +1,53 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +aliases: +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + + + + diff --git a/content/riak/kv/2.9.7/using/repair-recovery/errors.md b/content/riak/kv/2.9.7/using/repair-recovery/errors.md new file mode 100644 index 0000000000..ec70ad0d95 --- /dev/null +++ b/content/riak/kv/2.9.7/using/repair-recovery/errors.md @@ -0,0 +1,366 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.7/ops/running/recovery/errors + - /riak/kv/2.9.7/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.7/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.7/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.7/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.7/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.9.7/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.7/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.9.7/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.9.7/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + + + + diff --git a/content/riak/kv/2.9.7/using/repair-recovery/failed-node.md b/content/riak/kv/2.9.7/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..1c53a6cec3 --- /dev/null +++ b/content/riak/kv/2.9.7/using/repair-recovery/failed-node.md @@ -0,0 +1,114 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.7/ops/running/recovery/failed-node + - /riak/kv/2.9.7/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` + + + + diff --git a/content/riak/kv/2.9.7/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.9.7/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..8b99a23092 --- /dev/null +++ b/content/riak/kv/2.9.7/using/repair-recovery/failure-recovery.md @@ -0,0 +1,129 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.7/ops/running/recovery/failure-recovery + - /riak/kv/2.9.7/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.9.7/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** - A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** - If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** - Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.9.7/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.7/using/repair-recovery/repairs.md b/content/riak/kv/2.9.7/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..c0f4729194 --- /dev/null +++ b/content/riak/kv/2.9.7/using/repair-recovery/repairs.md @@ -0,0 +1,391 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.7/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.7/ops/running/recovery/repairing-indexes + - /riak/2.9.7/ops/running/recovery/failed-node + - /riak/kv/2.9.7/ops/running/recovery/failed-node + - /riak/2.9.7/ops/running/recovery/repairing-leveldb + - /riak/kv/2.9.7/ops/running/recovery/repairing-leveldb + - /riak/2.9.7/ops/running/recovery/repairing-partitions + - /riak/kv/2.9.7/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.9.7/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.9.7/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.9.7/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.9.7/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + + + + diff --git a/content/riak/kv/2.9.7/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.9.7/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..9edebec829 --- /dev/null +++ b/content/riak/kv/2.9.7/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,76 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +aliases: +--- + +[upgrade]: {{<baseurl>}}riak/kv/2.9.7/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.9.7/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + + + + diff --git a/content/riak/kv/2.9.7/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.9.7/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..502e6fb6a1 --- /dev/null +++ b/content/riak/kv/2.9.7/using/repair-recovery/rolling-restart.md @@ -0,0 +1,64 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.7/ops/running/recovery/rolling-restart + - /riak/kv/2.9.7/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.9.7/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + + + + diff --git a/content/riak/kv/2.9.7/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.9.7/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..745ef37e53 --- /dev/null +++ b/content/riak/kv/2.9.7/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,142 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.7/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.7/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + + + + diff --git a/content/riak/kv/2.9.7/using/running-a-cluster.md b/content/riak/kv/2.9.7/using/running-a-cluster.md new file mode 100644 index 0000000000..f19813272d --- /dev/null +++ b/content/riak/kv/2.9.7/using/running-a-cluster.md @@ -0,0 +1,339 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.9.7/ops/building/basic-cluster-setup + - /riak/kv/2.9.7/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.7/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.9.7/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + + + + diff --git a/content/riak/kv/2.9.7/using/security.md b/content/riak/kv/2.9.7/using/security.md new file mode 100644 index 0000000000..367d1653cf --- /dev/null +++ b/content/riak/kv/2.9.7/using/security.md @@ -0,0 +1,199 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.9.7/ops/advanced/security + - /riak/kv/2.9.7/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/2.9.7/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.9.7/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.7/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.9.7/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.9.7/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + + + + diff --git a/content/riak/kv/2.9.7/using/security/basics.md b/content/riak/kv/2.9.7/using/security/basics.md new file mode 100644 index 0000000000..7855b2a105 --- /dev/null +++ b/content/riak/kv/2.9.7/using/security/basics.md @@ -0,0 +1,851 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.7/ops/running/authz + - /riak/kv/2.9.7/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.9.7/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.9.7/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.9.7/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.9.7/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.9.7/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.9.7/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.9.7/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.9.7/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + + + + diff --git a/content/riak/kv/2.9.7/using/security/best-practices.md b/content/riak/kv/2.9.7/using/security/best-practices.md new file mode 100644 index 0000000000..b5451dacee --- /dev/null +++ b/content/riak/kv/2.9.7/using/security/best-practices.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.7/using/security/managing-sources.md b/content/riak/kv/2.9.7/using/security/managing-sources.md new file mode 100644 index 0000000000..2b99c7a064 --- /dev/null +++ b/content/riak/kv/2.9.7/using/security/managing-sources.md @@ -0,0 +1,273 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.7/ops/running/security-sources + - /riak/kv/2.9.7/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.9.7/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.9.7/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.9.7/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.9.7/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.9.7/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.9.7/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + + + + diff --git a/content/riak/kv/2.9.7/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.9.7/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..6f7d4658f1 --- /dev/null +++ b/content/riak/kv/2.9.7/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.7/using/troubleshooting.md b/content/riak/kv/2.9.7/using/troubleshooting.md new file mode 100644 index 0000000000..db23cccacb --- /dev/null +++ b/content/riak/kv/2.9.7/using/troubleshooting.md @@ -0,0 +1,28 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +aliases: +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + + + + diff --git a/content/riak/kv/2.9.7/using/troubleshooting/http-204.md b/content/riak/kv/2.9.7/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..12e30b7fda --- /dev/null +++ b/content/riak/kv/2.9.7/using/troubleshooting/http-204.md @@ -0,0 +1,22 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 2.9.7 +menu: + riak_kv-2.9.7: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +aliases: +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + + + + diff --git a/content/riak/kv/2.9.8/_reference-links.md b/content/riak/kv/2.9.8/_reference-links.md new file mode 100644 index 0000000000..956f6fb62a --- /dev/null +++ b/content/riak/kv/2.9.8/_reference-links.md @@ -0,0 +1,254 @@ + +# Riak KV 2.9.8 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.9.8/downloads/ +[install index]: {{}}riak/kv/2.9.8/setup/installing +[upgrade index]: {{}}riak/kv/2.9.8/upgrading +[plan index]: {{}}riak/kv/2.9.8/planning +[config index]: {{}}riak/kv/2.9.8/using/configuring/ +[config reference]: {{}}riak/kv/2.9.8/configuring/reference/ +[manage index]: {{}}riak/kv/2.9.8/using/managing +[performance index]: {{}}riak/kv/2.9.8/using/performance +[glossary vnode]: {{}}riak/kv/2.9.8/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.9.8/setup/planning +[plan start]: {{}}riak/kv/2.9.8/setup/planning/start +[plan backend]: {{}}riak/kv/2.9.8/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.9.8/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.8/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/2.9.8/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.8/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.9.8/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.9.8/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.9.8/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.9.8/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.9.8/setup/installing +[install aws]: {{}}riak/kv/2.9.8/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.9.8/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.9.8/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.9.8/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.9.8/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.9.8/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.9.8/setup/installing/solaris +[install suse]: {{}}riak/kv/2.9.8/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.9.8/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.9.8/setup/installing/source +[install source erlang]: {{}}riak/kv/2.9.8/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.9.8/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.9.8/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.9.8/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.9.8/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.9.8/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.9.8/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.9.8/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.9.8/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.9.8/configuring +[config basic]: {{}}riak/kv/2.9.8/configuring/basic +[config backend]: {{}}riak/kv/2.9.8/configuring/backend +[config manage]: {{}}riak/kv/2.9.8/configuring/managing +[config reference]: {{}}riak/kv/2.9.8/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.9.8/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.9.8/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.9.8/configuring/mapreduce +[config search]: {{}}riak/kv/2.9.8/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.9.8/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.9.8/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.9.8/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.9.8/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.9.8/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.9.8/using/ +[use admin commands]: {{}}riak/kv/2.9.8/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.9.8/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.9.8/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.9.8/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.9.8/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.9.8/using/reference/search +[use ref 2i]: {{}}riak/kv/2.9.8/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.9.8/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.9.8/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.9.8/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.9.8/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.9.8/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.9.8/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.9.8/using/admin/ +[use admin commands]: {{}}riak/kv/2.9.8/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.9.8/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.9.8/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.9.8/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.9.8/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.9.8/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.9.8/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.9.8/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.9.8/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.9.8/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.9.8/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.9.8/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.9.8/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.9.8/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.9.8/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.9.8/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.9.8/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.9.8/using/security/ +[security basics]: {{}}riak/kv/2.9.8/using/security/basics +[security managing]: {{}}riak/kv/2.9.8/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.9.8/using/performance/ +[perf benchmark]: {{}}riak/kv/2.9.8/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.8/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.9.8/using/performance/erlang +[perf aws]: {{}}riak/kv/2.9.8/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.9.8/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.9.8/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.9.8/developing +[dev client libraries]: {{}}riak/kv/2.9.8/developing/client-libraries +[dev data model]: {{}}riak/kv/2.9.8/developing/data-modeling +[dev data types]: {{}}riak/kv/2.9.8/developing/data-types +[dev kv model]: {{}}riak/kv/2.9.8/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.9.8/developing/getting-started +[getting started java]: {{}}riak/kv/2.9.8/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.9.8/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.9.8/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.9.8/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.9.8/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.9.8/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.9.8/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.9.8/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.9.8/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.8/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.8/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.8/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.8/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.8/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.8/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.9.8/developing/usage +[usage bucket types]: {{}}riak/kv/2.9.8/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.9.8/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.9.8/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.9.8/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.9.8/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.9.8/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.9.8/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.9.8/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.9.8/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.8/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.8/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.9.8/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.9.8/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.9.8/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.9.8/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.9.8/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.9.8/developing/api/backend +[dev api http]: {{}}riak/kv/2.9.8/developing/api/http +[dev api http status]: {{}}riak/kv/2.9.8/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.9.8/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.9.8/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.9.8/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.9.8/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.9.8/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.9.8/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.9.8/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.9.8/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.9.8/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.9.8/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.9.8/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.9.8/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.9.8/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.9.8/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + + + + diff --git a/content/riak/kv/2.9.8/add-ons.md b/content/riak/kv/2.9.8/add-ons.md new file mode 100644 index 0000000000..3f9ca11c72 --- /dev/null +++ b/content/riak/kv/2.9.8/add-ons.md @@ -0,0 +1,25 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.9.8/add-ons/redis/) + + + + + diff --git a/content/riak/kv/2.9.8/add-ons/redis.md b/content/riak/kv/2.9.8/add-ons/redis.md new file mode 100644 index 0000000000..852ea3d78e --- /dev/null +++ b/content/riak/kv/2.9.8/add-ons/redis.md @@ -0,0 +1,63 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +aliases: +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + + + + diff --git a/content/riak/kv/2.9.8/add-ons/redis/developing-rra.md b/content/riak/kv/2.9.8/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..1a6f37ae19 --- /dev/null +++ b/content/riak/kv/2.9.8/add-ons/redis/developing-rra.md @@ -0,0 +1,330 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.9.8/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.9.8/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.9.8/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.9.8/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.9.8/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + + + diff --git a/content/riak/kv/2.9.8/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.9.8/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..6035d872ef --- /dev/null +++ b/content/riak/kv/2.9.8/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,136 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + + + + diff --git a/content/riak/kv/2.9.8/add-ons/redis/set-up-rra.md b/content/riak/kv/2.9.8/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..8cf2b7ed2e --- /dev/null +++ b/content/riak/kv/2.9.8/add-ons/redis/set-up-rra.md @@ -0,0 +1,285 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.9.8/setup/installing +[perf open files]: {{}}riak/kv/2.9.8/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + + + + diff --git a/content/riak/kv/2.9.8/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.9.8/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..f531c69c36 --- /dev/null +++ b/content/riak/kv/2.9.8/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,143 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +aliases: +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + + + + diff --git a/content/riak/kv/2.9.8/add-ons/redis/using-rra.md b/content/riak/kv/2.9.8/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..416e346981 --- /dev/null +++ b/content/riak/kv/2.9.8/add-ons/redis/using-rra.md @@ -0,0 +1,246 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.9.8/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.9.8/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + + + + diff --git a/content/riak/kv/2.9.8/configuring.md b/content/riak/kv/2.9.8/configuring.md new file mode 100644 index 0000000000..e08269a2f3 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring.md @@ -0,0 +1,88 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + + + + diff --git a/content/riak/kv/2.9.8/configuring/backend.md b/content/riak/kv/2.9.8/configuring/backend.md new file mode 100644 index 0000000000..0084e521cc --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/backend.md @@ -0,0 +1,647 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +aliases: +--- + +[plan backend leveldb]: {{}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.8/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/2.9.8/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.8/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.8/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + + + + diff --git a/content/riak/kv/2.9.8/configuring/basic.md b/content/riak/kv/2.9.8/configuring/basic.md new file mode 100644 index 0000000000..380fded497 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/basic.md @@ -0,0 +1,239 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.8/ops/building/configuration/ + - /riak/kv/2.9.8/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/2.9.8/configuring/reference +[use running cluster]: {{}}riak/kv/2.9.8/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.9.8/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.9.8/using/performance/erlang +[plan start]: {{}}riak/kv/2.9.8/setup/planning/start +[plan best practices]: {{}}riak/kv/2.9.8/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.9.8/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.9.8/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.9.8/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.9.8/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.9.8/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.9.8/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.9.8/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.9.8/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.8/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.9.8/using/performance +[perf aws]: {{}}riak/kv/2.9.8/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.9.8/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.8/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + + + + diff --git a/content/riak/kv/2.9.8/configuring/global-object-expiration.md b/content/riak/kv/2.9.8/configuring/global-object-expiration.md new file mode 100644 index 0000000000..f9b83445fc --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/global-object-expiration.md @@ -0,0 +1,90 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.9.8: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 2.9.8 +toc: true +aliases: +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + + + + diff --git a/content/riak/kv/2.9.8/configuring/load-balancing-proxy.md b/content/riak/kv/2.9.8/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..858dc575c9 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/load-balancing-proxy.md @@ -0,0 +1,275 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.9.8/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/2.9.8/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + + + + diff --git a/content/riak/kv/2.9.8/configuring/managing.md b/content/riak/kv/2.9.8/configuring/managing.md new file mode 100644 index 0000000000..bbd1795217 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/managing.md @@ -0,0 +1,121 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +aliases: +--- + +[use admin riak cli]: {{}}riak/kv/2.9.8/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.9.8/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.9.8/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + + + + diff --git a/content/riak/kv/2.9.8/configuring/mapreduce.md b/content/riak/kv/2.9.8/configuring/mapreduce.md new file mode 100644 index 0000000000..89e25eb9bf --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/mapreduce.md @@ -0,0 +1,200 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/configs/mapreduce/ + - /riak/kv/2.9.8/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/2.9.8/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.9.8/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.9.8/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + + + + diff --git a/content/riak/kv/2.9.8/configuring/next-gen-replication.md b/content/riak/kv/2.9.8/configuring/next-gen-replication.md new file mode 100644 index 0000000000..ca271c1c08 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/next-gen-replication.md @@ -0,0 +1,63 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.8" +menu: + riak_kv-2.9.8: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +aliases: +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. + diff --git a/content/riak/kv/2.9.8/configuring/reference.md b/content/riak/kv/2.9.8/configuring/reference.md new file mode 100644 index 0000000000..68dd2ea400 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/reference.md @@ -0,0 +1,2039 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/configs/configuration-files/ + - /riak/kv/2.9.8/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] + - [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] + - [configuration][config backend leveldb] +* [Leveled][plan backend leveled] + - [configuration][config backend leveled] +* [Memory][plan backend memory] + - [configuration][config backend memory] +* [Multi][plan backend multi] + - [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + + + + diff --git a/content/riak/kv/2.9.8/configuring/search.md b/content/riak/kv/2.9.8/configuring/search.md new file mode 100644 index 0000000000..c3a20ecbdb --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/search.md @@ -0,0 +1,278 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/configs/search/ + - /riak/kv/2.9.8/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/2.9.8/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.8/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.8/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.9.8/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.9.8/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.9.8/configuring/reference +[config reference#search]: {{}}riak/kv/2.9.8/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.9.8/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.9.8/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + + + + diff --git a/content/riak/kv/2.9.8/configuring/strong-consistency.md b/content/riak/kv/2.9.8/configuring/strong-consistency.md new file mode 100644 index 0000000000..45e9e827cc --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/strong-consistency.md @@ -0,0 +1,702 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/2.9.8/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.9.8/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.9.8/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.9.8/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.9.8/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.9.8/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.9.8/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.9.8/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.9.8/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.9.8/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.9.8/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.9.8/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.9.8/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.9.8/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.9.8/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.9.8/developing/data-types +[glossary aae]: {{}}riak/kv/2.9.8/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.9.8/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.9.8/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.9.8/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.9.8/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble +--- +The ID of the ensemble
  • Quorum +--- +The number of ensemble peers that are either leading or following
  • Nodes +--- +The number of nodes currently online
  • Leader +--- +The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer +--- +The ID of the peer
  • Status +--- +Whether the peer is a leader or a follower
  • Trusted +--- +Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch +--- +The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node +--- +The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] +--- +If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] +--- +Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] +--- +Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** +--- +A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** +--- +In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** +--- +Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** +--- +At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** +--- +Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + + diff --git a/content/riak/kv/2.9.8/configuring/v2-multi-datacenter.md b/content/riak/kv/2.9.8/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..c2f3d28b6d --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/v2-multi-datacenter.md @@ -0,0 +1,160 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v2/configuration + - /riak/kv/2.9.8/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/2.9.8/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. + + + + diff --git a/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..1e5792be27 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,82 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v2/nat + - /riak/kv/2.9.8/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/2.9.8/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + + + + diff --git a/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..f81eff3e82 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,371 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v2/quick-start + - /riak/kv/2.9.8/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + + + + diff --git a/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..de888c7faf --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,164 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v2/ssl + - /riak/kv/2.9.8/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + + + + diff --git a/content/riak/kv/2.9.8/configuring/v3-multi-datacenter.md b/content/riak/kv/2.9.8/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..4828f5c035 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/v3-multi-datacenter.md @@ -0,0 +1,161 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/configuration + - /riak/kv/2.9.8/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/2.9.8/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + + + + diff --git a/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..b6b5192054 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/nat + - /riak/kv/2.9.8/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + + + + diff --git a/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..19449162f8 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,172 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/quick-start + - /riak/kv/2.9.8/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/2.9.8/using/performance +[config v3 mdc]: {{}}riak/kv/2.9.8/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + + + + diff --git a/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..b1373044c8 --- /dev/null +++ b/content/riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,174 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/ssl + - /riak/kv/2.9.8/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/2.9.8/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + + + + diff --git a/content/riak/kv/2.9.8/developing.md b/content/riak/kv/2.9.8/developing.md new file mode 100644 index 0000000000..f1d5f1f49b --- /dev/null +++ b/content/riak/kv/2.9.8/developing.md @@ -0,0 +1,79 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + + + + diff --git a/content/riak/kv/2.9.8/developing/api.md b/content/riak/kv/2.9.8/developing/api.md new file mode 100644 index 0000000000..5311a27f14 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api.md @@ -0,0 +1,42 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +aliases: +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + + + + diff --git a/content/riak/kv/2.9.8/developing/api/backend.md b/content/riak/kv/2.9.8/developing/api/backend.md new file mode 100644 index 0000000000..ac64d75d18 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/backend.md @@ -0,0 +1,118 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.8/dev/references/backend-api + - /riak/kv/2.9.8/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/2.9.8/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http.md b/content/riak/kv/2.9.8/developing/api/http.md new file mode 100644 index 0000000000..743f2d7ea0 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http.md @@ -0,0 +1,93 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.8/dev/references/http + - /riak/kv/2.9.8/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.9.8/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.9.8/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.8/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.9.8/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.9.8/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.9.8/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.9.8/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.8/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.8/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.9.8/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.9.8/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.9.8/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.9.8/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.9.8/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.8/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.8/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.9.8/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.9.8/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.9.8/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.9.8/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.9.8/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.9.8/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.9.8/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.9.8/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.9.8/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.9.8/developing/api/http/store-search-schema) + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/counters.md b/content/riak/kv/2.9.8/developing/api/http/counters.md new file mode 100644 index 0000000000..2a0920e83f --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/counters.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/counters + - /riak/kv/2.9.8/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.9.8/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.9.8/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/delete-object.md b/content/riak/kv/2.9.8/developing/api/http/delete-object.md new file mode 100644 index 0000000000..b48d194d40 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/delete-object.md @@ -0,0 +1,79 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/delete-object + - /riak/kv/2.9.8/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/delete-search-index.md b/content/riak/kv/2.9.8/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..90a977e91d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/delete-search-index.md @@ -0,0 +1,38 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/delete-search-index + - /riak/kv/2.9.8/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` - The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` - The request timed out internally + + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/fetch-object.md b/content/riak/kv/2.9.8/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..1acb90ce10 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/fetch-object.md @@ -0,0 +1,246 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/fetch-object + - /riak/kv/2.9.8/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.9.8/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.9.8/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.9.8/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.9.8/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.8/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/fetch-search-index.md b/content/riak/kv/2.9.8/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..1d382aa8d1 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/fetch-search-index.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/fetch-search-index + - /riak/kv/2.9.8/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.9.8/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` - No Search index with that name is currently + available +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.9.8/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.9.8/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..7cf0921c7d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/fetch-search-schema.md @@ -0,0 +1,42 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/fetch-search-schema + - /riak/kv/2.9.8/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/get-bucket-props.md b/content/riak/kv/2.9.8/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..3969002596 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/get-bucket-props.md @@ -0,0 +1,86 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/get-bucket-props + - /riak/kv/2.9.8/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.9.8/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.9.8/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.9.8/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/link-walking.md b/content/riak/kv/2.9.8/developing/api/http/link-walking.md new file mode 100644 index 0000000000..6c4f709b6d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/link-walking.md @@ -0,0 +1,129 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/link-walking + - /riak/kv/2.9.8/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.9.8/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.9.8/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.9.8/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/list-buckets.md b/content/riak/kv/2.9.8/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..265a663d48 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/list-buckets.md @@ -0,0 +1,68 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/list-buckets + - /riak/kv/2.9.8/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/list-keys.md b/content/riak/kv/2.9.8/developing/api/http/list-keys.md new file mode 100644 index 0000000000..8fbe39b4a9 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/list-keys.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/list-keys + - /riak/kv/2.9.8/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/list-resources.md b/content/riak/kv/2.9.8/developing/api/http/list-resources.md new file mode 100644 index 0000000000..7c00a0af64 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/list-resources.md @@ -0,0 +1,84 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/list-resources + - /riak/kv/2.9.8/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.9.8/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.9.8/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.9.8/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.9.8/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.9.8/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.9.8/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.9.8/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.9.8/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/mapreduce.md b/content/riak/kv/2.9.8/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..926e4691c9 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/mapreduce.md @@ -0,0 +1,74 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/mapreduce + - /riak/kv/2.9.8/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/2.9.8/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.9.8/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/ping.md b/content/riak/kv/2.9.8/developing/api/http/ping.md new file mode 100644 index 0000000000..ef67cfbbac --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/ping.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/ping + - /riak/kv/2.9.8/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.9.8/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..c95c4233d7 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/reset-bucket-props.md @@ -0,0 +1,61 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/reset-bucket-props + - /riak/kv/2.9.8/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/search-index-info.md b/content/riak/kv/2.9.8/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..ed85de3644 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/search-index-info.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/search-index-info + - /riak/kv/2.9.8/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.9.8/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` - Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/search-query.md b/content/riak/kv/2.9.8/developing/api/http/search-query.md new file mode 100644 index 0000000000..ada622ad17 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/search-query.md @@ -0,0 +1,73 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/search-query + - /riak/kv/2.9.8/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/2.9.8/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` - The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` - The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.9.8/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` - Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` - Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` - The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/secondary-indexes.md b/content/riak/kv/2.9.8/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..718d3947d2 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/secondary-indexes.md @@ -0,0 +1,95 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/secondary-indexes + - /riak/kv/2.9.8/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/set-bucket-props.md b/content/riak/kv/2.9.8/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..22fe477445 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/set-bucket-props.md @@ -0,0 +1,116 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/set-bucket-props + - /riak/kv/2.9.8/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.9.8/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.9.8/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/status.md b/content/riak/kv/2.9.8/developing/api/http/status.md new file mode 100644 index 0000000000..95fc8a3ecb --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/status.md @@ -0,0 +1,173 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/status + - /riak/kv/2.9.8/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.9.8/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/store-object.md b/content/riak/kv/2.9.8/developing/api/http/store-object.md new file mode 100644 index 0000000000..fa9473e8cc --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/store-object.md @@ -0,0 +1,150 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/store-object + - /riak/kv/2.9.8/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.9.8/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.8/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.9.8/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/store-search-index.md b/content/riak/kv/2.9.8/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..0c42435a0e --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/store-search-index.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/store-search-index + - /riak/kv/2.9.8/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/2.9.8/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.9.8/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` - The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` - The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` - The request timed out internally + + + + + diff --git a/content/riak/kv/2.9.8/developing/api/http/store-search-schema.md b/content/riak/kv/2.9.8/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..75d22e5b79 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/http/store-search-schema.md @@ -0,0 +1,55 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.8/dev/references/http/store-search-schema + - /riak/kv/2.9.8/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` - The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` - The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` - --- +The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..ddcab4118c --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers.md @@ -0,0 +1,189 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers + - /riak/kv/2.9.8/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` - A string representation of what went wrong +* `errcode` - A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/yz-schema-put) + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..52802ae06e --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,34 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/auth-req + - /riak/kv/2.9.8/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.9.8/using/security/basics). + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..c9e1abe6eb --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,82 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.9.8" +menu: + riak_kv-2.9.8: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.9.8/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..8e7983f908 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,104 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/delete-object + - /riak/kv/2.9.8/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/2.9.8/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..bf65bfc32d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.9.8/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/2.9.8/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-map-store). + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..f528fc3e3c --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,131 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.9.8/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.9.8/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.9.8/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.9.8/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..003172d04d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,77 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.9.8/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..40c5115b42 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,36 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.9.8/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..0b1ac2c4cb --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,132 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/dt-store + - /riak/kv/2.9.8/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.9.8/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.9.8/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.9.8/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..9d1ffc757f --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/dt-union + - /riak/kv/2.9.8/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/dt-store) message. + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..51fb6e3fba --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,185 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.9.8/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` - The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` - The character encoding of the object, e.g. `utf-8` +* `content_encoding` - The content encoding of the object, e.g. + `video/mp4` +* `vtag` - The object's [vtag]({{}}riak/kv/2.9.8/learn/glossary/#vector-clock) +* `links` - This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` - A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` - A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` - This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` - Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..d27a0702a3 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,114 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.9.8/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.9.8/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.9.8/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..b3cb53cfc7 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,37 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.9.8/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.9.8/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-props) message. + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..32bb6427fc --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,65 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.9.8/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..000ae2d398 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,80 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.9.8/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` - Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..fdc1c6796d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,101 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/list-keys + - /riak/kv/2.9.8/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` - bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..2691184e1f --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,153 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.9.8/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` - MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` - JSON-encoded MapReduce job +* `application/x-erlang-binary` - Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.9.8/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.9.8/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` - Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..ada160e646 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/ping.md @@ -0,0 +1,46 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/ping + - /riak/kv/2.9.8/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..0c75d0e338 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,63 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.9.8/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.9.8/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/search.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..f769b94b9e --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/search.md @@ -0,0 +1,152 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/search + - /riak/kv/2.9.8/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` - The contents of the query +* `index` - The name of the index to search + +Optional Parameters + +* `rows` - The maximum number of rows to return +* `start` - A start offset, i.e. the number of keys to skip before + returning values +* `sort` - How the search results are to be sorted +* `filter` - Filters search with additional query scoped to inline + fields +* `df` - Override the `default_field` setting in the schema file +* `op` - `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` - Return the fields limit +* `presort` - Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` - A list of docs that match the search request +* `max_score` - The top score returned +* `num_found` - Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..1881de0ceb --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,125 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.9.8/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.9.8/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..956823514f --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,62 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/server-info + - /riak/kv/2.9.8/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..a8bda9c539 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,72 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.9.8/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.9.8/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..2344a521aa --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,35 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.9.8/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.9.8/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/get-bucket-props). + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..ade477bfff --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,66 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.9.8/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..0a8901d152 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,154 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/store-object + - /riak/kv/2.9.8/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.9.8/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.9.8/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.9.8/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.9.8/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.9.8/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.9.8/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..50235bf433 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,37 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.9.8/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..28ffdfe287 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,63 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.9.8/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.8/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..5d11606d79 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.9.8/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.8/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..93eaecca42 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,52 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.9.8/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + + + + diff --git a/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..a18408d444 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.8/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.9.8/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.9.8/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/2.9.8/developing/api/repl-hooks.md b/content/riak/kv/2.9.8/developing/api/repl-hooks.md new file mode 100644 index 0000000000..fa0068469a --- /dev/null +++ b/content/riak/kv/2.9.8/developing/api/repl-hooks.md @@ -0,0 +1,196 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v2/hooks + - /riak/kv/2.9.8/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + + + + diff --git a/content/riak/kv/2.9.8/developing/app-guide.md b/content/riak/kv/2.9.8/developing/app-guide.md new file mode 100644 index 0000000000..7dfd2b1164 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/app-guide.md @@ -0,0 +1,420 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.9.8/dev/using/application-guide/ + - /riak/kv/2.9.8/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/2.9.8/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.9.8/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.9.8/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.9.8/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.9.8/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.9.8/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.9.8/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.9.8/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.9.8/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.9.8/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.9.8/developing/usage/search +[use ref search]: {{}}riak/kv/2.9.8/using/reference/search +[usage 2i]: {{}}riak/kv/2.9.8/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.9.8/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.9.8/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.9.8/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.9.8/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.9.8/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.9.8/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.9.8/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.8/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/2.9.8/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/2.9.8/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.8/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.8/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.8/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.8/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.8/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.8/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.9.8/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.9.8/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.9.8/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.9.8/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.9.8/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.9.8/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.9.8/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.9.8/setup/installing +[getting started]: {{}}riak/kv/2.9.8/developing/getting-started +[usage index]: {{}}riak/kv/2.9.8/developing/usage +[glossary]: {{}}riak/kv/2.9.8/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** - While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** - Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** - Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** - It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** - If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** - If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** - If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] - Getting started with Riak Search +* [Search Details][use ref search] - A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] - How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** - Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** - At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** - In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] - A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] - A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] - An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** - If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** - If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** - If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** - While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** - Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] - A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] - A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** - You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** - Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] - A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] - Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** - At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** - If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** - 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] - Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] - A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] - How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] - A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] - A listing of frequently used terms in Riak's + documentation + + + + + diff --git a/content/riak/kv/2.9.8/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.9.8/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..78e3d8b118 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,802 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/mapreduce/ + - /riak/kv/2.9.8/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/2.9.8/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.9.8/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.9.8/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.9.8/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.9.8/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.9.8/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.9.8/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) - Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) - Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) - Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
+
+
+
diff --git a/content/riak/kv/2.9.8/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.9.8/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..b0bdcdcdce
--- /dev/null
+++ b/content/riak/kv/2.9.8/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,72 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 2.9.8
+menu:
+  riak_kv-2.9.8:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.9.8/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.9.8/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.9.8/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
+
+
+
diff --git a/content/riak/kv/2.9.8/developing/app-guide/reference.md b/content/riak/kv/2.9.8/developing/app-guide/reference.md
new file mode 100644
index 0000000000..941577d3bd
--- /dev/null
+++ b/content/riak/kv/2.9.8/developing/app-guide/reference.md
@@ -0,0 +1,21 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 2.9.8
+#menu:
+#  riak_kv-2.9.8:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+**TODO: Add content**
+
+
+
+
diff --git a/content/riak/kv/2.9.8/developing/app-guide/replication-properties.md b/content/riak/kv/2.9.8/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..a38a6ad43a
--- /dev/null
+++ b/content/riak/kv/2.9.8/developing/app-guide/replication-properties.md
@@ -0,0 +1,584 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 2.9.8
+menu:
+  riak_kv-2.9.8:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.8/dev/advanced/replication-properties
+  - /riak/kv/2.9.8/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/2.9.8/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.9.8/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.9.8/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.9.8/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.9.8/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.9.8/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.9.8/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.9.8/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.9.8/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.9.8/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.9.8/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.9.8/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.9.8/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.9.8/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.9.8/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.9.8/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
+
+
+
diff --git a/content/riak/kv/2.9.8/developing/app-guide/strong-consistency.md b/content/riak/kv/2.9.8/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..37d3688f1d
--- /dev/null
+++ b/content/riak/kv/2.9.8/developing/app-guide/strong-consistency.md
@@ -0,0 +1,261 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 2.9.8
+menu:
+  riak_kv-2.9.8:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.8/dev/advanced/strong-consistency
+  - /riak/kv/2.9.8/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/2.9.8/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.9.8/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.9.8/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.9.8/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.9.8/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.9.8/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.9.8/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.9.8/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.9.8/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.9.8/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.9.8/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.9.8/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.9.8/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.9.8/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.9.8/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.9.8/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.9.8/developing/client-libraries
+[getting started]: {{}}riak/kv/2.9.8/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.9.8/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + + + + diff --git a/content/riak/kv/2.9.8/developing/app-guide/write-once.md b/content/riak/kv/2.9.8/developing/app-guide/write-once.md new file mode 100644 index 0000000000..f211431a80 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/app-guide/write-once.md @@ -0,0 +1,159 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.9.8/dev/advanced/write-once + - /riak/kv/2.9.8/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/2.9.8/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.9.8/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.9.8/developing/data-types +[strong consistency]: {{}}riak/kv/2.9.8/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.9.8/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.8/developing/client-libraries.md b/content/riak/kv/2.9.8/developing/client-libraries.md new file mode 100644 index 0000000000..a63a5c16b8 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/client-libraries.md @@ -0,0 +1,294 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.9.8/dev/using/libraries + - /riak/kv/2.9.8/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp] - A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver] - A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack] - A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp] - A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox] - An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info] - An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak] - Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo] - A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria] - Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) - A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client] - HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) - Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) - A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool] - Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool] - Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly] - Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool] - Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler] - An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc] - A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool] - A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken] - A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc] - A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc] - A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak] - Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak] - Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) - A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) - Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) - Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/] - Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai] - Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb] - Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak] - Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc] - A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak] - Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak] - A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak] - Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com] - Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model] - a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak] - A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc] - A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori] - Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak] - Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood] - Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak] - A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/] - Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc] - A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/] - A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) - Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny] - Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light] - Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client] - A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php] - A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak] - A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php] - A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle] - [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak] - A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) - Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus] - A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit] - A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy] - Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt] - A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak] - A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx] - Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt] - Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak] - Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky] - A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions] - Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor] - Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter] - DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient] - Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple] - An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf] - Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst] - Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka] - Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu] - A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku] - An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/] - A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/] - A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + + + + diff --git a/content/riak/kv/2.9.8/developing/data-modeling.md b/content/riak/kv/2.9.8/developing/data-modeling.md new file mode 100644 index 0000000000..69204ed822 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/data-modeling.md @@ -0,0 +1,15 @@ +--- +layout: redirect +target: "riak/kv/2.9.8/learn/use-cases/" +aliases: +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + + + + diff --git a/content/riak/kv/2.9.8/developing/data-types.md b/content/riak/kv/2.9.8/developing/data-types.md new file mode 100644 index 0000000000..cef6470e3c --- /dev/null +++ b/content/riak/kv/2.9.8/developing/data-types.md @@ -0,0 +1,279 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.9.8/dev/using/data-types + - /riak/kv/2.9.8/dev/using/data-types + - /riak/2.9.8/dev/data-modeling/data-types + - /riak/kv/2.9.8/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + + + + diff --git a/content/riak/kv/2.9.8/developing/data-types/counters.md b/content/riak/kv/2.9.8/developing/data-types/counters.md new file mode 100644 index 0000000000..4104bd20b6 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/data-types/counters.md @@ -0,0 +1,635 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.8/dev/using/data-types/counters + - /riak/kv/2.9.8/dev/using/data-types/counters + - /riak/2.9.8/dev/data-modeling/data-types/counters + - /riak/kv/2.9.8/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/data-types/gsets.md b/content/riak/kv/2.9.8/developing/data-types/gsets.md new file mode 100644 index 0000000000..c60379508d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/data-types/gsets.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.8/dev/using/data-types/gsets + - /riak/kv/2.9.8/dev/using/data-types/gsets + - /riak/2.9.8/dev/data-modeling/data-types/gsets + - /riak/kv/2.9.8/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/data-types/hyperloglogs.md b/content/riak/kv/2.9.8/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..449c4ffb7d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/data-types/hyperloglogs.md @@ -0,0 +1,643 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.8/dev/using/data-types/hyperloglogs + - /riak/kv/2.9.8/dev/using/data-types/hyperloglogs + - /riak/2.9.8/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.9.8/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/data-types/maps.md b/content/riak/kv/2.9.8/developing/data-types/maps.md new file mode 100644 index 0000000000..b8d5b09fed --- /dev/null +++ b/content/riak/kv/2.9.8/developing/data-types/maps.md @@ -0,0 +1,1885 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.8/dev/using/data-types/maps + - /riak/kv/2.9.8/dev/using/data-types/maps + - /riak/2.9.8/dev/data-modeling/data-types/maps + - /riak/kv/2.9.8/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/data-types/sets.md b/content/riak/kv/2.9.8/developing/data-types/sets.md new file mode 100644 index 0000000000..a1847c7964 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/data-types/sets.md @@ -0,0 +1,773 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.8/dev/using/data-types/sets + - /riak/kv/2.9.8/dev/using/data-types/sets + - /riak/2.9.8/dev/data-modeling/data-types/sets + - /riak/kv/2.9.8/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/faq.md b/content/riak/kv/2.9.8/developing/faq.md new file mode 100644 index 0000000000..5a01678306 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/faq.md @@ -0,0 +1,592 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.9.8/community/faqs/developing + - /riak/kv/2.9.8/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/2.9.8/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.9.8/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.9.8/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.9.8/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.9.8/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.9.8/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.9.8/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.9.8/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.9.8/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.9.8/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) - requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) - if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started.md b/content/riak/kv/2.9.8/developing/getting-started.md new file mode 100644 index 0000000000..208371a745 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started.md @@ -0,0 +1,51 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +aliases: +--- + +[install index]: {{}}riak/kv/2.9.8/setup/installing +[dev client libraries]: {{}}riak/kv/2.9.8/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/csharp.md b/content/riak/kv/2.9.8/developing/getting-started/csharp.md new file mode 100644 index 0000000000..ba708e352b --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/csharp.md @@ -0,0 +1,86 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/csharp + - /riak/kv/2.9.8/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.8/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.8/developing/getting-started/csharp/crud-operations) + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.9.8/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..d7724ddaf9 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +aliases: +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.9.8/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..9d67f1a841 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,111 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.9.8/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/csharp/querying.md b/content/riak/kv/2.9.8/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..4ba3e168df --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/csharp/querying.md @@ -0,0 +1,214 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/querying-csharp + - /riak/kv/2.9.8/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/erlang.md b/content/riak/kv/2.9.8/developing/getting-started/erlang.md new file mode 100644 index 0000000000..f1bfab51c2 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/erlang.md @@ -0,0 +1,59 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/erlang + - /riak/kv/2.9.8/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.8/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.8/developing/getting-started/erlang/crud-operations) + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.9.8/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..a1e5e6be8d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.9.8/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..72c70f48f5 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,342 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.9.8/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.8/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/erlang/querying.md b/content/riak/kv/2.9.8/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..2ae871882f --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/erlang/querying.md @@ -0,0 +1,308 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/querying-erlang + - /riak/kv/2.9.8/dev/taste-of-riak/querying-erlang +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.9.8/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/golang.md b/content/riak/kv/2.9.8/developing/getting-started/golang.md new file mode 100644 index 0000000000..e931c27ad4 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/golang.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/golang + - /riak/kv/2.9.8/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.8/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.8/developing/getting-started/golang/crud-operations) + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.9.8/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..122c15adb2 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,376 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +aliases: +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.9.8/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..6d87e750f8 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,552 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.9.8/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.8/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/golang/querying.md b/content/riak/kv/2.9.8/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..698fb4bb49 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/golang/querying.md @@ -0,0 +1,580 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/querying-golang + - /riak/kv/2.9.8/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/java.md b/content/riak/kv/2.9.8/developing/getting-started/java.md new file mode 100644 index 0000000000..385a37fec1 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/java.md @@ -0,0 +1,93 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/java + - /riak/kv/2.9.8/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.8/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.8/developing/getting-started/java/crud-operations) + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.9.8/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..d7190fc26b --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/java/crud-operations.md @@ -0,0 +1,206 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +aliases: +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.8/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.8/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.8/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.8/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.8/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.8/developing/usage/conflict-resolution/) +documention. + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.9.8/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..0ef51bf282 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/java/object-modeling.md @@ -0,0 +1,432 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.9.8/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/java/querying.md b/content/riak/kv/2.9.8/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..60738a2e25 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/java/querying.md @@ -0,0 +1,280 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/querying-java + - /riak/kv/2.9.8/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/nodejs.md b/content/riak/kv/2.9.8/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..f4c7235be6 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/nodejs.md @@ -0,0 +1,104 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/nodejs + - /riak/kv/2.9.8/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.8/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.8/developing/getting-started/nodejs/crud-operations) + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.9.8/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..8086ad5035 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,138 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +aliases: +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.9.8/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..a24e7a3836 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.9.8/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.9.8/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..08b84a337f --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/nodejs/querying.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.9.8/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/php.md b/content/riak/kv/2.9.8/developing/getting-started/php.md new file mode 100644 index 0000000000..ec7ca73c8c --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/php.md @@ -0,0 +1,80 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/php + - /riak/kv/2.9.8/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.8/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.8/developing/getting-started/php/crud-operations) + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.9.8/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..61cedb9dff --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/php/crud-operations.md @@ -0,0 +1,187 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +aliases: +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.9.8/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/php/querying.md b/content/riak/kv/2.9.8/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..c627c8d723 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/php/querying.md @@ -0,0 +1,408 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/querying-php + - /riak/kv/2.9.8/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/python.md b/content/riak/kv/2.9.8/developing/getting-started/python.md new file mode 100644 index 0000000000..3382a35e85 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/python.md @@ -0,0 +1,103 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/python + - /riak/kv/2.9.8/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.8/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` - Header files and a static library for Python +* `libffi-dev` - Foreign function interface library +* `libssl-dev` - libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.8/developing/getting-started/python/crud-operations) + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.9.8/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..0b4af529d5 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/python/crud-operations.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.9.8/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..1fec760d08 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/python/object-modeling.md @@ -0,0 +1,264 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.9.8/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/python/querying.md b/content/riak/kv/2.9.8/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..64f939cc3e --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/python/querying.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/querying-python + - /riak/kv/2.9.8/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/ruby.md b/content/riak/kv/2.9.8/developing/getting-started/ruby.md new file mode 100644 index 0000000000..30ea15060c --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/ruby.md @@ -0,0 +1,68 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/ruby + - /riak/kv/2.9.8/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.8/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.8/developing/getting-started/ruby/crud-operations) + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.9.8/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..fbc075de18 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,151 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.9.8/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..afec681183 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,295 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.9.8/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.8/developing/getting-started/ruby/querying.md b/content/riak/kv/2.9.8/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..b9b763d078 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/getting-started/ruby/querying.md @@ -0,0 +1,256 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.8/dev/taste-of-riak/querying-ruby + - /riak/kv/2.9.8/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.8/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.8/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.8/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.8/developing/key-value-modeling.md b/content/riak/kv/2.9.8/developing/key-value-modeling.md new file mode 100644 index 0000000000..737815188f --- /dev/null +++ b/content/riak/kv/2.9.8/developing/key-value-modeling.md @@ -0,0 +1,535 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.9.8/dev/data-modeling/key-value/ + - /riak/kv/2.9.8/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.9.8/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.9.8/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.9.8/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.9.8/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.9.8/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.9.8/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.9.8/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.9.8/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.9.8/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.9.8/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.9.8/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.9.8/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.9.8/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.9.8/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.9.8/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.9.8/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.9.8/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.9.8/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + + + + diff --git a/content/riak/kv/2.9.8/developing/usage.md b/content/riak/kv/2.9.8/developing/usage.md new file mode 100644 index 0000000000..283f2f6e12 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage.md @@ -0,0 +1,138 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +aliases: +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/bucket-types.md b/content/riak/kv/2.9.8/developing/usage/bucket-types.md new file mode 100644 index 0000000000..ed579bc5c3 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/bucket-types.md @@ -0,0 +1,102 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/bucket-types + - /riak/kv/2.9.8/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/commit-hooks.md b/content/riak/kv/2.9.8/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..5856d2c5e9 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/commit-hooks.md @@ -0,0 +1,243 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/using/commit-hooks + - /riak/kv/2.9.8/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.9.8/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object - This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` - The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.9.8/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` - The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/conflict-resolution.md b/content/riak/kv/2.9.8/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..ddc08c3269 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/conflict-resolution.md @@ -0,0 +1,681 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/using/conflict-resolution + - /riak/kv/2.9.8/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/strong-consistency) - A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.9.8/configuring/strong-consistency) - A guide for operators +> * [strong consistency][use ref strong consistency] - A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.9.8/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.9.8/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.9.8/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.8/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.9.8/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** - If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** - Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** - If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.9.8/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.8/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.9.8/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..b6d427c3db --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.8/dev/using/conflict-resolution/csharp + - /riak/kv/2.9.8/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..2a959e95bc --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.8/dev/using/conflict-resolution/golang + - /riak/kv/2.9.8/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..637eced411 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/java.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.8/dev/using/conflict-resolution/java + - /riak/kv/2.9.8/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.8/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..655de4e189 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.8/dev/using/conflict-resolution/nodejs + - /riak/kv/2.9.8/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..f809f042c0 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/php.md @@ -0,0 +1,244 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.8/dev/using/conflict-resolution/php + - /riak/kv/2.9.8/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.8/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..5ff60f69c7 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/python.md @@ -0,0 +1,258 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.8/dev/using/conflict-resolution/python + - /riak/kv/2.9.8/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.8/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..57da25b675 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.8/dev/using/conflict-resolution/ruby + - /riak/kv/2.9.8/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.8/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/content-types.md b/content/riak/kv/2.9.8/developing/usage/content-types.md new file mode 100644 index 0000000000..a955dfca50 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/content-types.md @@ -0,0 +1,192 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +aliases: +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/creating-objects.md b/content/riak/kv/2.9.8/developing/usage/creating-objects.md new file mode 100644 index 0000000000..974737576b --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/creating-objects.md @@ -0,0 +1,555 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +aliases: +--- + +[usage content types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.9.8/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/custom-extractors.md b/content/riak/kv/2.9.8/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..425dfd4872 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/custom-extractors.md @@ -0,0 +1,424 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/search/custom-extractors + - /riak/kv/2.9.8/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` - Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` - Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/deleting-objects.md b/content/riak/kv/2.9.8/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..4a4a41c7f9 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/deleting-objects.md @@ -0,0 +1,157 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +aliases: +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/document-store.md b/content/riak/kv/2.9.8/developing/usage/document-store.md new file mode 100644 index 0000000000..5c98d46042 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/document-store.md @@ -0,0 +1,617 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/search/document-store + - /riak/kv/2.9.8/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.9.8/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/mapreduce.md b/content/riak/kv/2.9.8/developing/usage/mapreduce.md new file mode 100644 index 0000000000..e87224b47f --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/mapreduce.md @@ -0,0 +1,246 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/using/mapreduce + - /riak/kv/2.9.8/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.9.8/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.9.8/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** - The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** - The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/next-gen-replication.md b/content/riak/kv/2.9.8/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..10ed39d23d --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/next-gen-replication.md @@ -0,0 +1,153 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.8" +menu: + riak_kv-2.9.8: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/2.9.8/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. + + + diff --git a/content/riak/kv/2.9.8/developing/usage/reading-objects.md b/content/riak/kv/2.9.8/developing/usage/reading-objects.md new file mode 100644 index 0000000000..25de3ff99a --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/reading-objects.md @@ -0,0 +1,252 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +aliases: +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/replication.md b/content/riak/kv/2.9.8/developing/usage/replication.md new file mode 100644 index 0000000000..21159b8eb2 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/replication.md @@ -0,0 +1,592 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/replication-properties + - /riak/kv/2.9.8/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.9.8/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.9.8/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.8/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.8/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.9.8/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.9.8/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.9.8/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.9.8/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/search-schemas.md b/content/riak/kv/2.9.8/developing/usage/search-schemas.md new file mode 100644 index 0000000000..b39036f2c4 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/search-schemas.md @@ -0,0 +1,511 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/search-schema + - /riak/kv/2.9.8/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.9.8/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/search.md b/content/riak/kv/2.9.8/developing/usage/search.md new file mode 100644 index 0000000000..3136273774 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/search.md @@ -0,0 +1,1455 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/using/search + - /riak/kv/2.9.8/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.9.8/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.8/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.8/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.9.8/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.9.8/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.9.8/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.9.8/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/searching-data-types.md b/content/riak/kv/2.9.8/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..d92247419b --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/searching-data-types.md @@ -0,0 +1,1687 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/search/search-data-types + - /riak/kv/2.9.8/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/secondary-indexes.md b/content/riak/kv/2.9.8/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..a728e5a8ba --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/secondary-indexes.md @@ -0,0 +1,2030 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/using/2i + - /riak/kv/2.9.8/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.9.8/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.8/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.8/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` - Binary index `field1_bin` and integer index `field2_int` +* `Moe` - Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` - Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` - Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/security.md b/content/riak/kv/2.9.8/developing/usage/security.md new file mode 100644 index 0000000000..1ffd25be46 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/security.md @@ -0,0 +1,103 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/client-security + - /riak/kv/2.9.8/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.9.8/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.9.8/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.9.8/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.9.8/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.8/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.9.8/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.9.8/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.9.8/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.9.8/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/security/erlang.md b/content/riak/kv/2.9.8/developing/usage/security/erlang.md new file mode 100644 index 0000000000..01dfe41c21 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/security/erlang.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/client-security/erlang + - /riak/kv/2.9.8/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.8/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/security/java.md b/content/riak/kv/2.9.8/developing/usage/security/java.md new file mode 100644 index 0000000000..bd14178e88 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/security/java.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/client-security/java + - /riak/kv/2.9.8/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/security/php.md b/content/riak/kv/2.9.8/developing/usage/security/php.md new file mode 100644 index 0000000000..00a1aeb919 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/security/php.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/client-security/php + - /riak/kv/2.9.8/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/security/python.md b/content/riak/kv/2.9.8/developing/usage/security/python.md new file mode 100644 index 0000000000..39d8a66896 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/security/python.md @@ -0,0 +1,176 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/client-security/python + - /riak/kv/2.9.8/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.8/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.9.8/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/security/ruby.md b/content/riak/kv/2.9.8/developing/usage/security/ruby.md new file mode 100644 index 0000000000..6d760dd697 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/security/ruby.md @@ -0,0 +1,162 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/client-security/ruby + - /riak/kv/2.9.8/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.8/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + + + + diff --git a/content/riak/kv/2.9.8/developing/usage/updating-objects.md b/content/riak/kv/2.9.8/developing/usage/updating-objects.md new file mode 100644 index 0000000000..00a6b917a7 --- /dev/null +++ b/content/riak/kv/2.9.8/developing/usage/updating-objects.md @@ -0,0 +1,778 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.8/dev/using/updates + - /riak/kv/2.9.8/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.9.8/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + + + + diff --git a/content/riak/kv/2.9.8/downloads.md b/content/riak/kv/2.9.8/downloads.md new file mode 100644 index 0000000000..9ff8576902 --- /dev/null +++ b/content/riak/kv/2.9.8/downloads.md @@ -0,0 +1,27 @@ +--- +title: "Download for Riak KV 2.9.8" +description: "Download some stuff!" +menu: + riak_kv-2.9.8: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 2.9.8 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 2.9.8 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.9.8/downloads + - /riak/kv/2.9.8/downloads +--- + + + + + diff --git a/content/riak/kv/2.9.8/index.md b/content/riak/kv/2.9.8/index.md new file mode 100644 index 0000000000..644c44c01a --- /dev/null +++ b/content/riak/kv/2.9.8/index.md @@ -0,0 +1,77 @@ +--- +title: "Riak KV 2.9.8" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.9.8/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.9.8/configuring +[downloads]: {{<baseurl>}}riak/kv/2.9.8/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.9.8/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.9.8/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.9.8/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.9.8/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.9.8/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + + + + diff --git a/content/riak/kv/2.9.8/learn.md b/content/riak/kv/2.9.8/learn.md new file mode 100644 index 0000000000..8c22a178ea --- /dev/null +++ b/content/riak/kv/2.9.8/learn.md @@ -0,0 +1,53 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts.md b/content/riak/kv/2.9.8/learn/concepts.md new file mode 100644 index 0000000000..04a649414a --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts.md @@ -0,0 +1,49 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +aliases: +--- + +[concept aae]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.9.8/configuring +[plan index]: {{<baseurl>}}riak/kv/2.9.8/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.9.8/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.9.8/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..f30f761805 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/active-anti-entropy.md @@ -0,0 +1,111 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/aae + - /riak/kv/2.9.8/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/buckets.md b/content/riak/kv/2.9.8/learn/concepts/buckets.md new file mode 100644 index 0000000000..f1de993586 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/buckets.md @@ -0,0 +1,217 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/Buckets + - /riak/kv/2.9.8/theory/concepts/Buckets + - /riak/2.9.8/theory/concepts/buckets + - /riak/kv/2.9.8/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.9.8/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.9.8/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.9.8/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.9.8/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/capability-negotiation.md b/content/riak/kv/2.9.8/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..d489291029 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/capability-negotiation.md @@ -0,0 +1,36 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/capability-negotiation + - /riak/kv/2.9.8/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.9.8/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/causal-context.md b/content/riak/kv/2.9.8/learn/concepts/causal-context.md new file mode 100644 index 0000000000..198d2d5afd --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/causal-context.md @@ -0,0 +1,289 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/context + - /riak/kv/2.9.8/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.9.8/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.9.8/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.9.8/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/clusters.md b/content/riak/kv/2.9.8/learn/concepts/clusters.md new file mode 100644 index 0000000000..ec8e06ef12 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/clusters.md @@ -0,0 +1,117 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/Clusters + - /riak/kv/2.9.8/theory/concepts/Clusters + - /riak/2.9.8/theory/concepts/clusters + - /riak/kv/2.9.8/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.8/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/crdts.md b/content/riak/kv/2.9.8/learn/concepts/crdts.md new file mode 100644 index 0000000000..306ae27e36 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/crdts.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/crdts + - /riak/kv/2.9.8/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.9.8/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.9.8/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/eventual-consistency.md b/content/riak/kv/2.9.8/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..6b27f7d956 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/eventual-consistency.md @@ -0,0 +1,202 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/Eventual-Consistency + - /riak/kv/2.9.8/theory/concepts/Eventual-Consistency + - /riak/2.9.8/theory/concepts/eventual-consistency + - /riak/kv/2.9.8/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/keys-and-objects.md b/content/riak/kv/2.9.8/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..fba165087e --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/keys-and-objects.md @@ -0,0 +1,53 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/keys-and-values + - /riak/kv/2.9.8/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/replication.md b/content/riak/kv/2.9.8/learn/concepts/replication.md new file mode 100644 index 0000000000..a7e3c8b8e1 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/replication.md @@ -0,0 +1,323 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/Replication + - /riak/kv/2.9.8/theory/concepts/Replication + - /riak/2.9.8/theory/concepts/replication + - /riak/kv/2.9.8/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.9.8/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/strong-consistency.md b/content/riak/kv/2.9.8/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..bb86ae4935 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/strong-consistency.md @@ -0,0 +1,105 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/strong-consistency + - /riak/kv/2.9.8/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.8/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + + + + diff --git a/content/riak/kv/2.9.8/learn/concepts/vnodes.md b/content/riak/kv/2.9.8/learn/concepts/vnodes.md new file mode 100644 index 0000000000..472a6928db --- /dev/null +++ b/content/riak/kv/2.9.8/learn/concepts/vnodes.md @@ -0,0 +1,160 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.8/theory/concepts/vnodes + - /riak/kv/2.9.8/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322.9.844576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + + + + diff --git a/content/riak/kv/2.9.8/learn/dynamo.md b/content/riak/kv/2.9.8/learn/dynamo.md new file mode 100644 index 0000000000..9b28b4c94f --- /dev/null +++ b/content/riak/kv/2.9.8/learn/dynamo.md @@ -0,0 +1,1928 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.9.8/theory/dynamo + - /riak/kv/2.9.8/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.9.8/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.9.8/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.9.8/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.9.8/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.8 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.9.8/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.8/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.9.8/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.9.8/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.9.8/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.8/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + + + + diff --git a/content/riak/kv/2.9.8/learn/glossary.md b/content/riak/kv/2.9.8/learn/glossary.md new file mode 100644 index 0000000000..5985258674 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/glossary.md @@ -0,0 +1,358 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +aliases: +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.9.8/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.9.8/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.9.8/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.8/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.9.8/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.9.8/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.9.8/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + + + + diff --git a/content/riak/kv/2.9.8/learn/new-to-nosql.md b/content/riak/kv/2.9.8/learn/new-to-nosql.md new file mode 100644 index 0000000000..6762a7525a --- /dev/null +++ b/content/riak/kv/2.9.8/learn/new-to-nosql.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 2.9.8 +#menu: +# riak_kv-2.9.8: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this lives in existing docs)** + + + + diff --git a/content/riak/kv/2.9.8/learn/use-cases.md b/content/riak/kv/2.9.8/learn/use-cases.md new file mode 100644 index 0000000000..107df71492 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/use-cases.md @@ -0,0 +1,405 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.9.8/dev/data-modeling/ + - /riak/kv/2.9.8/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.9.8/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.9.8/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + + + + diff --git a/content/riak/kv/2.9.8/learn/why-riak-kv.md b/content/riak/kv/2.9.8/learn/why-riak-kv.md new file mode 100644 index 0000000000..c6754da227 --- /dev/null +++ b/content/riak/kv/2.9.8/learn/why-riak-kv.md @@ -0,0 +1,225 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.9.8/theory/why-riak/ + - /riak/kv/2.9.8/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.8/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.9.8/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + + + + diff --git a/content/riak/kv/2.9.8/release-notes.md b/content/riak/kv/2.9.8/release-notes.md new file mode 100644 index 0000000000..52b88943ef --- /dev/null +++ b/content/riak/kv/2.9.8/release-notes.md @@ -0,0 +1,52 @@ +--- +title: "Riak KV 2.9.8 Release Notes" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.9.8/community/release-notes + - /riak/kv/2.9.8/intro-v20 + - /riak/2.9.8/intro-v20 + - /riak/kv/2.9.8/introduction +--- + +Released Dec 06, 2020. + + +## Overview + +This release improves the performance and stability of the leveled backend and of AAE folds. These performance improvements are based on feedback from deployments with > 1bn keys per cluster. + +The particular improvements are: + +- In leveled, caching of individual file scores so not all files are required to be scored each journal compaction run. + +- In leveled, a change to the default journal compaction scoring percentages to make longer runs more likely (i.e. achieve more compaction per scoring run). + +- In leveled, a change to the caching of SST file block-index in the ledger, that makes repeated folds with a last modified date range an order of magnitude faster and more computationally efficient. + +- In leveled, a fix to prevent very long list-buckets queries when buckets have just been deleted (by erasing all keys). + +- In kv_index_tictcatree, improved logging and exchange controls to make exchanges easier to monitor and less likely to prompt unnecessary work. + +- In kv_index_tictcatree, a change to speed-up the necessary rebuilds of aae tree-caches following a node crash, by only testing journal presence in scheduled rebuilds. + +- In riak_kv_ttaaefs_manager, some essential fixes to prevent excessive CPU load when comparing large volumes of keys and clocks, due to a failure to decode clocks correctly before passing to the exchange. + +Further significant improvements have been made to Tictac AAE full-sync, to greatly improve the efficiency of operation when there exists relatively large deltas between relatively large clusters (in terms of key counts). Those changes, which introduce the use of 'day_check', 'hour_check' and 'range_check' options to nval-based full-sync will be available in a future 3.0.2 release of Riak. For those wishing to use Tictac AAE full-sync at a non-trivial scale, it is recommended moving straight to 3.0.2 when it is available. + +## Previous Release Notes + +Please see the KV 2.9.7 release notes [here]({{<baseurl>}}riak/kv/2.9.7/release-notes/). + + + + + diff --git a/content/riak/kv/2.9.8/setup.md b/content/riak/kv/2.9.8/setup.md new file mode 100644 index 0000000000..0768926187 --- /dev/null +++ b/content/riak/kv/2.9.8/setup.md @@ -0,0 +1,51 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + + + + diff --git a/content/riak/kv/2.9.8/setup/downgrade.md b/content/riak/kv/2.9.8/setup/downgrade.md new file mode 100644 index 0000000000..945f3be19a --- /dev/null +++ b/content/riak/kv/2.9.8/setup/downgrade.md @@ -0,0 +1,179 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.8/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.9.8/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.9.8/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.8, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing.md b/content/riak/kv/2.9.8/setup/installing.md new file mode 100644 index 0000000000..96b4f98973 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing.md @@ -0,0 +1,61 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.9.8/ops/building/installing + - /riak/kv/2.9.8/ops/building/installing + - /riak/2.9.8/installing/ + - /riak/kv/2.9.8/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.9.8/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/amazon-web-services.md b/content/riak/kv/2.9.8/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..71e672f50b --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/amazon-web-services.md @@ -0,0 +1,153 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.9.8/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.9.8/installing/amazon-web-services/ + - /riak/kv/2.9.8/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.8/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2/riak-2.9.8-1.amzn2x86_64.rpm +sudo yum localinstall -y riak-2.9.8-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2/riak-2.9.8-1.amzn2x86_64.rpm +sudo rpm -i riak-2.9.8-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2016.09/riak-2.9.8-1.amzn1x86_64.rpm +sudo yum localinstall -y riak-2.9.8-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2016.09/riak-2.9.8-1.amzn1x86_64.rpm +sudo rpm -i riak-2.9.8-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/debian-ubuntu.md b/content/riak/kv/2.9.8/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..1e8e10d382 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/debian-ubuntu.md @@ -0,0 +1,171 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.9.8/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.9.8/installing/debian-ubuntu/ + - /riak/kv/2.9.8/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.9.8/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/bionic64/riak-2.9.8-1_amd64.deb +sudo dpkg -i riak-2.9.8-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/xenial64/riak-2.9.8-1_amd64.deb +sudo dpkg -i riak-2.9.8-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/trusty64/riak-2.9.8-1_amd64.deb +sudo dpkg -i riak-2.9.8-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/precise64/riak-2.9.8-1_amd64.deb +sudo dpkg -i riak-2.9.8-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/9/riak-2.9.8-1_amd64.deb +sudo dpkg -i riak-2.9.8-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/8/riak-2.9.8-1_amd64.deb +sudo dpkg -i riak-2.9.8-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/7/riak-2.9.8-1_amd64.deb +sudo dpkg -i riak-2.9.8-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/raspbian/buster/riak-2.9.8-1_armhf.deb +sudo dpkg -i riak-2.9.8-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/riak-2.9.8.tar.gz +tar zxvf riak-2.9.8.tar.gz +cd riak-2.9.8 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/freebsd.md b/content/riak/kv/2.9.8/setup/installing/freebsd.md new file mode 100644 index 0000000000..be459431bd --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/freebsd.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.9.8/ops/building/installing/Installing-on-FreeBSD + - /riak/2.9.8/installing/freebsd/ + - /riak/kv/2.9.8/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.8/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.9.8.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.8/freebsd/11.1/riak-2.9.8.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.8/freebsd/10.4/riak-2.9.8.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/mac-osx.md b/content/riak/kv/2.9.8/setup/installing/mac-osx.md new file mode 100644 index 0000000000..8b1fbb469c --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/mac-osx.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.9.8/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.9.8/installing/mac-osx/ + - /riak/kv/2.9.8/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.8/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.8/osx/10.11/riak-2.9.8-OSX-x86_64.tar.gz +tar xzvf riak-2.9.8-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.9.8 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.8` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.8/riak-2.9.8.tar.gz +tar zxvf riak-2.9.8.tar.gz +cd riak-2.9.8 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/rhel-centos.md b/content/riak/kv/2.9.8/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..d15f64f5ce --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/rhel-centos.md @@ -0,0 +1,134 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.9.8/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.9.8/installing/rhel-centos/ + - /riak/kv/2.9.8/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/8/riak-2.9.8-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.9.8-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/8/riak-2.9.8-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.9.8-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/7/riak-2.9.8-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.9.8-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/7/riak-2.9.8-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.9.8-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/6/riak-2.9.8-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.9.8-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/6/riak-2.9.8-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.9.8-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.8/riak-2.9.8.tar.gz +tar zxvf riak-2.9.8.tar.gz +cd riak-2.9.8 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/smartos.md b/content/riak/kv/2.9.8/setup/installing/smartos.md new file mode 100644 index 0000000000..97f92b0a74 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/smartos.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.9.8" +menu: + riak_kv-2.9.8: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.9.8/ops/building/installing/Installing-on-SmartOS + - /riak/2.9.8/installing/smartos/ + - /riak/kv/2.9.8/installing/smartos/ + - /riak/kv/2.9.8/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/2.9.8/setup/installing/solaris.md b/content/riak/kv/2.9.8/setup/installing/solaris.md new file mode 100644 index 0000000000..26403fa6fb --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/solaris.md @@ -0,0 +1,91 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.9.8" +menu: + riak_kv-2.9.8: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.9.8/ops/building/installing/Installing-on-Solaris + - /riak/2.9.8/installing/solaris/ + - /riak/kv/2.9.8/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/2.9.8/setup/installing/source.md b/content/riak/kv/2.9.8/setup/installing/source.md new file mode 100644 index 0000000000..b6f9867ab4 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/source.md @@ -0,0 +1,110 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/Installing-Riak-from-Source + - /riak/kv/2.9.8/ops/building/Installing-Riak-from-Source + - /riak/2.9.8/installing/source/ + - /riak/kv/2.9.8/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.8/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.8/riak-2.9.8.tar.gz +tar zxvf riak-2.9.8.tar.gz +cd riak-2.9.8 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/source/erlang.md b/content/riak/kv/2.9.8/setup/installing/source/erlang.md new file mode 100644 index 0000000000..eab445cc2e --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/source/erlang.md @@ -0,0 +1,571 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/erlang + - /riak/kv/2.9.8/ops/building/installing/erlang + - /riak/2.9.8/installing/source/erlang/ + - /riak/kv/2.9.8/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/2.9.8/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.9.8/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/source/jvm.md b/content/riak/kv/2.9.8/setup/installing/source/jvm.md new file mode 100644 index 0000000000..11af345838 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/source/jvm.md @@ -0,0 +1,55 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/jvm + - /riak/kv/2.9.8/ops/building/installing/jvm + - /riak/2.9.8/ops/building/installing/Installing-the-JVM + - /riak/kv/2.9.8/ops/building/installing/Installing-the-JVM + - /riak/2.9.8/installing/source/jvm/ + - /riak/kv/2.9.8/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/suse.md b/content/riak/kv/2.9.8/setup/installing/suse.md new file mode 100644 index 0000000000..245a5be54d --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/suse.md @@ -0,0 +1,52 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.9.8/ops/building/installing/Installing-on-SUSE + - /riak/2.9.8/installing/suse/ + - /riak/kv/2.9.8/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.8/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.8+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/verify.md b/content/riak/kv/2.9.8/setup/installing/verify.md new file mode 100644 index 0000000000..eff58eddd4 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/verify.md @@ -0,0 +1,169 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/installing/Post-Installation + - /riak/kv/2.9.8/ops/installing/Post-Installation + - /riak/2.9.8/installing/verify-install/ + - /riak/kv/2.9.8/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/2.9.8/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.9.8/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + + + + diff --git a/content/riak/kv/2.9.8/setup/installing/windows-azure.md b/content/riak/kv/2.9.8/setup/installing/windows-azure.md new file mode 100644 index 0000000000..0528a19f33 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/installing/windows-azure.md @@ -0,0 +1,197 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.9.8/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.9.8/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.9.8/installing/windows-azure/ + - /riak/kv/2.9.8/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + + + + diff --git a/content/riak/kv/2.9.8/setup/planning.md b/content/riak/kv/2.9.8/setup/planning.md new file mode 100644 index 0000000000..70ff1f4598 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning.md @@ -0,0 +1,61 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/backend.md b/content/riak/kv/2.9.8/setup/planning/backend.md new file mode 100644 index 0000000000..99c5793959 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/backend.md @@ -0,0 +1,60 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.9.8/ops/building/planning/backends/ + - /riak/kv/2.9.8/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/2.9.8/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/backend/bitcask.md b/content/riak/kv/2.9.8/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..eb5bef6785 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/backend/bitcask.md @@ -0,0 +1,994 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/backends/bitcask/ + - /riak/kv/2.9.8/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.9.8/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` - lets the operating system manage syncing writes + (default) + * `o_sync` - uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval - Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) - Writes are made via Erlang's built-in file API +* `nif` - Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` - No restrictions on when merge operations can occur + (default) +* `never` - Merge will never be attempted +* `window` - Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** - This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** - This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** - This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** - This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** - This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322.9.844576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322.9.844576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/backend/leveldb.md b/content/riak/kv/2.9.8/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..75f810117d --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/backend/leveldb.md @@ -0,0 +1,506 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/backends/leveldb/ + - /riak/kv/2.9.8/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.8/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** - The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** - LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322.9.844576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322.9.844576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/backend/leveled.md b/content/riak/kv/2.9.8/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..02d8a4abed --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/backend/leveled.md @@ -0,0 +1,141 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/backends/leveled/ + - /riak/kv/2.9.8/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.8/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.8 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/backend/memory.md b/content/riak/kv/2.9.8/setup/planning/backend/memory.md new file mode 100644 index 0000000000..21e6740a8b --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/backend/memory.md @@ -0,0 +1,147 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/backends/memory/ + - /riak/kv/2.9.8/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/backend/multi.md b/content/riak/kv/2.9.8/setup/planning/backend/multi.md new file mode 100644 index 0000000000..92f6a891a1 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/backend/multi.md @@ -0,0 +1,230 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/backends/multi/ + - /riak/kv/2.9.8/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/best-practices.md b/content/riak/kv/2.9.8/setup/planning/best-practices.md new file mode 100644 index 0000000000..5a894ec667 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/best-practices.md @@ -0,0 +1,145 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.9.8/ops/building/planning/best-practices + - /riak/kv/2.9.8/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/2.9.8/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.9.8/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.9.8/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..726de5c683 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,104 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.9.8/ops/building/planning/bitcask + - /riak/kv/2.9.8/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/cluster-capacity.md b/content/riak/kv/2.9.8/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..2b04ea3c3b --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/cluster-capacity.md @@ -0,0 +1,238 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.9.8/ops/building/planning/cluster + - /riak/kv/2.9.8/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.9.8/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.9.8/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/future.md b/content/riak/kv/2.9.8/setup/planning/future.md new file mode 100644 index 0000000000..67beea8fe6 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/future.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 2.9.8 +#menu: +# riak_kv-2.9.8: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/operating-system.md b/content/riak/kv/2.9.8/setup/planning/operating-system.md new file mode 100644 index 0000000000..a4efea5824 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/operating-system.md @@ -0,0 +1,30 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +aliases: +--- + +[downloads]: {{<baseurl>}}riak/kv/2.9.8/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + + + + diff --git a/content/riak/kv/2.9.8/setup/planning/start.md b/content/riak/kv/2.9.8/setup/planning/start.md new file mode 100644 index 0000000000..c5d5ef9b53 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/planning/start.md @@ -0,0 +1,61 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.9.8/ops/building/planning/system-planning + - /riak/kv/2.9.8/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + + + + diff --git a/content/riak/kv/2.9.8/setup/search.md b/content/riak/kv/2.9.8/setup/search.md new file mode 100644 index 0000000000..b26bcf4245 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/search.md @@ -0,0 +1,5 @@ + + + + + diff --git a/content/riak/kv/2.9.8/setup/upgrading.md b/content/riak/kv/2.9.8/setup/upgrading.md new file mode 100644 index 0000000000..68a5318251 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/upgrading.md @@ -0,0 +1,38 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.8][upgrade version] + +A tutorial on updating to Riak KV 2.9.8 + +[Learn More >>][upgrade version] + + + + diff --git a/content/riak/kv/2.9.8/setup/upgrading/checklist.md b/content/riak/kv/2.9.8/setup/upgrading/checklist.md new file mode 100644 index 0000000000..415bf79955 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/upgrading/checklist.md @@ -0,0 +1,225 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.8/ops/upgrading/production-checklist/ + - /riak/kv/2.9.8/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/2.9.8/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.9.8/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.9.8/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.9.8/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.9.8/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.9.8/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.9.8/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + + + + diff --git a/content/riak/kv/2.9.8/setup/upgrading/cluster.md b/content/riak/kv/2.9.8/setup/upgrading/cluster.md new file mode 100644 index 0000000000..87ed2c3cfd --- /dev/null +++ b/content/riak/kv/2.9.8/setup/upgrading/cluster.md @@ -0,0 +1,303 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.8" +menu: + riak_kv-2.9.8: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.8/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.8/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.8/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.8/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.8/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.8/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.8/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` - See [JMX Monitoring][jmx monitor] for more information. + * `snmp` - See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + + + + diff --git a/content/riak/kv/2.9.8/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.9.8/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..85f0c1f009 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/upgrading/multi-datacenter.md @@ -0,0 +1,24 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 2.9.8 +#menu: +# riak_kv-2.9.8: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: +--- + +## TODO + +How to update to a new version with multi-datacenter. + + + + + diff --git a/content/riak/kv/2.9.8/setup/upgrading/search.md b/content/riak/kv/2.9.8/setup/upgrading/search.md new file mode 100644 index 0000000000..38c7a77803 --- /dev/null +++ b/content/riak/kv/2.9.8/setup/upgrading/search.md @@ -0,0 +1,281 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.9.8" +menu: + riak_kv-2.9.8: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.8/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.8/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + + + + diff --git a/content/riak/kv/2.9.8/setup/upgrading/version.md b/content/riak/kv/2.9.8/setup/upgrading/version.md new file mode 100644 index 0000000000..3d5d5e046d --- /dev/null +++ b/content/riak/kv/2.9.8/setup/upgrading/version.md @@ -0,0 +1,252 @@ +--- +title: "Upgrading to Riak KV 2.9.8" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Upgrading to 2.9.8" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.8/upgrade-v20/ + - /riak/kv/2.9.8/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.8/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.8/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/2.9.8/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.8/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.9.8/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.8/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.8/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.8/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.9.8/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.9.8 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.8 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.8 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + + + + diff --git a/content/riak/kv/2.9.8/using.md b/content/riak/kv/2.9.8/using.md new file mode 100644 index 0000000000..f64622ea96 --- /dev/null +++ b/content/riak/kv/2.9.8/using.md @@ -0,0 +1,78 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + + + + diff --git a/content/riak/kv/2.9.8/using/admin.md b/content/riak/kv/2.9.8/using/admin.md new file mode 100644 index 0000000000..33eb87b1f4 --- /dev/null +++ b/content/riak/kv/2.9.8/using/admin.md @@ -0,0 +1,51 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.9.8/ops/running/cluster-admin + - /riak/kv/2.9.8/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + + + + diff --git a/content/riak/kv/2.9.8/using/admin/commands.md b/content/riak/kv/2.9.8/using/admin/commands.md new file mode 100644 index 0000000000..91df7fe05f --- /dev/null +++ b/content/riak/kv/2.9.8/using/admin/commands.md @@ -0,0 +1,378 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.8/ops/running/cluster-admin + - /riak/kv/2.9.8/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` - There are five possible values for status: + * `valid` - The node has begun participating in cluster operations + * `leaving` - The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` - The node's ownership transfers are complete and it is + currently shutting down + * `joining` - The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` - The node is not currently responding +* `avail` - There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` - What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` - The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322.9.844576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322.9.844576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` + + + + diff --git a/content/riak/kv/2.9.8/using/admin/riak-admin.md b/content/riak/kv/2.9.8/using/admin/riak-admin.md new file mode 100644 index 0000000000..150fa9c59b --- /dev/null +++ b/content/riak/kv/2.9.8/using/admin/riak-admin.md @@ -0,0 +1,721 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.8/ops/running/tools/riak-admin + - /riak/kv/2.9.8/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.9.8/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.9.8/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.9.8/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.9.8/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.9.8/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.9.8/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.9.8/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.9.8/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + + + + diff --git a/content/riak/kv/2.9.8/using/admin/riak-cli.md b/content/riak/kv/2.9.8/using/admin/riak-cli.md new file mode 100644 index 0000000000..dceddd8252 --- /dev/null +++ b/content/riak/kv/2.9.8/using/admin/riak-cli.md @@ -0,0 +1,204 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.8/ops/running/tools/riak + - /riak/kv/2.9.8/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + + + + diff --git a/content/riak/kv/2.9.8/using/admin/riak-control.md b/content/riak/kv/2.9.8/using/admin/riak-control.md new file mode 100644 index 0000000000..e1cfb4f9af --- /dev/null +++ b/content/riak/kv/2.9.8/using/admin/riak-control.md @@ -0,0 +1,237 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/riak-control + - /riak/kv/2.9.8/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.9.8/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations.md b/content/riak/kv/2.9.8/using/cluster-operations.md new file mode 100644 index 0000000000..d9eb2d49f2 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations.md @@ -0,0 +1,109 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +aliases: +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.9.8/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..ad8645fd45 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,289 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/2.9.8/ops/advanced/aae/ + - /riak/2.9.8/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.9.8/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.9.8/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..59ce14cb3b --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,198 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.8/ops/running/nodes/adding-removing + - /riak/kv/2.9.8/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/2.9.8/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/backend.md b/content/riak/kv/2.9.8/using/cluster-operations/backend.md new file mode 100644 index 0000000000..0a946485db --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/backend.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 2.9.8 +#menu: +# riak_kv-2.9.8: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/backing-up.md b/content/riak/kv/2.9.8/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..08b298631c --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/backing-up.md @@ -0,0 +1,271 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.8/ops/running/backups + - /riak/kv/2.9.8/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.9.8/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.9.8/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/bucket-types.md b/content/riak/kv/2.9.8/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..eb6962145d --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/bucket-types.md @@ -0,0 +1,63 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.9.8/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..a2b0c195ad --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,458 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.8/ops/running/nodes/renaming + - /riak/kv/2.9.8/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/handoff.md b/content/riak/kv/2.9.8/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..f8c6effd03 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/handoff.md @@ -0,0 +1,120 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.8/ops/running/handoff + - /riak/kv/2.9.8/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.9.8/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..332adb9766 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/inspecting-node.md @@ -0,0 +1,496 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.8/ops/running/nodes/inspecting + - /riak/kv/2.9.8/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392.9.848081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` - The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` - The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` - The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/load-balancing.md b/content/riak/kv/2.9.8/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..4505315828 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/load-balancing.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 2.9.8 +#menu: +# riak_kv-2.9.8: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this exists in docs)** + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/logging.md b/content/riak/kv/2.9.8/using/cluster-operations/logging.md new file mode 100644 index 0000000000..ad33f51f99 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/logging.md @@ -0,0 +1,47 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/replacing-node.md b/content/riak/kv/2.9.8/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..3117e10fcd --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/replacing-node.md @@ -0,0 +1,100 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.9.8/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.9.8/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..72c485f740 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.8 +#menu: +# riak_kv-2.9.8: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.9.8/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..7163e6c865 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/strong-consistency.md @@ -0,0 +1,76 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/2.9.8/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..b776b11a62 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,34 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.9.8/ops/advanced/tictacaae/ + - /riak/2.9.8/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.9.8/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..57b7583251 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,263 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v2/operations + - /riak/kv/2.9.8/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.8/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.8/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` - The IP address and port of a connected client (site)</li><li>`cluster_name` - The name of the connected client (site)</li><li>`connecting` - The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.9.8/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.9.8/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + + + + diff --git a/content/riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..9d008373d0 --- /dev/null +++ b/content/riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,425 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/operations + - /riak/kv/2.9.8/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.8/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.9.8/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.9.8/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.9.8/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + + + + diff --git a/content/riak/kv/2.9.8/using/performance.md b/content/riak/kv/2.9.8/using/performance.md new file mode 100644 index 0000000000..3f3862a533 --- /dev/null +++ b/content/riak/kv/2.9.8/using/performance.md @@ -0,0 +1,268 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.9.8/ops/tuning/linux/ + - /riak/2.9.8/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.8/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.9.8/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.8/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.9.8/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.9.8/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.9.8/using/performance/open-files-limit/) + + + + diff --git a/content/riak/kv/2.9.8/using/performance/amazon-web-services.md b/content/riak/kv/2.9.8/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..832b8bb1c9 --- /dev/null +++ b/content/riak/kv/2.9.8/using/performance/amazon-web-services.md @@ -0,0 +1,247 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.8/ops/tuning/aws + - /riak/kv/2.9.8/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + + + + diff --git a/content/riak/kv/2.9.8/using/performance/benchmarking.md b/content/riak/kv/2.9.8/using/performance/benchmarking.md new file mode 100644 index 0000000000..54b12866b7 --- /dev/null +++ b/content/riak/kv/2.9.8/using/performance/benchmarking.md @@ -0,0 +1,602 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.8/ops/building/benchmarking + - /riak/kv/2.9.8/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.9.8/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput - Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` - generate as many ops per second as possible +* `{rate, N}` - generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` - Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` - Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` - Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` - Directly invokes the Bitcask API +* `basho_bench_driver_dets` - Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` - operation completed successfully +* `{error, Reason, NewState}` - operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` - operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` - operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` - generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` - the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` - the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` - selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` - selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` - the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` - takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` - takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` - generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` - generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` - generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + + + + diff --git a/content/riak/kv/2.9.8/using/performance/erlang.md b/content/riak/kv/2.9.8/using/performance/erlang.md new file mode 100644 index 0000000000..e0bfcef501 --- /dev/null +++ b/content/riak/kv/2.9.8/using/performance/erlang.md @@ -0,0 +1,371 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.8/ops/tuning/erlang + - /riak/kv/2.9.8/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + + + + diff --git a/content/riak/kv/2.9.8/using/performance/latency-reduction.md b/content/riak/kv/2.9.8/using/performance/latency-reduction.md new file mode 100644 index 0000000000..49bb5c3afb --- /dev/null +++ b/content/riak/kv/2.9.8/using/performance/latency-reduction.md @@ -0,0 +1,267 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.8/ops/tuning/latency-reduction + - /riak/kv/2.9.8/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + + + + diff --git a/content/riak/kv/2.9.8/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.9.8/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..ffcff2ef05 --- /dev/null +++ b/content/riak/kv/2.9.8/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +[perf index]: {{<baseurl>}}riak/kv/2.9.8/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + + + + diff --git a/content/riak/kv/2.9.8/using/performance/open-files-limit.md b/content/riak/kv/2.9.8/using/performance/open-files-limit.md new file mode 100644 index 0000000000..83b0b3b237 --- /dev/null +++ b/content/riak/kv/2.9.8/using/performance/open-files-limit.md @@ -0,0 +1,351 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.8/ops/tuning/open-files-limit/ + - /riak/kv/2.9.8/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + + + + diff --git a/content/riak/kv/2.9.8/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.9.8/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..09976201f0 --- /dev/null +++ b/content/riak/kv/2.9.8/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,50 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/2.9.8/using/reference.md b/content/riak/kv/2.9.8/using/reference.md new file mode 100644 index 0000000000..22ec64e0b0 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference.md @@ -0,0 +1,135 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +aliases: +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + + + + diff --git a/content/riak/kv/2.9.8/using/reference/architecture.md b/content/riak/kv/2.9.8/using/reference/architecture.md new file mode 100644 index 0000000000..dcacf4a576 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/architecture.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +#menu: +# riak_kv-2.9.8: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +aliases: +--- + +<!-- TODO: Content --> + + + + diff --git a/content/riak/kv/2.9.8/using/reference/bucket-types.md b/content/riak/kv/2.9.8/using/reference/bucket-types.md new file mode 100644 index 0000000000..d0f28959aa --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/bucket-types.md @@ -0,0 +1,823 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +aliases: +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.9.8/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.9.8/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.8/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.8/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.9.8/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.9.8/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + + + + diff --git a/content/riak/kv/2.9.8/using/reference/custom-code.md b/content/riak/kv/2.9.8/using/reference/custom-code.md new file mode 100644 index 0000000000..579fda4d72 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/custom-code.md @@ -0,0 +1,135 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/install-custom-code/ + - /riak/kv/2.9.8/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.9.8/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.9.8/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.8/using/reference/failure-recovery.md b/content/riak/kv/2.9.8/using/reference/failure-recovery.md new file mode 100644 index 0000000000..7e156762e4 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/failure-recovery.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.8/using/reference/handoff.md b/content/riak/kv/2.9.8/using/reference/handoff.md new file mode 100644 index 0000000000..7a512f4609 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/handoff.md @@ -0,0 +1,201 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.8/ops/running/handoff/ + - /riak/kv/2.9.8/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + + + + diff --git a/content/riak/kv/2.9.8/using/reference/jmx.md b/content/riak/kv/2.9.8/using/reference/jmx.md new file mode 100644 index 0000000000..6f08c8743a --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/jmx.md @@ -0,0 +1,190 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/running/monitoring/jmx + - /riak/kv/2.9.8/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + + + + diff --git a/content/riak/kv/2.9.8/using/reference/logging.md b/content/riak/kv/2.9.8/using/reference/logging.md new file mode 100644 index 0000000000..0700a0618a --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/logging.md @@ -0,0 +1,301 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.8/ops/running/logging + - /riak/kv/2.9.8/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.8 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` - Every night at midnight +* `$D23` - Every day at 23:00 (11 pm) +* `$W0D20` - Every week on Sunday at 20:00 (8 pm) +* `$M1D0` - On the first day of every month at midnight +* `$M5D6` - On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` - Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.9.8/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` - Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-cli/#attach-direct) command +* `both` - Console logs will be emitted both to a file and to standard + output +* `off` - Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + + + + diff --git a/content/riak/kv/2.9.8/using/reference/multi-datacenter.md b/content/riak/kv/2.9.8/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..9348139c8f --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/multi-datacenter.md @@ -0,0 +1,53 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + + + + diff --git a/content/riak/kv/2.9.8/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.9.8/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..cf22e4bd09 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,100 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.8/ops/mdc/comparison + - /riak/kv/2.9.8/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.9.8/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.9.8/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + + + + diff --git a/content/riak/kv/2.9.8/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.9.8/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..86a3f565e3 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,171 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.8/ops/mdc/monitoring + - /riak/kv/2.9.8/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + + + + diff --git a/content/riak/kv/2.9.8/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.9.8/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..9d10da5778 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,66 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.8/ops/mdc/per-bucket + - /riak/kv/2.9.8/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` - Enable replication (realtime + fullsync) + * `false` - Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` - Replication only occurs in realtime for this bucket + * `fullsync` - Replication only occurs during a fullsync operation + * `both` - Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + + + + diff --git a/content/riak/kv/2.9.8/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.9.8/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..17e02e1721 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,244 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.8/ops/mdc/statistics + - /riak/kv/2.9.8/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + + + + diff --git a/content/riak/kv/2.9.8/using/reference/object-deletion.md b/content/riak/kv/2.9.8/using/reference/object-deletion.md new file mode 100644 index 0000000000..16baf828f5 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/object-deletion.md @@ -0,0 +1,121 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` - Disables tombstone removal +* `immediate` - The tombstone is removed as soon as the request is + received +* Custom time interval - How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + + + + diff --git a/content/riak/kv/2.9.8/using/reference/runtime-interaction.md b/content/riak/kv/2.9.8/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..029f1b5607 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/runtime-interaction.md @@ -0,0 +1,70 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/runtime + - /riak/kv/2.9.8/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` - Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` - Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` - The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` - The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` - A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` - A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` - A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + + + + diff --git a/content/riak/kv/2.9.8/using/reference/search.md b/content/riak/kv/2.9.8/using/reference/search.md new file mode 100644 index 0000000000..4f1aec2622 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/search.md @@ -0,0 +1,457 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/search + - /riak/kv/2.9.8/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.9.8/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. + + + diff --git a/content/riak/kv/2.9.8/using/reference/secondary-indexes.md b/content/riak/kv/2.9.8/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..933db04322 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/secondary-indexes.md @@ -0,0 +1,76 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.8/dev/advanced/2i + - /riak/kv/2.9.8/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.9.8/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + + + + diff --git a/content/riak/kv/2.9.8/using/reference/snmp.md b/content/riak/kv/2.9.8/using/reference/snmp.md new file mode 100644 index 0000000000..f2314d1487 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/snmp.md @@ -0,0 +1,166 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/running/monitoring/snmp + - /riak/kv/2.9.8/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + + + + diff --git a/content/riak/kv/2.9.8/using/reference/statistics-monitoring.md b/content/riak/kv/2.9.8/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..0412c7d42b --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/statistics-monitoring.md @@ -0,0 +1,395 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.8/ops/running/stats-and-monitoring + - /riak/kv/2.9.8/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.9.8/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.9.8/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.9.8/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.8/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.8/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.9.8/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + + + + diff --git a/content/riak/kv/2.9.8/using/reference/strong-consistency.md b/content/riak/kv/2.9.8/using/reference/strong-consistency.md new file mode 100644 index 0000000000..1cb29a9473 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/strong-consistency.md @@ -0,0 +1,150 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +aliases: +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.8/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.9.8/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.9.8/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.9.8/configuring/strong-consistency/#performance). + + + + diff --git a/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..5f3e84d72a --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter.md @@ -0,0 +1,40 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +aliases: +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.8/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + + + + diff --git a/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..26ca5bbb18 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,130 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.8/ops/mdc/v2/architecture + - /riak/kv/2.9.8/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.8/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.9.8/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + + + + diff --git a/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..e3ff4d2075 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,53 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.8/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.9.8/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.8/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..c632d47b10 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter.md @@ -0,0 +1,52 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +aliases: +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + + + + diff --git a/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..3b8e6d0398 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,129 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/aae + - /riak/kv/2.9.8/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + + + + diff --git a/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..f1cfd3f41d --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,186 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/architecture + - /riak/kv/2.9.8/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + + + + diff --git a/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..57f00dd5b8 --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,102 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/cascading-writes + - /riak/kv/2.9.8/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + + + + diff --git a/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..0ac6e8099a --- /dev/null +++ b/content/riak/kv/2.9.8/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,72 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.8/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.9.8/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + + + + diff --git a/content/riak/kv/2.9.8/using/repair-recovery.md b/content/riak/kv/2.9.8/using/repair-recovery.md new file mode 100644 index 0000000000..bda87895ff --- /dev/null +++ b/content/riak/kv/2.9.8/using/repair-recovery.md @@ -0,0 +1,53 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +aliases: +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + + + + diff --git a/content/riak/kv/2.9.8/using/repair-recovery/errors.md b/content/riak/kv/2.9.8/using/repair-recovery/errors.md new file mode 100644 index 0000000000..b630762de9 --- /dev/null +++ b/content/riak/kv/2.9.8/using/repair-recovery/errors.md @@ -0,0 +1,366 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.8/ops/running/recovery/errors + - /riak/kv/2.9.8/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.8/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.8/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.8/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.8/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.9.8/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.8/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.9.8/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.9.8/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + + + + diff --git a/content/riak/kv/2.9.8/using/repair-recovery/failed-node.md b/content/riak/kv/2.9.8/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..1e10010b03 --- /dev/null +++ b/content/riak/kv/2.9.8/using/repair-recovery/failed-node.md @@ -0,0 +1,114 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.8/ops/running/recovery/failed-node + - /riak/kv/2.9.8/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` + + + + diff --git a/content/riak/kv/2.9.8/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.9.8/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..7c5fc1e341 --- /dev/null +++ b/content/riak/kv/2.9.8/using/repair-recovery/failure-recovery.md @@ -0,0 +1,129 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.8/ops/running/recovery/failure-recovery + - /riak/kv/2.9.8/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.9.8/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** - A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** - If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** - Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.9.8/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.8/using/repair-recovery/repairs.md b/content/riak/kv/2.9.8/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..4505eb73a8 --- /dev/null +++ b/content/riak/kv/2.9.8/using/repair-recovery/repairs.md @@ -0,0 +1,391 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.8/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.8/ops/running/recovery/repairing-indexes + - /riak/2.9.8/ops/running/recovery/failed-node + - /riak/kv/2.9.8/ops/running/recovery/failed-node + - /riak/2.9.8/ops/running/recovery/repairing-leveldb + - /riak/kv/2.9.8/ops/running/recovery/repairing-leveldb + - /riak/2.9.8/ops/running/recovery/repairing-partitions + - /riak/kv/2.9.8/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.9.8/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.9.8/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.9.8/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.9.8/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + + + + diff --git a/content/riak/kv/2.9.8/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.9.8/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..e4d07c4582 --- /dev/null +++ b/content/riak/kv/2.9.8/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,76 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +aliases: +--- + +[upgrade]: {{<baseurl>}}riak/kv/2.9.8/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.9.8/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + + + + diff --git a/content/riak/kv/2.9.8/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.9.8/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..568a1493cd --- /dev/null +++ b/content/riak/kv/2.9.8/using/repair-recovery/rolling-restart.md @@ -0,0 +1,64 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.8/ops/running/recovery/rolling-restart + - /riak/kv/2.9.8/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.9.8/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + + + + diff --git a/content/riak/kv/2.9.8/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.9.8/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..c9bb8ea2dc --- /dev/null +++ b/content/riak/kv/2.9.8/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,142 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.8/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.8/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + + + + diff --git a/content/riak/kv/2.9.8/using/running-a-cluster.md b/content/riak/kv/2.9.8/using/running-a-cluster.md new file mode 100644 index 0000000000..78fc6c96a6 --- /dev/null +++ b/content/riak/kv/2.9.8/using/running-a-cluster.md @@ -0,0 +1,339 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.9.8/ops/building/basic-cluster-setup + - /riak/kv/2.9.8/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.8/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.9.8/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + + + + diff --git a/content/riak/kv/2.9.8/using/security.md b/content/riak/kv/2.9.8/using/security.md new file mode 100644 index 0000000000..4ea55013a3 --- /dev/null +++ b/content/riak/kv/2.9.8/using/security.md @@ -0,0 +1,199 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.9.8/ops/advanced/security + - /riak/kv/2.9.8/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/2.9.8/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.9.8/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.8/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.9.8/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.9.8/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + + + + diff --git a/content/riak/kv/2.9.8/using/security/basics.md b/content/riak/kv/2.9.8/using/security/basics.md new file mode 100644 index 0000000000..1c619d2842 --- /dev/null +++ b/content/riak/kv/2.9.8/using/security/basics.md @@ -0,0 +1,851 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.8/ops/running/authz + - /riak/kv/2.9.8/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.9.8/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.9.8/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.9.8/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.9.8/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.9.8/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.9.8/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.9.8/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.9.8/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + + + + diff --git a/content/riak/kv/2.9.8/using/security/best-practices.md b/content/riak/kv/2.9.8/using/security/best-practices.md new file mode 100644 index 0000000000..9ffe45eb2c --- /dev/null +++ b/content/riak/kv/2.9.8/using/security/best-practices.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.8/using/security/managing-sources.md b/content/riak/kv/2.9.8/using/security/managing-sources.md new file mode 100644 index 0000000000..7b9d85abc4 --- /dev/null +++ b/content/riak/kv/2.9.8/using/security/managing-sources.md @@ -0,0 +1,273 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.8/ops/running/security-sources + - /riak/kv/2.9.8/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.9.8/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.9.8/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.9.8/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.9.8/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.9.8/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.9.8/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + + + + diff --git a/content/riak/kv/2.9.8/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.9.8/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..e207310a3e --- /dev/null +++ b/content/riak/kv/2.9.8/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.8/using/troubleshooting.md b/content/riak/kv/2.9.8/using/troubleshooting.md new file mode 100644 index 0000000000..20a3dc8fd9 --- /dev/null +++ b/content/riak/kv/2.9.8/using/troubleshooting.md @@ -0,0 +1,28 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +aliases: +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + + + + diff --git a/content/riak/kv/2.9.8/using/troubleshooting/http-204.md b/content/riak/kv/2.9.8/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..02fbcbd10a --- /dev/null +++ b/content/riak/kv/2.9.8/using/troubleshooting/http-204.md @@ -0,0 +1,22 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 2.9.8 +menu: + riak_kv-2.9.8: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +aliases: +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + + + + diff --git a/content/riak/kv/2.9.9/_reference-links.md b/content/riak/kv/2.9.9/_reference-links.md new file mode 100644 index 0000000000..ce2e70681e --- /dev/null +++ b/content/riak/kv/2.9.9/_reference-links.md @@ -0,0 +1,254 @@ + +# Riak KV 2.9.9 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/2.9.9/downloads/ +[install index]: {{}}riak/kv/2.9.9/setup/installing +[upgrade index]: {{}}riak/kv/2.9.9/upgrading +[plan index]: {{}}riak/kv/2.9.9/planning +[config index]: {{}}riak/kv/2.9.9/using/configuring/ +[config reference]: {{}}riak/kv/2.9.9/configuring/reference/ +[manage index]: {{}}riak/kv/2.9.9/using/managing +[performance index]: {{}}riak/kv/2.9.9/using/performance +[glossary vnode]: {{}}riak/kv/2.9.9/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/2.9.9/setup/planning +[plan start]: {{}}riak/kv/2.9.9/setup/planning/start +[plan backend]: {{}}riak/kv/2.9.9/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/2.9.9/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.9/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/2.9.9/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.9/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/2.9.9/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/2.9.9/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/2.9.9/setup/planning/best-practices +[plan future]: {{}}riak/kv/2.9.9/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/2.9.9/setup/installing +[install aws]: {{}}riak/kv/2.9.9/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/2.9.9/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/2.9.9/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/2.9.9/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/2.9.9/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/2.9.9/setup/installing/smartos +[install solaris]: {{}}riak/kv/2.9.9/setup/installing/solaris +[install suse]: {{}}riak/kv/2.9.9/setup/installing/suse +[install windows azure]: {{}}riak/kv/2.9.9/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/2.9.9/setup/installing/source +[install source erlang]: {{}}riak/kv/2.9.9/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/2.9.9/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/2.9.9/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/2.9.9/setup/upgrading +[upgrade checklist]: {{}}riak/kv/2.9.9/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/2.9.9/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/2.9.9/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/2.9.9/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/2.9.9/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/2.9.9/configuring +[config basic]: {{}}riak/kv/2.9.9/configuring/basic +[config backend]: {{}}riak/kv/2.9.9/configuring/backend +[config manage]: {{}}riak/kv/2.9.9/configuring/managing +[config reference]: {{}}riak/kv/2.9.9/configuring/reference/ +[config strong consistency]: {{}}riak/kv/2.9.9/configuring/strong-consistency +[config load balance]: {{}}riak/kv/2.9.9/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/2.9.9/configuring/mapreduce +[config search]: {{}}riak/kv/2.9.9/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/2.9.9/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/2.9.9/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/2.9.9/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/2.9.9/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/2.9.9/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/2.9.9/using/ +[use admin commands]: {{}}riak/kv/2.9.9/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/2.9.9/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/2.9.9/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/2.9.9/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/2.9.9/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/2.9.9/using/reference/search +[use ref 2i]: {{}}riak/kv/2.9.9/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/2.9.9/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/2.9.9/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/2.9.9/using/reference/jmx +[use ref obj del]: {{}}riak/kv/2.9.9/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/2.9.9/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/2.9.9/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/2.9.9/using/admin/ +[use admin commands]: {{}}riak/kv/2.9.9/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/2.9.9/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/2.9.9/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/2.9.9/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/2.9.9/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/2.9.9/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/2.9.9/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/2.9.9/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/2.9.9/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/2.9.9/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/2.9.9/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/2.9.9/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/2.9.9/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/2.9.9/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/2.9.9/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/2.9.9/using/repair-recovery +[repair recover index]: {{}}riak/kv/2.9.9/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/2.9.9/using/security/ +[security basics]: {{}}riak/kv/2.9.9/using/security/basics +[security managing]: {{}}riak/kv/2.9.9/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/2.9.9/using/performance/ +[perf benchmark]: {{}}riak/kv/2.9.9/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.9/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/2.9.9/using/performance/erlang +[perf aws]: {{}}riak/kv/2.9.9/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/2.9.9/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/2.9.9/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/2.9.9/developing +[dev client libraries]: {{}}riak/kv/2.9.9/developing/client-libraries +[dev data model]: {{}}riak/kv/2.9.9/developing/data-modeling +[dev data types]: {{}}riak/kv/2.9.9/developing/data-types +[dev kv model]: {{}}riak/kv/2.9.9/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/2.9.9/developing/getting-started +[getting started java]: {{}}riak/kv/2.9.9/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/2.9.9/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/2.9.9/developing/getting-started/python +[getting started php]: {{}}riak/kv/2.9.9/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/2.9.9/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/2.9.9/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/2.9.9/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/2.9.9/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/2.9.9/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.9/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.9/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.9/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.9/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.9/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.9/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/2.9.9/developing/usage +[usage bucket types]: {{}}riak/kv/2.9.9/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/2.9.9/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/2.9.9/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/2.9.9/developing/usage/content-types +[usage create objects]: {{}}riak/kv/2.9.9/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/2.9.9/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/2.9.9/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/2.9.9/developing/usage/mapreduce +[usage search]: {{}}riak/kv/2.9.9/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.9/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.9/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/2.9.9/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/2.9.9/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/2.9.9/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/2.9.9/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/2.9.9/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/2.9.9/developing/api/backend +[dev api http]: {{}}riak/kv/2.9.9/developing/api/http +[dev api http status]: {{}}riak/kv/2.9.9/developing/api/http/status +[dev api pbc]: {{}}riak/kv/2.9.9/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/2.9.9/learn/glossary/ +[glossary aae]: {{}}riak/kv/2.9.9/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/2.9.9/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/2.9.9/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/2.9.9/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/2.9.9/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/2.9.9/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/2.9.9/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/2.9.9/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/2.9.9/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/2.9.9/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/2.9.9/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/2.9.9/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + + + + diff --git a/content/riak/kv/2.9.9/add-ons.md b/content/riak/kv/2.9.9/add-ons.md new file mode 100644 index 0000000000..15fbfa2ce5 --- /dev/null +++ b/content/riak/kv/2.9.9/add-ons.md @@ -0,0 +1,25 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/2.9.9/add-ons/redis/) + + + + + diff --git a/content/riak/kv/2.9.9/add-ons/redis.md b/content/riak/kv/2.9.9/add-ons/redis.md new file mode 100644 index 0000000000..f3e8b576f8 --- /dev/null +++ b/content/riak/kv/2.9.9/add-ons/redis.md @@ -0,0 +1,63 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +aliases: +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + + + + diff --git a/content/riak/kv/2.9.9/add-ons/redis/developing-rra.md b/content/riak/kv/2.9.9/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..e2fd0ecae8 --- /dev/null +++ b/content/riak/kv/2.9.9/add-ons/redis/developing-rra.md @@ -0,0 +1,330 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/2.9.9/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.9.9/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/2.9.9/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.9.9/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.9.9/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + + + diff --git a/content/riak/kv/2.9.9/add-ons/redis/redis-add-on-features.md b/content/riak/kv/2.9.9/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..d527d83c31 --- /dev/null +++ b/content/riak/kv/2.9.9/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,136 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + + + + diff --git a/content/riak/kv/2.9.9/add-ons/redis/set-up-rra.md b/content/riak/kv/2.9.9/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..80bd88e070 --- /dev/null +++ b/content/riak/kv/2.9.9/add-ons/redis/set-up-rra.md @@ -0,0 +1,285 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/2.9.9/setup/installing +[perf open files]: {{}}riak/kv/2.9.9/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + + + + diff --git a/content/riak/kv/2.9.9/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/2.9.9/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..c36dc68e25 --- /dev/null +++ b/content/riak/kv/2.9.9/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,143 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +aliases: +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + + + + diff --git a/content/riak/kv/2.9.9/add-ons/redis/using-rra.md b/content/riak/kv/2.9.9/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..ffffab143e --- /dev/null +++ b/content/riak/kv/2.9.9/add-ons/redis/using-rra.md @@ -0,0 +1,246 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/2.9.9/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/2.9.9/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + + + + diff --git a/content/riak/kv/2.9.9/configuring.md b/content/riak/kv/2.9.9/configuring.md new file mode 100644 index 0000000000..11e9f146f9 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring.md @@ -0,0 +1,88 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + + + + diff --git a/content/riak/kv/2.9.9/configuring/backend.md b/content/riak/kv/2.9.9/configuring/backend.md new file mode 100644 index 0000000000..9200f905a6 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/backend.md @@ -0,0 +1,647 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +aliases: +--- + +[plan backend leveldb]: {{}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/2.9.9/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/2.9.9/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.9/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/2.9.9/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + + + + diff --git a/content/riak/kv/2.9.9/configuring/basic.md b/content/riak/kv/2.9.9/configuring/basic.md new file mode 100644 index 0000000000..7fd1935e27 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/basic.md @@ -0,0 +1,239 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.9/ops/building/configuration/ + - /riak/kv/2.9.9/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/2.9.9/configuring/reference +[use running cluster]: {{}}riak/kv/2.9.9/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/2.9.9/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/2.9.9/using/performance/erlang +[plan start]: {{}}riak/kv/2.9.9/setup/planning/start +[plan best practices]: {{}}riak/kv/2.9.9/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/2.9.9/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/2.9.9/setup/planning/backend +[plan backend multi]: {{}}riak/kv/2.9.9/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/2.9.9/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/2.9.9/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/2.9.9/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/2.9.9/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/2.9.9/using/performance/benchmarking +[perf open files]: {{}}riak/kv/2.9.9/using/performance/open-files-limit +[perf index]: {{}}riak/kv/2.9.9/using/performance +[perf aws]: {{}}riak/kv/2.9.9/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/2.9.9/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.9/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + + + + diff --git a/content/riak/kv/2.9.9/configuring/global-object-expiration.md b/content/riak/kv/2.9.9/configuring/global-object-expiration.md new file mode 100644 index 0000000000..bfd8a3811c --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/global-object-expiration.md @@ -0,0 +1,90 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-2.9.9: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 2.9.9 +toc: true +aliases: +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + + + + diff --git a/content/riak/kv/2.9.9/configuring/load-balancing-proxy.md b/content/riak/kv/2.9.9/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..7a5c6319c3 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/load-balancing-proxy.md @@ -0,0 +1,275 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/2.9.9/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/2.9.9/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + + + + diff --git a/content/riak/kv/2.9.9/configuring/managing.md b/content/riak/kv/2.9.9/configuring/managing.md new file mode 100644 index 0000000000..fde78536d6 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/managing.md @@ -0,0 +1,121 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +aliases: +--- + +[use admin riak cli]: {{}}riak/kv/2.9.9/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/2.9.9/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/2.9.9/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + + + + diff --git a/content/riak/kv/2.9.9/configuring/mapreduce.md b/content/riak/kv/2.9.9/configuring/mapreduce.md new file mode 100644 index 0000000000..87fa669189 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/mapreduce.md @@ -0,0 +1,200 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/configs/mapreduce/ + - /riak/kv/2.9.9/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/2.9.9/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/2.9.9/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/2.9.9/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + + + + diff --git a/content/riak/kv/2.9.9/configuring/next-gen-replication.md b/content/riak/kv/2.9.9/configuring/next-gen-replication.md new file mode 100644 index 0000000000..30af6ebd2b --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/next-gen-replication.md @@ -0,0 +1,63 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.9" +menu: + riak_kv-2.9.9: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +aliases: +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. + diff --git a/content/riak/kv/2.9.9/configuring/reference.md b/content/riak/kv/2.9.9/configuring/reference.md new file mode 100644 index 0000000000..49d54e0b3e --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/reference.md @@ -0,0 +1,2039 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/configs/configuration-files/ + - /riak/kv/2.9.9/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] + - [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] + - [configuration][config backend leveldb] +* [Leveled][plan backend leveled] + - [configuration][config backend leveled] +* [Memory][plan backend memory] + - [configuration][config backend memory] +* [Multi][plan backend multi] + - [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + + + + diff --git a/content/riak/kv/2.9.9/configuring/search.md b/content/riak/kv/2.9.9/configuring/search.md new file mode 100644 index 0000000000..03b08918cf --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/search.md @@ -0,0 +1,278 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/configs/search/ + - /riak/kv/2.9.9/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/2.9.9/developing/usage/search +[usage search schema]: {{}}riak/kv/2.9.9/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/2.9.9/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/2.9.9/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/2.9.9/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/2.9.9/configuring/reference +[config reference#search]: {{}}riak/kv/2.9.9/configuring/reference/#search +[glossary aae]: {{}}riak/kv/2.9.9/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/2.9.9/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + + + + diff --git a/content/riak/kv/2.9.9/configuring/strong-consistency.md b/content/riak/kv/2.9.9/configuring/strong-consistency.md new file mode 100644 index 0000000000..d865bede2d --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/strong-consistency.md @@ -0,0 +1,702 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/2.9.9/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/2.9.9/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/2.9.9/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/2.9.9/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/2.9.9/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/2.9.9/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/2.9.9/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/2.9.9/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/2.9.9/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/2.9.9/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/2.9.9/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/2.9.9/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/2.9.9/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/2.9.9/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/2.9.9/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/2.9.9/developing/data-types +[glossary aae]: {{}}riak/kv/2.9.9/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/2.9.9/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/2.9.9/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/2.9.9/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/2.9.9/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble +--- +The ID of the ensemble
  • Quorum +--- +The number of ensemble peers that are either leading or following
  • Nodes +--- +The number of nodes currently online
  • Leader +--- +The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer +--- +The ID of the peer
  • Status +--- +Whether the peer is a leader or a follower
  • Trusted +--- +Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch +--- +The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node +--- +The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] +--- +If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] +--- +Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] +--- +Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** +--- +A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** +--- +In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** +--- +Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** +--- +At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** +--- +Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + + diff --git a/content/riak/kv/2.9.9/configuring/v2-multi-datacenter.md b/content/riak/kv/2.9.9/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..ce035b4d7d --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/v2-multi-datacenter.md @@ -0,0 +1,156 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v2/configuration + - /riak/kv/2.9.9/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/2.9.9/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. diff --git a/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..9138068e2d --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,82 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v2/nat + - /riak/kv/2.9.9/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/2.9.9/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + + + + diff --git a/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..8cf637d2d1 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,371 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v2/quick-start + - /riak/kv/2.9.9/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + + + + diff --git a/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..eb6b698347 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,164 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v2/ssl + - /riak/kv/2.9.9/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + + + + diff --git a/content/riak/kv/2.9.9/configuring/v3-multi-datacenter.md b/content/riak/kv/2.9.9/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..8751d48aa2 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/v3-multi-datacenter.md @@ -0,0 +1,161 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/configuration + - /riak/kv/2.9.9/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/2.9.9/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + + + + diff --git a/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..64c5938db6 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/nat + - /riak/kv/2.9.9/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + + + + diff --git a/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..802edc54b2 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,172 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/quick-start + - /riak/kv/2.9.9/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/2.9.9/using/performance +[config v3 mdc]: {{}}riak/kv/2.9.9/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + + + + diff --git a/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..8ef65ac8d6 --- /dev/null +++ b/content/riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,174 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/ssl + - /riak/kv/2.9.9/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/2.9.9/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + + + + diff --git a/content/riak/kv/2.9.9/developing.md b/content/riak/kv/2.9.9/developing.md new file mode 100644 index 0000000000..0b5afdebff --- /dev/null +++ b/content/riak/kv/2.9.9/developing.md @@ -0,0 +1,79 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + + + + diff --git a/content/riak/kv/2.9.9/developing/api.md b/content/riak/kv/2.9.9/developing/api.md new file mode 100644 index 0000000000..7744044fa9 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api.md @@ -0,0 +1,42 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +aliases: +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + + + + diff --git a/content/riak/kv/2.9.9/developing/api/backend.md b/content/riak/kv/2.9.9/developing/api/backend.md new file mode 100644 index 0000000000..1afc9451be --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/backend.md @@ -0,0 +1,118 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.9/dev/references/backend-api + - /riak/kv/2.9.9/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/2.9.9/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http.md b/content/riak/kv/2.9.9/developing/api/http.md new file mode 100644 index 0000000000..c74eb2d343 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http.md @@ -0,0 +1,93 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.9/dev/references/http + - /riak/kv/2.9.9/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/2.9.9/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/2.9.9/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/2.9.9/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/2.9.9/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/2.9.9/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/2.9.9/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/2.9.9/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.9/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/2.9.9/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/2.9.9/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/2.9.9/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/2.9.9/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/2.9.9/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/2.9.9/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.9/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/2.9.9/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/2.9.9/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/2.9.9/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/2.9.9/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/2.9.9/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/2.9.9/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/2.9.9/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/2.9.9/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/2.9.9/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/2.9.9/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/2.9.9/developing/api/http/store-search-schema) + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/counters.md b/content/riak/kv/2.9.9/developing/api/http/counters.md new file mode 100644 index 0000000000..3a8464a5ce --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/counters.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/counters + - /riak/kv/2.9.9/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/2.9.9/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/2.9.9/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/delete-object.md b/content/riak/kv/2.9.9/developing/api/http/delete-object.md new file mode 100644 index 0000000000..c1757324fc --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/delete-object.md @@ -0,0 +1,79 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/delete-object + - /riak/kv/2.9.9/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/delete-search-index.md b/content/riak/kv/2.9.9/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..fc39f99c82 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/delete-search-index.md @@ -0,0 +1,37 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/delete-search-index + - /riak/kv/2.9.9/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` - The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/fetch-object.md b/content/riak/kv/2.9.9/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..bcc4021cd6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/fetch-object.md @@ -0,0 +1,246 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/fetch-object + - /riak/kv/2.9.9/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/2.9.9/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/2.9.9/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/2.9.9/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/2.9.9/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.9/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/fetch-search-index.md b/content/riak/kv/2.9.9/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..2a6d9fbf67 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/fetch-search-index.md @@ -0,0 +1,51 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/fetch-search-index + - /riak/kv/2.9.9/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/2.9.9/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` - No Search index with that name is currently + available +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/2.9.9/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/fetch-search-schema.md b/content/riak/kv/2.9.9/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..5040586c19 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/fetch-search-schema.md @@ -0,0 +1,42 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/fetch-search-schema + - /riak/kv/2.9.9/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/get-bucket-props.md b/content/riak/kv/2.9.9/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..0f5521fbc5 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/get-bucket-props.md @@ -0,0 +1,86 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/get-bucket-props + - /riak/kv/2.9.9/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/2.9.9/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/2.9.9/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/2.9.9/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/link-walking.md b/content/riak/kv/2.9.9/developing/api/http/link-walking.md new file mode 100644 index 0000000000..da6dbc9963 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/link-walking.md @@ -0,0 +1,129 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/link-walking + - /riak/kv/2.9.9/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/2.9.9/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/2.9.9/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/2.9.9/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/list-buckets.md b/content/riak/kv/2.9.9/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..ea62279d07 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/list-buckets.md @@ -0,0 +1,68 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/list-buckets + - /riak/kv/2.9.9/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/list-keys.md b/content/riak/kv/2.9.9/developing/api/http/list-keys.md new file mode 100644 index 0000000000..7ecc5b42b5 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/list-keys.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/list-keys + - /riak/kv/2.9.9/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/list-resources.md b/content/riak/kv/2.9.9/developing/api/http/list-resources.md new file mode 100644 index 0000000000..7e1223f11b --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/list-resources.md @@ -0,0 +1,84 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/list-resources + - /riak/kv/2.9.9/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/2.9.9/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/2.9.9/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/2.9.9/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/2.9.9/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/2.9.9/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/2.9.9/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/2.9.9/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/2.9.9/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/mapreduce.md b/content/riak/kv/2.9.9/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..d4266d00b0 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/mapreduce.md @@ -0,0 +1,74 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/mapreduce + - /riak/kv/2.9.9/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/2.9.9/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/2.9.9/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/ping.md b/content/riak/kv/2.9.9/developing/api/http/ping.md new file mode 100644 index 0000000000..e6ab7c6c8a --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/ping.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/ping + - /riak/kv/2.9.9/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/reset-bucket-props.md b/content/riak/kv/2.9.9/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..d187cad4c4 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/reset-bucket-props.md @@ -0,0 +1,61 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/reset-bucket-props + - /riak/kv/2.9.9/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/search-index-info.md b/content/riak/kv/2.9.9/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..71234ad34e --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/search-index-info.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/search-index-info + - /riak/kv/2.9.9/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/2.9.9/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` - Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/search-query.md b/content/riak/kv/2.9.9/developing/api/http/search-query.md new file mode 100644 index 0000000000..de611995d5 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/search-query.md @@ -0,0 +1,73 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/search-query + - /riak/kv/2.9.9/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/2.9.9/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` - The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` - The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/2.9.9/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` - Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` - Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` - The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/secondary-indexes.md b/content/riak/kv/2.9.9/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..c86985a6bd --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/secondary-indexes.md @@ -0,0 +1,95 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/secondary-indexes + - /riak/kv/2.9.9/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/set-bucket-props.md b/content/riak/kv/2.9.9/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..10d993fb25 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/set-bucket-props.md @@ -0,0 +1,116 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/set-bucket-props + - /riak/kv/2.9.9/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/2.9.9/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/2.9.9/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/status.md b/content/riak/kv/2.9.9/developing/api/http/status.md new file mode 100644 index 0000000000..570109f4e6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/status.md @@ -0,0 +1,173 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/status + - /riak/kv/2.9.9/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/2.9.9/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/store-object.md b/content/riak/kv/2.9.9/developing/api/http/store-object.md new file mode 100644 index 0000000000..e9b56d68dd --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/store-object.md @@ -0,0 +1,150 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/store-object + - /riak/kv/2.9.9/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/2.9.9/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/2.9.9/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/2.9.9/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/store-search-index.md b/content/riak/kv/2.9.9/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..a3efe1e169 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/store-search-index.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/store-search-index + - /riak/kv/2.9.9/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/2.9.9/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/2.9.9/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` - The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` - The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.9/developing/api/http/store-search-schema.md b/content/riak/kv/2.9.9/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..dd9dd16ee8 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/http/store-search-schema.md @@ -0,0 +1,54 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/2.9.9/dev/references/http/store-search-schema + - /riak/kv/2.9.9/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` - The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` - The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` - The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..3d1a2305fe --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers.md @@ -0,0 +1,189 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers + - /riak/kv/2.9.9/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` - A string representation of what went wrong +* `errcode` - A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/yz-schema-put) + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..0869acd269 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,34 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/auth-req + - /riak/kv/2.9.9/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/2.9.9/using/security/basics). + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..e97b75060c --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,82 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "2.9.9" +menu: + riak_kv-2.9.9: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/coverage-queries + - /riak/kv/2.9.9/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..62b306c99b --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,104 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/delete-object + - /riak/kv/2.9.9/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/2.9.9/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..8de244fda6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/2.9.9/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/2.9.9/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-map-store). + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..a424f0df8a --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,131 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/dt-fetch + - /riak/kv/2.9.9/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/2.9.9/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/2.9.9/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/2.9.9/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..cd3baf01d6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,77 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/dt-map-store + - /riak/kv/2.9.9/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..31a7b6bde0 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,36 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/dt-set-store + - /riak/kv/2.9.9/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..262f800eb6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,132 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/dt-store + - /riak/kv/2.9.9/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/2.9.9/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/2.9.9/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/2.9.9/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..aaac4e6796 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/dt-union + - /riak/kv/2.9.9/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/dt-store) message. + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..348bf3fb87 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,185 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/fetch-object + - /riak/kv/2.9.9/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` - The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` - The character encoding of the object, e.g. `utf-8` +* `content_encoding` - The content encoding of the object, e.g. + `video/mp4` +* `vtag` - The object's [vtag]({{}}riak/kv/2.9.9/learn/glossary/#vector-clock) +* `links` - This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` - A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` - A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` - This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` - Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..8b21384566 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,114 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/2.9.9/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/2.9.9/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/2.9.9/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..bb9af8b7c0 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,37 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/2.9.9/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/2.9.9/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-props) message. + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..86f07bb34b --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,65 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/get-client-id + - /riak/kv/2.9.9/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..f0127e90ed --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,80 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/list-buckets + - /riak/kv/2.9.9/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` - Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..3d6c8dd7cb --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,101 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/list-keys + - /riak/kv/2.9.9/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` - bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..1d2a90f462 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,153 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/mapreduce + - /riak/kv/2.9.9/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` - MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` - JSON-encoded MapReduce job +* `application/x-erlang-binary` - Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/2.9.9/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/2.9.9/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` - Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/ping.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..ffcfa499e0 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/ping.md @@ -0,0 +1,46 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/ping + - /riak/kv/2.9.9/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..34c68765c6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,63 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/2.9.9/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/2.9.9/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/search.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..438bdac44b --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/search.md @@ -0,0 +1,152 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/search + - /riak/kv/2.9.9/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` - The contents of the query +* `index` - The name of the index to search + +Optional Parameters + +* `rows` - The maximum number of rows to return +* `start` - A start offset, i.e. the number of keys to skip before + returning values +* `sort` - How the search results are to be sorted +* `filter` - Filters search with additional query scoped to inline + fields +* `df` - Override the `default_field` setting in the schema file +* `op` - `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` - Return the fields limit +* `presort` - Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` - A list of docs that match the search request +* `max_score` - The top score returned +* `num_found` - Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..4a928482f2 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,125 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/2.9.9/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/2.9.9/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/server-info.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..a908e1f782 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,62 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/server-info + - /riak/kv/2.9.9/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..360afe1835 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,72 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/2.9.9/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/2.9.9/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..bdcaf109d5 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,35 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/2.9.9/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/2.9.9/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/get-bucket-props). + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..dd083d4ebe --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,66 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/set-client-id + - /riak/kv/2.9.9/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/store-object.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..9f78feeb5d --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,154 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/store-object + - /riak/kv/2.9.9/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/2.9.9/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/2.9.9/learn/concepts/buckets), and [bucket type]({{}}riak/kv/2.9.9/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/2.9.9/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/2.9.9/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/2.9.9/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..1d9c4aeb2b --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,37 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/2.9.9/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..4c17911f31 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,63 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/yz-index-get + - /riak/kv/2.9.9/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.9/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..a614e646b7 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/yz-index-put + - /riak/kv/2.9.9/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/2.9.9/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..71fc76d28a --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,52 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/2.9.9/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + + + + diff --git a/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..9013cfdece --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/2.9.9/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/2.9.9/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/2.9.9/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/2.9.9/developing/api/repl-hooks.md b/content/riak/kv/2.9.9/developing/api/repl-hooks.md new file mode 100644 index 0000000000..789d0b596e --- /dev/null +++ b/content/riak/kv/2.9.9/developing/api/repl-hooks.md @@ -0,0 +1,196 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v2/hooks + - /riak/kv/2.9.9/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + + + + diff --git a/content/riak/kv/2.9.9/developing/app-guide.md b/content/riak/kv/2.9.9/developing/app-guide.md new file mode 100644 index 0000000000..b30092b04a --- /dev/null +++ b/content/riak/kv/2.9.9/developing/app-guide.md @@ -0,0 +1,420 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/2.9.9/dev/using/application-guide/ + - /riak/kv/2.9.9/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/2.9.9/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/2.9.9/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/2.9.9/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/2.9.9/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/2.9.9/developing/key-value-modeling +[dev data types]: {{}}riak/kv/2.9.9/developing/data-types +[dev data types#counters]: {{}}riak/kv/2.9.9/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/2.9.9/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/2.9.9/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/2.9.9/developing/usage/creating-objects +[usage search]: {{}}riak/kv/2.9.9/developing/usage/search +[use ref search]: {{}}riak/kv/2.9.9/using/reference/search +[usage 2i]: {{}}riak/kv/2.9.9/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/2.9.9/developing/client-libraries +[concept crdts]: {{}}riak/kv/2.9.9/learn/concepts/crdts +[dev data model]: {{}}riak/kv/2.9.9/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/2.9.9/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/2.9.9/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/2.9.9/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/2.9.9/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/2.9.9/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/2.9.9/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/2.9.9/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/2.9.9/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/2.9.9/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/2.9.9/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/2.9.9/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/2.9.9/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/2.9.9/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/2.9.9/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/2.9.9/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/2.9.9/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/2.9.9/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/2.9.9/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/2.9.9/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/2.9.9/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/2.9.9/setup/installing +[getting started]: {{}}riak/kv/2.9.9/developing/getting-started +[usage index]: {{}}riak/kv/2.9.9/developing/usage +[glossary]: {{}}riak/kv/2.9.9/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** - While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** - Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** - Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** - It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** - If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** - If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** - If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] - Getting started with Riak Search +* [Search Details][use ref search] - A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] - How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** - Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** - At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** - In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] - A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] - A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] - An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** - If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** - If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** - If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** - While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** - Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] - A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] - A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** - You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** - Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] - A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] - Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** - At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** - If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** - 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] - Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] - A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] - How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] - A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] - A listing of frequently used terms in Riak's + documentation + + + + + diff --git a/content/riak/kv/2.9.9/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/2.9.9/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..51afd2a348 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,802 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/mapreduce/ + - /riak/kv/2.9.9/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/2.9.9/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/2.9.9/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/2.9.9/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/2.9.9/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/2.9.9/learn/glossary/#vnode +[config reference]: {{}}riak/kv/2.9.9/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/2.9.9/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) - Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) - Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) - Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
+
+
+
diff --git a/content/riak/kv/2.9.9/developing/app-guide/cluster-metadata.md b/content/riak/kv/2.9.9/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..c2b904ccfa
--- /dev/null
+++ b/content/riak/kv/2.9.9/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,72 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 2.9.9
+menu:
+  riak_kv-2.9.9:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/2.9.9/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/2.9.9/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/2.9.9/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
+
+
+
diff --git a/content/riak/kv/2.9.9/developing/app-guide/reference.md b/content/riak/kv/2.9.9/developing/app-guide/reference.md
new file mode 100644
index 0000000000..644260b3ea
--- /dev/null
+++ b/content/riak/kv/2.9.9/developing/app-guide/reference.md
@@ -0,0 +1,21 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 2.9.9
+#menu:
+#  riak_kv-2.9.9:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+**TODO: Add content**
+
+
+
+
diff --git a/content/riak/kv/2.9.9/developing/app-guide/replication-properties.md b/content/riak/kv/2.9.9/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..19a462069f
--- /dev/null
+++ b/content/riak/kv/2.9.9/developing/app-guide/replication-properties.md
@@ -0,0 +1,584 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 2.9.9
+menu:
+  riak_kv-2.9.9:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.9/dev/advanced/replication-properties
+  - /riak/kv/2.9.9/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/2.9.9/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/2.9.9/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/2.9.9/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/2.9.9/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/2.9.9/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/2.9.9/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/2.9.9/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/2.9.9/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/2.9.9/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/2.9.9/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/2.9.9/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/2.9.9/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/2.9.9/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/2.9.9/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/2.9.9/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/2.9.9/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
+
+
+
diff --git a/content/riak/kv/2.9.9/developing/app-guide/strong-consistency.md b/content/riak/kv/2.9.9/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..ba7b6a0248
--- /dev/null
+++ b/content/riak/kv/2.9.9/developing/app-guide/strong-consistency.md
@@ -0,0 +1,261 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 2.9.9
+menu:
+  riak_kv-2.9.9:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/2.9.9/dev/advanced/strong-consistency
+  - /riak/kv/2.9.9/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/2.9.9/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/2.9.9/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/2.9.9/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/2.9.9/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/2.9.9/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/2.9.9/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/2.9.9/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/2.9.9/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/2.9.9/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/2.9.9/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/2.9.9/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/2.9.9/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/2.9.9/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/2.9.9/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/2.9.9/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/2.9.9/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/2.9.9/developing/client-libraries
+[getting started]: {{}}riak/kv/2.9.9/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/2.9.9/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + + + + diff --git a/content/riak/kv/2.9.9/developing/app-guide/write-once.md b/content/riak/kv/2.9.9/developing/app-guide/write-once.md new file mode 100644 index 0000000000..e995088769 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/app-guide/write-once.md @@ -0,0 +1,159 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/2.9.9/dev/advanced/write-once + - /riak/kv/2.9.9/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/2.9.9/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/2.9.9/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/2.9.9/developing/data-types +[strong consistency]: {{}}riak/kv/2.9.9/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/2.9.9/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.9/developing/client-libraries.md b/content/riak/kv/2.9.9/developing/client-libraries.md new file mode 100644 index 0000000000..8558c35e09 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/client-libraries.md @@ -0,0 +1,294 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/2.9.9/dev/using/libraries + - /riak/kv/2.9.9/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) - A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) - A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) - A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) - A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) - An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) - An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) - Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) - A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) - Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) - A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) - HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) - Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) - A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) - Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) - Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) - Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) - Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) - An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) - A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) - A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) - A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) - A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) - A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) - Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) - Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) - A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) - Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) - Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) - Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) - Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) - Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) - Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) - A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) - Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) - A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) - Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) - Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) - a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) - A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) - A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) - Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) - Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) - Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) - A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) - Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) - A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) - A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) - Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) - Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) - Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) - A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) - A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) - A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) - A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) - [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) - A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) - Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) - A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) - A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) - Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) - A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) - A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) - Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) - Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) - Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) - A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) - Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) - Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) - DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) - Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) - An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) - Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) - Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) - Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) - A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) - An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) - A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) - A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + + + + diff --git a/content/riak/kv/2.9.9/developing/data-modeling.md b/content/riak/kv/2.9.9/developing/data-modeling.md new file mode 100644 index 0000000000..13ba0b33a2 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/data-modeling.md @@ -0,0 +1,15 @@ +--- +layout: redirect +target: "riak/kv/2.9.9/learn/use-cases/" +aliases: +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + + + + diff --git a/content/riak/kv/2.9.9/developing/data-types.md b/content/riak/kv/2.9.9/developing/data-types.md new file mode 100644 index 0000000000..adc4b8d8b2 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/data-types.md @@ -0,0 +1,279 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/2.9.9/dev/using/data-types + - /riak/kv/2.9.9/dev/using/data-types + - /riak/2.9.9/dev/data-modeling/data-types + - /riak/kv/2.9.9/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + + + + diff --git a/content/riak/kv/2.9.9/developing/data-types/counters.md b/content/riak/kv/2.9.9/developing/data-types/counters.md new file mode 100644 index 0000000000..be1ed3272c --- /dev/null +++ b/content/riak/kv/2.9.9/developing/data-types/counters.md @@ -0,0 +1,635 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.9/dev/using/data-types/counters + - /riak/kv/2.9.9/dev/using/data-types/counters + - /riak/2.9.9/dev/data-modeling/data-types/counters + - /riak/kv/2.9.9/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/data-types/gsets.md b/content/riak/kv/2.9.9/developing/data-types/gsets.md new file mode 100644 index 0000000000..86ddc77202 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/data-types/gsets.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.9/dev/using/data-types/gsets + - /riak/kv/2.9.9/dev/using/data-types/gsets + - /riak/2.9.9/dev/data-modeling/data-types/gsets + - /riak/kv/2.9.9/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/data-types/hyperloglogs.md b/content/riak/kv/2.9.9/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..40d78851d4 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/data-types/hyperloglogs.md @@ -0,0 +1,643 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.9/dev/using/data-types/hyperloglogs + - /riak/kv/2.9.9/dev/using/data-types/hyperloglogs + - /riak/2.9.9/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/2.9.9/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/data-types/maps.md b/content/riak/kv/2.9.9/developing/data-types/maps.md new file mode 100644 index 0000000000..37694fa8d9 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/data-types/maps.md @@ -0,0 +1,1885 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.9/dev/using/data-types/maps + - /riak/kv/2.9.9/dev/using/data-types/maps + - /riak/2.9.9/dev/data-modeling/data-types/maps + - /riak/kv/2.9.9/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/data-types/sets.md b/content/riak/kv/2.9.9/developing/data-types/sets.md new file mode 100644 index 0000000000..1b7afff590 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/data-types/sets.md @@ -0,0 +1,773 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/2.9.9/dev/using/data-types/sets + - /riak/kv/2.9.9/dev/using/data-types/sets + - /riak/2.9.9/dev/data-modeling/data-types/sets + - /riak/kv/2.9.9/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/faq.md b/content/riak/kv/2.9.9/developing/faq.md new file mode 100644 index 0000000000..97516b3522 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/faq.md @@ -0,0 +1,559 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/2.9.9/community/faqs/developing + - /riak/kv/2.9.9/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/2.9.9/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/2.9.9/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/2.9.9/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/2.9.9/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/2.9.9/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/2.9.9/developing/client-libraries +[MapReduce]: {{}}riak/kv/2.9.9/developing/usage/mapreduce +[Memory]: {{}}riak/kv/2.9.9/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/2.9.9/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/2.9.9/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) - requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) - if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started.md b/content/riak/kv/2.9.9/developing/getting-started.md new file mode 100644 index 0000000000..3e2621d47f --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started.md @@ -0,0 +1,51 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +aliases: +--- + +[install index]: {{}}riak/kv/2.9.9/setup/installing +[dev client libraries]: {{}}riak/kv/2.9.9/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/csharp.md b/content/riak/kv/2.9.9/developing/getting-started/csharp.md new file mode 100644 index 0000000000..7eba7a24a7 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/csharp.md @@ -0,0 +1,86 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/csharp + - /riak/kv/2.9.9/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.9/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.9/developing/getting-started/csharp/crud-operations) + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/2.9.9/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..37f5f9eabc --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +aliases: +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/2.9.9/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..81efd42caf --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,111 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/2.9.9/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/csharp/querying.md b/content/riak/kv/2.9.9/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..fd9d6df815 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/csharp/querying.md @@ -0,0 +1,214 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/querying-csharp + - /riak/kv/2.9.9/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/erlang.md b/content/riak/kv/2.9.9/developing/getting-started/erlang.md new file mode 100644 index 0000000000..dc3f0d2fc5 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/erlang.md @@ -0,0 +1,59 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/erlang + - /riak/kv/2.9.9/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.9/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.9/developing/getting-started/erlang/crud-operations) + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/2.9.9/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..0e8ef9b7b2 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/2.9.9/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..bd35edcf1c --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,342 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/2.9.9/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.9/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/erlang/querying.md b/content/riak/kv/2.9.9/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..2004e55a5a --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/erlang/querying.md @@ -0,0 +1,308 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/querying-erlang + - /riak/kv/2.9.9/dev/taste-of-riak/querying-erlang +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/2.9.9/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/golang.md b/content/riak/kv/2.9.9/developing/getting-started/golang.md new file mode 100644 index 0000000000..294dc8c460 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/golang.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/golang + - /riak/kv/2.9.9/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.9/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.9/developing/getting-started/golang/crud-operations) + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/golang/crud-operations.md b/content/riak/kv/2.9.9/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..3df2ec510f --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,376 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +aliases: +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/golang/object-modeling.md b/content/riak/kv/2.9.9/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..bad88249b1 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,552 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/object-modeling-golang + - /riak/kv/2.9.9/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/2.9.9/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/golang/querying.md b/content/riak/kv/2.9.9/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..b2d5868604 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/golang/querying.md @@ -0,0 +1,580 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/querying-golang + - /riak/kv/2.9.9/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/java.md b/content/riak/kv/2.9.9/developing/getting-started/java.md new file mode 100644 index 0000000000..6327ecf16b --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/java.md @@ -0,0 +1,93 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/java + - /riak/kv/2.9.9/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.9/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.9/developing/getting-started/java/crud-operations) + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/java/crud-operations.md b/content/riak/kv/2.9.9/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..577ea7b155 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/java/crud-operations.md @@ -0,0 +1,206 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +aliases: +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.9/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.9/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.9/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.9/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/2.9.9/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/2.9.9/developing/usage/conflict-resolution/) +documention. + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/java/object-modeling.md b/content/riak/kv/2.9.9/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..3134f08331 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/java/object-modeling.md @@ -0,0 +1,432 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/object-modeling-java + - /riak/kv/2.9.9/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/java/querying.md b/content/riak/kv/2.9.9/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..98edbfac5d --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/java/querying.md @@ -0,0 +1,280 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/querying-java + - /riak/kv/2.9.9/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/nodejs.md b/content/riak/kv/2.9.9/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..bc65d77c05 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/nodejs.md @@ -0,0 +1,104 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/nodejs + - /riak/kv/2.9.9/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.9/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.9/developing/getting-started/nodejs/crud-operations) + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/2.9.9/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..d976edbad7 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,138 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +aliases: +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/2.9.9/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..beb86e2c6c --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/2.9.9/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/nodejs/querying.md b/content/riak/kv/2.9.9/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..a6df770eda --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/nodejs/querying.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/querying-nodejs + - /riak/kv/2.9.9/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/php.md b/content/riak/kv/2.9.9/developing/getting-started/php.md new file mode 100644 index 0000000000..2fcb1d7de6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/php.md @@ -0,0 +1,80 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/php + - /riak/kv/2.9.9/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.9/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.9/developing/getting-started/php/crud-operations) + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/php/crud-operations.md b/content/riak/kv/2.9.9/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..a8fdb40a40 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/php/crud-operations.md @@ -0,0 +1,187 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +aliases: +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/2.9.9/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/php/querying.md b/content/riak/kv/2.9.9/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..34f7fdb80f --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/php/querying.md @@ -0,0 +1,408 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/querying-php + - /riak/kv/2.9.9/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/python.md b/content/riak/kv/2.9.9/developing/getting-started/python.md new file mode 100644 index 0000000000..6514b98b25 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/python.md @@ -0,0 +1,103 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/python + - /riak/kv/2.9.9/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.9/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` - Header files and a static library for Python +* `libffi-dev` - Foreign function interface library +* `libssl-dev` - libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.9/developing/getting-started/python/crud-operations) + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/python/crud-operations.md b/content/riak/kv/2.9.9/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..33387759ac --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/python/crud-operations.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/python/object-modeling.md b/content/riak/kv/2.9.9/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..c6f32faf10 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/python/object-modeling.md @@ -0,0 +1,264 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/object-modeling-python + - /riak/kv/2.9.9/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/python/querying.md b/content/riak/kv/2.9.9/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..0bee9373b6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/python/querying.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/querying-python + - /riak/kv/2.9.9/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/ruby.md b/content/riak/kv/2.9.9/developing/getting-started/ruby.md new file mode 100644 index 0000000000..457eb985ae --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/ruby.md @@ -0,0 +1,68 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/ruby + - /riak/kv/2.9.9/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/2.9.9/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/2.9.9/developing/getting-started/ruby/crud-operations) + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/2.9.9/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..9bc1de82a8 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,151 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/2.9.9/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..a7ccb2cf4e --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,295 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/2.9.9/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/2.9.9/developing/getting-started/ruby/querying.md b/content/riak/kv/2.9.9/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..6315451a9a --- /dev/null +++ b/content/riak/kv/2.9.9/developing/getting-started/ruby/querying.md @@ -0,0 +1,256 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/2.9.9/dev/taste-of-riak/querying-ruby + - /riak/kv/2.9.9/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/2.9.9/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/2.9.9/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/2.9.9/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/2.9.9/developing/key-value-modeling.md b/content/riak/kv/2.9.9/developing/key-value-modeling.md new file mode 100644 index 0000000000..e902314b28 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/key-value-modeling.md @@ -0,0 +1,535 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/2.9.9/dev/data-modeling/key-value/ + - /riak/kv/2.9.9/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/2.9.9/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/2.9.9/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/2.9.9/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/2.9.9/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/2.9.9/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/2.9.9/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/2.9.9/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/2.9.9/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/2.9.9/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/2.9.9/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/2.9.9/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/2.9.9/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/2.9.9/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/2.9.9/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/2.9.9/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/2.9.9/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/2.9.9/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/2.9.9/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + + + + diff --git a/content/riak/kv/2.9.9/developing/usage.md b/content/riak/kv/2.9.9/developing/usage.md new file mode 100644 index 0000000000..6c75039648 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage.md @@ -0,0 +1,138 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +aliases: +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/bucket-types.md b/content/riak/kv/2.9.9/developing/usage/bucket-types.md new file mode 100644 index 0000000000..76ce4a7b62 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/bucket-types.md @@ -0,0 +1,102 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/bucket-types + - /riak/kv/2.9.9/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/commit-hooks.md b/content/riak/kv/2.9.9/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..43f65957a0 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/commit-hooks.md @@ -0,0 +1,243 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/using/commit-hooks + - /riak/kv/2.9.9/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/2.9.9/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object - This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` - The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/2.9.9/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` - The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/conflict-resolution.md b/content/riak/kv/2.9.9/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..40bd63368d --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/conflict-resolution.md @@ -0,0 +1,681 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/using/conflict-resolution + - /riak/kv/2.9.9/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/strong-consistency) - A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/2.9.9/configuring/strong-consistency) - A guide for operators +> * [strong consistency][use ref strong consistency] - A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/2.9.9/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/2.9.9/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/2.9.9/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.9/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/2.9.9/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** - If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** - Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** - If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/2.9.9/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.9/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/2.9.9/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..03c5b95d13 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.9/dev/using/conflict-resolution/csharp + - /riak/kv/2.9.9/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/conflict-resolution/golang.md b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..24157ccdd5 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.9/dev/using/conflict-resolution/golang + - /riak/kv/2.9.9/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/conflict-resolution/java.md b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..73e28b06bb --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/java.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.9/dev/using/conflict-resolution/java + - /riak/kv/2.9.9/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.9/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..3b5efd1702 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.9/dev/using/conflict-resolution/nodejs + - /riak/kv/2.9.9/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/conflict-resolution/php.md b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..3821773cc1 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/php.md @@ -0,0 +1,244 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.9/dev/using/conflict-resolution/php + - /riak/kv/2.9.9/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.9/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/conflict-resolution/python.md b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..5dd1e56189 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/python.md @@ -0,0 +1,258 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.9/dev/using/conflict-resolution/python + - /riak/kv/2.9.9/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.9/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..53e9a9cbe7 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/2.9.9/dev/using/conflict-resolution/ruby + - /riak/kv/2.9.9/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/2.9.9/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/content-types.md b/content/riak/kv/2.9.9/developing/usage/content-types.md new file mode 100644 index 0000000000..d12a6a6859 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/content-types.md @@ -0,0 +1,192 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +aliases: +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/creating-objects.md b/content/riak/kv/2.9.9/developing/usage/creating-objects.md new file mode 100644 index 0000000000..3a8cefb0d8 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/creating-objects.md @@ -0,0 +1,555 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +aliases: +--- + +[usage content types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/2.9.9/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/custom-extractors.md b/content/riak/kv/2.9.9/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..134d22405a --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/custom-extractors.md @@ -0,0 +1,424 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/search/custom-extractors + - /riak/kv/2.9.9/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` - Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` - Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/deleting-objects.md b/content/riak/kv/2.9.9/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..7e0832a6bb --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/deleting-objects.md @@ -0,0 +1,157 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +aliases: +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/document-store.md b/content/riak/kv/2.9.9/developing/usage/document-store.md new file mode 100644 index 0000000000..227881c67b --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/document-store.md @@ -0,0 +1,617 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/search/document-store + - /riak/kv/2.9.9/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/2.9.9/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/mapreduce.md b/content/riak/kv/2.9.9/developing/usage/mapreduce.md new file mode 100644 index 0000000000..59cd0657d5 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/mapreduce.md @@ -0,0 +1,246 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/using/mapreduce + - /riak/kv/2.9.9/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/2.9.9/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/2.9.9/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** - The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** - The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/next-gen-replication.md b/content/riak/kv/2.9.9/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..b11205d185 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/next-gen-replication.md @@ -0,0 +1,153 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "2.9.9" +menu: + riak_kv-2.9.9: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/2.9.9/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. + + + diff --git a/content/riak/kv/2.9.9/developing/usage/reading-objects.md b/content/riak/kv/2.9.9/developing/usage/reading-objects.md new file mode 100644 index 0000000000..8365616df5 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/reading-objects.md @@ -0,0 +1,252 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +aliases: +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/replication.md b/content/riak/kv/2.9.9/developing/usage/replication.md new file mode 100644 index 0000000000..ff5c4ef24e --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/replication.md @@ -0,0 +1,592 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/replication-properties + - /riak/kv/2.9.9/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/2.9.9/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/2.9.9/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.9/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/2.9.9/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/2.9.9/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/2.9.9/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/2.9.9/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/2.9.9/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/search-schemas.md b/content/riak/kv/2.9.9/developing/usage/search-schemas.md new file mode 100644 index 0000000000..86008e15df --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/search-schemas.md @@ -0,0 +1,511 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/search-schema + - /riak/kv/2.9.9/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/), and [more]({{<baseurl>}}riak/kv/2.9.9/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/search.md b/content/riak/kv/2.9.9/developing/usage/search.md new file mode 100644 index 0000000000..c13a616fab --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/search.md @@ -0,0 +1,1455 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/using/search + - /riak/kv/2.9.9/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/2.9.9/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.9/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.9/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/2.9.9/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/2.9.9/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/2.9.9/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/2.9.9/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/searching-data-types.md b/content/riak/kv/2.9.9/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..86ff858a7f --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/searching-data-types.md @@ -0,0 +1,1687 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/search/search-data-types + - /riak/kv/2.9.9/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/secondary-indexes.md b/content/riak/kv/2.9.9/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..0edbb372e7 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/secondary-indexes.md @@ -0,0 +1,2030 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/using/2i + - /riak/kv/2.9.9/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/2.9.9/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.9/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.9/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` - Binary index `field1_bin` and integer index `field2_int` +* `Moe` - Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` - Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` - Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/security.md b/content/riak/kv/2.9.9/developing/usage/security.md new file mode 100644 index 0000000000..9492afe158 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/security.md @@ -0,0 +1,103 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/client-security + - /riak/kv/2.9.9/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/2.9.9/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/2.9.9/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/2.9.9/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/2.9.9/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/2.9.9/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/2.9.9/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/2.9.9/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/2.9.9/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/2.9.9/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/security/erlang.md b/content/riak/kv/2.9.9/developing/usage/security/erlang.md new file mode 100644 index 0000000000..e5dc5f820b --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/security/erlang.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/client-security/erlang + - /riak/kv/2.9.9/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.9/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/security/java.md b/content/riak/kv/2.9.9/developing/usage/security/java.md new file mode 100644 index 0000000000..60732e27cb --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/security/java.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/client-security/java + - /riak/kv/2.9.9/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/security/php.md b/content/riak/kv/2.9.9/developing/usage/security/php.md new file mode 100644 index 0000000000..53ec212cc6 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/security/php.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/client-security/php + - /riak/kv/2.9.9/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/security/python.md b/content/riak/kv/2.9.9/developing/usage/security/python.md new file mode 100644 index 0000000000..af20366091 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/security/python.md @@ -0,0 +1,176 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/client-security/python + - /riak/kv/2.9.9/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.9/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/2.9.9/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/security/ruby.md b/content/riak/kv/2.9.9/developing/usage/security/ruby.md new file mode 100644 index 0000000000..172a967581 --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/security/ruby.md @@ -0,0 +1,162 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/client-security/ruby + - /riak/kv/2.9.9/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/2.9.9/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + + + + diff --git a/content/riak/kv/2.9.9/developing/usage/updating-objects.md b/content/riak/kv/2.9.9/developing/usage/updating-objects.md new file mode 100644 index 0000000000..80012390dc --- /dev/null +++ b/content/riak/kv/2.9.9/developing/usage/updating-objects.md @@ -0,0 +1,778 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/2.9.9/dev/using/updates + - /riak/kv/2.9.9/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/2.9.9/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + + + + diff --git a/content/riak/kv/2.9.9/downloads.md b/content/riak/kv/2.9.9/downloads.md new file mode 100644 index 0000000000..0db1aa58c6 --- /dev/null +++ b/content/riak/kv/2.9.9/downloads.md @@ -0,0 +1,27 @@ +--- +title: "Download for Riak KV 2.9.9" +description: "Download some stuff!" +menu: + riak_kv-2.9.9: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 2.9.9 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 2.9.9 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/2.9.9/downloads + - /riak/kv/2.9.9/downloads +--- + + + + + diff --git a/content/riak/kv/2.9.9/index.md b/content/riak/kv/2.9.9/index.md new file mode 100644 index 0000000000..14293247a7 --- /dev/null +++ b/content/riak/kv/2.9.9/index.md @@ -0,0 +1,77 @@ +--- +title: "Riak KV 2.9.9" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/2.9.9/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/2.9.9/configuring +[downloads]: {{<baseurl>}}riak/kv/2.9.9/downloads/ +[install index]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/2.9.9/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/2.9.9/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/2.9.9/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/2.9.9/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/2.9.9/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + + + + diff --git a/content/riak/kv/2.9.9/learn.md b/content/riak/kv/2.9.9/learn.md new file mode 100644 index 0000000000..c7dd82639e --- /dev/null +++ b/content/riak/kv/2.9.9/learn.md @@ -0,0 +1,53 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts.md b/content/riak/kv/2.9.9/learn/concepts.md new file mode 100644 index 0000000000..515992d36c --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts.md @@ -0,0 +1,49 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +aliases: +--- + +[concept aae]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/2.9.9/configuring +[plan index]: {{<baseurl>}}riak/kv/2.9.9/setup/planning +[use index]: {{<baseurl>}}riak/kv/2.9.9/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/active-anti-entropy.md b/content/riak/kv/2.9.9/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..55fce88ec6 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/active-anti-entropy.md @@ -0,0 +1,111 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/aae + - /riak/kv/2.9.9/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/buckets.md b/content/riak/kv/2.9.9/learn/concepts/buckets.md new file mode 100644 index 0000000000..1759a4a29e --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/buckets.md @@ -0,0 +1,217 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/Buckets + - /riak/kv/2.9.9/theory/concepts/Buckets + - /riak/2.9.9/theory/concepts/buckets + - /riak/kv/2.9.9/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/2.9.9/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/2.9.9/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/2.9.9/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/2.9.9/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/capability-negotiation.md b/content/riak/kv/2.9.9/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..99ac1e1ad7 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/capability-negotiation.md @@ -0,0 +1,36 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/capability-negotiation + - /riak/kv/2.9.9/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/2.9.9/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/causal-context.md b/content/riak/kv/2.9.9/learn/concepts/causal-context.md new file mode 100644 index 0000000000..a29a15defb --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/causal-context.md @@ -0,0 +1,289 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/context + - /riak/kv/2.9.9/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/2.9.9/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/2.9.9/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/2.9.9/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/clusters.md b/content/riak/kv/2.9.9/learn/concepts/clusters.md new file mode 100644 index 0000000000..80cbfe2328 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/clusters.md @@ -0,0 +1,117 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/Clusters + - /riak/kv/2.9.9/theory/concepts/Clusters + - /riak/2.9.9/theory/concepts/clusters + - /riak/kv/2.9.9/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.9/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/crdts.md b/content/riak/kv/2.9.9/learn/concepts/crdts.md new file mode 100644 index 0000000000..e5422fe945 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/crdts.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/crdts + - /riak/kv/2.9.9/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/2.9.9/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/2.9.9/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/eventual-consistency.md b/content/riak/kv/2.9.9/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..86c3ff1c95 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/eventual-consistency.md @@ -0,0 +1,202 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/Eventual-Consistency + - /riak/kv/2.9.9/theory/concepts/Eventual-Consistency + - /riak/2.9.9/theory/concepts/eventual-consistency + - /riak/kv/2.9.9/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/keys-and-objects.md b/content/riak/kv/2.9.9/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..b464f33d56 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/keys-and-objects.md @@ -0,0 +1,53 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/keys-and-values + - /riak/kv/2.9.9/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/replication.md b/content/riak/kv/2.9.9/learn/concepts/replication.md new file mode 100644 index 0000000000..2b763ba7f9 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/replication.md @@ -0,0 +1,323 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/Replication + - /riak/kv/2.9.9/theory/concepts/Replication + - /riak/2.9.9/theory/concepts/replication + - /riak/kv/2.9.9/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/2.9.9/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/strong-consistency.md b/content/riak/kv/2.9.9/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..c7b28580fb --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/strong-consistency.md @@ -0,0 +1,105 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/strong-consistency + - /riak/kv/2.9.9/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.9/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + + + + diff --git a/content/riak/kv/2.9.9/learn/concepts/vnodes.md b/content/riak/kv/2.9.9/learn/concepts/vnodes.md new file mode 100644 index 0000000000..60858614a3 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/concepts/vnodes.md @@ -0,0 +1,160 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/2.9.9/theory/concepts/vnodes + - /riak/kv/2.9.9/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322.9.944576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + + + + diff --git a/content/riak/kv/2.9.9/learn/dynamo.md b/content/riak/kv/2.9.9/learn/dynamo.md new file mode 100644 index 0000000000..9fbcab0114 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/dynamo.md @@ -0,0 +1,1928 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/2.9.9/theory/dynamo + - /riak/kv/2.9.9/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/2.9.9/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/2.9.9/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/2.9.9/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/2.9.9/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.9 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/2.9.9/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.9/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/2.9.9/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/2.9.9/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/2.9.9/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.9/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + + + + diff --git a/content/riak/kv/2.9.9/learn/glossary.md b/content/riak/kv/2.9.9/learn/glossary.md new file mode 100644 index 0000000000..88d74c9c75 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/glossary.md @@ -0,0 +1,358 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +aliases: +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/2.9.9/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/2.9.9/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/2.9.9/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/2.9.9/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/2.9.9/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/2.9.9/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/2.9.9/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + + + + diff --git a/content/riak/kv/2.9.9/learn/new-to-nosql.md b/content/riak/kv/2.9.9/learn/new-to-nosql.md new file mode 100644 index 0000000000..ee6c709784 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/new-to-nosql.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 2.9.9 +#menu: +# riak_kv-2.9.9: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this lives in existing docs)** + + + + diff --git a/content/riak/kv/2.9.9/learn/use-cases.md b/content/riak/kv/2.9.9/learn/use-cases.md new file mode 100644 index 0000000000..ddb63d1b9e --- /dev/null +++ b/content/riak/kv/2.9.9/learn/use-cases.md @@ -0,0 +1,405 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/2.9.9/dev/data-modeling/ + - /riak/kv/2.9.9/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/2.9.9/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/2.9.9/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + + + + diff --git a/content/riak/kv/2.9.9/learn/why-riak-kv.md b/content/riak/kv/2.9.9/learn/why-riak-kv.md new file mode 100644 index 0000000000..fa9afe0b47 --- /dev/null +++ b/content/riak/kv/2.9.9/learn/why-riak-kv.md @@ -0,0 +1,225 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/2.9.9/theory/why-riak/ + - /riak/kv/2.9.9/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/2.9.9/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/2.9.9/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + + + + diff --git a/content/riak/kv/2.9.9/release-notes.md b/content/riak/kv/2.9.9/release-notes.md new file mode 100644 index 0000000000..514efb2e01 --- /dev/null +++ b/content/riak/kv/2.9.9/release-notes.md @@ -0,0 +1,34 @@ +--- +title: "Riak KV 2.9.9 Release Notes" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/2.9.9/community/release-notes + - /riak/kv/2.9.9/intro-v20 + - /riak/2.9.9/intro-v20 + - /riak/kv/2.9.9/introduction +--- + +Released Aug 7, 2021. + + +## Overview + +Minor stability improvements to leveled backend - see [leveled release notes](https://github.com/martinsumner/leveled/releases/tag/0.9.24) for further details. + +## Previous Release Notes + +Please see the KV 2.9.8 release notes [here]({{<baseurl>}}riak/kv/2.9.8/release-notes/). + + + + + diff --git a/content/riak/kv/2.9.9/setup.md b/content/riak/kv/2.9.9/setup.md new file mode 100644 index 0000000000..d4892e9abb --- /dev/null +++ b/content/riak/kv/2.9.9/setup.md @@ -0,0 +1,51 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + + + + diff --git a/content/riak/kv/2.9.9/setup/downgrade.md b/content/riak/kv/2.9.9/setup/downgrade.md new file mode 100644 index 0000000000..068f8df8d6 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/downgrade.md @@ -0,0 +1,179 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/2.9.9/ops/upgrading/rolling-downgrades/ + - /riak/kv/2.9.9/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/2.9.9/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.9, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing.md b/content/riak/kv/2.9.9/setup/installing.md new file mode 100644 index 0000000000..d4c26ead2d --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing.md @@ -0,0 +1,61 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/2.9.9/ops/building/installing + - /riak/kv/2.9.9/ops/building/installing + - /riak/2.9.9/installing/ + - /riak/kv/2.9.9/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/2.9.9/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/amazon-web-services.md b/content/riak/kv/2.9.9/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..2718e6a822 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/amazon-web-services.md @@ -0,0 +1,153 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/2.9.9/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/2.9.9/installing/amazon-web-services/ + - /riak/kv/2.9.9/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.9/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2/riak-2.9.9-1.amzn2x86_64.rpm +sudo yum localinstall -y riak-2.9.9-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2/riak-2.9.9-1.amzn2x86_64.rpm +sudo rpm -i riak-2.9.9-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2016.09/riak-2.9.9-1.amzn1x86_64.rpm +sudo yum localinstall -y riak-2.9.9-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2016.09/riak-2.9.9-1.amzn1x86_64.rpm +sudo rpm -i riak-2.9.9-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/debian-ubuntu.md b/content/riak/kv/2.9.9/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..4db2203a3e --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/debian-ubuntu.md @@ -0,0 +1,171 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/2.9.9/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/2.9.9/installing/debian-ubuntu/ + - /riak/kv/2.9.9/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/2.9.9/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/bionic64/riak-2.9.9-1_amd64.deb +sudo dpkg -i riak-2.9.9-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/xenial64/riak-2.9.9-1_amd64.deb +sudo dpkg -i riak-2.9.9-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/trusty64/riak-2.9.9-1_amd64.deb +sudo dpkg -i riak-2.9.9-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/precise64/riak-2.9.9-1_amd64.deb +sudo dpkg -i riak-2.9.9-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/9/riak-2.9.9-1_amd64.deb +sudo dpkg -i riak-2.9.9-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/8/riak-2.9.9-1_amd64.deb +sudo dpkg -i riak-2.9.9-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/7/riak-2.9.9-1_amd64.deb +sudo dpkg -i riak-2.9.9-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/raspbian/buster/riak-2.9.9-1_armhf.deb +sudo dpkg -i riak-2.9.9-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/riak-2.9.9.tar.gz +tar zxvf riak-2.9.9.tar.gz +cd riak-2.9.9 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/freebsd.md b/content/riak/kv/2.9.9/setup/installing/freebsd.md new file mode 100644 index 0000000000..8388175892 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/freebsd.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/2.9.9/ops/building/installing/Installing-on-FreeBSD + - /riak/2.9.9/installing/freebsd/ + - /riak/kv/2.9.9/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.9/downloads/ +[install verify]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-2.9.9.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.9/freebsd/11.1/riak-2.9.9.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/2.9/2.9.9/freebsd/10.4/riak-2.9.9.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/mac-osx.md b/content/riak/kv/2.9.9/setup/installing/mac-osx.md new file mode 100644 index 0000000000..33f2c5dd40 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/mac-osx.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/2.9.9/ops/building/installing/Installing-on-Mac-OS-X + - /riak/2.9.9/installing/mac-osx/ + - /riak/kv/2.9.9/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/2.9.9/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.9/osx/10.11/riak-2.9.9-OSX-x86_64.tar.gz +tar xzvf riak-2.9.9-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 2.9.9 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.9` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.9/riak-2.9.9.tar.gz +tar zxvf riak-2.9.9.tar.gz +cd riak-2.9.9 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/rhel-centos.md b/content/riak/kv/2.9.9/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..bf4538a274 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/rhel-centos.md @@ -0,0 +1,134 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/2.9.9/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/2.9.9/installing/rhel-centos/ + - /riak/kv/2.9.9/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/8/riak-2.9.9-1.el8.x86_64.rpm +sudo yum localinstall -y riak-2.9.9-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/8/riak-2.9.9-1.el8.x86_64.rpm +sudo rpm -Uvh riak-2.9.9-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/7/riak-2.9.9-1.el7.x86_64.rpm +sudo yum localinstall -y riak-2.9.9-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/7/riak-2.9.9-1.el7.x86_64.rpm +sudo rpm -Uvh riak-2.9.9-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/6/riak-2.9.9-1.el6.x86_64.rpm +sudo yum localinstall -y riak-2.9.9-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/6/riak-2.9.9-1.el6.x86_64.rpm +sudo rpm -Uvh riak-2.9.9-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/2.9/2.9.9/riak-2.9.9.tar.gz +tar zxvf riak-2.9.9.tar.gz +cd riak-2.9.9 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/smartos.md b/content/riak/kv/2.9.9/setup/installing/smartos.md new file mode 100644 index 0000000000..6047820409 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/smartos.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "2.9.9" +menu: + riak_kv-2.9.9: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-SmartOS + - /riak/kv/2.9.9/ops/building/installing/Installing-on-SmartOS + - /riak/2.9.9/installing/smartos/ + - /riak/kv/2.9.9/installing/smartos/ + - /riak/kv/2.9.9/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/2.9.9/setup/installing/solaris.md b/content/riak/kv/2.9.9/setup/installing/solaris.md new file mode 100644 index 0000000000..961b2aded2 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/solaris.md @@ -0,0 +1,91 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "2.9.9" +menu: + riak_kv-2.9.9: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-Solaris + - /riak/kv/2.9.9/ops/building/installing/Installing-on-Solaris + - /riak/2.9.9/installing/solaris/ + - /riak/kv/2.9.9/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/2.9.9/setup/installing/source.md b/content/riak/kv/2.9.9/setup/installing/source.md new file mode 100644 index 0000000000..8e64a662b0 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/source.md @@ -0,0 +1,110 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/Installing-Riak-from-Source + - /riak/kv/2.9.9/ops/building/Installing-Riak-from-Source + - /riak/2.9.9/installing/source/ + - /riak/kv/2.9.9/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/2.9.9/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/2.9/2.9.9/riak-2.9.9.tar.gz +tar zxvf riak-2.9.9.tar.gz +cd riak-2.9.9 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/source/erlang.md b/content/riak/kv/2.9.9/setup/installing/source/erlang.md new file mode 100644 index 0000000000..fc8ac9dd50 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/source/erlang.md @@ -0,0 +1,571 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/erlang + - /riak/kv/2.9.9/ops/building/installing/erlang + - /riak/2.9.9/installing/source/erlang/ + - /riak/kv/2.9.9/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/2.9.9/setup/installing +[security basics]: {{<baseurl>}}riak/kv/2.9.9/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/source/jvm.md b/content/riak/kv/2.9.9/setup/installing/source/jvm.md new file mode 100644 index 0000000000..bdd795ff9f --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/source/jvm.md @@ -0,0 +1,55 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/jvm + - /riak/kv/2.9.9/ops/building/installing/jvm + - /riak/2.9.9/ops/building/installing/Installing-the-JVM + - /riak/kv/2.9.9/ops/building/installing/Installing-the-JVM + - /riak/2.9.9/installing/source/jvm/ + - /riak/kv/2.9.9/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/suse.md b/content/riak/kv/2.9.9/setup/installing/suse.md new file mode 100644 index 0000000000..3dc8f73ac9 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/suse.md @@ -0,0 +1,52 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-SUSE + - /riak/kv/2.9.9/ops/building/installing/Installing-on-SUSE + - /riak/2.9.9/installing/suse/ + - /riak/kv/2.9.9/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.9/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.9+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/verify.md b/content/riak/kv/2.9.9/setup/installing/verify.md new file mode 100644 index 0000000000..d656775314 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/verify.md @@ -0,0 +1,169 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/installing/Post-Installation + - /riak/kv/2.9.9/ops/installing/Post-Installation + - /riak/2.9.9/installing/verify-install/ + - /riak/kv/2.9.9/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/2.9.9/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/2.9.9/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + + + + diff --git a/content/riak/kv/2.9.9/setup/installing/windows-azure.md b/content/riak/kv/2.9.9/setup/installing/windows-azure.md new file mode 100644 index 0000000000..794df6dc9e --- /dev/null +++ b/content/riak/kv/2.9.9/setup/installing/windows-azure.md @@ -0,0 +1,197 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/2.9.9/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/2.9.9/ops/building/installing/Installing-on-Windows-Azure + - /riak/2.9.9/installing/windows-azure/ + - /riak/kv/2.9.9/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + + + + diff --git a/content/riak/kv/2.9.9/setup/planning.md b/content/riak/kv/2.9.9/setup/planning.md new file mode 100644 index 0000000000..e1b82c359c --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning.md @@ -0,0 +1,61 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/backend.md b/content/riak/kv/2.9.9/setup/planning/backend.md new file mode 100644 index 0000000000..7eda42972c --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/backend.md @@ -0,0 +1,60 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/2.9.9/ops/building/planning/backends/ + - /riak/kv/2.9.9/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/2.9.9/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/backend/bitcask.md b/content/riak/kv/2.9.9/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..41e7c6906d --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/backend/bitcask.md @@ -0,0 +1,994 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/backends/bitcask/ + - /riak/kv/2.9.9/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/2.9.9/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` - lets the operating system manage syncing writes + (default) + * `o_sync` - uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval - Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) - Writes are made via Erlang's built-in file API +* `nif` - Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` - No restrictions on when merge operations can occur + (default) +* `never` - Merge will never be attempted +* `window` - Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** - This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** - This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** - This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** - This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** - This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322.9.944576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322.9.944576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/backend/leveldb.md b/content/riak/kv/2.9.9/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..d1d298c0ba --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/backend/leveldb.md @@ -0,0 +1,506 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/backends/leveldb/ + - /riak/kv/2.9.9/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.9/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** - The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** - LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322.9.944576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322.9.944576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/backend/leveled.md b/content/riak/kv/2.9.9/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..8f5822a228 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/backend/leveled.md @@ -0,0 +1,141 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/backends/leveled/ + - /riak/kv/2.9.9/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/2.9.9/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.9 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/backend/memory.md b/content/riak/kv/2.9.9/setup/planning/backend/memory.md new file mode 100644 index 0000000000..383d502a11 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/backend/memory.md @@ -0,0 +1,147 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/backends/memory/ + - /riak/kv/2.9.9/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/backend/multi.md b/content/riak/kv/2.9.9/setup/planning/backend/multi.md new file mode 100644 index 0000000000..e09b562f81 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/backend/multi.md @@ -0,0 +1,230 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/backends/multi/ + - /riak/kv/2.9.9/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/best-practices.md b/content/riak/kv/2.9.9/setup/planning/best-practices.md new file mode 100644 index 0000000000..b8c41eadbc --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/best-practices.md @@ -0,0 +1,145 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/2.9.9/ops/building/planning/best-practices + - /riak/kv/2.9.9/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/2.9.9/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/2.9.9/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/2.9.9/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..0e85f1d3ad --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,104 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/2.9.9/ops/building/planning/bitcask + - /riak/kv/2.9.9/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/cluster-capacity.md b/content/riak/kv/2.9.9/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..4ab80cc35a --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/cluster-capacity.md @@ -0,0 +1,238 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/2.9.9/ops/building/planning/cluster + - /riak/kv/2.9.9/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/2.9.9/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/2.9.9/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/future.md b/content/riak/kv/2.9.9/setup/planning/future.md new file mode 100644 index 0000000000..c775edf111 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/future.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 2.9.9 +#menu: +# riak_kv-2.9.9: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/operating-system.md b/content/riak/kv/2.9.9/setup/planning/operating-system.md new file mode 100644 index 0000000000..a713ad1019 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/operating-system.md @@ -0,0 +1,30 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +aliases: +--- + +[downloads]: {{<baseurl>}}riak/kv/2.9.9/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + + + + diff --git a/content/riak/kv/2.9.9/setup/planning/start.md b/content/riak/kv/2.9.9/setup/planning/start.md new file mode 100644 index 0000000000..ec595685b0 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/planning/start.md @@ -0,0 +1,61 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/2.9.9/ops/building/planning/system-planning + - /riak/kv/2.9.9/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + + + + diff --git a/content/riak/kv/2.9.9/setup/search.md b/content/riak/kv/2.9.9/setup/search.md new file mode 100644 index 0000000000..b26bcf4245 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/search.md @@ -0,0 +1,5 @@ + + + + + diff --git a/content/riak/kv/2.9.9/setup/upgrading.md b/content/riak/kv/2.9.9/setup/upgrading.md new file mode 100644 index 0000000000..0b0b58d731 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/upgrading.md @@ -0,0 +1,38 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.9][upgrade version] + +A tutorial on updating to Riak KV 2.9.9 + +[Learn More >>][upgrade version] + + + + diff --git a/content/riak/kv/2.9.9/setup/upgrading/checklist.md b/content/riak/kv/2.9.9/setup/upgrading/checklist.md new file mode 100644 index 0000000000..5c1c74e4eb --- /dev/null +++ b/content/riak/kv/2.9.9/setup/upgrading/checklist.md @@ -0,0 +1,225 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.9/ops/upgrading/production-checklist/ + - /riak/kv/2.9.9/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/2.9.9/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/2.9.9/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/2.9.9/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/2.9.9/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/2.9.9/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/2.9.9/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/2.9.9/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + + + + diff --git a/content/riak/kv/2.9.9/setup/upgrading/cluster.md b/content/riak/kv/2.9.9/setup/upgrading/cluster.md new file mode 100644 index 0000000000..b994bd1b9a --- /dev/null +++ b/content/riak/kv/2.9.9/setup/upgrading/cluster.md @@ -0,0 +1,303 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "2.9.9" +menu: + riak_kv-2.9.9: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.9/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.9/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/2.9.9/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.9/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.9/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.9/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.9/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` - See [JMX Monitoring][jmx monitor] for more information. + * `snmp` - See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + + + + diff --git a/content/riak/kv/2.9.9/setup/upgrading/multi-datacenter.md b/content/riak/kv/2.9.9/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..391f0898f4 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/upgrading/multi-datacenter.md @@ -0,0 +1,24 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 2.9.9 +#menu: +# riak_kv-2.9.9: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: +--- + +## TODO + +How to update to a new version with multi-datacenter. + + + + + diff --git a/content/riak/kv/2.9.9/setup/upgrading/search.md b/content/riak/kv/2.9.9/setup/upgrading/search.md new file mode 100644 index 0000000000..9b0d222f69 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/upgrading/search.md @@ -0,0 +1,281 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "2.9.9" +menu: + riak_kv-2.9.9: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/2.9.9/ops/advanced/upgrading-search-2 + - /riak/kv/2.9.9/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + + + + diff --git a/content/riak/kv/2.9.9/setup/upgrading/version.md b/content/riak/kv/2.9.9/setup/upgrading/version.md new file mode 100644 index 0000000000..f86e2904a9 --- /dev/null +++ b/content/riak/kv/2.9.9/setup/upgrading/version.md @@ -0,0 +1,252 @@ +--- +title: "Upgrading to Riak KV 2.9.9" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Upgrading to 2.9.9" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/2.9.9/upgrade-v20/ + - /riak/kv/2.9.9/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.9/ops/upgrading/rolling-upgrades/ + - /riak/kv/2.9.9/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/2.9.9/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/2.9.9/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/2.9.9/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.9/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/2.9.9/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/2.9.9/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/2.9.9/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 2.9.9 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.9 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 2.9.9 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + + + + diff --git a/content/riak/kv/2.9.9/using.md b/content/riak/kv/2.9.9/using.md new file mode 100644 index 0000000000..9cae45b1ca --- /dev/null +++ b/content/riak/kv/2.9.9/using.md @@ -0,0 +1,78 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + + + + diff --git a/content/riak/kv/2.9.9/using/admin.md b/content/riak/kv/2.9.9/using/admin.md new file mode 100644 index 0000000000..fdbbdd7475 --- /dev/null +++ b/content/riak/kv/2.9.9/using/admin.md @@ -0,0 +1,51 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/2.9.9/ops/running/cluster-admin + - /riak/kv/2.9.9/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + + + + diff --git a/content/riak/kv/2.9.9/using/admin/commands.md b/content/riak/kv/2.9.9/using/admin/commands.md new file mode 100644 index 0000000000..d04f755d9f --- /dev/null +++ b/content/riak/kv/2.9.9/using/admin/commands.md @@ -0,0 +1,378 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.9/ops/running/cluster-admin + - /riak/kv/2.9.9/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` - There are five possible values for status: + * `valid` - The node has begun participating in cluster operations + * `leaving` - The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` - The node's ownership transfers are complete and it is + currently shutting down + * `joining` - The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` - The node is not currently responding +* `avail` - There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` - What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` - The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322.9.944576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322.9.944576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` + + + + diff --git a/content/riak/kv/2.9.9/using/admin/riak-admin.md b/content/riak/kv/2.9.9/using/admin/riak-admin.md new file mode 100644 index 0000000000..57bd03205f --- /dev/null +++ b/content/riak/kv/2.9.9/using/admin/riak-admin.md @@ -0,0 +1,721 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.9/ops/running/tools/riak-admin + - /riak/kv/2.9.9/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/2.9.9/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/2.9.9/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/2.9.9/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/2.9.9/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/2.9.9/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/2.9.9/using/security/ +[security managing]: {{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/2.9.9/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/2.9.9/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + + + + diff --git a/content/riak/kv/2.9.9/using/admin/riak-cli.md b/content/riak/kv/2.9.9/using/admin/riak-cli.md new file mode 100644 index 0000000000..5eae67a451 --- /dev/null +++ b/content/riak/kv/2.9.9/using/admin/riak-cli.md @@ -0,0 +1,204 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.9/ops/running/tools/riak + - /riak/kv/2.9.9/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + + + + diff --git a/content/riak/kv/2.9.9/using/admin/riak-control.md b/content/riak/kv/2.9.9/using/admin/riak-control.md new file mode 100644 index 0000000000..d95cb5e1b4 --- /dev/null +++ b/content/riak/kv/2.9.9/using/admin/riak-control.md @@ -0,0 +1,237 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/riak-control + - /riak/kv/2.9.9/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/2.9.9/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations.md b/content/riak/kv/2.9.9/using/cluster-operations.md new file mode 100644 index 0000000000..eabd7945f9 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations.md @@ -0,0 +1,109 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +aliases: +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/2.9.9/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..8439eb7d5e --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,289 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/2.9.9/ops/advanced/aae/ + - /riak/2.9.9/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/2.9.9/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/2.9.9/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..d492af68a6 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,198 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.9/ops/running/nodes/adding-removing + - /riak/kv/2.9.9/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/2.9.9/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/backend.md b/content/riak/kv/2.9.9/using/cluster-operations/backend.md new file mode 100644 index 0000000000..b6977b8507 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/backend.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 2.9.9 +#menu: +# riak_kv-2.9.9: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/backing-up.md b/content/riak/kv/2.9.9/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..6d013e817b --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/backing-up.md @@ -0,0 +1,271 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.9/ops/running/backups + - /riak/kv/2.9.9/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/2.9.9/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/2.9.9/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/bucket-types.md b/content/riak/kv/2.9.9/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..136d6c0cc6 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/bucket-types.md @@ -0,0 +1,63 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/2.9.9/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..6b76e88c2a --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,458 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.9/ops/running/nodes/renaming + - /riak/kv/2.9.9/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/handoff.md b/content/riak/kv/2.9.9/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..536f01d8c8 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/handoff.md @@ -0,0 +1,120 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.9/ops/running/handoff + - /riak/kv/2.9.9/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/inspecting-node.md b/content/riak/kv/2.9.9/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..b1b1f88f6a --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/inspecting-node.md @@ -0,0 +1,496 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/2.9.9/ops/running/nodes/inspecting + - /riak/kv/2.9.9/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392.9.948081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` - The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` - The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` - The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/load-balancing.md b/content/riak/kv/2.9.9/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..ea474086e4 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/load-balancing.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 2.9.9 +#menu: +# riak_kv-2.9.9: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this exists in docs)** + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/logging.md b/content/riak/kv/2.9.9/using/cluster-operations/logging.md new file mode 100644 index 0000000000..db1e18c64c --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/logging.md @@ -0,0 +1,47 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/replacing-node.md b/content/riak/kv/2.9.9/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..019a8f7362 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/replacing-node.md @@ -0,0 +1,100 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/2.9.9/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/secondary-indexes.md b/content/riak/kv/2.9.9/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..23f0120615 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.9 +#menu: +# riak_kv-2.9.9: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/strong-consistency.md b/content/riak/kv/2.9.9/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..f9bd660782 --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/strong-consistency.md @@ -0,0 +1,76 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/2.9.9/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..0de33578df --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,34 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/2.9.9/ops/advanced/tictacaae/ + - /riak/2.9.9/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/2.9.9/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..69513cfe4f --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,263 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v2/operations + - /riak/kv/2.9.9/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.9/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/2.9.9/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` - The IP address and port of a connected client (site)</li><li>`cluster_name` - The name of the connected client (site)</li><li>`connecting` - The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/2.9.9/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/2.9.9/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + + + + diff --git a/content/riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..82a539f9fb --- /dev/null +++ b/content/riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,425 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/operations + - /riak/kv/2.9.9/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/2.9.9/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/2.9.9/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/2.9.9/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/2.9.9/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + + + + diff --git a/content/riak/kv/2.9.9/using/performance.md b/content/riak/kv/2.9.9/using/performance.md new file mode 100644 index 0000000000..c312457f37 --- /dev/null +++ b/content/riak/kv/2.9.9/using/performance.md @@ -0,0 +1,268 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/2.9.9/ops/tuning/linux/ + - /riak/2.9.9/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.9/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/2.9.9/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/2.9.9/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/2.9.9/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/2.9.9/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/2.9.9/using/performance/open-files-limit/) + + + + diff --git a/content/riak/kv/2.9.9/using/performance/amazon-web-services.md b/content/riak/kv/2.9.9/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..6426afae1a --- /dev/null +++ b/content/riak/kv/2.9.9/using/performance/amazon-web-services.md @@ -0,0 +1,247 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.9/ops/tuning/aws + - /riak/kv/2.9.9/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + + + + diff --git a/content/riak/kv/2.9.9/using/performance/benchmarking.md b/content/riak/kv/2.9.9/using/performance/benchmarking.md new file mode 100644 index 0000000000..98a3d78c33 --- /dev/null +++ b/content/riak/kv/2.9.9/using/performance/benchmarking.md @@ -0,0 +1,602 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.9/ops/building/benchmarking + - /riak/kv/2.9.9/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/2.9.9/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput - Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` - generate as many ops per second as possible +* `{rate, N}` - generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` - Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` - Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` - Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` - Directly invokes the Bitcask API +* `basho_bench_driver_dets` - Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` - operation completed successfully +* `{error, Reason, NewState}` - operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` - operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` - operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` - generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` - the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` - the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` - selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` - selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` - the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` - takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` - takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` - generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` - generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` - generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + + + + diff --git a/content/riak/kv/2.9.9/using/performance/erlang.md b/content/riak/kv/2.9.9/using/performance/erlang.md new file mode 100644 index 0000000000..52d4dde555 --- /dev/null +++ b/content/riak/kv/2.9.9/using/performance/erlang.md @@ -0,0 +1,371 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.9/ops/tuning/erlang + - /riak/kv/2.9.9/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + + + + diff --git a/content/riak/kv/2.9.9/using/performance/latency-reduction.md b/content/riak/kv/2.9.9/using/performance/latency-reduction.md new file mode 100644 index 0000000000..fe3bc27c80 --- /dev/null +++ b/content/riak/kv/2.9.9/using/performance/latency-reduction.md @@ -0,0 +1,267 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.9/ops/tuning/latency-reduction + - /riak/kv/2.9.9/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + + + + diff --git a/content/riak/kv/2.9.9/using/performance/multi-datacenter-tuning.md b/content/riak/kv/2.9.9/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..441f686348 --- /dev/null +++ b/content/riak/kv/2.9.9/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +[perf index]: {{<baseurl>}}riak/kv/2.9.9/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + + + + diff --git a/content/riak/kv/2.9.9/using/performance/open-files-limit.md b/content/riak/kv/2.9.9/using/performance/open-files-limit.md new file mode 100644 index 0000000000..c16663d1cf --- /dev/null +++ b/content/riak/kv/2.9.9/using/performance/open-files-limit.md @@ -0,0 +1,351 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/2.9.9/ops/tuning/open-files-limit/ + - /riak/kv/2.9.9/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + + + + diff --git a/content/riak/kv/2.9.9/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/2.9.9/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..5adf5ec021 --- /dev/null +++ b/content/riak/kv/2.9.9/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,50 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/2.9.9/using/reference.md b/content/riak/kv/2.9.9/using/reference.md new file mode 100644 index 0000000000..9674a8e04c --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference.md @@ -0,0 +1,135 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +aliases: +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + + + + diff --git a/content/riak/kv/2.9.9/using/reference/architecture.md b/content/riak/kv/2.9.9/using/reference/architecture.md new file mode 100644 index 0000000000..6ee92bc769 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/architecture.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +#menu: +# riak_kv-2.9.9: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +aliases: +--- + +<!-- TODO: Content --> + + + + diff --git a/content/riak/kv/2.9.9/using/reference/bucket-types.md b/content/riak/kv/2.9.9/using/reference/bucket-types.md new file mode 100644 index 0000000000..2920a618e3 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/bucket-types.md @@ -0,0 +1,823 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +aliases: +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/2.9.9/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/2.9.9/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/2.9.9/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/2.9.9/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/2.9.9/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/2.9.9/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + + + + diff --git a/content/riak/kv/2.9.9/using/reference/custom-code.md b/content/riak/kv/2.9.9/using/reference/custom-code.md new file mode 100644 index 0000000000..94c554a3a9 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/custom-code.md @@ -0,0 +1,135 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/install-custom-code/ + - /riak/kv/2.9.9/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/2.9.9/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/2.9.9/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.9/using/reference/failure-recovery.md b/content/riak/kv/2.9.9/using/reference/failure-recovery.md new file mode 100644 index 0000000000..8fe3c3cf98 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/failure-recovery.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.9/using/reference/handoff.md b/content/riak/kv/2.9.9/using/reference/handoff.md new file mode 100644 index 0000000000..30644602f8 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/handoff.md @@ -0,0 +1,201 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.9/ops/running/handoff/ + - /riak/kv/2.9.9/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + + + + diff --git a/content/riak/kv/2.9.9/using/reference/jmx.md b/content/riak/kv/2.9.9/using/reference/jmx.md new file mode 100644 index 0000000000..89c090a9b5 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/jmx.md @@ -0,0 +1,190 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/running/monitoring/jmx + - /riak/kv/2.9.9/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + + + + diff --git a/content/riak/kv/2.9.9/using/reference/logging.md b/content/riak/kv/2.9.9/using/reference/logging.md new file mode 100644 index 0000000000..a1da8e63bb --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/logging.md @@ -0,0 +1,301 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.9/ops/running/logging + - /riak/kv/2.9.9/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.9 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` - Every night at midnight +* `$D23` - Every day at 23:00 (11 pm) +* `$W0D20` - Every week on Sunday at 20:00 (8 pm) +* `$M1D0` - On the first day of every month at midnight +* `$M5D6` - On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` - Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/2.9.9/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` - Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-cli/#attach-direct) command +* `both` - Console logs will be emitted both to a file and to standard + output +* `off` - Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + + + + diff --git a/content/riak/kv/2.9.9/using/reference/multi-datacenter.md b/content/riak/kv/2.9.9/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..16ba66443b --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/multi-datacenter.md @@ -0,0 +1,53 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + + + + diff --git a/content/riak/kv/2.9.9/using/reference/multi-datacenter/comparison.md b/content/riak/kv/2.9.9/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..3b3c2e07cf --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,100 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.9/ops/mdc/comparison + - /riak/kv/2.9.9/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/2.9.9/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/2.9.9/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + + + + diff --git a/content/riak/kv/2.9.9/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/2.9.9/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..fef60459ae --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,168 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.9/ops/mdc/monitoring + - /riak/kv/2.9.9/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + + + + diff --git a/content/riak/kv/2.9.9/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/2.9.9/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..d2b1c2ac5a --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,66 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.9/ops/mdc/per-bucket + - /riak/kv/2.9.9/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` - Enable replication (realtime + fullsync) + * `false` - Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` - Replication only occurs in realtime for this bucket + * `fullsync` - Replication only occurs during a fullsync operation + * `both` - Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + + + + diff --git a/content/riak/kv/2.9.9/using/reference/multi-datacenter/statistics.md b/content/riak/kv/2.9.9/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..ba4b575e93 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,244 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/2.9.9/ops/mdc/statistics + - /riak/kv/2.9.9/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + + + + diff --git a/content/riak/kv/2.9.9/using/reference/object-deletion.md b/content/riak/kv/2.9.9/using/reference/object-deletion.md new file mode 100644 index 0000000000..7b6eeef473 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/object-deletion.md @@ -0,0 +1,121 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` - Disables tombstone removal +* `immediate` - The tombstone is removed as soon as the request is + received +* Custom time interval - How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + + + + diff --git a/content/riak/kv/2.9.9/using/reference/runtime-interaction.md b/content/riak/kv/2.9.9/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..ca6b53ea6b --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/runtime-interaction.md @@ -0,0 +1,70 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/runtime + - /riak/kv/2.9.9/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` - Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` - Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` - The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` - The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` - A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` - A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` - A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + + + + diff --git a/content/riak/kv/2.9.9/using/reference/search.md b/content/riak/kv/2.9.9/using/reference/search.md new file mode 100644 index 0000000000..caececf0a3 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/search.md @@ -0,0 +1,457 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/search + - /riak/kv/2.9.9/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/2.9.9/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. + + + diff --git a/content/riak/kv/2.9.9/using/reference/secondary-indexes.md b/content/riak/kv/2.9.9/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..69087f1555 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/secondary-indexes.md @@ -0,0 +1,76 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.9/dev/advanced/2i + - /riak/kv/2.9.9/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/2.9.9/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + + + + diff --git a/content/riak/kv/2.9.9/using/reference/snmp.md b/content/riak/kv/2.9.9/using/reference/snmp.md new file mode 100644 index 0000000000..787d26df81 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/snmp.md @@ -0,0 +1,166 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/running/monitoring/snmp + - /riak/kv/2.9.9/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + + + + diff --git a/content/riak/kv/2.9.9/using/reference/statistics-monitoring.md b/content/riak/kv/2.9.9/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..2d4d12d696 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/statistics-monitoring.md @@ -0,0 +1,395 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/2.9.9/ops/running/stats-and-monitoring + - /riak/kv/2.9.9/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/2.9.9/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/2.9.9/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/2.9.9/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.9/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/2.9.9/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/2.9.9/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + + + + diff --git a/content/riak/kv/2.9.9/using/reference/strong-consistency.md b/content/riak/kv/2.9.9/using/reference/strong-consistency.md new file mode 100644 index 0000000000..6f3bcdc4bb --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/strong-consistency.md @@ -0,0 +1,150 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +aliases: +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/2.9.9/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/2.9.9/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/2.9.9/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/2.9.9/configuring/strong-consistency/#performance). + + + + diff --git a/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter.md b/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..723861ea1f --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter.md @@ -0,0 +1,40 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +aliases: +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.9/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + + + + diff --git a/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..657a28cd0a --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,130 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.9/ops/mdc/v2/architecture + - /riak/kv/2.9.9/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.9/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/2.9.9/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + + + + diff --git a/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..08af106996 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,53 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/2.9.9/ops/mdc/v2/scheduling-fullsync + - /riak/kv/2.9.9/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/2.9.9/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter.md b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..3a68f6573c --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter.md @@ -0,0 +1,52 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +aliases: +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + + + + diff --git a/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..019c4dbe2d --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,129 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/aae + - /riak/kv/2.9.9/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + + + + diff --git a/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..0b6e208182 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,186 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/architecture + - /riak/kv/2.9.9/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + + + + diff --git a/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..6c9a249e67 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,102 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/cascading-writes + - /riak/kv/2.9.9/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + + + + diff --git a/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..ae538c2e66 --- /dev/null +++ b/content/riak/kv/2.9.9/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,72 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/2.9.9/ops/mdc/v3/scheduling-fullsync + - /riak/kv/2.9.9/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + + + + diff --git a/content/riak/kv/2.9.9/using/repair-recovery.md b/content/riak/kv/2.9.9/using/repair-recovery.md new file mode 100644 index 0000000000..e3be6fa70e --- /dev/null +++ b/content/riak/kv/2.9.9/using/repair-recovery.md @@ -0,0 +1,53 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +aliases: +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + + + + diff --git a/content/riak/kv/2.9.9/using/repair-recovery/errors.md b/content/riak/kv/2.9.9/using/repair-recovery/errors.md new file mode 100644 index 0000000000..340d46c8c4 --- /dev/null +++ b/content/riak/kv/2.9.9/using/repair-recovery/errors.md @@ -0,0 +1,366 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.9/ops/running/recovery/errors + - /riak/kv/2.9.9/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.9/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.9/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.9/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.9/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/2.9.9/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/2.9.9/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/2.9.9/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/2.9.9/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + + + + diff --git a/content/riak/kv/2.9.9/using/repair-recovery/failed-node.md b/content/riak/kv/2.9.9/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..d820580f9c --- /dev/null +++ b/content/riak/kv/2.9.9/using/repair-recovery/failed-node.md @@ -0,0 +1,114 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.9/ops/running/recovery/failed-node + - /riak/kv/2.9.9/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` + + + + diff --git a/content/riak/kv/2.9.9/using/repair-recovery/failure-recovery.md b/content/riak/kv/2.9.9/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..604f0471f9 --- /dev/null +++ b/content/riak/kv/2.9.9/using/repair-recovery/failure-recovery.md @@ -0,0 +1,129 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.9/ops/running/recovery/failure-recovery + - /riak/kv/2.9.9/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/2.9.9/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** - A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** - If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** - Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/2.9.9/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + + + + diff --git a/content/riak/kv/2.9.9/using/repair-recovery/repairs.md b/content/riak/kv/2.9.9/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..27e6972180 --- /dev/null +++ b/content/riak/kv/2.9.9/using/repair-recovery/repairs.md @@ -0,0 +1,391 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.9/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.9/ops/running/recovery/repairing-indexes + - /riak/2.9.9/ops/running/recovery/failed-node + - /riak/kv/2.9.9/ops/running/recovery/failed-node + - /riak/2.9.9/ops/running/recovery/repairing-leveldb + - /riak/kv/2.9.9/ops/running/recovery/repairing-leveldb + - /riak/2.9.9/ops/running/recovery/repairing-partitions + - /riak/kv/2.9.9/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/2.9.9/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/2.9.9/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/2.9.9/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/2.9.9/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + + + + diff --git a/content/riak/kv/2.9.9/using/repair-recovery/rolling-replaces.md b/content/riak/kv/2.9.9/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..13e880b7bf --- /dev/null +++ b/content/riak/kv/2.9.9/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,76 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +aliases: +--- + +[upgrade]: {{<baseurl>}}riak/kv/2.9.9/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/2.9.9/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + + + + diff --git a/content/riak/kv/2.9.9/using/repair-recovery/rolling-restart.md b/content/riak/kv/2.9.9/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..4076a08dbf --- /dev/null +++ b/content/riak/kv/2.9.9/using/repair-recovery/rolling-restart.md @@ -0,0 +1,64 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.9/ops/running/recovery/rolling-restart + - /riak/kv/2.9.9/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/2.9.9/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + + + + diff --git a/content/riak/kv/2.9.9/using/repair-recovery/secondary-indexes.md b/content/riak/kv/2.9.9/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..5dbc020c15 --- /dev/null +++ b/content/riak/kv/2.9.9/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,142 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/2.9.9/ops/running/recovery/repairing-indexes + - /riak/kv/2.9.9/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + + + + diff --git a/content/riak/kv/2.9.9/using/running-a-cluster.md b/content/riak/kv/2.9.9/using/running-a-cluster.md new file mode 100644 index 0000000000..2b7cba39cc --- /dev/null +++ b/content/riak/kv/2.9.9/using/running-a-cluster.md @@ -0,0 +1,339 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/2.9.9/ops/building/basic-cluster-setup + - /riak/kv/2.9.9/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/2.9.9/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/2.9.9/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + + + + diff --git a/content/riak/kv/2.9.9/using/security.md b/content/riak/kv/2.9.9/using/security.md new file mode 100644 index 0000000000..5d6dfd8856 --- /dev/null +++ b/content/riak/kv/2.9.9/using/security.md @@ -0,0 +1,199 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/2.9.9/ops/advanced/security + - /riak/kv/2.9.9/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/2.9.9/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/2.9.9/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/2.9.9/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/2.9.9/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/2.9.9/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + + + + diff --git a/content/riak/kv/2.9.9/using/security/basics.md b/content/riak/kv/2.9.9/using/security/basics.md new file mode 100644 index 0000000000..99a54b2011 --- /dev/null +++ b/content/riak/kv/2.9.9/using/security/basics.md @@ -0,0 +1,851 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.9/ops/running/authz + - /riak/kv/2.9.9/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/2.9.9/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/2.9.9/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/2.9.9/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/2.9.9/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/2.9.9/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/2.9.9/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/2.9.9/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/2.9.9/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + + + + diff --git a/content/riak/kv/2.9.9/using/security/best-practices.md b/content/riak/kv/2.9.9/using/security/best-practices.md new file mode 100644 index 0000000000..7693406e3b --- /dev/null +++ b/content/riak/kv/2.9.9/using/security/best-practices.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.9/using/security/managing-sources.md b/content/riak/kv/2.9.9/using/security/managing-sources.md new file mode 100644 index 0000000000..287d710fe6 --- /dev/null +++ b/content/riak/kv/2.9.9/using/security/managing-sources.md @@ -0,0 +1,273 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/2.9.9/ops/running/security-sources + - /riak/kv/2.9.9/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/2.9.9/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/2.9.9/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/2.9.9/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/2.9.9/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/2.9.9/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/2.9.9/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + + + + diff --git a/content/riak/kv/2.9.9/using/security/v2-v3-ssl-ca.md b/content/riak/kv/2.9.9/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..d62aa99387 --- /dev/null +++ b/content/riak/kv/2.9.9/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/2.9.9/using/troubleshooting.md b/content/riak/kv/2.9.9/using/troubleshooting.md new file mode 100644 index 0000000000..00523f7672 --- /dev/null +++ b/content/riak/kv/2.9.9/using/troubleshooting.md @@ -0,0 +1,28 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +aliases: +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + + + + diff --git a/content/riak/kv/2.9.9/using/troubleshooting/http-204.md b/content/riak/kv/2.9.9/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..287dfc0ece --- /dev/null +++ b/content/riak/kv/2.9.9/using/troubleshooting/http-204.md @@ -0,0 +1,22 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 2.9.9 +menu: + riak_kv-2.9.9: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +aliases: +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + + + + diff --git a/content/riak/kv/3.0.1/_reference-links.md b/content/riak/kv/3.0.1/_reference-links.md new file mode 100644 index 0000000000..8f8aad7d68 --- /dev/null +++ b/content/riak/kv/3.0.1/_reference-links.md @@ -0,0 +1,253 @@ + +# Riak KV 3.0.1 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/3.0.1/downloads/ +[install index]: {{}}riak/kv/3.0.1/setup/installing +[upgrade index]: {{}}riak/kv/3.0.1/upgrading +[plan index]: {{}}riak/kv/3.0.1/planning +[config index]: {{}}riak/kv/3.0.1/using/configuring/ +[config reference]: {{}}riak/kv/3.0.1/configuring/reference/ +[manage index]: {{}}riak/kv/3.0.1/using/managing +[performance index]: {{}}riak/kv/3.0.1/using/performance +[glossary vnode]: {{}}riak/kv/3.0.1/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/3.0.1/setup/planning +[plan start]: {{}}riak/kv/3.0.1/setup/planning/start +[plan backend]: {{}}riak/kv/3.0.1/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/3.0.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/3.0.1/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/3.0.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/3.0.1/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/3.0.1/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/3.0.1/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/3.0.1/setup/planning/best-practices +[plan future]: {{}}riak/kv/3.0.1/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/3.0.1/setup/installing +[install aws]: {{}}riak/kv/3.0.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/3.0.1/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/3.0.1/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/3.0.1/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/3.0.1/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/3.0.1/setup/installing/smartos +[install solaris]: {{}}riak/kv/3.0.1/setup/installing/solaris +[install suse]: {{}}riak/kv/3.0.1/setup/installing/suse +[install windows azure]: {{}}riak/kv/3.0.1/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/3.0.1/setup/installing/source +[install source erlang]: {{}}riak/kv/3.0.1/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/3.0.1/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/3.0.1/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/3.0.1/setup/upgrading +[upgrade checklist]: {{}}riak/kv/3.0.1/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/3.0.1/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/3.0.1/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/3.0.1/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/3.0.1/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/3.0.1/configuring +[config basic]: {{}}riak/kv/3.0.1/configuring/basic +[config backend]: {{}}riak/kv/3.0.1/configuring/backend +[config manage]: {{}}riak/kv/3.0.1/configuring/managing +[config reference]: {{}}riak/kv/3.0.1/configuring/reference/ +[config strong consistency]: {{}}riak/kv/3.0.1/configuring/strong-consistency +[config load balance]: {{}}riak/kv/3.0.1/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/3.0.1/configuring/mapreduce +[config search]: {{}}riak/kv/3.0.1/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/3.0.1/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/3.0.1/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/3.0.1/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/3.0.1/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/3.0.1/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/3.0.1/using/ +[use admin commands]: {{}}riak/kv/3.0.1/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/3.0.1/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/3.0.1/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/3.0.1/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/3.0.1/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/3.0.1/using/reference/search +[use ref 2i]: {{}}riak/kv/3.0.1/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/3.0.1/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/3.0.1/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/3.0.1/using/reference/jmx +[use ref obj del]: {{}}riak/kv/3.0.1/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/3.0.1/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/3.0.1/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/3.0.1/using/admin/ +[use admin commands]: {{}}riak/kv/3.0.1/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/3.0.1/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/3.0.1/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/3.0.1/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/3.0.1/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/3.0.1/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/3.0.1/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/3.0.1/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/3.0.1/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/3.0.1/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/3.0.1/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/3.0.1/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/3.0.1/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/3.0.1/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/3.0.1/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/3.0.1/using/repair-recovery +[repair recover index]: {{}}riak/kv/3.0.1/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/3.0.1/using/security/ +[security basics]: {{}}riak/kv/3.0.1/using/security/basics +[security managing]: {{}}riak/kv/3.0.1/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/3.0.1/using/performance/ +[perf benchmark]: {{}}riak/kv/3.0.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/3.0.1/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/3.0.1/using/performance/erlang +[perf aws]: {{}}riak/kv/3.0.1/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/3.0.1/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/3.0.1/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/3.0.1/developing +[dev client libraries]: {{}}riak/kv/3.0.1/developing/client-libraries +[dev data model]: {{}}riak/kv/3.0.1/developing/data-modeling +[dev data types]: {{}}riak/kv/3.0.1/developing/data-types +[dev kv model]: {{}}riak/kv/3.0.1/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/3.0.1/developing/getting-started +[getting started java]: {{}}riak/kv/3.0.1/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/3.0.1/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/3.0.1/developing/getting-started/python +[getting started php]: {{}}riak/kv/3.0.1/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/3.0.1/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/3.0.1/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/3.0.1/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/3.0.1/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/3.0.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/3.0.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/3.0.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/3.0.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/3.0.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/3.0.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/3.0.1/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/3.0.1/developing/usage +[usage bucket types]: {{}}riak/kv/3.0.1/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/3.0.1/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/3.0.1/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/3.0.1/developing/usage/content-types +[usage create objects]: {{}}riak/kv/3.0.1/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/3.0.1/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/3.0.1/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/3.0.1/developing/usage/mapreduce +[usage search]: {{}}riak/kv/3.0.1/developing/usage/search +[usage search schema]: {{}}riak/kv/3.0.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/3.0.1/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/3.0.1/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/3.0.1/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/3.0.1/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/3.0.1/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/3.0.1/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/3.0.1/developing/api/backend +[dev api http]: {{}}riak/kv/3.0.1/developing/api/http +[dev api http status]: {{}}riak/kv/3.0.1/developing/api/http/status +[dev api pbc]: {{}}riak/kv/3.0.1/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/3.0.1/learn/glossary/ +[glossary aae]: {{}}riak/kv/3.0.1/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/3.0.1/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/3.0.1/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/3.0.1/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/3.0.1/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/3.0.1/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/3.0.1/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/3.0.1/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/3.0.1/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/3.0.1/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/3.0.1/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/3.0.1/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + + + diff --git a/content/riak/kv/3.0.1/add-ons.md b/content/riak/kv/3.0.1/add-ons.md new file mode 100644 index 0000000000..9f8fbb74d3 --- /dev/null +++ b/content/riak/kv/3.0.1/add-ons.md @@ -0,0 +1,25 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: + +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/3.0.1/add-ons/redis/) + + + + diff --git a/content/riak/kv/3.0.1/add-ons/redis.md b/content/riak/kv/3.0.1/add-ons/redis.md new file mode 100644 index 0000000000..6f7bedda97 --- /dev/null +++ b/content/riak/kv/3.0.1/add-ons/redis.md @@ -0,0 +1,63 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +aliases: + +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + + + diff --git a/content/riak/kv/3.0.1/add-ons/redis/developing-rra.md b/content/riak/kv/3.0.1/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..27e524feed --- /dev/null +++ b/content/riak/kv/3.0.1/add-ons/redis/developing-rra.md @@ -0,0 +1,330 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/3.0.1/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/3.0.1/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/3.0.1/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/3.0.1/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/3.0.1/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak admin bucket-type status rra >/dev/null 2>&1; then + riak admin bucket-type create rra '{"props":{}}' + riak admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + + diff --git a/content/riak/kv/3.0.1/add-ons/redis/redis-add-on-features.md b/content/riak/kv/3.0.1/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..2b5ed51f16 --- /dev/null +++ b/content/riak/kv/3.0.1/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,136 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + + + diff --git a/content/riak/kv/3.0.1/add-ons/redis/set-up-rra.md b/content/riak/kv/3.0.1/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..cdab52e7a5 --- /dev/null +++ b/content/riak/kv/3.0.1/add-ons/redis/set-up-rra.md @@ -0,0 +1,285 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/3.0.1/setup/installing +[perf open files]: {{}}riak/kv/3.0.1/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + + + diff --git a/content/riak/kv/3.0.1/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/3.0.1/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..58928d3420 --- /dev/null +++ b/content/riak/kv/3.0.1/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,143 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +aliases: + +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + + + diff --git a/content/riak/kv/3.0.1/add-ons/redis/using-rra.md b/content/riak/kv/3.0.1/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..a740f8489f --- /dev/null +++ b/content/riak/kv/3.0.1/add-ons/redis/using-rra.md @@ -0,0 +1,246 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/3.0.1/add-ons/redis/get-started-with-rra + +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/3.0.1/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + + + diff --git a/content/riak/kv/3.0.1/configuring.md b/content/riak/kv/3.0.1/configuring.md new file mode 100644 index 0000000000..95d8bd2add --- /dev/null +++ b/content/riak/kv/3.0.1/configuring.md @@ -0,0 +1,88 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: + +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + + + diff --git a/content/riak/kv/3.0.1/configuring/backend.md b/content/riak/kv/3.0.1/configuring/backend.md new file mode 100644 index 0000000000..664a09c08d --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/backend.md @@ -0,0 +1,647 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +aliases: + +--- + +[plan backend leveldb]: {{}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/3.0.1/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/3.0.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/3.0.1/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/3.0.1/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + + + diff --git a/content/riak/kv/3.0.1/configuring/basic.md b/content/riak/kv/3.0.1/configuring/basic.md new file mode 100644 index 0000000000..df3599b885 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/basic.md @@ -0,0 +1,239 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.1/ops/building/configuration/ + - /riak/kv/3.0.1/ops/building/configuration/ + +--- + +[config reference]: {{}}riak/kv/3.0.1/configuring/reference +[use running cluster]: {{}}riak/kv/3.0.1/using/running-a-cluster +[use admin riak admin#member-status]: {{}}riak/kv/3.0.1/using/admin/riak admin/#member-status +[perf erlang]: {{}}riak/kv/3.0.1/using/performance/erlang +[plan start]: {{}}riak/kv/3.0.1/setup/planning/start +[plan best practices]: {{}}riak/kv/3.0.1/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/3.0.1/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/3.0.1/setup/planning/backend +[plan backend multi]: {{}}riak/kv/3.0.1/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/3.0.1/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/3.0.1/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/3.0.1/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/3.0.1/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/3.0.1/using/performance/benchmarking +[perf open files]: {{}}riak/kv/3.0.1/using/performance/open-files-limit +[perf index]: {{}}riak/kv/3.0.1/using/performance +[perf aws]: {{}}riak/kv/3.0.1/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/3.0.1/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak admin member-status`][use admin riak admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak admin` command can verify the ring size: + +```bash +riak admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/3.0.1/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + + + diff --git a/content/riak/kv/3.0.1/configuring/global-object-expiration.md b/content/riak/kv/3.0.1/configuring/global-object-expiration.md new file mode 100644 index 0000000000..bacae85bc6 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/global-object-expiration.md @@ -0,0 +1,90 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-3.0.1: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 3.0.1 +toc: true +aliases: + +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + + + diff --git a/content/riak/kv/3.0.1/configuring/load-balancing-proxy.md b/content/riak/kv/3.0.1/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..ba4fd53022 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/load-balancing-proxy.md @@ -0,0 +1,275 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/3.0.1/ops/advanced/configs/load-balanacing-proxy/ + +--- + +[perf open files]: {{}}riak/kv/3.0.1/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + + + diff --git a/content/riak/kv/3.0.1/configuring/managing.md b/content/riak/kv/3.0.1/configuring/managing.md new file mode 100644 index 0000000000..b7e0ca1d7f --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/managing.md @@ -0,0 +1,121 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +aliases: + +--- + +[use admin riak cli]: {{}}riak/kv/3.0.1/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/3.0.1/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/3.0.1/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + + + diff --git a/content/riak/kv/3.0.1/configuring/mapreduce.md b/content/riak/kv/3.0.1/configuring/mapreduce.md new file mode 100644 index 0000000000..d91b82e963 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/mapreduce.md @@ -0,0 +1,200 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/configs/mapreduce/ + - /riak/kv/3.0.1/ops/advanced/configs/mapreduce/ + +--- + +[usage mapreduce]: {{}}riak/kv/3.0.1/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/3.0.1/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/3.0.1/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + + + diff --git a/content/riak/kv/3.0.1/configuring/next-gen-replication.md b/content/riak/kv/3.0.1/configuring/next-gen-replication.md new file mode 100644 index 0000000000..840f2d2ed3 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/next-gen-replication.md @@ -0,0 +1,63 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "3.0.1" +menu: + riak_kv-3.0.1: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +aliases: + +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. diff --git a/content/riak/kv/3.0.1/configuring/reference.md b/content/riak/kv/3.0.1/configuring/reference.md new file mode 100644 index 0000000000..d72a3d71ab --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/reference.md @@ -0,0 +1,2039 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/configs/configuration-files/ + - /riak/kv/3.0.1/ops/advanced/configs/configuration-files/ + +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak admin]: ../../using/admin/riak admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] + - [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] + - [configuration][config backend leveldb] +* [Leveled][plan backend leveled] + - [configuration][config backend leveled] +* [Memory][plan backend memory] + - [configuration][config backend memory] +* [Multi][plan backend multi] + - [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + + + diff --git a/content/riak/kv/3.0.1/configuring/search.md b/content/riak/kv/3.0.1/configuring/search.md new file mode 100644 index 0000000000..e576248b53 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/search.md @@ -0,0 +1,278 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/configs/search/ + - /riak/kv/3.0.1/ops/advanced/configs/search/ + +--- + +[usage search]: {{}}riak/kv/3.0.1/developing/usage/search +[usage search schema]: {{}}riak/kv/3.0.1/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/3.0.1/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/3.0.1/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/3.0.1/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/3.0.1/configuring/reference +[config reference#search]: {{}}riak/kv/3.0.1/configuring/reference/#search +[glossary aae]: {{}}riak/kv/3.0.1/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/3.0.1/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak admin` by issuing one of the following commands: + +``` +riak admin set search.dist_query=off +``` + or + +``` +riak admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + + + diff --git a/content/riak/kv/3.0.1/configuring/strong-consistency.md b/content/riak/kv/3.0.1/configuring/strong-consistency.md new file mode 100644 index 0000000000..68c10b5e8a --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/strong-consistency.md @@ -0,0 +1,671 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +aliases: + +--- + +[apps strong consistency]: {{}}riak/kv/3.0.1/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/3.0.1/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/3.0.1/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/3.0.1/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/3.0.1/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/3.0.1/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/3.0.1/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/3.0.1/using/cluster-operations/bucket-types +[use admin riak admin#ensemble]: {{}}riak/kv/3.0.1/using/admin/riak admin/#ensemble-status +[use admin riak admin]: {{}}riak/kv/3.0.1/using/admin/riak admin +[config reference#advanced]: {{}}riak/kv/3.0.1/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/3.0.1/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/3.0.1/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/3.0.1/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/3.0.1/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/3.0.1/developing/data-types +[glossary aae]: {{}}riak/kv/3.0.1/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/3.0.1/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/3.0.1/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/3.0.1/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/3.0.1/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak admin ensemble-status`][use admin riak admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak admin]: + +```bash +riak admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak admin ensemble-status`][admin riak admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak admin ensemble-status + +The [`riak admin`][use admin riak admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble - The ID of the ensemble
  • Quorum - The number of ensemble peers that are either leading or following
  • Nodes - The number of nodes currently online
  • Leader - The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer - The ID of the peer
  • Status - Whether the peer is a leader or a follower
  • Trusted - Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch - The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node - The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] - If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] - Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] - Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** - A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** - In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** - Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** - At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** - Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + + + diff --git a/content/riak/kv/3.0.1/configuring/v2-multi-datacenter.md b/content/riak/kv/3.0.1/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..8064cb20d5 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/v2-multi-datacenter.md @@ -0,0 +1,160 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v2/configuration + - /riak/kv/3.0.1/ops/mdc/v2/configuration + +--- + +[config v2 ssl]: {{}}riak/kv/3.0.1/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. + + + diff --git a/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..f6cc426b0e --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,82 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v2/nat + - /riak/kv/3.0.1/ops/mdc/v2/nat + +--- + +[config v2 ssl]: {{}}riak/kv/3.0.1/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + + + diff --git a/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..12f6391460 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,371 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v2/quick-start + - /riak/kv/3.0.1/ops/mdc/v2/quick-start + +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + + + diff --git a/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..143e21bfe8 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,164 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v2/ssl + - /riak/kv/3.0.1/ops/mdc/v2/ssl + +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + + + diff --git a/content/riak/kv/3.0.1/configuring/v3-multi-datacenter.md b/content/riak/kv/3.0.1/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..d6440f29fc --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/v3-multi-datacenter.md @@ -0,0 +1,161 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/configuration + - /riak/kv/3.0.1/ops/mdc/v3/configuration + +--- + +[config reference#advanced]: {{}}riak/kv/3.0.1/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + + + diff --git a/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..d96b036180 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/nat + - /riak/kv/3.0.1/ops/mdc/v3/nat + +--- + +[config v3 ssl]: {{}}riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + + + diff --git a/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..ebd851c4cb --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,172 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/quick-start + - /riak/kv/3.0.1/ops/mdc/v3/quick-start + +--- + +[perf index]: {{}}riak/kv/3.0.1/using/performance +[config v3 mdc]: {{}}riak/kv/3.0.1/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + + + diff --git a/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..6dc2f932e2 --- /dev/null +++ b/content/riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,174 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/ssl + - /riak/kv/3.0.1/ops/mdc/v3/ssl + +--- + +[config reference#advanced.config]: {{}}riak/kv/3.0.1/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + + + diff --git a/content/riak/kv/3.0.1/developing.md b/content/riak/kv/3.0.1/developing.md new file mode 100644 index 0000000000..5ea87b9c53 --- /dev/null +++ b/content/riak/kv/3.0.1/developing.md @@ -0,0 +1,79 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: + +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + + + diff --git a/content/riak/kv/3.0.1/developing/api.md b/content/riak/kv/3.0.1/developing/api.md new file mode 100644 index 0000000000..424b4fe98c --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api.md @@ -0,0 +1,42 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +aliases: + +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + + + diff --git a/content/riak/kv/3.0.1/developing/api/backend.md b/content/riak/kv/3.0.1/developing/api/backend.md new file mode 100644 index 0000000000..dd1de4137f --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/backend.md @@ -0,0 +1,118 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.1/dev/references/backend-api + - /riak/kv/3.0.1/dev/references/backend-api + +--- + +[plan backend]: {{}}riak/kv/3.0.1/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http.md b/content/riak/kv/3.0.1/developing/api/http.md new file mode 100644 index 0000000000..b63eb78a85 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http.md @@ -0,0 +1,93 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.1/dev/references/http + - /riak/kv/3.0.1/dev/references/http + +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/3.0.1/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/3.0.1/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/3.0.1/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/3.0.1/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/3.0.1/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/3.0.1/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/3.0.1/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/3.0.1/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/3.0.1/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/3.0.1/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/3.0.1/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/3.0.1/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/3.0.1/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/3.0.1/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/3.0.1/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/3.0.1/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/3.0.1/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/3.0.1/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/3.0.1/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/3.0.1/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/3.0.1/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/3.0.1/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/3.0.1/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/3.0.1/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/3.0.1/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/3.0.1/developing/api/http/store-search-schema) + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/counters.md b/content/riak/kv/3.0.1/developing/api/http/counters.md new file mode 100644 index 0000000000..b7877b4b3a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/counters.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/counters + - /riak/kv/3.0.1/dev/references/http/counters + +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/3.0.1/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/3.0.1/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/delete-object.md b/content/riak/kv/3.0.1/developing/api/http/delete-object.md new file mode 100644 index 0000000000..08d7ead9ad --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/delete-object.md @@ -0,0 +1,79 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/delete-object + - /riak/kv/3.0.1/dev/references/http/delete-object + +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/delete-search-index.md b/content/riak/kv/3.0.1/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..2102ac6d0f --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/delete-search-index.md @@ -0,0 +1,37 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/delete-search-index + - /riak/kv/3.0.1/dev/references/http/delete-search-index + +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content) - The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable) - The request timed out internally + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/fetch-object.md b/content/riak/kv/3.0.1/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..972ba539b2 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/fetch-object.md @@ -0,0 +1,246 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/fetch-object + - /riak/kv/3.0.1/dev/references/http/fetch-object + +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/3.0.1/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/3.0.1/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/3.0.1/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/3.0.1/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/3.0.1/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/fetch-search-index.md b/content/riak/kv/3.0.1/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..49707ac515 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/fetch-search-index.md @@ -0,0 +1,51 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/fetch-search-index + - /riak/kv/3.0.1/dev/references/http/fetch-search-index + +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/3.0.1/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found) - No Search index with that name is currently + available +* `503 Service Unavailable) - The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/3.0.1/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/fetch-search-schema.md b/content/riak/kv/3.0.1/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..c796ae2968 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/fetch-search-schema.md @@ -0,0 +1,42 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/fetch-search-schema + - /riak/kv/3.0.1/dev/references/http/fetch-search-schema + +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable) - The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/get-bucket-props.md b/content/riak/kv/3.0.1/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..8f2b6918d5 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/get-bucket-props.md @@ -0,0 +1,86 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/get-bucket-props + - /riak/kv/3.0.1/dev/references/http/get-bucket-props + +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/3.0.1/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/3.0.1/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/3.0.1/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/link-walking.md b/content/riak/kv/3.0.1/developing/api/http/link-walking.md new file mode 100644 index 0000000000..e7e8ea334d --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/link-walking.md @@ -0,0 +1,129 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/link-walking + - /riak/kv/3.0.1/dev/references/http/link-walking + +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/3.0.1/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/3.0.1/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/3.0.1/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/list-buckets.md b/content/riak/kv/3.0.1/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..bca8bb8e80 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/list-buckets.md @@ -0,0 +1,68 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/list-buckets + - /riak/kv/3.0.1/dev/references/http/list-buckets + +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/list-keys.md b/content/riak/kv/3.0.1/developing/api/http/list-keys.md new file mode 100644 index 0000000000..927d659e62 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/list-keys.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/list-keys + - /riak/kv/3.0.1/dev/references/http/list-keys + +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/list-resources.md b/content/riak/kv/3.0.1/developing/api/http/list-resources.md new file mode 100644 index 0000000000..1cbf092362 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/list-resources.md @@ -0,0 +1,84 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/list-resources + - /riak/kv/3.0.1/dev/references/http/list-resources + +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/3.0.1/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/3.0.1/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/3.0.1/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/3.0.1/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/3.0.1/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/3.0.1/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/3.0.1/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/3.0.1/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/mapreduce.md b/content/riak/kv/3.0.1/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..d7153f5808 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/mapreduce.md @@ -0,0 +1,74 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/mapreduce + - /riak/kv/3.0.1/dev/references/http/mapreduce + +--- + +[MapReduce]({{}}riak/kv/3.0.1/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/3.0.1/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/ping.md b/content/riak/kv/3.0.1/developing/api/http/ping.md new file mode 100644 index 0000000000..a764ced0de --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/ping.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/ping + - /riak/kv/3.0.1/dev/references/http/ping + +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/reset-bucket-props.md b/content/riak/kv/3.0.1/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..a1df619cbd --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/reset-bucket-props.md @@ -0,0 +1,61 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/reset-bucket-props + - /riak/kv/3.0.1/dev/references/http/reset-bucket-props + +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/search-index-info.md b/content/riak/kv/3.0.1/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..d43b05edaf --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/search-index-info.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/search-index-info + - /riak/kv/3.0.1/dev/references/http/search-index-info + +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/3.0.1/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found) - Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable) - The request timed out internally + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/search-query.md b/content/riak/kv/3.0.1/developing/api/http/search-query.md new file mode 100644 index 0000000000..b6c4d73d3e --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/search-query.md @@ -0,0 +1,73 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/search-query + - /riak/kv/3.0.1/dev/references/http/search-query + +--- + +Performs a [Riak KV Search]({{}}riak/kv/3.0.1/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt) - The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q) - The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/3.0.1/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request) - Returned when, for example, a malformed query is + supplied +* `404 Object Not Found) - Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable) - The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/secondary-indexes.md b/content/riak/kv/3.0.1/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..ec1b601d07 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/secondary-indexes.md @@ -0,0 +1,95 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/secondary-indexes + - /riak/kv/3.0.1/dev/references/http/secondary-indexes + +--- + +[Secondary Indexes]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/set-bucket-props.md b/content/riak/kv/3.0.1/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..fcbcb5d97b --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/set-bucket-props.md @@ -0,0 +1,116 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/set-bucket-props + - /riak/kv/3.0.1/dev/references/http/set-bucket-props + +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/3.0.1/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/3.0.1/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/status.md b/content/riak/kv/3.0.1/developing/api/http/status.md new file mode 100644 index 0000000000..e038ca9040 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/status.md @@ -0,0 +1,173 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/status + - /riak/kv/3.0.1/dev/references/http/status + +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak admin status` detailed in the [Inspecting a Node]({{}}riak/kv/3.0.1/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/store-object.md b/content/riak/kv/3.0.1/developing/api/http/store-object.md new file mode 100644 index 0000000000..6ffb1df633 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/store-object.md @@ -0,0 +1,150 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/store-object + - /riak/kv/3.0.1/dev/references/http/store-object + +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/3.0.1/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/3.0.1/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/3.0.1/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/store-search-index.md b/content/riak/kv/3.0.1/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..c190dc929a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/store-search-index.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/store-search-index + - /riak/kv/3.0.1/dev/references/http/store-search-index + +--- + +Creates a new Riak Search [index]({{}}riak/kv/3.0.1/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/3.0.1/developing/usage/search). + +## Normal Response Codes + +* `204 No Content) - The index has been successfully created + +## Typical Error Codes + +* `409 Conflict) - The index cannot be created because there is + already an index with that name +* `503 Service Unavailable) - The request timed out internally + + + diff --git a/content/riak/kv/3.0.1/developing/api/http/store-search-schema.md b/content/riak/kv/3.0.1/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..bc2387aae1 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/http/store-search-schema.md @@ -0,0 +1,54 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.1/dev/references/http/store-search-schema + - /riak/kv/3.0.1/dev/references/http/store-search-schema + +--- + +Creates a new Riak [Search schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content) - The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request) - The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict) - The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable) - The request timed out internally + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..9402e27945 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers.md @@ -0,0 +1,189 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers + - /riak/kv/3.0.1/dev/references/protocol-buffers + +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg) - A string representation of what went wrong +* `errcode) - A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/yz-schema-put) + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..75a8e42920 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,34 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/auth-req + - /riak/kv/3.0.1/dev/references/protocol-buffers/auth-req + +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/3.0.1/using/security/basics). + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..6f2845dbe7 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,82 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "3.0.1" +menu: + riak_kv-3.0.1: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/coverage-queries + - /riak/kv/3.0.1/dev/references/protocol-buffers/coverage-queries + +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..fb81b057c6 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,104 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/delete-object + - /riak/kv/3.0.1/dev/references/protocol-buffers/delete-object + +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/3.0.1/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..c546941649 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/3.0.1/dev/references/protocol-buffers/dt-counter-store + +--- + +An operation to update a [counter]({{}}riak/kv/3.0.1/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-map-store). + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..9edfaadd49 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,131 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/dt-fetch + - /riak/kv/3.0.1/dev/references/protocol-buffers/dt-fetch + +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/3.0.1/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/3.0.1/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/3.0.1/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..572b5ed3d6 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,77 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/dt-map-store + - /riak/kv/3.0.1/dev/references/protocol-buffers/dt-map-store + +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..f39f1e7aec --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,36 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/dt-set-store + - /riak/kv/3.0.1/dev/references/protocol-buffers/dt-set-store + +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..5a78379394 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,132 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/dt-store + - /riak/kv/3.0.1/dev/references/protocol-buffers/dt-store + +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/3.0.1/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/3.0.1/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/3.0.1/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..b2c656b9f4 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/dt-union + - /riak/kv/3.0.1/dev/references/protocol-buffers/dt-union + +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/dt-store) message. + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..96e5add56a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,185 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/fetch-object + - /riak/kv/3.0.1/dev/references/protocol-buffers/fetch-object + +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type) - The content type of the object, e.g. `text/plain` + or `application/json` +* `charset) - The character encoding of the object, e.g. `utf-8` +* `content_encoding) - The content encoding of the object, e.g. + `video/mp4` +* `vtag) - The object's [vtag]({{}}riak/kv/3.0.1/learn/glossary/#vector-clock) +* `links) - This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod) - A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs) - A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta) - This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted) - Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..6d9a76a71a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,114 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/3.0.1/dev/references/protocol-buffers/get-bucket-props + +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/3.0.1/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/3.0.1/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..0ba3d01054 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,37 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/3.0.1/dev/references/protocol-buffers/get-bucket-type + +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/3.0.1/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-props) message. + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..11afebfc17 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,65 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/get-client-id + - /riak/kv/3.0.1/dev/references/protocol-buffers/get-client-id + +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..ad0e275c67 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,80 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/list-buckets + - /riak/kv/3.0.1/dev/references/protocol-buffers/list-buckets + +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets) - Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..4cca518efe --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,101 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/list-keys + - /riak/kv/3.0.1/dev/references/protocol-buffers/list-keys + +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket) - bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..cba04cee91 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,153 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/mapreduce + - /riak/kv/3.0.1/dev/references/protocol-buffers/mapreduce + +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request) - MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json) - JSON-encoded MapReduce job +* `application/x-erlang-binary) - Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/3.0.1/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/3.0.1/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase) - Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/ping.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..57a10ffe2e --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/ping.md @@ -0,0 +1,46 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/ping + - /riak/kv/3.0.1/dev/references/protocol-buffers/ping + +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..55ccf5b6f8 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,63 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/3.0.1/dev/references/protocol-buffers/reset-bucket-props + +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/3.0.1/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/search.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..45fad8c2fc --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/search.md @@ -0,0 +1,152 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/search + - /riak/kv/3.0.1/dev/references/protocol-buffers/search + +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q) - The contents of the query +* `index) - The name of the index to search + +Optional Parameters + +* `rows) - The maximum number of rows to return +* `start) - A start offset, i.e. the number of keys to skip before + returning values +* `sort) - How the search results are to be sorted +* `filter) - Filters search with additional query scoped to inline + fields +* `df) - Override the `default_field` setting in the schema file +* `op) - `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl) - Return the fields limit +* `presort) - Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs) - A list of docs that match the search request +* `max_score) - The top score returned +* `num_found) - Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..8475b06336 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,125 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/3.0.1/dev/references/protocol-buffers/secondary-indexes + +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/3.0.1/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/server-info.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..19c1e57951 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,62 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/server-info + - /riak/kv/3.0.1/dev/references/protocol-buffers/server-info + +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..be93089d33 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,72 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/3.0.1/dev/references/protocol-buffers/set-bucket-props + +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/3.0.1/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..4aed405bf9 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,35 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/3.0.1/dev/references/protocol-buffers/set-bucket-type + +--- + +Assigns a set of [bucket properties]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/3.0.1/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/get-bucket-props). + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..63becda453 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,66 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/set-client-id + - /riak/kv/3.0.1/dev/references/protocol-buffers/set-client-id + +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/store-object.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..34b3436026 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,154 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/store-object + - /riak/kv/3.0.1/dev/references/protocol-buffers/store-object + +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/3.0.1/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/3.0.1/learn/concepts/buckets), and [bucket type]({{}}riak/kv/3.0.1/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/3.0.1/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/3.0.1/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/3.0.1/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..d67473fe8d --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,37 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/3.0.1/dev/references/protocol-buffers/yz-index-delete + +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..c2b3ea2e51 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,63 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/yz-index-get + - /riak/kv/3.0.1/dev/references/protocol-buffers/yz-index-get + +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/3.0.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..f0f76fca79 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/yz-index-put + - /riak/kv/3.0.1/dev/references/protocol-buffers/yz-index-put + +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/3.0.1/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..3785ce1c61 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,52 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/3.0.1/dev/references/protocol-buffers/yz-schema-get + +--- + +Fetch a [search schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + + + diff --git a/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..a888aab04f --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.1/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/3.0.1/dev/references/protocol-buffers/yz-schema-put + +--- + +Create a new Solr [search schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/3.0.1/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + diff --git a/content/riak/kv/3.0.1/developing/api/repl-hooks.md b/content/riak/kv/3.0.1/developing/api/repl-hooks.md new file mode 100644 index 0000000000..a239f16ea9 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/api/repl-hooks.md @@ -0,0 +1,196 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v2/hooks + - /riak/kv/3.0.1/ops/mdc/v2/hooks + +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + + + diff --git a/content/riak/kv/3.0.1/developing/app-guide.md b/content/riak/kv/3.0.1/developing/app-guide.md new file mode 100644 index 0000000000..f5150b72ac --- /dev/null +++ b/content/riak/kv/3.0.1/developing/app-guide.md @@ -0,0 +1,420 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/3.0.1/dev/using/application-guide/ + - /riak/kv/3.0.1/dev/using/application-guide/ + +--- + +[usage conflict resolution]: {{}}riak/kv/3.0.1/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/3.0.1/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/3.0.1/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/3.0.1/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/3.0.1/developing/key-value-modeling +[dev data types]: {{}}riak/kv/3.0.1/developing/data-types +[dev data types#counters]: {{}}riak/kv/3.0.1/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/3.0.1/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/3.0.1/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/3.0.1/developing/usage/creating-objects +[usage search]: {{}}riak/kv/3.0.1/developing/usage/search +[use ref search]: {{}}riak/kv/3.0.1/using/reference/search +[usage 2i]: {{}}riak/kv/3.0.1/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/3.0.1/developing/client-libraries +[concept crdts]: {{}}riak/kv/3.0.1/learn/concepts/crdts +[dev data model]: {{}}riak/kv/3.0.1/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/3.0.1/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/3.0.1/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/3.0.1/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/3.0.1/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/3.0.1/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/3.0.1/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/3.0.1/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/3.0.1/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/3.0.1/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/3.0.1/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/3.0.1/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/3.0.1/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/3.0.1/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/3.0.1/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/3.0.1/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/3.0.1/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/3.0.1/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/3.0.1/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/3.0.1/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/3.0.1/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/3.0.1/setup/installing +[getting started]: {{}}riak/kv/3.0.1/developing/getting-started +[usage index]: {{}}riak/kv/3.0.1/developing/usage +[glossary]: {{}}riak/kv/3.0.1/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** - While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** - Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** - Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** - It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** - If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** - If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** - If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] - Getting started with Riak Search +* [Search Details][use ref search] - A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] - How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** - Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** - At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** - In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] - A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] - A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] - An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** - If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** - If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** - If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** - While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** - Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] - A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] - A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** - You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** - Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] - A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] - Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** - At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** - If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** - 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] - Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] - A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] - How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] - A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] - A listing of frequently used terms in Riak's + documentation + + + + diff --git a/content/riak/kv/3.0.1/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/3.0.1/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..7073dd7cf0 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,802 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/mapreduce/ + - /riak/kv/3.0.1/dev/advanced/mapreduce/ + +--- + +[usage 2i]: {{}}riak/kv/3.0.1/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/3.0.1/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/3.0.1/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/3.0.1/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/3.0.1/learn/glossary/#vnode +[config reference]: {{}}riak/kv/3.0.1/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/3.0.1/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) - Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) - Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) - Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
+
+
diff --git a/content/riak/kv/3.0.1/developing/app-guide/cluster-metadata.md b/content/riak/kv/3.0.1/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..a9cdfa89a9
--- /dev/null
+++ b/content/riak/kv/3.0.1/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,72 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 3.0.1
+menu:
+  riak_kv-3.0.1:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+aliases:
+
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/3.0.1/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/3.0.1/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/3.0.1/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
+
+
diff --git a/content/riak/kv/3.0.1/developing/app-guide/reference.md b/content/riak/kv/3.0.1/developing/app-guide/reference.md
new file mode 100644
index 0000000000..831a6d68b8
--- /dev/null
+++ b/content/riak/kv/3.0.1/developing/app-guide/reference.md
@@ -0,0 +1,21 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 3.0.1
+#menu:
+#  riak_kv-3.0.1:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+aliases:
+
+---
+
+**TODO: Add content**
+
+
+
diff --git a/content/riak/kv/3.0.1/developing/app-guide/replication-properties.md b/content/riak/kv/3.0.1/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..e9a1893829
--- /dev/null
+++ b/content/riak/kv/3.0.1/developing/app-guide/replication-properties.md
@@ -0,0 +1,584 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 3.0.1
+menu:
+  riak_kv-3.0.1:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/3.0.1/dev/advanced/replication-properties
+  - /riak/kv/3.0.1/dev/advanced/replication-properties
+
+---
+
+[usage bucket types]: {{}}riak/kv/3.0.1/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/3.0.1/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/3.0.1/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/3.0.1/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/3.0.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/3.0.1/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/3.0.1/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/3.0.1/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/3.0.1/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/3.0.1/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/3.0.1/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/3.0.1/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/3.0.1/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all) - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one) - This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum) - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default) - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/3.0.1/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/3.0.1/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/3.0.1/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
+
+
diff --git a/content/riak/kv/3.0.1/developing/app-guide/strong-consistency.md b/content/riak/kv/3.0.1/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..c3d3a38844
--- /dev/null
+++ b/content/riak/kv/3.0.1/developing/app-guide/strong-consistency.md
@@ -0,0 +1,261 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 3.0.1
+menu:
+  riak_kv-3.0.1:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/3.0.1/dev/advanced/strong-consistency
+  - /riak/kv/3.0.1/dev/advanced/strong-consistency
+
+---
+
+[use ref strong consistency]: {{}}riak/kv/3.0.1/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/3.0.1/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/3.0.1/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/3.0.1/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/3.0.1/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/3.0.1/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/3.0.1/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/3.0.1/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/3.0.1/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/3.0.1/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/3.0.1/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/3.0.1/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/3.0.1/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/3.0.1/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/3.0.1/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/3.0.1/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/3.0.1/developing/client-libraries
+[getting started]: {{}}riak/kv/3.0.1/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/3.0.1/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + + + diff --git a/content/riak/kv/3.0.1/developing/app-guide/write-once.md b/content/riak/kv/3.0.1/developing/app-guide/write-once.md new file mode 100644 index 0000000000..657d47ba55 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/app-guide/write-once.md @@ -0,0 +1,158 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/3.0.1/dev/advanced/write-once + - /riak/kv/3.0.1/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/3.0.1/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/3.0.1/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/3.0.1/developing/data-types +[strong consistency]: {{}}riak/kv/3.0.1/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/3.0.1/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + + + diff --git a/content/riak/kv/3.0.1/developing/client-libraries.md b/content/riak/kv/3.0.1/developing/client-libraries.md new file mode 100644 index 0000000000..d31cb99b5a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/client-libraries.md @@ -0,0 +1,294 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/3.0.1/dev/using/libraries + - /riak/kv/3.0.1/dev/using/libraries + +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) - A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) - A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) - A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) - A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) - An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) - An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) - Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) - A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) - Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) - A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) - HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) - Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) - A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) - Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) - Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) - Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) - Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) - An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) - A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) - A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) - A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) - A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) - A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) - Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) - Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) - A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) - Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) - Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) - Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) - Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) - Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) - Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) - A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) - Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) - A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) - Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) - Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) - a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) - A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) - A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) - Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) - Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) - Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) - A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) - Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) - A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) - A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) - Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) - Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) - Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) - A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) - A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) - A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) - A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) - [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) - A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) - Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) - A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) - A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) - Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) - A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) - A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) - Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) - Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) - Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) - A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) - Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) - Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) - DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) - Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) - An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) - Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) - Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) - Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) - A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) - An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) - A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) - A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + + + diff --git a/content/riak/kv/3.0.1/developing/data-modeling.md b/content/riak/kv/3.0.1/developing/data-modeling.md new file mode 100644 index 0000000000..e5a348305d --- /dev/null +++ b/content/riak/kv/3.0.1/developing/data-modeling.md @@ -0,0 +1,13 @@ +--- +layout: redirect +target: "riak/kv/3.0.1/learn/use-cases/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + + + diff --git a/content/riak/kv/3.0.1/developing/data-types.md b/content/riak/kv/3.0.1/developing/data-types.md new file mode 100644 index 0000000000..aad15b6a4a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/data-types.md @@ -0,0 +1,279 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/3.0.1/dev/using/data-types + - /riak/kv/3.0.1/dev/using/data-types + - /riak/3.0.1/dev/data-modeling/data-types + - /riak/kv/3.0.1/dev/data-modeling/data-types + +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak admin` interface: + +```bash +riak admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + + + diff --git a/content/riak/kv/3.0.1/developing/data-types/counters.md b/content/riak/kv/3.0.1/developing/data-types/counters.md new file mode 100644 index 0000000000..9d529423e9 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/data-types/counters.md @@ -0,0 +1,635 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.1/dev/using/data-types/counters + - /riak/kv/3.0.1/dev/using/data-types/counters + - /riak/3.0.1/dev/data-modeling/data-types/counters + - /riak/kv/3.0.1/dev/data-modeling/data-types/counters + +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + + + diff --git a/content/riak/kv/3.0.1/developing/data-types/gsets.md b/content/riak/kv/3.0.1/developing/data-types/gsets.md new file mode 100644 index 0000000000..2f86a6494b --- /dev/null +++ b/content/riak/kv/3.0.1/developing/data-types/gsets.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.1/dev/using/data-types/gsets + - /riak/kv/3.0.1/dev/using/data-types/gsets + - /riak/3.0.1/dev/data-modeling/data-types/gsets + - /riak/kv/3.0.1/dev/data-modeling/data-types/gsets + +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + diff --git a/content/riak/kv/3.0.1/developing/data-types/hyperloglogs.md b/content/riak/kv/3.0.1/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..ba09f68730 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/data-types/hyperloglogs.md @@ -0,0 +1,643 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.1/dev/using/data-types/hyperloglogs + - /riak/kv/3.0.1/dev/using/data-types/hyperloglogs + - /riak/3.0.1/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/3.0.1/dev/data-modeling/data-types/hyperloglogs + +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/data-types/maps.md b/content/riak/kv/3.0.1/developing/data-types/maps.md new file mode 100644 index 0000000000..fec10ccc99 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/data-types/maps.md @@ -0,0 +1,1885 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.1/dev/using/data-types/maps + - /riak/kv/3.0.1/dev/using/data-types/maps + - /riak/3.0.1/dev/data-modeling/data-types/maps + - /riak/kv/3.0.1/dev/data-modeling/data-types/maps + +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + + + diff --git a/content/riak/kv/3.0.1/developing/data-types/sets.md b/content/riak/kv/3.0.1/developing/data-types/sets.md new file mode 100644 index 0000000000..3888c2ea7a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/data-types/sets.md @@ -0,0 +1,773 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.1/dev/using/data-types/sets + - /riak/kv/3.0.1/dev/using/data-types/sets + - /riak/3.0.1/dev/data-modeling/data-types/sets + - /riak/kv/3.0.1/dev/data-modeling/data-types/sets + +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + diff --git a/content/riak/kv/3.0.1/developing/faq.md b/content/riak/kv/3.0.1/developing/faq.md new file mode 100644 index 0000000000..ab05a78f97 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/faq.md @@ -0,0 +1,559 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/3.0.1/community/faqs/developing + - /riak/kv/3.0.1/community/faqs/developing + +--- + +[[Basho Bench]: {{}}riak/kv/3.0.1/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/3.0.1/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/3.0.1/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/3.0.1/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/3.0.1/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/3.0.1/developing/client-libraries +[MapReduce]: {{}}riak/kv/3.0.1/developing/usage/mapreduce +[Memory]: {{}}riak/kv/3.0.1/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/3.0.1/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/3.0.1/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) - requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) - if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started.md b/content/riak/kv/3.0.1/developing/getting-started.md new file mode 100644 index 0000000000..2544aad116 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started.md @@ -0,0 +1,51 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +aliases: + +--- + +[install index]: {{}}riak/kv/3.0.1/setup/installing +[dev client libraries]: {{}}riak/kv/3.0.1/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/csharp.md b/content/riak/kv/3.0.1/developing/getting-started/csharp.md new file mode 100644 index 0000000000..31b2b06793 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/csharp.md @@ -0,0 +1,86 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/csharp + - /riak/kv/3.0.1/dev/taste-of-riak/csharp + +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.1/developing/getting-started/csharp/crud-operations) + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/3.0.1/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..21796b49a4 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +aliases: + +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/3.0.1/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..6e202731a9 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,111 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/3.0.1/dev/taste-of-riak/object-modeling-csharp + +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/csharp/querying.md b/content/riak/kv/3.0.1/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..9e1fee6cd5 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/csharp/querying.md @@ -0,0 +1,214 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/querying-csharp + - /riak/kv/3.0.1/dev/taste-of-riak/querying-csharp + +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/erlang.md b/content/riak/kv/3.0.1/developing/getting-started/erlang.md new file mode 100644 index 0000000000..e737bebe32 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/erlang.md @@ -0,0 +1,59 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/erlang + - /riak/kv/3.0.1/dev/taste-of-riak/erlang + +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.1/developing/getting-started/erlang/crud-operations) + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/3.0.1/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..e006325e25 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +aliases: + +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/3.0.1/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..1ccdb5854a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,342 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/3.0.1/dev/taste-of-riak/object-modeling-erlang + +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/3.0.1/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/erlang/querying.md b/content/riak/kv/3.0.1/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..550460bcc0 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/erlang/querying.md @@ -0,0 +1,308 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/querying-erlang + - /riak/kv/3.0.1/dev/taste-of-riak/querying-erlang + +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/3.0.1/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/golang.md b/content/riak/kv/3.0.1/developing/getting-started/golang.md new file mode 100644 index 0000000000..b88800964a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/golang.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/golang + - /riak/kv/3.0.1/dev/taste-of-riak/golang + +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.1/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.1/developing/getting-started/golang/crud-operations) + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/golang/crud-operations.md b/content/riak/kv/3.0.1/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..8e871f5447 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,376 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +aliases: + +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/golang/object-modeling.md b/content/riak/kv/3.0.1/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..8d8c977566 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,552 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/object-modeling-golang + - /riak/kv/3.0.1/dev/taste-of-riak/object-modeling-golang + +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/3.0.1/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/golang/querying.md b/content/riak/kv/3.0.1/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..914eb5332c --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/golang/querying.md @@ -0,0 +1,580 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/querying-golang + - /riak/kv/3.0.1/dev/taste-of-riak/querying-golang + +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/java.md b/content/riak/kv/3.0.1/developing/getting-started/java.md new file mode 100644 index 0000000000..7cf3721950 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/java.md @@ -0,0 +1,93 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/java + - /riak/kv/3.0.1/dev/taste-of-riak/java + +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.1/developing/getting-started/java/crud-operations) + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/java/crud-operations.md b/content/riak/kv/3.0.1/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..0ec8e6d54c --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/java/crud-operations.md @@ -0,0 +1,206 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +aliases: + +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.1/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.1/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.1/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.1/developing/usage/conflict-resolution/) +documention. + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/java/object-modeling.md b/content/riak/kv/3.0.1/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..6d32034b46 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/java/object-modeling.md @@ -0,0 +1,432 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/object-modeling-java + - /riak/kv/3.0.1/dev/taste-of-riak/object-modeling-java + +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/java/querying.md b/content/riak/kv/3.0.1/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..b446d7c9da --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/java/querying.md @@ -0,0 +1,280 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/querying-java + - /riak/kv/3.0.1/dev/taste-of-riak/querying-java + +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/nodejs.md b/content/riak/kv/3.0.1/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..fc44d65c88 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/nodejs.md @@ -0,0 +1,104 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/nodejs + - /riak/kv/3.0.1/dev/taste-of-riak/nodejs + +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.1/developing/getting-started/nodejs/crud-operations) + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/3.0.1/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..0acc0eb799 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,138 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +aliases: + +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/3.0.1/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..843c2668dc --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/3.0.1/dev/taste-of-riak/object-modeling-nodejs + +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/nodejs/querying.md b/content/riak/kv/3.0.1/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..43ab805079 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/nodejs/querying.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/querying-nodejs + - /riak/kv/3.0.1/dev/taste-of-riak/querying-nodejs + +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/php.md b/content/riak/kv/3.0.1/developing/getting-started/php.md new file mode 100644 index 0000000000..0654ab9161 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/php.md @@ -0,0 +1,80 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/php + - /riak/kv/3.0.1/dev/taste-of-riak/php + +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.1/developing/getting-started/php/crud-operations) + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/php/crud-operations.md b/content/riak/kv/3.0.1/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..946ad3d084 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/php/crud-operations.md @@ -0,0 +1,187 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +aliases: + +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/3.0.1/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/php/querying.md b/content/riak/kv/3.0.1/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..3c49c20e18 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/php/querying.md @@ -0,0 +1,408 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/querying-php + - /riak/kv/3.0.1/dev/taste-of-riak/querying-php + +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/python.md b/content/riak/kv/3.0.1/developing/getting-started/python.md new file mode 100644 index 0000000000..d1c2d8baec --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/python.md @@ -0,0 +1,103 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/python + - /riak/kv/3.0.1/dev/taste-of-riak/python + +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.1/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev) - Header files and a static library for Python +* `libffi-dev) - Foreign function interface library +* `libssl-dev) - libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.1/developing/getting-started/python/crud-operations) + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/python/crud-operations.md b/content/riak/kv/3.0.1/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..dee79195d6 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/python/crud-operations.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +aliases: + +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/python/object-modeling.md b/content/riak/kv/3.0.1/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..948dd5c754 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/python/object-modeling.md @@ -0,0 +1,264 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/object-modeling-python + - /riak/kv/3.0.1/dev/taste-of-riak/object-modeling-python + +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/python/querying.md b/content/riak/kv/3.0.1/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..9f5db770f6 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/python/querying.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/querying-python + - /riak/kv/3.0.1/dev/taste-of-riak/querying-python + +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/ruby.md b/content/riak/kv/3.0.1/developing/getting-started/ruby.md new file mode 100644 index 0000000000..4fb9fbe445 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/ruby.md @@ -0,0 +1,68 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/ruby + - /riak/kv/3.0.1/dev/taste-of-riak/ruby + +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.1/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.1/developing/getting-started/ruby/crud-operations) + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/3.0.1/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..b32d0b33a3 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,151 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +aliases: + +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/3.0.1/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..bb8947256c --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,295 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/3.0.1/dev/taste-of-riak/object-modeling-ruby + +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + diff --git a/content/riak/kv/3.0.1/developing/getting-started/ruby/querying.md b/content/riak/kv/3.0.1/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..7e5083ad5c --- /dev/null +++ b/content/riak/kv/3.0.1/developing/getting-started/ruby/querying.md @@ -0,0 +1,256 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/3.0.1/dev/taste-of-riak/querying-ruby + - /riak/kv/3.0.1/dev/taste-of-riak/querying-ruby + +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.1/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.1/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.1/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + diff --git a/content/riak/kv/3.0.1/developing/key-value-modeling.md b/content/riak/kv/3.0.1/developing/key-value-modeling.md new file mode 100644 index 0000000000..60ba33ac71 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/key-value-modeling.md @@ -0,0 +1,535 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/3.0.1/dev/data-modeling/key-value/ + - /riak/kv/3.0.1/dev/data-modeling/key-value/ + +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/3.0.1/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/3.0.1/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/3.0.1/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/3.0.1/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/3.0.1/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/3.0.1/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/3.0.1/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/3.0.1/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/3.0.1/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/3.0.1/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/3.0.1/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/3.0.1/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/3.0.1/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/3.0.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/3.0.1/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/3.0.1/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/3.0.1/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/3.0.1/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak admin bucket-type create john +riak admin bucket-type create robert +riak admin bucket-type create jimmy +riak admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak admin bucket-type create earth '{"props":{"n_val":2}}' +riak admin bucket-type create fire '{"props":{"n_val":2}}' +riak admin bucket-type create wind '{"props":{"n_val":2}}' +riak admin bucket-type create water '{"props":{"n_val":2}}' +riak admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + + + diff --git a/content/riak/kv/3.0.1/developing/usage.md b/content/riak/kv/3.0.1/developing/usage.md new file mode 100644 index 0000000000..fbae899b62 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage.md @@ -0,0 +1,138 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +aliases: + +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + + + diff --git a/content/riak/kv/3.0.1/developing/usage/bucket-types.md b/content/riak/kv/3.0.1/developing/usage/bucket-types.md new file mode 100644 index 0000000000..18cc98421b --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/bucket-types.md @@ -0,0 +1,102 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/bucket-types + - /riak/kv/3.0.1/dev/advanced/bucket-types + +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/commit-hooks.md b/content/riak/kv/3.0.1/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..d63b0fe7af --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/commit-hooks.md @@ -0,0 +1,243 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/using/commit-hooks + - /riak/kv/3.0.1/dev/using/commit-hooks + +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/3.0.1/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object - This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail) - The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/3.0.1/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}) - The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/conflict-resolution.md b/content/riak/kv/3.0.1/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..1a35f53a51 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/conflict-resolution.md @@ -0,0 +1,681 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/using/conflict-resolution + - /riak/kv/3.0.1/dev/using/conflict-resolution + +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/strong-consistency) - A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/3.0.1/configuring/strong-consistency) - A guide for operators +> * [strong consistency][use ref strong consistency] - A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/3.0.1/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/3.0.1/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/3.0.1/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.1/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/3.0.1/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** - If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** - Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** - If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak admin bucket-type activate siblings_allowed +riak admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/3.0.1/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.1/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/3.0.1/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + + + diff --git a/content/riak/kv/3.0.1/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..ec509e932f --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.1/dev/using/conflict-resolution/csharp + - /riak/kv/3.0.1/dev/using/conflict-resolution/csharp + +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + + + diff --git a/content/riak/kv/3.0.1/developing/usage/conflict-resolution/golang.md b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..e8b6ae31d5 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.1/dev/using/conflict-resolution/golang + - /riak/kv/3.0.1/dev/using/conflict-resolution/golang + +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + + + diff --git a/content/riak/kv/3.0.1/developing/usage/conflict-resolution/java.md b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..9a9139d3fe --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/java.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.1/dev/using/conflict-resolution/java + - /riak/kv/3.0.1/dev/using/conflict-resolution/java + +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.1/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets). + + + diff --git a/content/riak/kv/3.0.1/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..0029a5e5c4 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.1/dev/using/conflict-resolution/nodejs + - /riak/kv/3.0.1/dev/using/conflict-resolution/nodejs + +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + + + diff --git a/content/riak/kv/3.0.1/developing/usage/conflict-resolution/php.md b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..891818b43c --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/php.md @@ -0,0 +1,244 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.1/dev/using/conflict-resolution/php + - /riak/kv/3.0.1/dev/using/conflict-resolution/php + +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.1/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets). + + + diff --git a/content/riak/kv/3.0.1/developing/usage/conflict-resolution/python.md b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..9041217b40 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/python.md @@ -0,0 +1,258 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.1/dev/using/conflict-resolution/python + - /riak/kv/3.0.1/dev/using/conflict-resolution/python + +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.1/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets). + + + diff --git a/content/riak/kv/3.0.1/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..58369d3b7b --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.1/dev/using/conflict-resolution/ruby + - /riak/kv/3.0.1/dev/using/conflict-resolution/ruby + +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.1/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets). + + + diff --git a/content/riak/kv/3.0.1/developing/usage/content-types.md b/content/riak/kv/3.0.1/developing/usage/content-types.md new file mode 100644 index 0000000000..7b6a442d43 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/content-types.md @@ -0,0 +1,192 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +aliases: + +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/creating-objects.md b/content/riak/kv/3.0.1/developing/usage/creating-objects.md new file mode 100644 index 0000000000..7cef47e820 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/creating-objects.md @@ -0,0 +1,555 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +aliases: + +--- + +[usage content types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/3.0.1/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/custom-extractors.md b/content/riak/kv/3.0.1/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..1708c1d9f8 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/custom-extractors.md @@ -0,0 +1,424 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/search/custom-extractors + - /riak/kv/3.0.1/dev/search/custom-extractors + +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1) - Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2) - Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/deleting-objects.md b/content/riak/kv/3.0.1/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..8d36e69189 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/deleting-objects.md @@ -0,0 +1,157 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +aliases: + +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/document-store.md b/content/riak/kv/3.0.1/developing/usage/document-store.md new file mode 100644 index 0000000000..afaa7a1e7e --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/document-store.md @@ -0,0 +1,617 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/search/document-store + - /riak/kv/3.0.1/dev/search/document-store + +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/3.0.1/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/mapreduce.md b/content/riak/kv/3.0.1/developing/usage/mapreduce.md new file mode 100644 index 0000000000..ee9de9e0cf --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/mapreduce.md @@ -0,0 +1,246 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/using/mapreduce + - /riak/kv/3.0.1/dev/using/mapreduce + +--- + +[usage 2i]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/3.0.1/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/3.0.1/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** - The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** - The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/next-gen-replication.md b/content/riak/kv/3.0.1/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..08a8f8ee05 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/next-gen-replication.md @@ -0,0 +1,152 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "3.0.1" +menu: + riak_kv-3.0.1: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/3.0.1/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. + + diff --git a/content/riak/kv/3.0.1/developing/usage/reading-objects.md b/content/riak/kv/3.0.1/developing/usage/reading-objects.md new file mode 100644 index 0000000000..5045f92e5e --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/reading-objects.md @@ -0,0 +1,252 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +aliases: + +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/replication.md b/content/riak/kv/3.0.1/developing/usage/replication.md new file mode 100644 index 0000000000..09c4742e27 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/replication.md @@ -0,0 +1,592 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/replication-properties + - /riak/kv/3.0.1/dev/advanced/replication-properties + +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/3.0.1/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/3.0.1/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/3.0.1/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/3.0.1/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all) - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one) - This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum) - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default) - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/3.0.1/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/3.0.1/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/3.0.1/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/3.0.1/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/search-schemas.md b/content/riak/kv/3.0.1/developing/usage/search-schemas.md new file mode 100644 index 0000000000..fb8e543b30 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/search-schemas.md @@ -0,0 +1,511 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/search-schema + - /riak/kv/3.0.1/dev/advanced/search-schema + +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/), and [more]({{<baseurl>}}riak/kv/3.0.1/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/search.md b/content/riak/kv/3.0.1/developing/usage/search.md new file mode 100644 index 0000000000..906f1fe15d --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/search.md @@ -0,0 +1,1455 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/using/search + - /riak/kv/3.0.1/dev/using/search + +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/3.0.1/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.1/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.1/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak admin bucket-type create animals '{"props":{}}' +riak admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/3.0.1/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak admin security grant search.admin on schema to username +riak admin security grant search.admin on index to username +riak admin security grant search.query on index to username +riak admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak admin security revoke search.admin on schema from username +riak admin security revoke search.admin on index from username +riak admin security revoke search.query on index from username +riak admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/3.0.1/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/3.0.1/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/3.0.1/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/searching-data-types.md b/content/riak/kv/3.0.1/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..b108acc7cb --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/searching-data-types.md @@ -0,0 +1,1687 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/search/search-data-types + - /riak/kv/3.0.1/dev/search/search-data-types + +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/secondary-indexes.md b/content/riak/kv/3.0.1/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..0fe19be1b8 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/secondary-indexes.md @@ -0,0 +1,2030 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/using/2i + - /riak/kv/3.0.1/dev/using/2i + +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/3.0.1/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.1/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.1/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry) - Binary index `field1_bin` and integer index `field2_int` +* `Moe) - Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly) - Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica) - Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/security.md b/content/riak/kv/3.0.1/developing/usage/security.md new file mode 100644 index 0000000000..4ed9abae1a --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/security.md @@ -0,0 +1,103 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/client-security + - /riak/kv/3.0.1/dev/advanced/client-security + +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/3.0.1/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak admin security`]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/3.0.1/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/3.0.1/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/3.0.1/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.1/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/3.0.1/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/3.0.1/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/3.0.1/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/3.0.1/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/security/erlang.md b/content/riak/kv/3.0.1/developing/usage/security/erlang.md new file mode 100644 index 0000000000..93d5cf9298 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/security/erlang.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/client-security/erlang + - /riak/kv/3.0.1/dev/advanced/client-security/erlang + +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.1/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + + + diff --git a/content/riak/kv/3.0.1/developing/usage/security/java.md b/content/riak/kv/3.0.1/developing/usage/security/java.md new file mode 100644 index 0000000000..9b58ffc934 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/security/java.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/client-security/java + - /riak/kv/3.0.1/dev/advanced/client-security/java + +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/security/php.md b/content/riak/kv/3.0.1/developing/usage/security/php.md new file mode 100644 index 0000000000..8a9060599b --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/security/php.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/client-security/php + - /riak/kv/3.0.1/dev/advanced/client-security/php + +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/security/python.md b/content/riak/kv/3.0.1/developing/usage/security/python.md new file mode 100644 index 0000000000..6d7ec9f38e --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/security/python.md @@ -0,0 +1,176 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/client-security/python + - /riak/kv/3.0.1/dev/advanced/client-security/python + +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.1/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/3.0.1/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/security/ruby.md b/content/riak/kv/3.0.1/developing/usage/security/ruby.md new file mode 100644 index 0000000000..b0510ce3a2 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/security/ruby.md @@ -0,0 +1,162 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/client-security/ruby + - /riak/kv/3.0.1/dev/advanced/client-security/ruby + +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.1/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + + + diff --git a/content/riak/kv/3.0.1/developing/usage/updating-objects.md b/content/riak/kv/3.0.1/developing/usage/updating-objects.md new file mode 100644 index 0000000000..eeac1f8262 --- /dev/null +++ b/content/riak/kv/3.0.1/developing/usage/updating-objects.md @@ -0,0 +1,778 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.1/dev/using/updates + - /riak/kv/3.0.1/dev/using/updates + +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/3.0.1/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + + + diff --git a/content/riak/kv/3.0.1/downloads.md b/content/riak/kv/3.0.1/downloads.md new file mode 100644 index 0000000000..39a469af3e --- /dev/null +++ b/content/riak/kv/3.0.1/downloads.md @@ -0,0 +1,26 @@ +--- +title: "Download for Riak KV 3.0.1" +description: "Download some stuff!" +menu: + riak_kv-3.0.1: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 3.0.1 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 3.0.1 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/3.0.1/downloads + - /riak/kv/3.0.1/downloads +--- + + + + diff --git a/content/riak/kv/3.0.1/index.md b/content/riak/kv/3.0.1/index.md new file mode 100644 index 0000000000..45f3e6e3b7 --- /dev/null +++ b/content/riak/kv/3.0.1/index.md @@ -0,0 +1,81 @@ +--- +title: "Riak KV 3.0.1" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/3.0.1/ + +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/3.0.1/configuring +[downloads]: {{<baseurl>}}riak/kv/3.0.1/downloads/ +[install index]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/3.0.1/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/3.0.1/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/3.0.1/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/3.0.1/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +This release allows Riak to run on OTP versions 20, 21 and 22, but is not fully backwards compatible with previous releases. + +- It is not possible to run this release on any OTP version prior to OTP 20. Testing of node-by-node upgrades is the responsibility of Riak customers, there has been no comprehensive testing of this upgrade managed centrally. Most customer testing of upgrades has been spent on testing an uplift from 2.2.x and OTP R16 to 3.0 and OTP 20, so this is likely to be the safest transition. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Debian 10.0 ("Buster") +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/3.0.1/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + + + diff --git a/content/riak/kv/3.0.1/learn.md b/content/riak/kv/3.0.1/learn.md new file mode 100644 index 0000000000..7847e4ef3b --- /dev/null +++ b/content/riak/kv/3.0.1/learn.md @@ -0,0 +1,53 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: + +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + + + diff --git a/content/riak/kv/3.0.1/learn/concepts.md b/content/riak/kv/3.0.1/learn/concepts.md new file mode 100644 index 0000000000..cd19287af3 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts.md @@ -0,0 +1,49 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +aliases: + +--- + +[concept aae]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/3.0.1/configuring +[plan index]: {{<baseurl>}}riak/kv/3.0.1/setup/planning +[use index]: {{<baseurl>}}riak/kv/3.0.1/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/active-anti-entropy.md b/content/riak/kv/3.0.1/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..23cd7ebb65 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/active-anti-entropy.md @@ -0,0 +1,111 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/aae + - /riak/kv/3.0.1/theory/concepts/aae + +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/buckets.md b/content/riak/kv/3.0.1/learn/concepts/buckets.md new file mode 100644 index 0000000000..31db2e47e3 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/buckets.md @@ -0,0 +1,217 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/Buckets + - /riak/kv/3.0.1/theory/concepts/Buckets + - /riak/3.0.1/theory/concepts/buckets + - /riak/kv/3.0.1/theory/concepts/buckets + +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/3.0.1/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/3.0.1/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/3.0.1/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/3.0.1/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/capability-negotiation.md b/content/riak/kv/3.0.1/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..0473196531 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/capability-negotiation.md @@ -0,0 +1,36 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/capability-negotiation + - /riak/kv/3.0.1/theory/concepts/capability-negotiation + +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/3.0.1/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/causal-context.md b/content/riak/kv/3.0.1/learn/concepts/causal-context.md new file mode 100644 index 0000000000..eb5ce58dac --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/causal-context.md @@ -0,0 +1,289 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/context + - /riak/kv/3.0.1/theory/concepts/context + +--- + + +[concept aae]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/3.0.1/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/3.0.1/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/3.0.1/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/clusters.md b/content/riak/kv/3.0.1/learn/concepts/clusters.md new file mode 100644 index 0000000000..ad7921ac34 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/clusters.md @@ -0,0 +1,117 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/Clusters + - /riak/kv/3.0.1/theory/concepts/Clusters + - /riak/3.0.1/theory/concepts/clusters + - /riak/kv/3.0.1/theory/concepts/clusters + +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/3.0.1/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/crdts.md b/content/riak/kv/3.0.1/learn/concepts/crdts.md new file mode 100644 index 0000000000..9f24a39361 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/crdts.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/crdts + - /riak/kv/3.0.1/theory/concepts/crdts + +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/3.0.1/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/3.0.1/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/eventual-consistency.md b/content/riak/kv/3.0.1/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..569928706b --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/eventual-consistency.md @@ -0,0 +1,202 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/Eventual-Consistency + - /riak/kv/3.0.1/theory/concepts/Eventual-Consistency + - /riak/3.0.1/theory/concepts/eventual-consistency + - /riak/kv/3.0.1/theory/concepts/eventual-consistency + +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/keys-and-objects.md b/content/riak/kv/3.0.1/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..d79793d2e2 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/keys-and-objects.md @@ -0,0 +1,53 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/keys-and-values + - /riak/kv/3.0.1/theory/concepts/keys-and-values + +--- + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/replication.md b/content/riak/kv/3.0.1/learn/concepts/replication.md new file mode 100644 index 0000000000..49b92d5594 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/replication.md @@ -0,0 +1,323 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/Replication + - /riak/kv/3.0.1/theory/concepts/Replication + - /riak/3.0.1/theory/concepts/replication + - /riak/kv/3.0.1/theory/concepts/replication + +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/3.0.1/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/strong-consistency.md b/content/riak/kv/3.0.1/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..edd990d6de --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/strong-consistency.md @@ -0,0 +1,105 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/strong-consistency + - /riak/kv/3.0.1/theory/concepts/strong-consistency + +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/3.0.1/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + + + diff --git a/content/riak/kv/3.0.1/learn/concepts/vnodes.md b/content/riak/kv/3.0.1/learn/concepts/vnodes.md new file mode 100644 index 0000000000..b9def90447 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/concepts/vnodes.md @@ -0,0 +1,160 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.1/theory/concepts/vnodes + - /riak/kv/3.0.1/theory/concepts/vnodes + +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322.9.744576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + + + diff --git a/content/riak/kv/3.0.1/learn/dynamo.md b/content/riak/kv/3.0.1/learn/dynamo.md new file mode 100644 index 0000000000..7a16f0bac5 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/dynamo.md @@ -0,0 +1,1928 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/3.0.1/theory/dynamo + - /riak/kv/3.0.1/theory/dynamo + +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/3.0.1/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/3.0.1/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/3.0.1/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/3.0.1/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.9.7 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/3.0.1/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak admin command-line tool]. + +[riak admin command-line tool]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak admin +> cluster plan`, then the changes are committed with `riak admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.1/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/3.0.1/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/3.0.1/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/3.0.1/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.1/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + + + diff --git a/content/riak/kv/3.0.1/learn/glossary.md b/content/riak/kv/3.0.1/learn/glossary.md new file mode 100644 index 0000000000..63b5a4f473 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/glossary.md @@ -0,0 +1,358 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +aliases: + +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/3.0.1/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/3.0.1/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/3.0.1/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/3.0.1/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/3.0.1/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/3.0.1/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/3.0.1/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + + + diff --git a/content/riak/kv/3.0.1/learn/new-to-nosql.md b/content/riak/kv/3.0.1/learn/new-to-nosql.md new file mode 100644 index 0000000000..60c09fe027 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/new-to-nosql.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 3.0.1 +#menu: +# riak_kv-3.0.1: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +aliases: + +--- + +**TODO: Add content (not sure where this lives in existing docs)** + + + diff --git a/content/riak/kv/3.0.1/learn/use-cases.md b/content/riak/kv/3.0.1/learn/use-cases.md new file mode 100644 index 0000000000..75b7335a57 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/use-cases.md @@ -0,0 +1,405 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/3.0.1/dev/data-modeling/ + - /riak/kv/3.0.1/dev/data-modeling/ + +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/3.0.1/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/3.0.1/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + + + diff --git a/content/riak/kv/3.0.1/learn/why-riak-kv.md b/content/riak/kv/3.0.1/learn/why-riak-kv.md new file mode 100644 index 0000000000..3bc791a270 --- /dev/null +++ b/content/riak/kv/3.0.1/learn/why-riak-kv.md @@ -0,0 +1,225 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/3.0.1/theory/why-riak/ + - /riak/kv/3.0.1/theory/why-riak/ + +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.1/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/3.0.1/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + + + diff --git a/content/riak/kv/3.0.1/release-notes.md b/content/riak/kv/3.0.1/release-notes.md new file mode 100644 index 0000000000..9f1f2c5ef9 --- /dev/null +++ b/content/riak/kv/3.0.1/release-notes.md @@ -0,0 +1,50 @@ +--- +title: "Riak KV 3.0.1 Release Notes" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/3.0.1/community/release-notes + - /riak/kv/3.0.1/intro-v20 + - /riak/3.0.1/intro-v20 + - /riak/kv/3.0.1/introduction + +--- + +Released Aug 20, 2020. + + +## Overview + +This major release allows Riak to run on OTP versions 20, 21 and 22 - but is not fully backwards-compatible with previous releases. Some limitations and key changes should be noted: + +- It is not possible to run this release on any OTP version prior to OTP 20. Testing of node-by-node upgrades is the responsibility of Riak customers, there has been no comprehensive testing of this upgrade managed centrally. Most customer testing of upgrades has been spent on testing an uplift from 2.2.x and OTP R16 to 3.0 and OTP 20, so this is likely to be the safest transition. + +- This release will not by default include Yokozuna support, but Yokozuna can be added in by reverting the commented lines in rebar.config. There are a number of riak_test failures with Yokozuna, and these have not been resolved prior to release. Upgrading with yokozuna will be a breaking change, and data my be lost due to the uplift in solr version. Any migration will require bespoke management of any data within yokozuna. + +- Packaging support is not currently proven for any platform other than CentOS, Debian or Ubuntu. Riak will build from source on other platforms - e.g. `make locked-deps; make rel`. + +- As part of the release there has been a comprehensive review of all tests across the dependencies (riak_test, eunit, eqc and pulse), as well as removal of all dialyzer and xref warnings and addition where possible of travis tests. The intention is to continue to raise the bar on test stability before accepting Pull Requests going forward. + +- If using riak_client directly (e.g. `{ok, C} = riak:local_client()`), then please use `riak_client:F(*Args, C) not C:F(*Args)` when calling functions within riak_client - the latter mechanism now has issues within OTP 20+. + +- Instead of `riak-admin` the command `riak admin` should now be used for admin CLI commands. + +- Other than the limitations listed above, the release should be functionally identical to Riak KV 2.9.7. Throughput improvements may be seen as a result of the OTP 20 upgrade on some CPU-bound workloads. For disk-bound workloads, additional benefit may be achieved by upgrading further to OTP 22. + +[Previous Release Notes](#previous-release-notes) + +## Previous Release Notes + +Please see the KV 2.9.7 release notes [here]({{<baseurl>}}riak/kv/2.9.7/release-notes/), the KV 2.9.4 release notes [here]({{<baseurl>}}riak/kv/2.9.4/release-notes/), and the KV 2.9.1 release notes [here]({{<baseurl>}}riak/kv/2.9.1/release-notes/). + + + + diff --git a/content/riak/kv/3.0.1/setup.md b/content/riak/kv/3.0.1/setup.md new file mode 100644 index 0000000000..5bc7fda064 --- /dev/null +++ b/content/riak/kv/3.0.1/setup.md @@ -0,0 +1,51 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: + +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + + + diff --git a/content/riak/kv/3.0.1/setup/downgrade.md b/content/riak/kv/3.0.1/setup/downgrade.md new file mode 100644 index 0000000000..3a436080b5 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/downgrade.md @@ -0,0 +1,179 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/3.0.1/ops/upgrading/rolling-downgrades/ + - /riak/kv/3.0.1/ops/upgrading/rolling-downgrades/ + +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/3.0.1/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 2.9.7, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak admin aae-status` and `riak admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak admin aae-status and riak admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak admin transfers +``` + + + + diff --git a/content/riak/kv/3.0.1/setup/installing.md b/content/riak/kv/3.0.1/setup/installing.md new file mode 100644 index 0000000000..4b2e3f5fce --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing.md @@ -0,0 +1,61 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/3.0.1/ops/building/installing + - /riak/kv/3.0.1/ops/building/installing + - /riak/3.0.1/installing/ + - /riak/kv/3.0.1/installing/ + +--- + +[install aws]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/3.0.1/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/amazon-web-services.md b/content/riak/kv/3.0.1/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..6fe2e7f97e --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/amazon-web-services.md @@ -0,0 +1,153 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/3.0.1/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/3.0.1/installing/amazon-web-services/ + - /riak/kv/3.0.1/installing/amazon-web-services/ + +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/3.0.1/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-1.amzn2x86_64.rpm +sudo yum localinstall -y riak_3.0.1-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-1.amzn2x86_64.rpm +sudo rpm -i riak_3.0.1-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-1.amzn1x86_64.rpm +sudo yum localinstall -y riak_3.0.1-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-1.amzn1x86_64.rpm +sudo rpm -i riak_3.0.1-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/debian-ubuntu.md b/content/riak/kv/3.0.1/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..fd3032cd80 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/debian-ubuntu.md @@ -0,0 +1,171 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/3.0.1/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/3.0.1/installing/debian-ubuntu/ + - /riak/kv/3.0.1/installing/debian-ubuntu/ + +--- + +[install source index]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/3.0.1/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-1_amd64.deb +sudo dpkg -i riak_3.0.1-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-1_amd64.deb +sudo dpkg -i riak_3.0.1-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-1_amd64.deb +sudo dpkg -i riak_3.0.1-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/precise64/riak_3.0.1-1_amd64.deb +sudo dpkg -i riak_3.0.1-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-1_amd64.deb +sudo dpkg -i riak_3.0.1-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-1_amd64.deb +sudo dpkg -i riak_3.0.1-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/7/riak_3.0.1-1_amd64.deb +sudo dpkg -i riak_3.0.1-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-1_armhf.deb +sudo dpkg -i riak_3.0.1-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/riak-3.0.1.tar.gz +tar zxvf riak-3.0.1.tar.gz +cd riak-3.0.1 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/freebsd.md b/content/riak/kv/3.0.1/setup/installing/freebsd.md new file mode 100644 index 0000000000..aec2e5df40 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/freebsd.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/3.0.1/ops/building/installing/Installing-on-FreeBSD + - /riak/3.0.1/installing/freebsd/ + - /riak/kv/3.0.1/installing/freebsd/ + +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/3.0.1/downloads/ +[install verify]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-3.0.1.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/3.0/3.0.1/freebsd/11.1/riak-3.0.1.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/3.0/3.0.1/freebsd/10.4/riak-3.0.1.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak admin scripts directly. + +Man pages are available for riak(1) and riak admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/mac-osx.md b/content/riak/kv/3.0.1/setup/installing/mac-osx.md new file mode 100644 index 0000000000..573e06ad19 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/mac-osx.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/3.0.1/ops/building/installing/Installing-on-Mac-OS-X + - /riak/3.0.1/installing/mac-osx/ + - /riak/kv/3.0.1/installing/mac-osx/ + +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/3.0.1/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.1/osx/10.11/riak-3.0.1-OSX-x86_64.tar.gz +tar xzvf riak-3.0.1-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 3.0.1 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.1` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.1/riak-3.0.1.tar.gz +tar zxvf riak-3.0.1.tar.gz +cd riak-3.0.1 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/rhel-centos.md b/content/riak/kv/3.0.1/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..165aedbd73 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/rhel-centos.md @@ -0,0 +1,134 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/3.0.1/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/3.0.1/installing/rhel-centos/ + - /riak/kv/3.0.1/installing/rhel-centos/ + +--- + + + +[install source index]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-1.el8.x86_64.rpm +sudo yum localinstall -y riak-3.0.1-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-1.el8.x86_64.rpm +sudo rpm -Uvh riak-3.0.1-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-1.el7.x86_64.rpm +sudo yum localinstall -y riak-3.0.1-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-1.el7.x86_64.rpm +sudo rpm -Uvh riak-3.0.1-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6/riak-3.0.1-1.el6.x86_64.rpm +sudo yum localinstall -y riak-3.0.1-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6/riak-3.0.1-1.el6.x86_64.rpm +sudo rpm -Uvh riak-3.0.1-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.1/riak-3.0.1.tar.gz +tar zxvf riak-3.0.1.tar.gz +cd riak-3.0.1 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/smartos.md b/content/riak/kv/3.0.1/setup/installing/smartos.md new file mode 100644 index 0000000000..b923e17bb0 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/smartos.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "3.0.1" +menu: + riak_kv-3.0.1: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-SmartOS + - /riak/kv/3.0.1/ops/building/installing/Installing-on-SmartOS + - /riak/3.0.1/installing/smartos/ + - /riak/kv/3.0.1/installing/smartos/ + - /riak/kv/3.0.1/installing/smartos/ + +--- + +[install verify]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/3.0.1/setup/installing/solaris.md b/content/riak/kv/3.0.1/setup/installing/solaris.md new file mode 100644 index 0000000000..78d78b8ec8 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/solaris.md @@ -0,0 +1,91 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "3.0.1" +menu: + riak_kv-3.0.1: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-Solaris + - /riak/kv/3.0.1/ops/building/installing/Installing-on-Solaris + - /riak/3.0.1/installing/solaris/ + - /riak/kv/3.0.1/installing/solaris/ + +--- + + + +[install verify]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. diff --git a/content/riak/kv/3.0.1/setup/installing/source.md b/content/riak/kv/3.0.1/setup/installing/source.md new file mode 100644 index 0000000000..54b2f59a7b --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/source.md @@ -0,0 +1,110 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/Installing-Riak-from-Source + - /riak/kv/3.0.1/ops/building/Installing-Riak-from-Source + - /riak/3.0.1/installing/source/ + - /riak/kv/3.0.1/installing/source/ + +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/3.0.1/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.1/riak-3.0.1.tar.gz +tar zxvf riak-3.0.1.tar.gz +cd riak-3.0.1 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/source/erlang.md b/content/riak/kv/3.0.1/setup/installing/source/erlang.md new file mode 100644 index 0000000000..21927c8f0b --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/source/erlang.md @@ -0,0 +1,571 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/erlang + - /riak/kv/3.0.1/ops/building/installing/erlang + - /riak/3.0.1/installing/source/erlang/ + - /riak/kv/3.0.1/installing/source/erlang/ + +--- + +[install index]: {{<baseurl>}}riak/kv/3.0.1/setup/installing +[security basics]: {{<baseurl>}}riak/kv/3.0.1/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/source/jvm.md b/content/riak/kv/3.0.1/setup/installing/source/jvm.md new file mode 100644 index 0000000000..93a74daa08 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/source/jvm.md @@ -0,0 +1,55 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/jvm + - /riak/kv/3.0.1/ops/building/installing/jvm + - /riak/3.0.1/ops/building/installing/Installing-the-JVM + - /riak/kv/3.0.1/ops/building/installing/Installing-the-JVM + - /riak/3.0.1/installing/source/jvm/ + - /riak/kv/3.0.1/installing/source/jvm/ + +--- + +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + + + diff --git a/content/riak/kv/3.0.1/setup/installing/suse.md b/content/riak/kv/3.0.1/setup/installing/suse.md new file mode 100644 index 0000000000..3d567ea07f --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/suse.md @@ -0,0 +1,52 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-SUSE + - /riak/kv/3.0.1/ops/building/installing/Installing-on-SUSE + - /riak/3.0.1/installing/suse/ + - /riak/kv/3.0.1/installing/suse/ + +--- + +[install verify]: {{<baseurl>}}riak/kv/3.0.1/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.7+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/verify.md b/content/riak/kv/3.0.1/setup/installing/verify.md new file mode 100644 index 0000000000..7fea175872 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/verify.md @@ -0,0 +1,169 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/installing/Post-Installation + - /riak/kv/3.0.1/ops/installing/Post-Installation + - /riak/3.0.1/installing/verify-install/ + - /riak/kv/3.0.1/installing/verify-install/ + +--- + +[client libraries]: {{<baseurl>}}riak/kv/3.0.1/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/3.0.1/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak admin test` command: + +```bash +riak admin test +``` + +Successful output from `riak admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + + + diff --git a/content/riak/kv/3.0.1/setup/installing/windows-azure.md b/content/riak/kv/3.0.1/setup/installing/windows-azure.md new file mode 100644 index 0000000000..2f258ca2e5 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/installing/windows-azure.md @@ -0,0 +1,197 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/3.0.1/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/3.0.1/ops/building/installing/Installing-on-Windows-Azure + - /riak/3.0.1/installing/windows-azure/ + - /riak/kv/3.0.1/installing/windows-azure/ + +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + + + diff --git a/content/riak/kv/3.0.1/setup/planning.md b/content/riak/kv/3.0.1/setup/planning.md new file mode 100644 index 0000000000..ab8b6c34f7 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning.md @@ -0,0 +1,61 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: + +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + + + diff --git a/content/riak/kv/3.0.1/setup/planning/backend.md b/content/riak/kv/3.0.1/setup/planning/backend.md new file mode 100644 index 0000000000..3190b69f46 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/backend.md @@ -0,0 +1,60 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/3.0.1/ops/building/planning/backends/ + - /riak/kv/3.0.1/ops/building/planning/backends/ + +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/3.0.1/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/backend/bitcask.md b/content/riak/kv/3.0.1/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..e32e3c7376 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/backend/bitcask.md @@ -0,0 +1,994 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/backends/bitcask/ + - /riak/kv/3.0.1/ops/advanced/backends/bitcask/ + +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/3.0.1/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none) - lets the operating system manage syncing writes + (default) + * `o_sync) - uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval - Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) - Writes are made via Erlang's built-in file API +* `nif) - Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always) - No restrictions on when merge operations can occur + (default) +* `never) - Merge will never be attempted +* `window) - Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** - This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** - This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** - This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** - This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** - This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322.9.744576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322.9.744576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/backend/leveldb.md b/content/riak/kv/3.0.1/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..dc3b95d8f6 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/backend/leveldb.md @@ -0,0 +1,506 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/backends/leveldb/ + - /riak/kv/3.0.1/ops/advanced/backends/leveldb/ + +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/3.0.1/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** - The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** - LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322.9.744576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322.9.744576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/backend/leveled.md b/content/riak/kv/3.0.1/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..4b21b6cb74 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/backend/leveled.md @@ -0,0 +1,140 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/backends/leveled/ + - /riak/kv/3.0.1/ops/advanced/backends/leveled/ + +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/3.0.1/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.7 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/backend/memory.md b/content/riak/kv/3.0.1/setup/planning/backend/memory.md new file mode 100644 index 0000000000..14f970e888 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/backend/memory.md @@ -0,0 +1,147 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/backends/memory/ + - /riak/kv/3.0.1/ops/advanced/backends/memory/ + +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/backend/multi.md b/content/riak/kv/3.0.1/setup/planning/backend/multi.md new file mode 100644 index 0000000000..f2b1e80bce --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/backend/multi.md @@ -0,0 +1,230 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/backends/multi/ + - /riak/kv/3.0.1/ops/advanced/backends/multi/ + +--- + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[use admin riak admin cli]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak admin`][use admin riak admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak admin bucket-type activate leveldb_backend +riak admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/best-practices.md b/content/riak/kv/3.0.1/setup/planning/best-practices.md new file mode 100644 index 0000000000..166839c16d --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/best-practices.md @@ -0,0 +1,145 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/3.0.1/ops/building/planning/best-practices + - /riak/kv/3.0.1/ops/building/planning/best-practices + +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/3.0.1/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/3.0.1/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/3.0.1/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..fd76536b42 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,104 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/3.0.1/ops/building/planning/bitcask + - /riak/kv/3.0.1/ops/building/planning/bitcask + +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/cluster-capacity.md b/content/riak/kv/3.0.1/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..ce03683c35 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/cluster-capacity.md @@ -0,0 +1,238 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/3.0.1/ops/building/planning/cluster + - /riak/kv/3.0.1/ops/building/planning/cluster + +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/3.0.1/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/replication +[use admin riak admin#cluster]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/3.0.1/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak admin cluster command][use admin riak admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak admin cluster join` commands followed by +a `riak admin cluster plan` and `riak admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + + + diff --git a/content/riak/kv/3.0.1/setup/planning/future.md b/content/riak/kv/3.0.1/setup/planning/future.md new file mode 100644 index 0000000000..8486b2a3b7 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/future.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 3.0.1 +#menu: +# riak_kv-3.0.1: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +aliases: + +--- + +**TODO: Add content** + + + diff --git a/content/riak/kv/3.0.1/setup/planning/operating-system.md b/content/riak/kv/3.0.1/setup/planning/operating-system.md new file mode 100644 index 0000000000..00397bc874 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/operating-system.md @@ -0,0 +1,30 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +aliases: + +--- + +[downloads]: {{<baseurl>}}riak/kv/3.0.1/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + + + diff --git a/content/riak/kv/3.0.1/setup/planning/start.md b/content/riak/kv/3.0.1/setup/planning/start.md new file mode 100644 index 0000000000..44e9b59d11 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/planning/start.md @@ -0,0 +1,61 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/3.0.1/ops/building/planning/system-planning + - /riak/kv/3.0.1/ops/building/planning/system-planning + +--- + +[plan backend]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + + + diff --git a/content/riak/kv/3.0.1/setup/search.md b/content/riak/kv/3.0.1/setup/search.md new file mode 100644 index 0000000000..fd40910d9e --- /dev/null +++ b/content/riak/kv/3.0.1/setup/search.md @@ -0,0 +1,4 @@ + + + + diff --git a/content/riak/kv/3.0.1/setup/upgrading.md b/content/riak/kv/3.0.1/setup/upgrading.md new file mode 100644 index 0000000000..6fe93e4c31 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/upgrading.md @@ -0,0 +1,38 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: + +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 2.9.7][upgrade version] + +A tutorial on updating to Riak KV 3.0.1 + +[Learn More >>][upgrade version] + + + diff --git a/content/riak/kv/3.0.1/setup/upgrading/checklist.md b/content/riak/kv/3.0.1/setup/upgrading/checklist.md new file mode 100644 index 0000000000..57ecb39b5e --- /dev/null +++ b/content/riak/kv/3.0.1/setup/upgrading/checklist.md @@ -0,0 +1,225 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/3.0.1/ops/upgrading/production-checklist/ + - /riak/kv/3.0.1/ops/upgrading/production-checklist/ + +--- + +[perf open files]: {{<baseurl>}}riak/kv/3.0.1/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/3.0.1/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/3.0.1/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/3.0.1/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/3.0.1/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/3.0.1/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/3.0.1/using/troubleshooting/http-204 +[use admin riak admin]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak admin`][use admin riak admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + + + diff --git a/content/riak/kv/3.0.1/setup/upgrading/cluster.md b/content/riak/kv/3.0.1/setup/upgrading/cluster.md new file mode 100644 index 0000000000..e5322b1a9a --- /dev/null +++ b/content/riak/kv/3.0.1/setup/upgrading/cluster.md @@ -0,0 +1,302 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "3.0.1" +menu: + riak_kv-3.0.1: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/3.0.1/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.1/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/3.0.1/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/3.0.1/using/admin/commands +[use admin riak admin]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.1/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/3.0.1/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/3.0.1/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` - See [JMX Monitoring][jmx monitor] for more information. + * `snmp` - See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + + + diff --git a/content/riak/kv/3.0.1/setup/upgrading/multi-datacenter.md b/content/riak/kv/3.0.1/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..0c58eadd22 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/upgrading/multi-datacenter.md @@ -0,0 +1,24 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 3.0.1 +#menu: +# riak_kv-3.0.1: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: + +--- + +## TODO + +How to update to a new version with multi-datacenter. + + + + diff --git a/content/riak/kv/3.0.1/setup/upgrading/search.md b/content/riak/kv/3.0.1/setup/upgrading/search.md new file mode 100644 index 0000000000..bd039229bf --- /dev/null +++ b/content/riak/kv/3.0.1/setup/upgrading/search.md @@ -0,0 +1,280 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "3.0.1" +menu: + riak_kv-3.0.1: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/3.0.1/ops/advanced/upgrading-search-2 + - /riak/kv/3.0.1/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + + + diff --git a/content/riak/kv/3.0.1/setup/upgrading/version.md b/content/riak/kv/3.0.1/setup/upgrading/version.md new file mode 100644 index 0000000000..e07c074ff4 --- /dev/null +++ b/content/riak/kv/3.0.1/setup/upgrading/version.md @@ -0,0 +1,250 @@ +--- +title: "Upgrading to Riak KV 3.0.1" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Upgrading to 3.0.1" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/3.0.1/upgrade-v20/ + - /riak/kv/3.0.1/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.1/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.1/setup/upgrading/cluster/ +--- + + +[production checklist]: {{<baseurl>}}riak/kv/3.0.1/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/3.0.1/using/admin/commands +[use admin riak admin]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/3.0.1/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.1/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/3.0.1/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/3.0.1/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/3.0.1/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 3.0.1 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 3.0.1 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 3.0.1 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + + + diff --git a/content/riak/kv/3.0.1/using.md b/content/riak/kv/3.0.1/using.md new file mode 100644 index 0000000000..95f441b2bf --- /dev/null +++ b/content/riak/kv/3.0.1/using.md @@ -0,0 +1,78 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: + +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + + + diff --git a/content/riak/kv/3.0.1/using/admin.md b/content/riak/kv/3.0.1/using/admin.md new file mode 100644 index 0000000000..ae5503fac6 --- /dev/null +++ b/content/riak/kv/3.0.1/using/admin.md @@ -0,0 +1,51 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/3.0.1/ops/running/cluster-admin + - /riak/kv/3.0.1/ops/running/cluster-admin + +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak admin]: ./riak admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak admin Command Line Interface][use admin riak cli] + +Details the `riak admin` interface. + +[Learn More >>][use admin riak admin] + +#### [riak Command Line Interface][use admin riak admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + + + diff --git a/content/riak/kv/3.0.1/using/admin/commands.md b/content/riak/kv/3.0.1/using/admin/commands.md new file mode 100644 index 0000000000..b1b8d1b8de --- /dev/null +++ b/content/riak/kv/3.0.1/using/admin/commands.md @@ -0,0 +1,378 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.1/ops/running/cluster-admin + - /riak/kv/3.0.1/ops/running/cluster-admin + +--- + +[use admin riak admin#cluster]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes +[use admin riak admin#cluster-plan]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster-plan +[use admin riak admin#cluster-commit]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster-commit + + +This document explains usage of the [`riak admin cluster`][use admin riak admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak admin cluster` +interface are also available as self-standing commands. The `riak admin +member-status` command is now the `riak admin cluster status` command, +`riak admin join` is now `riak admin cluster join`, etc. +> +> We recommend using the `riak admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status) - There are five possible values for status: + * `valid) - The node has begun participating in cluster operations + * `leaving) - The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting) - The node's ownership transfers are complete and it is + currently shutting down + * `joining) - The node is in the process of joining the cluster but + but has not yet completed the join process + * `down) - The node is not currently responding +* `avail) - There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring) - What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending) - The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak admin cluster plan`][use admin riak admin#cluster-plan] and committed the changes by running +[`riak admin cluster commit`][use admin riak admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak admin cluster plan`][use admin riak admin#cluster-plan] and committed the changes +by running [`riak admin cluster commit`][use admin riak admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak admin cluster plan`][use admin riak admin#cluster-plan] and committed the changes +by running [`riak admin cluster commit`][use admin riak admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak admin cluster plan`][use admin riak admin#cluster-plan] and committed the changes +by running [`riak admin cluster commit`][use admin riak admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak admin cluster plan`][use admin riak admin#cluster-plan] and committed the changes +by running [`riak admin cluster commit`][use admin riak admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak admin cluster plan +``` + +`riak admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak admin cluster plan`][use admin riak admin#cluster-plan] prior to being committed. + +```bash +riak admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322.9.744576013453623296`. You can convert +that index to an ID like this: + +```bash +riak admin cluster partition index=1004782375664995756265033322.9.744576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak admin cluster partition id=20 +``` + + + diff --git a/content/riak/kv/3.0.1/using/admin/riak-admin.md b/content/riak/kv/3.0.1/using/admin/riak-admin.md new file mode 100644 index 0000000000..5e0d41805f --- /dev/null +++ b/content/riak/kv/3.0.1/using/admin/riak-admin.md @@ -0,0 +1,721 @@ +--- +title: "riak admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "riak admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.1/ops/running/tools/riak admin + - /riak/kv/3.0.1/ops/running/tools/riak admin + +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/3.0.1/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/3.0.1/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/3.0.1/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/3.0.1/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/3.0.1/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/3.0.1/using/security/ +[security managing]: {{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/3.0.1/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/3.0.1/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/handoff +[use admin riak admin#stats]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#stats + +## `riak admin` + +The riak admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak admin` by itself will output a list of available commands: + +``` +Usage: riak admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak admin join` command has +been deprecated in favor of the [`riak admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak admin leave` command has +been deprecated in favor of the new [`riak admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak admin services` to see which services are +available on a running node. + +```bash +riak admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak admin search <command> +``` + +### aae-status + +```bash +riak admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak admin status`][use admin riak admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + + + diff --git a/content/riak/kv/3.0.1/using/admin/riak-cli.md b/content/riak/kv/3.0.1/using/admin/riak-cli.md new file mode 100644 index 0000000000..ab01711d30 --- /dev/null +++ b/content/riak/kv/3.0.1/using/admin/riak-cli.md @@ -0,0 +1,204 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.1/ops/running/tools/riak + - /riak/kv/3.0.1/ops/running/tools/riak + +--- + +[configuration file]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak admin`]: {{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#top +[configuration]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak admin`][`riak admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + + + diff --git a/content/riak/kv/3.0.1/using/admin/riak-control.md b/content/riak/kv/3.0.1/using/admin/riak-control.md new file mode 100644 index 0000000000..e70e145003 --- /dev/null +++ b/content/riak/kv/3.0.1/using/admin/riak-control.md @@ -0,0 +1,237 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/riak-control + - /riak/kv/3.0.1/ops/advanced/riak-control + +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/3.0.1/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations.md b/content/riak/kv/3.0.1/using/cluster-operations.md new file mode 100644 index 0000000000..3252d1b0fe --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations.md @@ -0,0 +1,109 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +aliases: + +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/3.0.1/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..bd3d62615b --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,288 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/3.0.1/ops/advanced/aae/ + - /riak/3.0.1/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/3.0.1/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/3.0.1/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..b72c134abc --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,198 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.1/ops/running/nodes/adding-removing + - /riak/kv/3.0.1/ops/running/nodes/adding-removing + +--- + +[use running cluster]: {{<baseurl>}}riak/kv/3.0.1/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak admin cluster join A +bin/riak admin cluster join B +bin/riak admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak admin cluster clear` +and running `riak admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak admin cluster leave` +the cluster plan must be reviewed with `riak admin cluster plan` and +the changes committed with `riak admin cluster commit`. + +The other command is `riak admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak admin cluster plan` and +committed with `riak admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak admin cluster join` or `riak admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak admin transfer-limit <node> 0 +``` + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/backend.md b/content/riak/kv/3.0.1/using/cluster-operations/backend.md new file mode 100644 index 0000000000..a63026955b --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/backend.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 3.0.1 +#menu: +# riak_kv-3.0.1: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +aliases: + +--- + +**TODO: Add content** + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/backing-up.md b/content/riak/kv/3.0.1/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..fa42f394d3 --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/backing-up.md @@ -0,0 +1,271 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.1/ops/running/backups + - /riak/kv/3.0.1/ops/running/backups + +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak admin backup`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak admin down <node>`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#down) +2. Join the restored node to the cluster using + [`riak admin cluster join <node>`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak admin cluster plan` +5. Finally, commit the cluster changes with `riak admin cluster commit` + +{{% note %}} +For more information on the `riak admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/3.0.1/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/3.0.1/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/bucket-types.md b/content/riak/kv/3.0.1/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..6554335bd4 --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/bucket-types.md @@ -0,0 +1,63 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +aliases: + +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak admin bucket-type create animals '{"props":{}}' +riak admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak admin bucket-type activate animals +``` + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/3.0.1/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..bca42c4006 --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,458 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.1/ops/running/nodes/renaming + - /riak/kv/3.0.1/ops/running/nodes/renaming + +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak admin reip`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak admin cluster force-replace`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak admin cluster plan:` + + ```bash + riak admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak admin cluster commit`: + + ```bash + riak admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak admin down`. For example, we would down `riak@10.1.42.12` with `riak admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/handoff.md b/content/riak/kv/3.0.1/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..42077db051 --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/handoff.md @@ -0,0 +1,120 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.1/ops/running/handoff + - /riak/kv/3.0.1/ops/running/handoff + +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak admin handoff enable outbound --node riak3@100.0.0.1 +riak admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak admin handoff enable inbound --node riak5@100.0.0.1 +riak admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak admin handoff enable both --all +``` + +As for enabling handoff, the `riak admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak admin`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/inspecting-node.md b/content/riak/kv/3.0.1/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..74bad2cd96 --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/inspecting-node.md @@ -0,0 +1,496 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.1/ops/running/nodes/inspecting + - /riak/kv/3.0.1/ops/running/nodes/inspecting + +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak admin status + +`riak admin status` is a subcommand of the `riak admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak admin diag` usage" %}} +The `riak-debug` and `riak admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak admin diag + +Running `riak admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392993748081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak admin` script for +you, but here's how to set them: + +* `--etc) - The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base) - The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user) - The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak admin configuration management tool](../../admin/riak admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/load-balancing.md b/content/riak/kv/3.0.1/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..688d62ca8e --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/load-balancing.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 3.0.1 +#menu: +# riak_kv-3.0.1: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +aliases: + +--- + +**TODO: Add content (not sure where this exists in docs)** + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/logging.md b/content/riak/kv/3.0.1/using/cluster-operations/logging.md new file mode 100644 index 0000000000..09435f2b5b --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/logging.md @@ -0,0 +1,47 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +aliases: + +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/replacing-node.md b/content/riak/kv/3.0.1/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..31411a346f --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/replacing-node.md @@ -0,0 +1,100 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +aliases: + +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/3.0.1/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak admin cluster join`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak admin cluster replace`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster) command: + + ```bash + riak admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak admin cluster plan`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak admin cluster commit`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster) command: + + ```bash + riak admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak admin cluster clear`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster): + + ```bash + riak admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak admin ringready`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#ringready) +and [`riak admin member-status`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak admin ringready` command. +{{% /note %}} + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/secondary-indexes.md b/content/riak/kv/3.0.1/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..a072ab0db7 --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.1 +#menu: +# riak_kv-3.0.1: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +aliases: + +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/strong-consistency.md b/content/riak/kv/3.0.1/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..0a2786a10e --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/strong-consistency.md @@ -0,0 +1,76 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +aliases: + +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak admin status`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/3.0.1/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..eca4c40a6c --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,34 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/3.0.1/ops/advanced/tictacaae/ + - /riak/3.0.1/ops/advanced/ticktacaae/ + +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/3.0.1/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..64fc9ce0e9 --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,263 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v2/operations + - /riak/kv/3.0.1/ops/mdc/v2/operations + +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/3.0.1/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/3.0.1/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected) - The IP address and port of a connected client (site)</li><li>`cluster_name) - The name of the connected client (site)</li><li>`connecting) - The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/3.0.1/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/3.0.1/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + + + diff --git a/content/riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..ea53e5230a --- /dev/null +++ b/content/riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,425 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/operations + - /riak/kv/3.0.1/ops/mdc/v3/operations + +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.1/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/3.0.1/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/3.0.1/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/3.0.1/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + + + diff --git a/content/riak/kv/3.0.1/using/performance.md b/content/riak/kv/3.0.1/using/performance.md new file mode 100644 index 0000000000..3339d11b58 --- /dev/null +++ b/content/riak/kv/3.0.1/using/performance.md @@ -0,0 +1,268 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/3.0.1/ops/tuning/linux/ + - /riak/3.0.1/ops/tuning/linux/ + +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/3.0.1/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/3.0.1/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/3.0.1/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/3.0.1/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/3.0.1/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/3.0.1/using/performance/open-files-limit/) + + + diff --git a/content/riak/kv/3.0.1/using/performance/amazon-web-services.md b/content/riak/kv/3.0.1/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..0b4602b463 --- /dev/null +++ b/content/riak/kv/3.0.1/using/performance/amazon-web-services.md @@ -0,0 +1,247 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.1/ops/tuning/aws + - /riak/kv/3.0.1/ops/tuning/aws + +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + + + diff --git a/content/riak/kv/3.0.1/using/performance/benchmarking.md b/content/riak/kv/3.0.1/using/performance/benchmarking.md new file mode 100644 index 0000000000..3c4f7a2c7b --- /dev/null +++ b/content/riak/kv/3.0.1/using/performance/benchmarking.md @@ -0,0 +1,602 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.1/ops/building/benchmarking + - /riak/kv/3.0.1/ops/building/benchmarking + +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/3.0.1/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput - Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}) - generate as many ops per second as possible +* `{rate, N}) - generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw) - Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb) - Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient) - Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask) - Directly invokes the Bitcask API +* `basho_bench_driver_dets) - Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}) - operation completed successfully +* `{error, Reason, NewState}) - operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}) - operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}) - operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}) - generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}) - the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}) - the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}) - selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}) - selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}) - the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}) - specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}) - takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}) - takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}) - generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}) - generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}) - generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}) - specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + + + diff --git a/content/riak/kv/3.0.1/using/performance/erlang.md b/content/riak/kv/3.0.1/using/performance/erlang.md new file mode 100644 index 0000000000..6e3c525cf2 --- /dev/null +++ b/content/riak/kv/3.0.1/using/performance/erlang.md @@ -0,0 +1,371 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.1/ops/tuning/erlang + - /riak/kv/3.0.1/ops/tuning/erlang + +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + + + diff --git a/content/riak/kv/3.0.1/using/performance/latency-reduction.md b/content/riak/kv/3.0.1/using/performance/latency-reduction.md new file mode 100644 index 0000000000..9d883293d4 --- /dev/null +++ b/content/riak/kv/3.0.1/using/performance/latency-reduction.md @@ -0,0 +1,267 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.1/ops/tuning/latency-reduction + - /riak/kv/3.0.1/ops/tuning/latency-reduction + +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak admin status`](../../admin/riak admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak admin status`](../../admin/riak admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + + + diff --git a/content/riak/kv/3.0.1/using/performance/multi-datacenter-tuning.md b/content/riak/kv/3.0.1/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..a575d6fddc --- /dev/null +++ b/content/riak/kv/3.0.1/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: + +--- + +[perf index]: {{<baseurl>}}riak/kv/3.0.1/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + + + diff --git a/content/riak/kv/3.0.1/using/performance/open-files-limit.md b/content/riak/kv/3.0.1/using/performance/open-files-limit.md new file mode 100644 index 0000000000..0b6e7b985f --- /dev/null +++ b/content/riak/kv/3.0.1/using/performance/open-files-limit.md @@ -0,0 +1,351 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.1/ops/tuning/open-files-limit/ + - /riak/kv/3.0.1/ops/tuning/open-files-limit/ + +--- + +[plan backend]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + + + diff --git a/content/riak/kv/3.0.1/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/3.0.1/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..1569846ff1 --- /dev/null +++ b/content/riak/kv/3.0.1/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,50 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: + +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + diff --git a/content/riak/kv/3.0.1/using/reference.md b/content/riak/kv/3.0.1/using/reference.md new file mode 100644 index 0000000000..fcf55f86e1 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference.md @@ -0,0 +1,135 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +aliases: + +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + + + diff --git a/content/riak/kv/3.0.1/using/reference/architecture.md b/content/riak/kv/3.0.1/using/reference/architecture.md new file mode 100644 index 0000000000..516e7b27cd --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/architecture.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +#menu: +# riak_kv-3.0.1: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +aliases: + +--- + +<!-- TODO: Content --> + + + diff --git a/content/riak/kv/3.0.1/using/reference/bucket-types.md b/content/riak/kv/3.0.1/using/reference/bucket-types.md new file mode 100644 index 0000000000..3492885f4d --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/bucket-types.md @@ -0,0 +1,823 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +aliases: + +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/3.0.1/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/3.0.1/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.1/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.1/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/3.0.1/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/3.0.1/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + + + diff --git a/content/riak/kv/3.0.1/using/reference/custom-code.md b/content/riak/kv/3.0.1/using/reference/custom-code.md new file mode 100644 index 0000000000..309c4ad106 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/custom-code.md @@ -0,0 +1,135 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/install-custom-code/ + - /riak/kv/3.0.1/ops/advanced/install-custom-code/ + +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/3.0.1/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/3.0.1/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + + + diff --git a/content/riak/kv/3.0.1/using/reference/failure-recovery.md b/content/riak/kv/3.0.1/using/reference/failure-recovery.md new file mode 100644 index 0000000000..aa4c1d3a29 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/failure-recovery.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +aliases: + +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + diff --git a/content/riak/kv/3.0.1/using/reference/handoff.md b/content/riak/kv/3.0.1/using/reference/handoff.md new file mode 100644 index 0000000000..46b9ce92d5 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/handoff.md @@ -0,0 +1,201 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.1/ops/running/handoff/ + - /riak/kv/3.0.1/ops/running/handoff/ + +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + + + diff --git a/content/riak/kv/3.0.1/using/reference/jmx.md b/content/riak/kv/3.0.1/using/reference/jmx.md new file mode 100644 index 0000000000..2d047fa431 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/jmx.md @@ -0,0 +1,190 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/running/monitoring/jmx + - /riak/kv/3.0.1/ops/running/monitoring/jmx + +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + + + diff --git a/content/riak/kv/3.0.1/using/reference/logging.md b/content/riak/kv/3.0.1/using/reference/logging.md new file mode 100644 index 0000000000..d9b86b8d33 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/logging.md @@ -0,0 +1,301 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.1/ops/running/logging + - /riak/kv/3.0.1/ops/running/logging + +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.7 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0) - Every night at midnight +* `$D23) - Every day at 23:00 (11 pm) +* `$W0D20) - Every week on Sunday at 20:00 (8 pm) +* `$M1D0) - On the first day of every month at midnight +* `$M5D6) - On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file) - Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/3.0.1/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console) - Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak-cli/#attach-direct) command +* `both) - Console logs will be emitted both to a file and to standard + output +* `off) - Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + + + diff --git a/content/riak/kv/3.0.1/using/reference/multi-datacenter.md b/content/riak/kv/3.0.1/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..a68f828f43 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/multi-datacenter.md @@ -0,0 +1,53 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + + + diff --git a/content/riak/kv/3.0.1/using/reference/multi-datacenter/comparison.md b/content/riak/kv/3.0.1/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..d7177db499 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,100 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.1/ops/mdc/comparison + - /riak/kv/3.0.1/ops/mdc/comparison + +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/3.0.1/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/3.0.1/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + + + diff --git a/content/riak/kv/3.0.1/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/3.0.1/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..234340e0bf --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,168 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.1/ops/mdc/monitoring + - /riak/kv/3.0.1/ops/mdc/monitoring + +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + + + diff --git a/content/riak/kv/3.0.1/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/3.0.1/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..396caf14a7 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,66 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.1/ops/mdc/per-bucket + - /riak/kv/3.0.1/ops/mdc/per-bucket + +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true) - Enable replication (realtime + fullsync) + * `false) - Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime) - Replication only occurs in realtime for this bucket + * `fullsync) - Replication only occurs during a fullsync operation + * `both) - Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + + + diff --git a/content/riak/kv/3.0.1/using/reference/multi-datacenter/statistics.md b/content/riak/kv/3.0.1/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..a818093819 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,244 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.1/ops/mdc/statistics + - /riak/kv/3.0.1/ops/mdc/statistics + +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + + + diff --git a/content/riak/kv/3.0.1/using/reference/object-deletion.md b/content/riak/kv/3.0.1/using/reference/object-deletion.md new file mode 100644 index 0000000000..37c8164865 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/object-deletion.md @@ -0,0 +1,121 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/deletion + +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep) - Disables tombstone removal +* `immediate) - The tombstone is removed as soon as the request is + received +* Custom time interval - How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + + + diff --git a/content/riak/kv/3.0.1/using/reference/runtime-interaction.md b/content/riak/kv/3.0.1/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..23437a3657 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/runtime-interaction.md @@ -0,0 +1,70 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/runtime + - /riak/kv/3.0.1/ops/advanced/runtime + +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port) - Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port) - Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports) - The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes) - The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size) - A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection) - A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule) - A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + + + diff --git a/content/riak/kv/3.0.1/using/reference/search.md b/content/riak/kv/3.0.1/using/reference/search.md new file mode 100644 index 0000000000..2d6e987a38 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/search.md @@ -0,0 +1,457 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/search + - /riak/kv/3.0.1/dev/advanced/search + +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/3.0.1/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. + + diff --git a/content/riak/kv/3.0.1/using/reference/secondary-indexes.md b/content/riak/kv/3.0.1/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..029aca0443 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/secondary-indexes.md @@ -0,0 +1,76 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.1/dev/advanced/2i + - /riak/kv/3.0.1/dev/advanced/2i + +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/3.0.1/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + + + diff --git a/content/riak/kv/3.0.1/using/reference/snmp.md b/content/riak/kv/3.0.1/using/reference/snmp.md new file mode 100644 index 0000000000..1c185cd3e4 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/snmp.md @@ -0,0 +1,166 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/running/monitoring/snmp + - /riak/kv/3.0.1/ops/running/monitoring/snmp + +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + + + diff --git a/content/riak/kv/3.0.1/using/reference/statistics-monitoring.md b/content/riak/kv/3.0.1/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..08150aff40 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/statistics-monitoring.md @@ -0,0 +1,395 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.1/ops/running/stats-and-monitoring + - /riak/kv/3.0.1/ops/running/stats-and-monitoring + +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/3.0.1/developing/api/http/status) endpoint, or through the [`riak admin`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/3.0.1/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/3.0.1/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak admin`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak admin status` command will return all of the +currently available information from a running node. + +```bash +riak admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/inspecting-node/#riak admin-status) document. + +### stat + +The `riak admin stat` command is related to the `riak admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/3.0.1/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/3.0.1/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/3.0.1/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + + + diff --git a/content/riak/kv/3.0.1/using/reference/strong-consistency.md b/content/riak/kv/3.0.1/using/reference/strong-consistency.md new file mode 100644 index 0000000000..540fcc018b --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/strong-consistency.md @@ -0,0 +1,150 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +aliases: + +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/3.0.1/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/3.0.1/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/3.0.1/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/3.0.1/configuring/strong-consistency/#performance). + + + diff --git a/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter.md b/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..6f2c85e124 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter.md @@ -0,0 +1,40 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +aliases: + +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.1/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + + + diff --git a/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..ca44863fdb --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,130 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/3.0.1/ops/mdc/v2/architecture + - /riak/kv/3.0.1/ops/mdc/v2/architecture + +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.1/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/3.0.1/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + + + diff --git a/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..8b7f8b9f96 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,53 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/3.0.1/ops/mdc/v2/scheduling-fullsync + - /riak/kv/3.0.1/ops/mdc/v2/scheduling-fullsync + +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.1/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + diff --git a/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter.md b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..97d9b6639c --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter.md @@ -0,0 +1,52 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +aliases: + +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + + + diff --git a/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..5d8f101076 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,129 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/aae + - /riak/kv/3.0.1/ops/mdc/v3/aae + +--- + +[glossary aae]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + + + diff --git a/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..3079a126a6 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,186 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/architecture + - /riak/kv/3.0.1/ops/mdc/v3/architecture + +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + + + diff --git a/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..1be81a7b40 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,102 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/cascading-writes + - /riak/kv/3.0.1/ops/mdc/v3/cascading-writes + +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + + + diff --git a/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..db05fa5578 --- /dev/null +++ b/content/riak/kv/3.0.1/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,72 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.1/ops/mdc/v3/scheduling-fullsync + - /riak/kv/3.0.1/ops/mdc/v3/scheduling-fullsync + +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + + + diff --git a/content/riak/kv/3.0.1/using/repair-recovery.md b/content/riak/kv/3.0.1/using/repair-recovery.md new file mode 100644 index 0000000000..01f77a822e --- /dev/null +++ b/content/riak/kv/3.0.1/using/repair-recovery.md @@ -0,0 +1,53 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +aliases: + +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + + + diff --git a/content/riak/kv/3.0.1/using/repair-recovery/errors.md b/content/riak/kv/3.0.1/using/repair-recovery/errors.md new file mode 100644 index 0000000000..f7f6b158c7 --- /dev/null +++ b/content/riak/kv/3.0.1/using/repair-recovery/errors.md @@ -0,0 +1,366 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.1/ops/running/recovery/errors + - /riak/kv/3.0.1/ops/running/recovery/errors + +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.1/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.1/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.1/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.1/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/3.0.1/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.1/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/3.0.1/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/3.0.1/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak admin member-status` and ensure the cluster is valid. + - Check `riak admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + + + diff --git a/content/riak/kv/3.0.1/using/repair-recovery/failed-node.md b/content/riak/kv/3.0.1/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..bbdd80ba54 --- /dev/null +++ b/content/riak/kv/3.0.1/using/repair-recovery/failed-node.md @@ -0,0 +1,114 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.1/ops/running/recovery/failed-node + - /riak/kv/3.0.1/ops/running/recovery/failed-node + +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak admin cluster commit + ``` + + + diff --git a/content/riak/kv/3.0.1/using/repair-recovery/failure-recovery.md b/content/riak/kv/3.0.1/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..80952066ae --- /dev/null +++ b/content/riak/kv/3.0.1/using/repair-recovery/failure-recovery.md @@ -0,0 +1,129 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.1/ops/running/recovery/failure-recovery + - /riak/kv/3.0.1/ops/running/recovery/failure-recovery + +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/3.0.1/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** - A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** - If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** - Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/3.0.1/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + + + diff --git a/content/riak/kv/3.0.1/using/repair-recovery/repairs.md b/content/riak/kv/3.0.1/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..d064bcb9a3 --- /dev/null +++ b/content/riak/kv/3.0.1/using/repair-recovery/repairs.md @@ -0,0 +1,391 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.1/ops/running/recovery/repairing-indexes + - /riak/kv/3.0.1/ops/running/recovery/repairing-indexes + - /riak/3.0.1/ops/running/recovery/failed-node + - /riak/kv/3.0.1/ops/running/recovery/failed-node + - /riak/3.0.1/ops/running/recovery/repairing-leveldb + - /riak/kv/3.0.1/ops/running/recovery/repairing-leveldb + - /riak/3.0.1/ops/running/recovery/repairing-partitions + - /riak/kv/3.0.1/ops/running/recovery/repairing-partitions + +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/3.0.1/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/3.0.1/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/3.0.1/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/3.0.1/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + + + diff --git a/content/riak/kv/3.0.1/using/repair-recovery/rolling-replaces.md b/content/riak/kv/3.0.1/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..75c8d7b56d --- /dev/null +++ b/content/riak/kv/3.0.1/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,76 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +aliases: + +--- + +[upgrade]: {{<baseurl>}}riak/kv/3.0.1/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/3.0.1/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + + + diff --git a/content/riak/kv/3.0.1/using/repair-recovery/rolling-restart.md b/content/riak/kv/3.0.1/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..4c2aee9a1e --- /dev/null +++ b/content/riak/kv/3.0.1/using/repair-recovery/rolling-restart.md @@ -0,0 +1,64 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.1/ops/running/recovery/rolling-restart + - /riak/kv/3.0.1/ops/running/recovery/rolling-restart + +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/3.0.1/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + + + diff --git a/content/riak/kv/3.0.1/using/repair-recovery/secondary-indexes.md b/content/riak/kv/3.0.1/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..3d765fc008 --- /dev/null +++ b/content/riak/kv/3.0.1/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,142 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.1/ops/running/recovery/repairing-indexes + - /riak/kv/3.0.1/ops/running/recovery/repairing-indexes + +--- + +The `riak admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + + + diff --git a/content/riak/kv/3.0.1/using/running-a-cluster.md b/content/riak/kv/3.0.1/using/running-a-cluster.md new file mode 100644 index 0000000000..a8c874f435 --- /dev/null +++ b/content/riak/kv/3.0.1/using/running-a-cluster.md @@ -0,0 +1,339 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/3.0.1/ops/building/basic-cluster-setup + - /riak/kv/3.0.1/ops/building/basic-cluster-setup + +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak admin` +with `sudo /usr/sbin/riak admin`. The `riak` and `riak admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak admin cluster replace`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak admin cluster force-replace`]({{<baseurl>}}riak/kv/3.0.1/using/admin/riak admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak admin cluster plan` and then running `riak admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak admin cluster plan +riak admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak admin` command: + + ```bash + bin/riak admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/3.0.1/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak admin cluster plan +./rel/riak2/bin/riak admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + + + diff --git a/content/riak/kv/3.0.1/using/security.md b/content/riak/kv/3.0.1/using/security.md new file mode 100644 index 0000000000..92b5a3039f --- /dev/null +++ b/content/riak/kv/3.0.1/using/security.md @@ -0,0 +1,199 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/3.0.1/ops/advanced/security + - /riak/kv/3.0.1/ops/advanced/security + +--- + +[config reference search]: {{<baseurl>}}riak/kv/3.0.1/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/3.0.1/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/3.0.1/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/3.0.1/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/3.0.1/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + + + diff --git a/content/riak/kv/3.0.1/using/security/basics.md b/content/riak/kv/3.0.1/using/security/basics.md new file mode 100644 index 0000000000..39e8a35df3 --- /dev/null +++ b/content/riak/kv/3.0.1/using/security/basics.md @@ -0,0 +1,851 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/3.0.1/ops/running/authz + - /riak/kv/3.0.1/ops/running/authz + +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/3.0.1/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/3.0.1/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak admin security print-users +``` + +The same goes for groups: + +```bash +riak admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak admin security +print-users` command. + +```bash +riak admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak admin security add-user client +riak admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/3.0.1/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/3.0.1/developing/usage/bucket-types) in addition to setting bucket properties. `riak admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/3.0.1/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak admin security grant search.query on index to riakuser + +# To revoke: +# riak admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak admin security grant search.query on schema to riakuser + +# To revoke: +# riak admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak admin security del-source all 127.0.0.1/32 password +riak admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/3.0.1/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/3.0.1/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/3.0.1/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + + + diff --git a/content/riak/kv/3.0.1/using/security/best-practices.md b/content/riak/kv/3.0.1/using/security/best-practices.md new file mode 100644 index 0000000000..f9563d1528 --- /dev/null +++ b/content/riak/kv/3.0.1/using/security/best-practices.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +aliases: + +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + diff --git a/content/riak/kv/3.0.1/using/security/managing-sources.md b/content/riak/kv/3.0.1/using/security/managing-sources.md new file mode 100644 index 0000000000..7f974fed4e --- /dev/null +++ b/content/riak/kv/3.0.1/using/security/managing-sources.md @@ -0,0 +1,273 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/3.0.1/ops/running/security-sources + - /riak/kv/3.0.1/ops/running/security-sources + +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/3.0.1/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/3.0.1/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/3.0.1/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak admin security add-source riakuser 127.0.0.1/32 password +riak admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/3.0.1/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/3.0.1/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/3.0.1/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak admin security add-source all 127.0.0.1/32 certificate +riak admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + + + diff --git a/content/riak/kv/3.0.1/using/security/v2-v3-ssl-ca.md b/content/riak/kv/3.0.1/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..bf259bbc36 --- /dev/null +++ b/content/riak/kv/3.0.1/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +aliases: + +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + diff --git a/content/riak/kv/3.0.1/using/troubleshooting.md b/content/riak/kv/3.0.1/using/troubleshooting.md new file mode 100644 index 0000000000..ee0ab81d3f --- /dev/null +++ b/content/riak/kv/3.0.1/using/troubleshooting.md @@ -0,0 +1,28 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +aliases: + +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + + + diff --git a/content/riak/kv/3.0.1/using/troubleshooting/http-204.md b/content/riak/kv/3.0.1/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..440345d6e8 --- /dev/null +++ b/content/riak/kv/3.0.1/using/troubleshooting/http-204.md @@ -0,0 +1,22 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 3.0.1 +menu: + riak_kv-3.0.1: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +aliases: + +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + + + diff --git a/content/riak/kv/3.0.2/_reference-links.md b/content/riak/kv/3.0.2/_reference-links.md new file mode 100644 index 0000000000..f22b19cba7 --- /dev/null +++ b/content/riak/kv/3.0.2/_reference-links.md @@ -0,0 +1,254 @@ + +# Riak KV 3.0.2 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/3.0.2/downloads/ +[install index]: {{}}riak/kv/3.0.2/setup/installing +[upgrade index]: {{}}riak/kv/3.0.2/upgrading +[plan index]: {{}}riak/kv/3.0.2/planning +[config index]: {{}}riak/kv/3.0.2/using/configuring/ +[config reference]: {{}}riak/kv/3.0.2/configuring/reference/ +[manage index]: {{}}riak/kv/3.0.2/using/managing +[performance index]: {{}}riak/kv/3.0.2/using/performance +[glossary vnode]: {{}}riak/kv/3.0.2/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/3.0.2/setup/planning +[plan start]: {{}}riak/kv/3.0.2/setup/planning/start +[plan backend]: {{}}riak/kv/3.0.2/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/3.0.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/3.0.2/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/3.0.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/3.0.2/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/3.0.2/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/3.0.2/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/3.0.2/setup/planning/best-practices +[plan future]: {{}}riak/kv/3.0.2/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/3.0.2/setup/installing +[install aws]: {{}}riak/kv/3.0.2/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/3.0.2/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/3.0.2/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/3.0.2/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/3.0.2/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/3.0.2/setup/installing/smartos +[install solaris]: {{}}riak/kv/3.0.2/setup/installing/solaris +[install suse]: {{}}riak/kv/3.0.2/setup/installing/suse +[install windows azure]: {{}}riak/kv/3.0.2/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/3.0.2/setup/installing/source +[install source erlang]: {{}}riak/kv/3.0.2/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/3.0.2/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/3.0.2/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/3.0.2/setup/upgrading +[upgrade checklist]: {{}}riak/kv/3.0.2/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/3.0.2/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/3.0.2/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/3.0.2/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/3.0.2/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/3.0.2/configuring +[config basic]: {{}}riak/kv/3.0.2/configuring/basic +[config backend]: {{}}riak/kv/3.0.2/configuring/backend +[config manage]: {{}}riak/kv/3.0.2/configuring/managing +[config reference]: {{}}riak/kv/3.0.2/configuring/reference/ +[config strong consistency]: {{}}riak/kv/3.0.2/configuring/strong-consistency +[config load balance]: {{}}riak/kv/3.0.2/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/3.0.2/configuring/mapreduce +[config search]: {{}}riak/kv/3.0.2/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/3.0.2/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/3.0.2/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/3.0.2/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/3.0.2/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/3.0.2/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/3.0.2/using/ +[use admin commands]: {{}}riak/kv/3.0.2/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/3.0.2/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/3.0.2/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/3.0.2/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/3.0.2/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/3.0.2/using/reference/search +[use ref 2i]: {{}}riak/kv/3.0.2/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/3.0.2/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/3.0.2/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/3.0.2/using/reference/jmx +[use ref obj del]: {{}}riak/kv/3.0.2/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/3.0.2/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/3.0.2/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/3.0.2/using/admin/ +[use admin commands]: {{}}riak/kv/3.0.2/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/3.0.2/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/3.0.2/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/3.0.2/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/3.0.2/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/3.0.2/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/3.0.2/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/3.0.2/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/3.0.2/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/3.0.2/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/3.0.2/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/3.0.2/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/3.0.2/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/3.0.2/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/3.0.2/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/3.0.2/using/repair-recovery +[repair recover index]: {{}}riak/kv/3.0.2/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/3.0.2/using/security/ +[security basics]: {{}}riak/kv/3.0.2/using/security/basics +[security managing]: {{}}riak/kv/3.0.2/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/3.0.2/using/performance/ +[perf benchmark]: {{}}riak/kv/3.0.2/using/performance/benchmarking +[perf open files]: {{}}riak/kv/3.0.2/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/3.0.2/using/performance/erlang +[perf aws]: {{}}riak/kv/3.0.2/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/3.0.2/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/3.0.2/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/3.0.2/developing +[dev client libraries]: {{}}riak/kv/3.0.2/developing/client-libraries +[dev data model]: {{}}riak/kv/3.0.2/developing/data-modeling +[dev data types]: {{}}riak/kv/3.0.2/developing/data-types +[dev kv model]: {{}}riak/kv/3.0.2/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/3.0.2/developing/getting-started +[getting started java]: {{}}riak/kv/3.0.2/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/3.0.2/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/3.0.2/developing/getting-started/python +[getting started php]: {{}}riak/kv/3.0.2/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/3.0.2/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/3.0.2/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/3.0.2/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/3.0.2/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/3.0.2/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/3.0.2/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/3.0.2/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/3.0.2/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/3.0.2/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/3.0.2/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/3.0.2/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/3.0.2/developing/usage +[usage bucket types]: {{}}riak/kv/3.0.2/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/3.0.2/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/3.0.2/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/3.0.2/developing/usage/content-types +[usage create objects]: {{}}riak/kv/3.0.2/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/3.0.2/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/3.0.2/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/3.0.2/developing/usage/mapreduce +[usage search]: {{}}riak/kv/3.0.2/developing/usage/search +[usage search schema]: {{}}riak/kv/3.0.2/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/3.0.2/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/3.0.2/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/3.0.2/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/3.0.2/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/3.0.2/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/3.0.2/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/3.0.2/developing/api/backend +[dev api http]: {{}}riak/kv/3.0.2/developing/api/http +[dev api http status]: {{}}riak/kv/3.0.2/developing/api/http/status +[dev api pbc]: {{}}riak/kv/3.0.2/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/3.0.2/learn/glossary/ +[glossary aae]: {{}}riak/kv/3.0.2/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/3.0.2/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/3.0.2/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/3.0.2/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/3.0.2/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/3.0.2/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/3.0.2/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/3.0.2/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/3.0.2/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/3.0.2/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/3.0.2/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/3.0.2/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + + + + diff --git a/content/riak/kv/3.0.2/add-ons.md b/content/riak/kv/3.0.2/add-ons.md new file mode 100644 index 0000000000..fb003ac4e6 --- /dev/null +++ b/content/riak/kv/3.0.2/add-ons.md @@ -0,0 +1,25 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/3.0.2/add-ons/redis/) + + + + + diff --git a/content/riak/kv/3.0.2/add-ons/redis.md b/content/riak/kv/3.0.2/add-ons/redis.md new file mode 100644 index 0000000000..7da7bd569b --- /dev/null +++ b/content/riak/kv/3.0.2/add-ons/redis.md @@ -0,0 +1,63 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +aliases: +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + + + + diff --git a/content/riak/kv/3.0.2/add-ons/redis/developing-rra.md b/content/riak/kv/3.0.2/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..bcdf111178 --- /dev/null +++ b/content/riak/kv/3.0.2/add-ons/redis/developing-rra.md @@ -0,0 +1,330 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/3.0.2/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/3.0.2/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/3.0.2/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/3.0.2/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/3.0.2/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + + + diff --git a/content/riak/kv/3.0.2/add-ons/redis/redis-add-on-features.md b/content/riak/kv/3.0.2/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..45a02c6e57 --- /dev/null +++ b/content/riak/kv/3.0.2/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,136 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + + + + diff --git a/content/riak/kv/3.0.2/add-ons/redis/set-up-rra.md b/content/riak/kv/3.0.2/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..1b1415a026 --- /dev/null +++ b/content/riak/kv/3.0.2/add-ons/redis/set-up-rra.md @@ -0,0 +1,285 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/3.0.2/setup/installing +[perf open files]: {{}}riak/kv/3.0.2/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + + + + diff --git a/content/riak/kv/3.0.2/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/3.0.2/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..e047ce75f3 --- /dev/null +++ b/content/riak/kv/3.0.2/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,143 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +aliases: +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + + + + diff --git a/content/riak/kv/3.0.2/add-ons/redis/using-rra.md b/content/riak/kv/3.0.2/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..3d45d9e168 --- /dev/null +++ b/content/riak/kv/3.0.2/add-ons/redis/using-rra.md @@ -0,0 +1,246 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/3.0.2/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/3.0.2/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + + + + diff --git a/content/riak/kv/3.0.2/configuring.md b/content/riak/kv/3.0.2/configuring.md new file mode 100644 index 0000000000..6feff74d73 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring.md @@ -0,0 +1,88 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + + + + diff --git a/content/riak/kv/3.0.2/configuring/backend.md b/content/riak/kv/3.0.2/configuring/backend.md new file mode 100644 index 0000000000..9c9ff0889c --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/backend.md @@ -0,0 +1,647 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +aliases: +--- + +[plan backend leveldb]: {{}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/3.0.2/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/3.0.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/3.0.2/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/3.0.2/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + + + + diff --git a/content/riak/kv/3.0.2/configuring/basic.md b/content/riak/kv/3.0.2/configuring/basic.md new file mode 100644 index 0000000000..042f15017b --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/basic.md @@ -0,0 +1,239 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.2/ops/building/configuration/ + - /riak/kv/3.0.2/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/3.0.2/configuring/reference +[use running cluster]: {{}}riak/kv/3.0.2/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/3.0.2/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/3.0.2/using/performance/erlang +[plan start]: {{}}riak/kv/3.0.2/setup/planning/start +[plan best practices]: {{}}riak/kv/3.0.2/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/3.0.2/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/3.0.2/setup/planning/backend +[plan backend multi]: {{}}riak/kv/3.0.2/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/3.0.2/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/3.0.2/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/3.0.2/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/3.0.2/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/3.0.2/using/performance/benchmarking +[perf open files]: {{}}riak/kv/3.0.2/using/performance/open-files-limit +[perf index]: {{}}riak/kv/3.0.2/using/performance +[perf aws]: {{}}riak/kv/3.0.2/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/3.0.2/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/3.0.2/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + + + + diff --git a/content/riak/kv/3.0.2/configuring/global-object-expiration.md b/content/riak/kv/3.0.2/configuring/global-object-expiration.md new file mode 100644 index 0000000000..dd72189ef2 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/global-object-expiration.md @@ -0,0 +1,90 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-3.0.2: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 3.0.2 +toc: true +aliases: +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + + + + diff --git a/content/riak/kv/3.0.2/configuring/load-balancing-proxy.md b/content/riak/kv/3.0.2/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..926c80bee7 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/load-balancing-proxy.md @@ -0,0 +1,275 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/3.0.2/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/3.0.2/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + + + + diff --git a/content/riak/kv/3.0.2/configuring/managing.md b/content/riak/kv/3.0.2/configuring/managing.md new file mode 100644 index 0000000000..5968479659 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/managing.md @@ -0,0 +1,121 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +aliases: +--- + +[use admin riak cli]: {{}}riak/kv/3.0.2/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/3.0.2/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/3.0.2/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + + + + diff --git a/content/riak/kv/3.0.2/configuring/mapreduce.md b/content/riak/kv/3.0.2/configuring/mapreduce.md new file mode 100644 index 0000000000..a9d9f5e321 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/mapreduce.md @@ -0,0 +1,200 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/configs/mapreduce/ + - /riak/kv/3.0.2/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/3.0.2/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/3.0.2/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/3.0.2/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + + + + diff --git a/content/riak/kv/3.0.2/configuring/next-gen-replication.md b/content/riak/kv/3.0.2/configuring/next-gen-replication.md new file mode 100644 index 0000000000..89e8dceb1f --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/next-gen-replication.md @@ -0,0 +1,116 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "3.0.2" +menu: + riak_kv-3.0.2: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +aliases: +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## configure full-sync Replication + +To enable full-sync replication on a cluster, the following configuration is required: + +`ttaaefs_scope = all` +`ttaaefs_queuename = replq` +`ttaaefs_localnval = 3` +`ttaaefs_remotenval = 3` + +Then to configure a peer relationship: + +`ttaaefs_peerip = ` +`ttaaefs_peerport = 8087` +`ttaaefs_peerprotocol = pb` + +Unlike when configuring a real-time replication sink, each node can only have a single peer relationship with another node in the remote cluster. Note though, that all full-sync commands run across the whole cluster. If a single peer relationship dies, some full-sync capacity is lost, but other peer relationships between different nodes will still cover the whole data set. + +Once there are peer relationships, a schedule is required, and a capacity must be defined. + +`ttaaefs_allcheck = 2` +`ttaaefs_hourcheck = 0` +`ttaaefs_daycheck = 22` +`ttaaefs_rangecheck = 36` + +`ttaaefs_maxresults = 64` +`ttaaefs_rangeboost = 8` +The schedule is how many times each 24 hour period to run a check of the defined type. The schedule is re-shuffled at random each day, and is specific to that node's peer relationship. + +As this is a configuration for nval full-sync, all of the data will always be compared - by merging a cluster-wide tictac tree and comparing the trees of both clusters. If a delta is found by that comparison, the scheduled work item determines what to do next: + +all indicates that the whole database should be scanned for all time looking for deltas, but only for deltas in a limited number of broken leaves of the merkle tree (the ttaaefs_maxresults). + +hour or day restricts he scan to data modified in the past hour or past 24 hours. + +range is a "smart" check. It will not be run when past queries have indicated nothing can be done to resolve the delta (for example as the other cluster is ahead, and only the source cluster can prompt fixes). If past queries have shown the clusters to be synchronised, but then a delta occurs, the range_check will only scan for deltas since the last successful synchronisation. If another check discovers the majority of deltas are in a certain bucket or modified range, the range query will switch to using this as a constraint for the scan. + +Each check is constrained by `ttaaefs_maxresults`, so that it only tries to resolve issues in a subset of broken leaves in the tree of that scale (there are o(1M) leaves to the tree overall). However, the range checks will try and resolve more (as they are constrained by the range) - this will be the multiple of `ttaaefs_maxresults` and `ttaaefs_ranegboost`. + +It is normally preferable to under-configure the schedule. When over-configuring the schedule, i.e. setting too much repair work than capacity of the cluster allows, there are protections to queue those schedule items there is no capacity to serve, and proactively cancel items once the manager falls behind in the schedule. However, those cancellations will reset range_checks and so may delay the overall time to recover. + +It is possible to enhance the speed of recovery when there is capacity by manually requesting additional checks, or by temporarily overriding `ttaaefs_maxresults` and/or `ttaaefs_rangeboost`. + +In a cluster with 1bn keys, under a steady load including 2K PUTs per second, relative timings to complete different sync checks (assuming there exists a delta): + +`all_sync 150s - 200s;` + +`day_sync 20s - 30s;` + +`hour_sync 2s - 5s;` + +`range_sync` (depends on how recent the low point in the modified range is). + +Timings will vary depending on the total number of keys in the cluster, the rate of changes, the size of the delta and the precise hardware used. Full-sync repairs tend to be relatively demanding of CPU (rather than disk I/O), so available CPU capacity is important. + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. diff --git a/content/riak/kv/3.0.2/configuring/reference.md b/content/riak/kv/3.0.2/configuring/reference.md new file mode 100644 index 0000000000..d13214b481 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/reference.md @@ -0,0 +1,2039 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/configs/configuration-files/ + - /riak/kv/3.0.2/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask + - [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] + - [configuration][config backend leveldb] +* [Leveled][plan backend leveled] + - [configuration][config backend leveled] +* [Memory][plan backend memory] + - [configuration][config backend memory] +* [Multi][plan backend multi] + - [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + + + + diff --git a/content/riak/kv/3.0.2/configuring/search.md b/content/riak/kv/3.0.2/configuring/search.md new file mode 100644 index 0000000000..0ac5588692 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/search.md @@ -0,0 +1,278 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/configs/search/ + - /riak/kv/3.0.2/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/3.0.2/developing/usage/search +[usage search schema]: {{}}riak/kv/3.0.2/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/3.0.2/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/3.0.2/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/3.0.2/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/3.0.2/configuring/reference +[config reference#search]: {{}}riak/kv/3.0.2/configuring/reference/#search +[glossary aae]: {{}}riak/kv/3.0.2/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/3.0.2/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + + + + diff --git a/content/riak/kv/3.0.2/configuring/strong-consistency.md b/content/riak/kv/3.0.2/configuring/strong-consistency.md new file mode 100644 index 0000000000..6d6b1ea823 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/strong-consistency.md @@ -0,0 +1,702 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/3.0.2/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/3.0.2/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/3.0.2/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/3.0.2/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/3.0.2/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/3.0.2/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/3.0.2/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/3.0.2/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/3.0.2/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/3.0.2/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/3.0.2/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/3.0.2/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/3.0.2/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/3.0.2/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/3.0.2/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/3.0.2/developing/data-types +[glossary aae]: {{}}riak/kv/3.0.2/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/3.0.2/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/3.0.2/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/3.0.2/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/3.0.2/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble +--- +The ID of the ensemble
  • Quorum +--- +The number of ensemble peers that are either leading or following
  • Nodes +--- +The number of nodes currently online
  • Leader +--- +The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer +--- +The ID of the peer
  • Status +--- +Whether the peer is a leader or a follower
  • Trusted +--- +Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch +--- +The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node +--- +The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] +--- +If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] +--- +Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] +--- +Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** +--- +A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** +--- +In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** +--- +Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** +--- +At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** +--- +Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + + diff --git a/content/riak/kv/3.0.2/configuring/v2-multi-datacenter.md b/content/riak/kv/3.0.2/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..d521351744 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/v2-multi-datacenter.md @@ -0,0 +1,160 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v2/configuration + - /riak/kv/3.0.2/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/3.0.2/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. + + + + diff --git a/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..c4c34f3b8e --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,82 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v2/nat + - /riak/kv/3.0.2/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/3.0.2/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + + + + diff --git a/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..fd7b394dc6 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,371 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v2/quick-start + - /riak/kv/3.0.2/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + + + + diff --git a/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..4d67aac020 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,164 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v2/ssl + - /riak/kv/3.0.2/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + + + + diff --git a/content/riak/kv/3.0.2/configuring/v3-multi-datacenter.md b/content/riak/kv/3.0.2/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..12d73d80ba --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/v3-multi-datacenter.md @@ -0,0 +1,161 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/configuration + - /riak/kv/3.0.2/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/3.0.2/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + + + + diff --git a/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..9f652fb0b7 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/nat + - /riak/kv/3.0.2/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + + + + diff --git a/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..0cfc8018a4 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,172 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/quick-start + - /riak/kv/3.0.2/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/3.0.2/using/performance +[config v3 mdc]: {{}}riak/kv/3.0.2/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + + + + diff --git a/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..b382d78c34 --- /dev/null +++ b/content/riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,174 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/ssl + - /riak/kv/3.0.2/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/3.0.2/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + + + + diff --git a/content/riak/kv/3.0.2/developing.md b/content/riak/kv/3.0.2/developing.md new file mode 100644 index 0000000000..d1afe81b63 --- /dev/null +++ b/content/riak/kv/3.0.2/developing.md @@ -0,0 +1,79 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + + + + diff --git a/content/riak/kv/3.0.2/developing/api.md b/content/riak/kv/3.0.2/developing/api.md new file mode 100644 index 0000000000..1b1f4e6f71 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api.md @@ -0,0 +1,42 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +aliases: +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + + + + diff --git a/content/riak/kv/3.0.2/developing/api/backend.md b/content/riak/kv/3.0.2/developing/api/backend.md new file mode 100644 index 0000000000..0ecf57a937 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/backend.md @@ -0,0 +1,118 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.2/dev/references/backend-api + - /riak/kv/3.0.2/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/3.0.2/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http.md b/content/riak/kv/3.0.2/developing/api/http.md new file mode 100644 index 0000000000..4fed521b16 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http.md @@ -0,0 +1,93 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.2/dev/references/http + - /riak/kv/3.0.2/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/3.0.2/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/3.0.2/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/3.0.2/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/3.0.2/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/3.0.2/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/3.0.2/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/3.0.2/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/3.0.2/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/3.0.2/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/3.0.2/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/3.0.2/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/3.0.2/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/3.0.2/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/3.0.2/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/3.0.2/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/3.0.2/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/3.0.2/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/3.0.2/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/3.0.2/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/3.0.2/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/3.0.2/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/3.0.2/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/3.0.2/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/3.0.2/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/3.0.2/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/3.0.2/developing/api/http/store-search-schema) + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/counters.md b/content/riak/kv/3.0.2/developing/api/http/counters.md new file mode 100644 index 0000000000..db8bf5634d --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/counters.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/counters + - /riak/kv/3.0.2/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/3.0.2/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/3.0.2/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/delete-object.md b/content/riak/kv/3.0.2/developing/api/http/delete-object.md new file mode 100644 index 0000000000..8a976ff527 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/delete-object.md @@ -0,0 +1,79 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/delete-object + - /riak/kv/3.0.2/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/delete-search-index.md b/content/riak/kv/3.0.2/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..e725f3248a --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/delete-search-index.md @@ -0,0 +1,38 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/delete-search-index + - /riak/kv/3.0.2/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` - The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` - The request timed out internally + + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/fetch-object.md b/content/riak/kv/3.0.2/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..4d760bf17c --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/fetch-object.md @@ -0,0 +1,246 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/fetch-object + - /riak/kv/3.0.2/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/3.0.2/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/3.0.2/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/3.0.2/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/3.0.2/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/3.0.2/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/fetch-search-index.md b/content/riak/kv/3.0.2/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..ffcfbc115e --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/fetch-search-index.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/fetch-search-index + - /riak/kv/3.0.2/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/3.0.2/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` - No Search index with that name is currently + available +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/3.0.2/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/fetch-search-schema.md b/content/riak/kv/3.0.2/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..e6c0d3df88 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/fetch-search-schema.md @@ -0,0 +1,42 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/fetch-search-schema + - /riak/kv/3.0.2/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/get-bucket-props.md b/content/riak/kv/3.0.2/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..1da37b90c1 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/get-bucket-props.md @@ -0,0 +1,86 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/get-bucket-props + - /riak/kv/3.0.2/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/3.0.2/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/3.0.2/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/3.0.2/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/link-walking.md b/content/riak/kv/3.0.2/developing/api/http/link-walking.md new file mode 100644 index 0000000000..bfeee3ebc8 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/link-walking.md @@ -0,0 +1,129 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/link-walking + - /riak/kv/3.0.2/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/3.0.2/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/3.0.2/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/3.0.2/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/list-buckets.md b/content/riak/kv/3.0.2/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..ade5a2cf11 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/list-buckets.md @@ -0,0 +1,68 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/list-buckets + - /riak/kv/3.0.2/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/list-keys.md b/content/riak/kv/3.0.2/developing/api/http/list-keys.md new file mode 100644 index 0000000000..74c2c8795f --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/list-keys.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/list-keys + - /riak/kv/3.0.2/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/list-resources.md b/content/riak/kv/3.0.2/developing/api/http/list-resources.md new file mode 100644 index 0000000000..2765b95673 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/list-resources.md @@ -0,0 +1,84 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/list-resources + - /riak/kv/3.0.2/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/3.0.2/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/3.0.2/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/3.0.2/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/3.0.2/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/3.0.2/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/3.0.2/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/3.0.2/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/3.0.2/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/mapreduce.md b/content/riak/kv/3.0.2/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..65bf20500d --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/mapreduce.md @@ -0,0 +1,74 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/mapreduce + - /riak/kv/3.0.2/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/3.0.2/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/3.0.2/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/ping.md b/content/riak/kv/3.0.2/developing/api/http/ping.md new file mode 100644 index 0000000000..536d00c746 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/ping.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/ping + - /riak/kv/3.0.2/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/reset-bucket-props.md b/content/riak/kv/3.0.2/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..2f0b4fcd3e --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/reset-bucket-props.md @@ -0,0 +1,61 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/reset-bucket-props + - /riak/kv/3.0.2/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/search-index-info.md b/content/riak/kv/3.0.2/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..25bf0d557f --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/search-index-info.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/search-index-info + - /riak/kv/3.0.2/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/3.0.2/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` - Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/search-query.md b/content/riak/kv/3.0.2/developing/api/http/search-query.md new file mode 100644 index 0000000000..b928442efd --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/search-query.md @@ -0,0 +1,73 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/search-query + - /riak/kv/3.0.2/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/3.0.2/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` - The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` - The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/3.0.2/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` - Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` - Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` - The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/secondary-indexes.md b/content/riak/kv/3.0.2/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..c2be6b7842 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/secondary-indexes.md @@ -0,0 +1,95 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/secondary-indexes + - /riak/kv/3.0.2/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/set-bucket-props.md b/content/riak/kv/3.0.2/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..9a49da079f --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/set-bucket-props.md @@ -0,0 +1,116 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/set-bucket-props + - /riak/kv/3.0.2/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/3.0.2/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/3.0.2/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/status.md b/content/riak/kv/3.0.2/developing/api/http/status.md new file mode 100644 index 0000000000..c2aa42d21b --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/status.md @@ -0,0 +1,173 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/status + - /riak/kv/3.0.2/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/3.0.2/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/store-object.md b/content/riak/kv/3.0.2/developing/api/http/store-object.md new file mode 100644 index 0000000000..e84e564506 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/store-object.md @@ -0,0 +1,150 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/store-object + - /riak/kv/3.0.2/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/3.0.2/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/3.0.2/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/3.0.2/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/store-search-index.md b/content/riak/kv/3.0.2/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..380af4095a --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/store-search-index.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/store-search-index + - /riak/kv/3.0.2/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/3.0.2/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/3.0.2/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` - The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` - The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` - The request timed out internally + + + + + diff --git a/content/riak/kv/3.0.2/developing/api/http/store-search-schema.md b/content/riak/kv/3.0.2/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..fdc1e3dc9c --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/http/store-search-schema.md @@ -0,0 +1,54 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.2/dev/references/http/store-search-schema + - /riak/kv/3.0.2/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` - The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` - The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` - The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..82ebf53acf --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers.md @@ -0,0 +1,189 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers + - /riak/kv/3.0.2/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` - A string representation of what went wrong +* `errcode` - A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/yz-schema-put) + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..b30f314b9b --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,34 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/auth-req + - /riak/kv/3.0.2/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/3.0.2/using/security/basics). + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..89f46e498c --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,82 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "3.0.2" +menu: + riak_kv-3.0.2: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/coverage-queries + - /riak/kv/3.0.2/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..041051fb65 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,104 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/delete-object + - /riak/kv/3.0.2/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/3.0.2/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..f8f7fb3839 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/3.0.2/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/3.0.2/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-map-store). + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..f1958613f9 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,131 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/dt-fetch + - /riak/kv/3.0.2/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/3.0.2/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/3.0.2/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/3.0.2/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..20e7e5749b --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,77 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/dt-map-store + - /riak/kv/3.0.2/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..66712ffe9b --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,36 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/dt-set-store + - /riak/kv/3.0.2/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..fba7fc9c08 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,132 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/dt-store + - /riak/kv/3.0.2/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/3.0.2/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/3.0.2/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/3.0.2/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..bf928c0bd7 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/dt-union + - /riak/kv/3.0.2/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/dt-store) message. + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..e93c1836a9 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,185 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/fetch-object + - /riak/kv/3.0.2/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` - The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` - The character encoding of the object, e.g. `utf-8` +* `content_encoding` - The content encoding of the object, e.g. + `video/mp4` +* `vtag` - The object's [vtag]({{}}riak/kv/3.0.2/learn/glossary/#vector-clock) +* `links` - This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` - A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` - A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` - This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` - Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..bcb1d538d8 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,114 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/3.0.2/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/3.0.2/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/3.0.2/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..8bdc2ce886 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,37 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/3.0.2/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/3.0.2/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-props) message. + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..feb7771480 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,65 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/get-client-id + - /riak/kv/3.0.2/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..2abeab90a2 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,80 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/list-buckets + - /riak/kv/3.0.2/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` - Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..e1d5d128de --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,101 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/list-keys + - /riak/kv/3.0.2/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` - bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..27a09a0607 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,153 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/mapreduce + - /riak/kv/3.0.2/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` - MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` - JSON-encoded MapReduce job +* `application/x-erlang-binary` - Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/3.0.2/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/3.0.2/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` - Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/ping.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..2cffa68b07 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/ping.md @@ -0,0 +1,46 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/ping + - /riak/kv/3.0.2/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..8fc9350ba8 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,63 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/3.0.2/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/3.0.2/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/search.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..f4d723593c --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/search.md @@ -0,0 +1,152 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/search + - /riak/kv/3.0.2/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` - The contents of the query +* `index` - The name of the index to search + +Optional Parameters + +* `rows` - The maximum number of rows to return +* `start` - A start offset, i.e. the number of keys to skip before + returning values +* `sort` - How the search results are to be sorted +* `filter` - Filters search with additional query scoped to inline + fields +* `df` - Override the `default_field` setting in the schema file +* `op` - `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` - Return the fields limit +* `presort` - Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` - A list of docs that match the search request +* `max_score` - The top score returned +* `num_found` - Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..e7450b43d4 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,125 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/3.0.2/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/3.0.2/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/server-info.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..2fbaff9fbc --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,62 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/server-info + - /riak/kv/3.0.2/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..9f0eecea8d --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,72 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/3.0.2/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/3.0.2/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..2d3f102a1b --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,35 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/3.0.2/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/3.0.2/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/get-bucket-props). + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..03c49b3501 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,66 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/set-client-id + - /riak/kv/3.0.2/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/store-object.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..05a9cce25b --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,154 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/store-object + - /riak/kv/3.0.2/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/3.0.2/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/3.0.2/learn/concepts/buckets), and [bucket type]({{}}riak/kv/3.0.2/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/3.0.2/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/3.0.2/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/3.0.2/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..a7b4c5f5b6 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,37 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/3.0.2/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..4458eed060 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,63 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/yz-index-get + - /riak/kv/3.0.2/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/3.0.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..a57c38db50 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/yz-index-put + - /riak/kv/3.0.2/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/3.0.2/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..c5a9e31f84 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,52 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/3.0.2/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + + + + diff --git a/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..d27c88dce4 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.2/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/3.0.2/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/3.0.2/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/3.0.2/developing/api/repl-hooks.md b/content/riak/kv/3.0.2/developing/api/repl-hooks.md new file mode 100644 index 0000000000..46bf5daf99 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/api/repl-hooks.md @@ -0,0 +1,196 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v2/hooks + - /riak/kv/3.0.2/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + + + + diff --git a/content/riak/kv/3.0.2/developing/app-guide.md b/content/riak/kv/3.0.2/developing/app-guide.md new file mode 100644 index 0000000000..e602b48867 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/app-guide.md @@ -0,0 +1,420 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/3.0.2/dev/using/application-guide/ + - /riak/kv/3.0.2/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/3.0.2/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/3.0.2/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/3.0.2/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/3.0.2/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/3.0.2/developing/key-value-modeling +[dev data types]: {{}}riak/kv/3.0.2/developing/data-types +[dev data types#counters]: {{}}riak/kv/3.0.2/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/3.0.2/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/3.0.2/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/3.0.2/developing/usage/creating-objects +[usage search]: {{}}riak/kv/3.0.2/developing/usage/search +[use ref search]: {{}}riak/kv/3.0.2/using/reference/search +[usage 2i]: {{}}riak/kv/3.0.2/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/3.0.2/developing/client-libraries +[concept crdts]: {{}}riak/kv/3.0.2/learn/concepts/crdts +[dev data model]: {{}}riak/kv/3.0.2/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/3.0.2/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/3.0.2/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/3.0.2/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/3.0.2/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/3.0.2/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/3.0.2/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/3.0.2/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/3.0.2/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/3.0.2/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/3.0.2/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/3.0.2/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/3.0.2/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/3.0.2/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/3.0.2/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/3.0.2/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/3.0.2/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/3.0.2/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/3.0.2/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/3.0.2/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/3.0.2/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/3.0.2/setup/installing +[getting started]: {{}}riak/kv/3.0.2/developing/getting-started +[usage index]: {{}}riak/kv/3.0.2/developing/usage +[glossary]: {{}}riak/kv/3.0.2/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data - While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects - Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects - Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys - It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types] - If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size - If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies - If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] - Getting started with Riak Search +* [Search Details][use ref search] - A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] - How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API - Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed - At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters - In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] - A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] - A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] - An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits - If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings - If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic - If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit - While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern - Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] - A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] - A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only - You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do - Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] - A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] - Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination - At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes - If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask - 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] - Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] - A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] - How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] - A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] - A listing of frequently used terms in Riak's + documentation + + + + + diff --git a/content/riak/kv/3.0.2/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/3.0.2/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..393de996cd --- /dev/null +++ b/content/riak/kv/3.0.2/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,802 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/mapreduce/ + - /riak/kv/3.0.2/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/3.0.2/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/3.0.2/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/3.0.2/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/3.0.2/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/3.0.2/learn/glossary/#vnode +[config reference]: {{}}riak/kv/3.0.2/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/3.0.2/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) - Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) - Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) - Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
+
+
+
diff --git a/content/riak/kv/3.0.2/developing/app-guide/cluster-metadata.md b/content/riak/kv/3.0.2/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..ebdc67f03d
--- /dev/null
+++ b/content/riak/kv/3.0.2/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,72 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 3.0.2
+menu:
+  riak_kv-3.0.2:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/3.0.2/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/3.0.2/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/3.0.2/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
+
+
+
diff --git a/content/riak/kv/3.0.2/developing/app-guide/reference.md b/content/riak/kv/3.0.2/developing/app-guide/reference.md
new file mode 100644
index 0000000000..5d11c9fa50
--- /dev/null
+++ b/content/riak/kv/3.0.2/developing/app-guide/reference.md
@@ -0,0 +1,21 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 3.0.2
+#menu:
+#  riak_kv-3.0.2:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+**TODO: Add content**
+
+
+
+
diff --git a/content/riak/kv/3.0.2/developing/app-guide/replication-properties.md b/content/riak/kv/3.0.2/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..23232cbce0
--- /dev/null
+++ b/content/riak/kv/3.0.2/developing/app-guide/replication-properties.md
@@ -0,0 +1,584 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 3.0.2
+menu:
+  riak_kv-3.0.2:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/3.0.2/dev/advanced/replication-properties
+  - /riak/kv/3.0.2/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/3.0.2/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/3.0.2/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/3.0.2/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/3.0.2/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/3.0.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/3.0.2/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/3.0.2/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/3.0.2/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/3.0.2/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/3.0.2/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/3.0.2/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/3.0.2/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/3.0.2/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/3.0.2/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/3.0.2/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/3.0.2/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
+
+
+
diff --git a/content/riak/kv/3.0.2/developing/app-guide/strong-consistency.md b/content/riak/kv/3.0.2/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..d0f6b89079
--- /dev/null
+++ b/content/riak/kv/3.0.2/developing/app-guide/strong-consistency.md
@@ -0,0 +1,261 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 3.0.2
+menu:
+  riak_kv-3.0.2:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/3.0.2/dev/advanced/strong-consistency
+  - /riak/kv/3.0.2/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/3.0.2/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/3.0.2/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/3.0.2/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/3.0.2/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/3.0.2/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/3.0.2/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/3.0.2/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/3.0.2/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/3.0.2/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/3.0.2/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/3.0.2/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/3.0.2/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/3.0.2/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/3.0.2/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/3.0.2/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/3.0.2/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/3.0.2/developing/client-libraries
+[getting started]: {{}}riak/kv/3.0.2/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/3.0.2/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + + + + diff --git a/content/riak/kv/3.0.2/developing/app-guide/write-once.md b/content/riak/kv/3.0.2/developing/app-guide/write-once.md new file mode 100644 index 0000000000..cdc9bb0b48 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/app-guide/write-once.md @@ -0,0 +1,159 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/3.0.2/dev/advanced/write-once + - /riak/kv/3.0.2/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/3.0.2/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/3.0.2/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/3.0.2/developing/data-types +[strong consistency]: {{}}riak/kv/3.0.2/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/3.0.2/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.2/developing/client-libraries.md b/content/riak/kv/3.0.2/developing/client-libraries.md new file mode 100644 index 0000000000..fd62c6ac9b --- /dev/null +++ b/content/riak/kv/3.0.2/developing/client-libraries.md @@ -0,0 +1,294 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/3.0.2/dev/using/libraries + - /riak/kv/3.0.2/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) - A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) - A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) - A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) - A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) - An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) - An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) - Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) - A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) - Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) - A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) - HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) - Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) - A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) - Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) - Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) - Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) - Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) - An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) - A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) - A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) - A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) - A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) - A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) - Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) - Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) - A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) - Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) - Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) - Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) - Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) - Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) - Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) - A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) - Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) - A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) - Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) - Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) - a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) - A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) - A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) - Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) - Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) - Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) - A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) - Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) - A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) - A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) - Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) - Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) - Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) - A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) - A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) - A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) - A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) - [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) - A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) - Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) - A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) - A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) - Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) - A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) - A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) - Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) - Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) - Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) - A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) - Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) - Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) - DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) - Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) - An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) - Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) - Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) - Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) - A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) - An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) - A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) - A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + + + + diff --git a/content/riak/kv/3.0.2/developing/data-modeling.md b/content/riak/kv/3.0.2/developing/data-modeling.md new file mode 100644 index 0000000000..87e17ca247 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/data-modeling.md @@ -0,0 +1,15 @@ +--- +layout: redirect +target: "riak/kv/3.0.2/learn/use-cases/" +aliases: +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + + + + diff --git a/content/riak/kv/3.0.2/developing/data-types.md b/content/riak/kv/3.0.2/developing/data-types.md new file mode 100644 index 0000000000..05505bf216 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/data-types.md @@ -0,0 +1,279 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/3.0.2/dev/using/data-types + - /riak/kv/3.0.2/dev/using/data-types + - /riak/3.0.2/dev/data-modeling/data-types + - /riak/kv/3.0.2/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + + + + diff --git a/content/riak/kv/3.0.2/developing/data-types/counters.md b/content/riak/kv/3.0.2/developing/data-types/counters.md new file mode 100644 index 0000000000..f90777da0e --- /dev/null +++ b/content/riak/kv/3.0.2/developing/data-types/counters.md @@ -0,0 +1,635 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.2/dev/using/data-types/counters + - /riak/kv/3.0.2/dev/using/data-types/counters + - /riak/3.0.2/dev/data-modeling/data-types/counters + - /riak/kv/3.0.2/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/data-types/gsets.md b/content/riak/kv/3.0.2/developing/data-types/gsets.md new file mode 100644 index 0000000000..84401c6a42 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/data-types/gsets.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.2/dev/using/data-types/gsets + - /riak/kv/3.0.2/dev/using/data-types/gsets + - /riak/3.0.2/dev/data-modeling/data-types/gsets + - /riak/kv/3.0.2/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/data-types/hyperloglogs.md b/content/riak/kv/3.0.2/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..5c1087c40e --- /dev/null +++ b/content/riak/kv/3.0.2/developing/data-types/hyperloglogs.md @@ -0,0 +1,643 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.2/dev/using/data-types/hyperloglogs + - /riak/kv/3.0.2/dev/using/data-types/hyperloglogs + - /riak/3.0.2/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/3.0.2/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/data-types/maps.md b/content/riak/kv/3.0.2/developing/data-types/maps.md new file mode 100644 index 0000000000..59066d7eca --- /dev/null +++ b/content/riak/kv/3.0.2/developing/data-types/maps.md @@ -0,0 +1,1885 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.2/dev/using/data-types/maps + - /riak/kv/3.0.2/dev/using/data-types/maps + - /riak/3.0.2/dev/data-modeling/data-types/maps + - /riak/kv/3.0.2/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/data-types/sets.md b/content/riak/kv/3.0.2/developing/data-types/sets.md new file mode 100644 index 0000000000..4505262fe3 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/data-types/sets.md @@ -0,0 +1,773 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.2/dev/using/data-types/sets + - /riak/kv/3.0.2/dev/using/data-types/sets + - /riak/3.0.2/dev/data-modeling/data-types/sets + - /riak/kv/3.0.2/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/faq.md b/content/riak/kv/3.0.2/developing/faq.md new file mode 100644 index 0000000000..7dcd6a8fef --- /dev/null +++ b/content/riak/kv/3.0.2/developing/faq.md @@ -0,0 +1,559 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/3.0.2/community/faqs/developing + - /riak/kv/3.0.2/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/3.0.2/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/3.0.2/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/3.0.2/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/3.0.2/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/3.0.2/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/3.0.2/developing/client-libraries +[MapReduce]: {{}}riak/kv/3.0.2/developing/usage/mapreduce +[Memory]: {{}}riak/kv/3.0.2/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/3.0.2/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/3.0.2/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) - requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) - if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started.md b/content/riak/kv/3.0.2/developing/getting-started.md new file mode 100644 index 0000000000..bd977900ab --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started.md @@ -0,0 +1,51 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +aliases: +--- + +[install index]: {{}}riak/kv/3.0.2/setup/installing +[dev client libraries]: {{}}riak/kv/3.0.2/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/csharp.md b/content/riak/kv/3.0.2/developing/getting-started/csharp.md new file mode 100644 index 0000000000..8e1d6062f3 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/csharp.md @@ -0,0 +1,86 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/csharp + - /riak/kv/3.0.2/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.2/developing/getting-started/csharp/crud-operations) + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/3.0.2/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..43f6c217d0 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +aliases: +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/3.0.2/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..d0f678afed --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,111 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/3.0.2/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/csharp/querying.md b/content/riak/kv/3.0.2/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..282bd2cd2d --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/csharp/querying.md @@ -0,0 +1,214 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/querying-csharp + - /riak/kv/3.0.2/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/erlang.md b/content/riak/kv/3.0.2/developing/getting-started/erlang.md new file mode 100644 index 0000000000..5b93756b03 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/erlang.md @@ -0,0 +1,59 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/erlang + - /riak/kv/3.0.2/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.2/developing/getting-started/erlang/crud-operations) + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/3.0.2/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..67a41caec8 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/3.0.2/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..a8445b1f28 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,342 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/3.0.2/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/3.0.2/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/erlang/querying.md b/content/riak/kv/3.0.2/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..dd06501791 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/erlang/querying.md @@ -0,0 +1,308 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/querying-erlang + - /riak/kv/3.0.2/dev/taste-of-riak/querying-erlang +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/3.0.2/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/golang.md b/content/riak/kv/3.0.2/developing/getting-started/golang.md new file mode 100644 index 0000000000..5b7d656bfc --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/golang.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/golang + - /riak/kv/3.0.2/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.2/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.2/developing/getting-started/golang/crud-operations) + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/golang/crud-operations.md b/content/riak/kv/3.0.2/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..3923648df6 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,376 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +aliases: +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/golang/object-modeling.md b/content/riak/kv/3.0.2/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..2ad369c2d6 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,552 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/object-modeling-golang + - /riak/kv/3.0.2/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/3.0.2/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/golang/querying.md b/content/riak/kv/3.0.2/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..b190fbdfc1 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/golang/querying.md @@ -0,0 +1,580 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/querying-golang + - /riak/kv/3.0.2/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/java.md b/content/riak/kv/3.0.2/developing/getting-started/java.md new file mode 100644 index 0000000000..19c03d5e3a --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/java.md @@ -0,0 +1,93 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/java + - /riak/kv/3.0.2/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.2/developing/getting-started/java/crud-operations) + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/java/crud-operations.md b/content/riak/kv/3.0.2/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..0b95a3652e --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/java/crud-operations.md @@ -0,0 +1,206 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +aliases: +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.2/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.2/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.2/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.2/developing/usage/conflict-resolution/) +documention. + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/java/object-modeling.md b/content/riak/kv/3.0.2/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..a9bbba6ef0 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/java/object-modeling.md @@ -0,0 +1,432 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/object-modeling-java + - /riak/kv/3.0.2/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/java/querying.md b/content/riak/kv/3.0.2/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..d6fc63a52b --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/java/querying.md @@ -0,0 +1,280 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/querying-java + - /riak/kv/3.0.2/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/nodejs.md b/content/riak/kv/3.0.2/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..d66386d705 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/nodejs.md @@ -0,0 +1,104 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/nodejs + - /riak/kv/3.0.2/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.2/developing/getting-started/nodejs/crud-operations) + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/3.0.2/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..4e461d7ebe --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,138 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +aliases: +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/3.0.2/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..49b6ad2510 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/3.0.2/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/nodejs/querying.md b/content/riak/kv/3.0.2/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..5c715bb123 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/nodejs/querying.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/querying-nodejs + - /riak/kv/3.0.2/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/php.md b/content/riak/kv/3.0.2/developing/getting-started/php.md new file mode 100644 index 0000000000..df97462ed1 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/php.md @@ -0,0 +1,80 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/php + - /riak/kv/3.0.2/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.2/developing/getting-started/php/crud-operations) + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/php/crud-operations.md b/content/riak/kv/3.0.2/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..816bd20931 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/php/crud-operations.md @@ -0,0 +1,187 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +aliases: +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/3.0.2/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/php/querying.md b/content/riak/kv/3.0.2/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..fb33d4df4a --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/php/querying.md @@ -0,0 +1,408 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/querying-php + - /riak/kv/3.0.2/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/python.md b/content/riak/kv/3.0.2/developing/getting-started/python.md new file mode 100644 index 0000000000..ca5d305f5e --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/python.md @@ -0,0 +1,103 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/python + - /riak/kv/3.0.2/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.2/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` - Header files and a static library for Python +* `libffi-dev` - Foreign function interface library +* `libssl-dev` - libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.2/developing/getting-started/python/crud-operations) + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/python/crud-operations.md b/content/riak/kv/3.0.2/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..93162cf40d --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/python/crud-operations.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/python/object-modeling.md b/content/riak/kv/3.0.2/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..1028dc9e4e --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/python/object-modeling.md @@ -0,0 +1,264 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/object-modeling-python + - /riak/kv/3.0.2/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/python/querying.md b/content/riak/kv/3.0.2/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..cbefba0010 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/python/querying.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/querying-python + - /riak/kv/3.0.2/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/ruby.md b/content/riak/kv/3.0.2/developing/getting-started/ruby.md new file mode 100644 index 0000000000..c05cca02bd --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/ruby.md @@ -0,0 +1,68 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/ruby + - /riak/kv/3.0.2/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.2/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.2/developing/getting-started/ruby/crud-operations) + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/3.0.2/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..95599036da --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,151 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/3.0.2/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..3ee1d6e020 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,295 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/3.0.2/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.2/developing/getting-started/ruby/querying.md b/content/riak/kv/3.0.2/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..d1a06dfca9 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/getting-started/ruby/querying.md @@ -0,0 +1,256 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/3.0.2/dev/taste-of-riak/querying-ruby + - /riak/kv/3.0.2/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.2/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.2/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.2/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.2/developing/key-value-modeling.md b/content/riak/kv/3.0.2/developing/key-value-modeling.md new file mode 100644 index 0000000000..2dbd345901 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/key-value-modeling.md @@ -0,0 +1,535 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/3.0.2/dev/data-modeling/key-value/ + - /riak/kv/3.0.2/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/3.0.2/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/3.0.2/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/3.0.2/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/3.0.2/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/3.0.2/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/3.0.2/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/3.0.2/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/3.0.2/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/3.0.2/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/3.0.2/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/3.0.2/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/3.0.2/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/3.0.2/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/3.0.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/3.0.2/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/3.0.2/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/3.0.2/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/3.0.2/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + + + + diff --git a/content/riak/kv/3.0.2/developing/usage.md b/content/riak/kv/3.0.2/developing/usage.md new file mode 100644 index 0000000000..e8e8a4df89 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage.md @@ -0,0 +1,138 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +aliases: +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/bucket-types.md b/content/riak/kv/3.0.2/developing/usage/bucket-types.md new file mode 100644 index 0000000000..c98dd08fd9 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/bucket-types.md @@ -0,0 +1,102 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/bucket-types + - /riak/kv/3.0.2/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/commit-hooks.md b/content/riak/kv/3.0.2/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..fa63f3e6a5 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/commit-hooks.md @@ -0,0 +1,243 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/using/commit-hooks + - /riak/kv/3.0.2/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/3.0.2/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object - This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` - The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/3.0.2/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` - The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/conflict-resolution.md b/content/riak/kv/3.0.2/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..a7a0bad7c4 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/conflict-resolution.md @@ -0,0 +1,681 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/using/conflict-resolution + - /riak/kv/3.0.2/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/strong-consistency) - A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/3.0.2/configuring/strong-consistency) - A guide for operators +> * [strong consistency][use ref strong consistency] - A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/3.0.2/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/3.0.2/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/3.0.2/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.2/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/3.0.2/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** - If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** - Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** - If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/3.0.2/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.2/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/3.0.2/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..296a4d6dec --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.2/dev/using/conflict-resolution/csharp + - /riak/kv/3.0.2/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/conflict-resolution/golang.md b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..3efc369aec --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.2/dev/using/conflict-resolution/golang + - /riak/kv/3.0.2/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/conflict-resolution/java.md b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..ba9c202202 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/java.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.2/dev/using/conflict-resolution/java + - /riak/kv/3.0.2/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.2/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..a89ffca131 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.2/dev/using/conflict-resolution/nodejs + - /riak/kv/3.0.2/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/conflict-resolution/php.md b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..5aad58feb8 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/php.md @@ -0,0 +1,244 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.2/dev/using/conflict-resolution/php + - /riak/kv/3.0.2/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.2/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/conflict-resolution/python.md b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..660ab84af8 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/python.md @@ -0,0 +1,258 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.2/dev/using/conflict-resolution/python + - /riak/kv/3.0.2/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.2/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..ef64aecaea --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.2/dev/using/conflict-resolution/ruby + - /riak/kv/3.0.2/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.2/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/content-types.md b/content/riak/kv/3.0.2/developing/usage/content-types.md new file mode 100644 index 0000000000..7309c97225 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/content-types.md @@ -0,0 +1,192 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +aliases: +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/creating-objects.md b/content/riak/kv/3.0.2/developing/usage/creating-objects.md new file mode 100644 index 0000000000..b2e611b269 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/creating-objects.md @@ -0,0 +1,555 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +aliases: +--- + +[usage content types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/3.0.2/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/custom-extractors.md b/content/riak/kv/3.0.2/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..fb299c1b65 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/custom-extractors.md @@ -0,0 +1,424 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/search/custom-extractors + - /riak/kv/3.0.2/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` - Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` - Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/deleting-objects.md b/content/riak/kv/3.0.2/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..2159603e78 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/deleting-objects.md @@ -0,0 +1,157 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +aliases: +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/document-store.md b/content/riak/kv/3.0.2/developing/usage/document-store.md new file mode 100644 index 0000000000..346a0b9f3c --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/document-store.md @@ -0,0 +1,617 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/search/document-store + - /riak/kv/3.0.2/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/3.0.2/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/mapreduce.md b/content/riak/kv/3.0.2/developing/usage/mapreduce.md new file mode 100644 index 0000000000..dc4f4dfa41 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/mapreduce.md @@ -0,0 +1,246 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/using/mapreduce + - /riak/kv/3.0.2/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/3.0.2/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/3.0.2/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** - The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** - The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/next-gen-replication.md b/content/riak/kv/3.0.2/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..240fbade59 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/next-gen-replication.md @@ -0,0 +1,153 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "3.0.2" +menu: + riak_kv-3.0.2: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/3.0.2/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. + + + diff --git a/content/riak/kv/3.0.2/developing/usage/reading-objects.md b/content/riak/kv/3.0.2/developing/usage/reading-objects.md new file mode 100644 index 0000000000..4892fde549 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/reading-objects.md @@ -0,0 +1,252 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +aliases: +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/replication.md b/content/riak/kv/3.0.2/developing/usage/replication.md new file mode 100644 index 0000000000..c8403734c9 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/replication.md @@ -0,0 +1,592 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/replication-properties + - /riak/kv/3.0.2/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/3.0.2/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/3.0.2/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/3.0.2/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/3.0.2/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/3.0.2/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/3.0.2/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/3.0.2/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/3.0.2/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/search-schemas.md b/content/riak/kv/3.0.2/developing/usage/search-schemas.md new file mode 100644 index 0000000000..726bd90406 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/search-schemas.md @@ -0,0 +1,511 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/search-schema + - /riak/kv/3.0.2/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/), and [more]({{<baseurl>}}riak/kv/3.0.2/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/search.md b/content/riak/kv/3.0.2/developing/usage/search.md new file mode 100644 index 0000000000..541f521381 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/search.md @@ -0,0 +1,1455 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/using/search + - /riak/kv/3.0.2/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/3.0.2/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.2/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.2/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/3.0.2/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/3.0.2/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/3.0.2/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/3.0.2/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/searching-data-types.md b/content/riak/kv/3.0.2/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..34b11e682a --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/searching-data-types.md @@ -0,0 +1,1687 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/search/search-data-types + - /riak/kv/3.0.2/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/secondary-indexes.md b/content/riak/kv/3.0.2/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..dcb03914ae --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/secondary-indexes.md @@ -0,0 +1,2030 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/using/2i + - /riak/kv/3.0.2/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/3.0.2/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.2/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.2/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` - Binary index `field1_bin` and integer index `field2_int` +* `Moe` - Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` - Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` - Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/security.md b/content/riak/kv/3.0.2/developing/usage/security.md new file mode 100644 index 0000000000..60be412a38 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/security.md @@ -0,0 +1,103 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/client-security + - /riak/kv/3.0.2/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/3.0.2/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/3.0.2/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/3.0.2/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/3.0.2/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.2/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/3.0.2/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/3.0.2/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/3.0.2/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/3.0.2/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/security/erlang.md b/content/riak/kv/3.0.2/developing/usage/security/erlang.md new file mode 100644 index 0000000000..f44199e9e3 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/security/erlang.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/client-security/erlang + - /riak/kv/3.0.2/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.2/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/security/java.md b/content/riak/kv/3.0.2/developing/usage/security/java.md new file mode 100644 index 0000000000..6bbd7e1bd7 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/security/java.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/client-security/java + - /riak/kv/3.0.2/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/security/php.md b/content/riak/kv/3.0.2/developing/usage/security/php.md new file mode 100644 index 0000000000..8ca438816f --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/security/php.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/client-security/php + - /riak/kv/3.0.2/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/security/python.md b/content/riak/kv/3.0.2/developing/usage/security/python.md new file mode 100644 index 0000000000..6db820abdd --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/security/python.md @@ -0,0 +1,176 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/client-security/python + - /riak/kv/3.0.2/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.2/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/3.0.2/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/security/ruby.md b/content/riak/kv/3.0.2/developing/usage/security/ruby.md new file mode 100644 index 0000000000..faa92a1267 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/security/ruby.md @@ -0,0 +1,162 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/client-security/ruby + - /riak/kv/3.0.2/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.2/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + + + + diff --git a/content/riak/kv/3.0.2/developing/usage/updating-objects.md b/content/riak/kv/3.0.2/developing/usage/updating-objects.md new file mode 100644 index 0000000000..b301968923 --- /dev/null +++ b/content/riak/kv/3.0.2/developing/usage/updating-objects.md @@ -0,0 +1,778 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.2/dev/using/updates + - /riak/kv/3.0.2/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/3.0.2/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + + + + diff --git a/content/riak/kv/3.0.2/downloads.md b/content/riak/kv/3.0.2/downloads.md new file mode 100644 index 0000000000..826975f8c9 --- /dev/null +++ b/content/riak/kv/3.0.2/downloads.md @@ -0,0 +1,27 @@ +--- +title: "Download for Riak KV 3.0.2" +description: "Download some stuff!" +menu: + riak_kv-3.0.2: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 3.0.2 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 3.0.2 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/3.0.2/downloads + - /riak/kv/3.0.2/downloads +--- + + + + + diff --git a/content/riak/kv/3.0.2/index.md b/content/riak/kv/3.0.2/index.md new file mode 100644 index 0000000000..5739791deb --- /dev/null +++ b/content/riak/kv/3.0.2/index.md @@ -0,0 +1,79 @@ +--- +title: "Riak KV 3.0.2" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/3.0.2/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/3.0.2/configuring +[downloads]: {{<baseurl>}}riak/kv/3.0.2/downloads/ +[install index]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/3.0.2/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/3.0.2/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/3.0.2/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/3.0.2/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +This release is tested with OTP 20, OTP 21 and OTP 22; but optimal performance is likely to be achieved when using OTP 22. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/3.0.2/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + + + + diff --git a/content/riak/kv/3.0.2/learn.md b/content/riak/kv/3.0.2/learn.md new file mode 100644 index 0000000000..e5f3a1999a --- /dev/null +++ b/content/riak/kv/3.0.2/learn.md @@ -0,0 +1,53 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts.md b/content/riak/kv/3.0.2/learn/concepts.md new file mode 100644 index 0000000000..b4ea23cf05 --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts.md @@ -0,0 +1,49 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +aliases: +--- + +[concept aae]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/3.0.2/configuring +[plan index]: {{<baseurl>}}riak/kv/3.0.2/setup/planning +[use index]: {{<baseurl>}}riak/kv/3.0.2/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/active-anti-entropy.md b/content/riak/kv/3.0.2/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..5784621cf1 --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/active-anti-entropy.md @@ -0,0 +1,111 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/aae + - /riak/kv/3.0.2/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/buckets.md b/content/riak/kv/3.0.2/learn/concepts/buckets.md new file mode 100644 index 0000000000..2abccb2a5f --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/buckets.md @@ -0,0 +1,217 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/Buckets + - /riak/kv/3.0.2/theory/concepts/Buckets + - /riak/3.0.2/theory/concepts/buckets + - /riak/kv/3.0.2/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/3.0.2/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/3.0.2/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/3.0.2/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/3.0.2/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/capability-negotiation.md b/content/riak/kv/3.0.2/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..84c25decff --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/capability-negotiation.md @@ -0,0 +1,36 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/capability-negotiation + - /riak/kv/3.0.2/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/3.0.2/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/causal-context.md b/content/riak/kv/3.0.2/learn/concepts/causal-context.md new file mode 100644 index 0000000000..55839fe69c --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/causal-context.md @@ -0,0 +1,289 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/context + - /riak/kv/3.0.2/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/3.0.2/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/3.0.2/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/3.0.2/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/clusters.md b/content/riak/kv/3.0.2/learn/concepts/clusters.md new file mode 100644 index 0000000000..5cf31e827d --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/clusters.md @@ -0,0 +1,117 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/Clusters + - /riak/kv/3.0.2/theory/concepts/Clusters + - /riak/3.0.2/theory/concepts/clusters + - /riak/kv/3.0.2/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/3.0.2/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/crdts.md b/content/riak/kv/3.0.2/learn/concepts/crdts.md new file mode 100644 index 0000000000..68c61a7f32 --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/crdts.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/crdts + - /riak/kv/3.0.2/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/3.0.2/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/3.0.2/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/eventual-consistency.md b/content/riak/kv/3.0.2/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..055b6c1612 --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/eventual-consistency.md @@ -0,0 +1,202 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/Eventual-Consistency + - /riak/kv/3.0.2/theory/concepts/Eventual-Consistency + - /riak/3.0.2/theory/concepts/eventual-consistency + - /riak/kv/3.0.2/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/keys-and-objects.md b/content/riak/kv/3.0.2/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..5e6d38aec4 --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/keys-and-objects.md @@ -0,0 +1,53 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/keys-and-values + - /riak/kv/3.0.2/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/replication.md b/content/riak/kv/3.0.2/learn/concepts/replication.md new file mode 100644 index 0000000000..eed36a1c1f --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/replication.md @@ -0,0 +1,323 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/Replication + - /riak/kv/3.0.2/theory/concepts/Replication + - /riak/3.0.2/theory/concepts/replication + - /riak/kv/3.0.2/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/3.0.2/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/strong-consistency.md b/content/riak/kv/3.0.2/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..aad07a2a4b --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/strong-consistency.md @@ -0,0 +1,105 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/strong-consistency + - /riak/kv/3.0.2/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/3.0.2/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + + + + diff --git a/content/riak/kv/3.0.2/learn/concepts/vnodes.md b/content/riak/kv/3.0.2/learn/concepts/vnodes.md new file mode 100644 index 0000000000..35a8e5374b --- /dev/null +++ b/content/riak/kv/3.0.2/learn/concepts/vnodes.md @@ -0,0 +1,160 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.2/theory/concepts/vnodes + - /riak/kv/3.0.2/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033322.9.844576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + + + + diff --git a/content/riak/kv/3.0.2/learn/dynamo.md b/content/riak/kv/3.0.2/learn/dynamo.md new file mode 100644 index 0000000000..c311460a80 --- /dev/null +++ b/content/riak/kv/3.0.2/learn/dynamo.md @@ -0,0 +1,1928 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/3.0.2/theory/dynamo + - /riak/kv/3.0.2/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/3.0.2/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/3.0.2/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/3.0.2/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/3.0.2/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.2.6 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/3.0.2/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.2/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/3.0.2/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/3.0.2/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/3.0.2/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.2/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + + + + diff --git a/content/riak/kv/3.0.2/learn/glossary.md b/content/riak/kv/3.0.2/learn/glossary.md new file mode 100644 index 0000000000..0876c71e1e --- /dev/null +++ b/content/riak/kv/3.0.2/learn/glossary.md @@ -0,0 +1,358 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +aliases: +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/3.0.2/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/3.0.2/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/3.0.2/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/3.0.2/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/3.0.2/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/3.0.2/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/3.0.2/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + + + + diff --git a/content/riak/kv/3.0.2/learn/new-to-nosql.md b/content/riak/kv/3.0.2/learn/new-to-nosql.md new file mode 100644 index 0000000000..0a6f1a0d9e --- /dev/null +++ b/content/riak/kv/3.0.2/learn/new-to-nosql.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 3.0.2 +#menu: +# riak_kv-3.0.2: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this lives in existing docs)** + + + + diff --git a/content/riak/kv/3.0.2/learn/use-cases.md b/content/riak/kv/3.0.2/learn/use-cases.md new file mode 100644 index 0000000000..69163a0d12 --- /dev/null +++ b/content/riak/kv/3.0.2/learn/use-cases.md @@ -0,0 +1,405 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/3.0.2/dev/data-modeling/ + - /riak/kv/3.0.2/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/3.0.2/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/3.0.2/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + + + + diff --git a/content/riak/kv/3.0.2/learn/why-riak-kv.md b/content/riak/kv/3.0.2/learn/why-riak-kv.md new file mode 100644 index 0000000000..149a771d38 --- /dev/null +++ b/content/riak/kv/3.0.2/learn/why-riak-kv.md @@ -0,0 +1,225 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/3.0.2/theory/why-riak/ + - /riak/kv/3.0.2/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.2/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/3.0.2/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + + + + diff --git a/content/riak/kv/3.0.2/release-notes.md b/content/riak/kv/3.0.2/release-notes.md new file mode 100644 index 0000000000..d2c68377c1 --- /dev/null +++ b/content/riak/kv/3.0.2/release-notes.md @@ -0,0 +1,52 @@ +--- +title: "Riak KV 3.0.2 Release Notes" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/3.0.2/community/release-notes + - /riak/kv/3.0.2/intro-v20 + - /riak/3.0.2/intro-v20 + - /riak/kv/3.0.2/introduction +--- + +Released Jan 05, 2021. + + +## Overview + +This release improves the performance and stability of the leveled backend and of AAE folds. These performance improvements are based on feedback from deployments with > 1bn keys per cluster. + +The particular improvements are: + +- In leveled, caching of individual file scores so not all files are required to be scored each journal compaction run. + +- In leveled, a change to the default journal compaction scoring percentages to make longer runs more likely (i.e. achieve more compaction per scoring run). + +- In leveled, a change to the caching of SST file block-index in the ledger, that makes repeated folds with a last modified date range an order of magnitude faster and more computationally efficient. + +- In leveled, a fix to prevent very long list-buckets queries when buckets have just been deleted (by erasing all keys). + +- In kv_index_tictcatree, improved logging and exchange controls to make exchanges easier to monitor and less likely to prompt unnecessary work. + +- In kv_index_tictcatree, a change to speed-up the necessary rebuilds of aae tree-caches following a node crash, by only testing journal presence in scheduled rebuilds. + +- In riak_kv_ttaaefs_manager, some essential fixes to prevent excessive CPU load when comparing large volumes of keys and clocks, due to a failure to decode clocks correctly before passing to the exchange. + +Further significant improvements have been made to Tictac AAE full-sync, to greatly improve the efficiency of operation when there exists relatively large deltas between relatively large clusters (in terms of key counts). Those changes, which introduce the use of 'day_check', 'hour_check' and 'range_check' options to nval-based full-sync will be available in a future 3.0.2 release of Riak. For those wishing to use Tictac AAE full-sync at a non-trivial scale, it is recommended moving straight to 3.0.2 when it is available. + +## Previous Release Notes + +Please see the KV 2.9.7 release notes [here]({{<baseurl>}}riak/kv/2.9.7/release-notes/). + + + + + diff --git a/content/riak/kv/3.0.2/setup.md b/content/riak/kv/3.0.2/setup.md new file mode 100644 index 0000000000..12d0ff27ad --- /dev/null +++ b/content/riak/kv/3.0.2/setup.md @@ -0,0 +1,51 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + + + + diff --git a/content/riak/kv/3.0.2/setup/downgrade.md b/content/riak/kv/3.0.2/setup/downgrade.md new file mode 100644 index 0000000000..493749b016 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/downgrade.md @@ -0,0 +1,179 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/3.0.2/ops/upgrading/rolling-downgrades/ + - /riak/kv/3.0.2/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/3.0.2/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 3.0.2, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing.md b/content/riak/kv/3.0.2/setup/installing.md new file mode 100644 index 0000000000..90a52ec2b7 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing.md @@ -0,0 +1,61 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/3.0.2/ops/building/installing + - /riak/kv/3.0.2/ops/building/installing + - /riak/3.0.2/installing/ + - /riak/kv/3.0.2/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/3.0.2/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/amazon-web-services.md b/content/riak/kv/3.0.2/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..2743efb525 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/amazon-web-services.md @@ -0,0 +1,153 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/3.0.2/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/3.0.2/installing/amazon-web-services/ + - /riak/kv/3.0.2/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/3.0.2/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2-1.amzn2x86_64.rpm +sudo yum localinstall -y riak_3.0.2-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2-1.amzn2x86_64.rpm +sudo rpm -i riak_3.0.2-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2-1.amzn1x86_64.rpm +sudo yum localinstall -y riak_3.0.2-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2-1.amzn1x86_64.rpm +sudo rpm -i riak_3.0.2-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/debian-ubuntu.md b/content/riak/kv/3.0.2/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..0a20f450b2 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/debian-ubuntu.md @@ -0,0 +1,171 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/3.0.2/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/3.0.2/installing/debian-ubuntu/ + - /riak/kv/3.0.2/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/3.0.2/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-1_amd64.deb +sudo dpkg -i riak_3.0.2-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-1_amd64.deb +sudo dpkg -i riak_3.0.2-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-1_amd64.deb +sudo dpkg -i riak_3.0.2-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/precise64/riak_3.0.2-1_amd64.deb +sudo dpkg -i riak_3.0.2-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-1_amd64.deb +sudo dpkg -i riak_3.0.2-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-1_amd64.deb +sudo dpkg -i riak_3.0.2-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/7/riak_3.0.2-1_amd64.deb +sudo dpkg -i riak_3.0.2-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/raspbian/buster/riak_3.0.2-1_armhf.deb +sudo dpkg -i riak_3.0.2-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/riak-3.0.2.tar.gz +tar zxvf riak-3.0.2.tar.gz +cd riak-3.0.2 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/freebsd.md b/content/riak/kv/3.0.2/setup/installing/freebsd.md new file mode 100644 index 0000000000..b5ab0d494a --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/freebsd.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/3.0.2/ops/building/installing/Installing-on-FreeBSD + - /riak/3.0.2/installing/freebsd/ + - /riak/kv/3.0.2/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/3.0.2/downloads/ +[install verify]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-3.0.2.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/3.0/3.0.2/freebsd/11.1/riak-3.0.2.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/3.0/3.0.2/freebsd/10.4/riak-3.0.2.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/mac-osx.md b/content/riak/kv/3.0.2/setup/installing/mac-osx.md new file mode 100644 index 0000000000..ced5478de8 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/mac-osx.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/3.0.2/ops/building/installing/Installing-on-Mac-OS-X + - /riak/3.0.2/installing/mac-osx/ + - /riak/kv/3.0.2/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/3.0.2/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.2/osx/10.11/riak-3.0.2-OSX-x86_64.tar.gz +tar xzvf riak-3.0.2-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 3.0.2 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `3.0.2` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.2/riak-3.0.2.tar.gz +tar zxvf riak-3.0.2.tar.gz +cd riak-3.0.2 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/rhel-centos.md b/content/riak/kv/3.0.2/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..35476c18fc --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/rhel-centos.md @@ -0,0 +1,134 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/3.0.2/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/3.0.2/installing/rhel-centos/ + - /riak/kv/3.0.2/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-1.el8.x86_64.rpm +sudo yum localinstall -y riak-3.0.2-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-1.el8.x86_64.rpm +sudo rpm -Uvh riak-3.0.2-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-1.el7.x86_64.rpm +sudo yum localinstall -y riak-3.0.2-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-1.el7.x86_64.rpm +sudo rpm -Uvh riak-3.0.2-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/6/riak-3.0.2-1.el6.x86_64.rpm +sudo yum localinstall -y riak-3.0.2-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/6/riak-3.0.2-1.el6.x86_64.rpm +sudo rpm -Uvh riak-3.0.2-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.2/riak-3.0.2.tar.gz +tar zxvf riak-3.0.2.tar.gz +cd riak-3.0.2 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/smartos.md b/content/riak/kv/3.0.2/setup/installing/smartos.md new file mode 100644 index 0000000000..25edafbfee --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/smartos.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "3.0.2" +menu: + riak_kv-3.0.2: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-SmartOS + - /riak/kv/3.0.2/ops/building/installing/Installing-on-SmartOS + - /riak/3.0.2/installing/smartos/ + - /riak/kv/3.0.2/installing/smartos/ + - /riak/kv/3.0.2/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/3.0.2/setup/installing/solaris.md b/content/riak/kv/3.0.2/setup/installing/solaris.md new file mode 100644 index 0000000000..8be537430e --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/solaris.md @@ -0,0 +1,91 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "3.0.2" +menu: + riak_kv-3.0.2: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-Solaris + - /riak/kv/3.0.2/ops/building/installing/Installing-on-Solaris + - /riak/3.0.2/installing/solaris/ + - /riak/kv/3.0.2/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/3.0.2/setup/installing/source.md b/content/riak/kv/3.0.2/setup/installing/source.md new file mode 100644 index 0000000000..fcf638bba2 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/source.md @@ -0,0 +1,110 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/Installing-Riak-from-Source + - /riak/kv/3.0.2/ops/building/Installing-Riak-from-Source + - /riak/3.0.2/installing/source/ + - /riak/kv/3.0.2/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/3.0.2/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.2/riak-3.0.2.tar.gz +tar zxvf riak-3.0.2.tar.gz +cd riak-3.0.2 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/source/erlang.md b/content/riak/kv/3.0.2/setup/installing/source/erlang.md new file mode 100644 index 0000000000..676ce5502c --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/source/erlang.md @@ -0,0 +1,571 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/erlang + - /riak/kv/3.0.2/ops/building/installing/erlang + - /riak/3.0.2/installing/source/erlang/ + - /riak/kv/3.0.2/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/3.0.2/setup/installing +[security basics]: {{<baseurl>}}riak/kv/3.0.2/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/source/jvm.md b/content/riak/kv/3.0.2/setup/installing/source/jvm.md new file mode 100644 index 0000000000..26e9d57f9c --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/source/jvm.md @@ -0,0 +1,55 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/jvm + - /riak/kv/3.0.2/ops/building/installing/jvm + - /riak/3.0.2/ops/building/installing/Installing-the-JVM + - /riak/kv/3.0.2/ops/building/installing/Installing-the-JVM + - /riak/3.0.2/installing/source/jvm/ + - /riak/kv/3.0.2/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/suse.md b/content/riak/kv/3.0.2/setup/installing/suse.md new file mode 100644 index 0000000000..8e1f5354e4 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/suse.md @@ -0,0 +1,52 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-SUSE + - /riak/kv/3.0.2/ops/building/installing/Installing-on-SUSE + - /riak/3.0.2/installing/suse/ + - /riak/kv/3.0.2/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/3.0.2/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.9.8+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/verify.md b/content/riak/kv/3.0.2/setup/installing/verify.md new file mode 100644 index 0000000000..88d4a205ba --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/verify.md @@ -0,0 +1,169 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/installing/Post-Installation + - /riak/kv/3.0.2/ops/installing/Post-Installation + - /riak/3.0.2/installing/verify-install/ + - /riak/kv/3.0.2/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/3.0.2/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/3.0.2/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + + + + diff --git a/content/riak/kv/3.0.2/setup/installing/windows-azure.md b/content/riak/kv/3.0.2/setup/installing/windows-azure.md new file mode 100644 index 0000000000..1eabe5eed8 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/installing/windows-azure.md @@ -0,0 +1,197 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/3.0.2/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/3.0.2/ops/building/installing/Installing-on-Windows-Azure + - /riak/3.0.2/installing/windows-azure/ + - /riak/kv/3.0.2/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + + + + diff --git a/content/riak/kv/3.0.2/setup/planning.md b/content/riak/kv/3.0.2/setup/planning.md new file mode 100644 index 0000000000..fdccca8c35 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning.md @@ -0,0 +1,61 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/backend.md b/content/riak/kv/3.0.2/setup/planning/backend.md new file mode 100644 index 0000000000..729a734a56 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/backend.md @@ -0,0 +1,60 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/3.0.2/ops/building/planning/backends/ + - /riak/kv/3.0.2/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/3.0.2/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/backend/bitcask.md b/content/riak/kv/3.0.2/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..049dc741d2 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/backend/bitcask.md @@ -0,0 +1,994 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/backends/bitcask/ + - /riak/kv/3.0.2/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/3.0.2/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` - lets the operating system manage syncing writes + (default) + * `o_sync` - uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval - Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) - Writes are made via Erlang's built-in file API +* `nif` - Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` - No restrictions on when merge operations can occur + (default) +* `never` - Merge will never be attempted +* `window` - Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** - This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** - This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** - This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** - This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** - This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033322.9.844576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033322.9.844576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/backend/leveldb.md b/content/riak/kv/3.0.2/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..a4fbd0e435 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/backend/leveldb.md @@ -0,0 +1,506 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/backends/leveldb/ + - /riak/kv/3.0.2/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/3.0.2/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** - The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** - LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033322.9.844576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033322.9.844576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/backend/leveled.md b/content/riak/kv/3.0.2/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..0be0893191 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/backend/leveled.md @@ -0,0 +1,141 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/backends/leveled/ + - /riak/kv/3.0.2/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/3.0.2/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 2.9.8 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/backend/memory.md b/content/riak/kv/3.0.2/setup/planning/backend/memory.md new file mode 100644 index 0000000000..6deac6781f --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/backend/memory.md @@ -0,0 +1,147 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/backends/memory/ + - /riak/kv/3.0.2/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/backend/multi.md b/content/riak/kv/3.0.2/setup/planning/backend/multi.md new file mode 100644 index 0000000000..22c048a971 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/backend/multi.md @@ -0,0 +1,230 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/backends/multi/ + - /riak/kv/3.0.2/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/best-practices.md b/content/riak/kv/3.0.2/setup/planning/best-practices.md new file mode 100644 index 0000000000..c274301196 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/best-practices.md @@ -0,0 +1,145 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/3.0.2/ops/building/planning/best-practices + - /riak/kv/3.0.2/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/3.0.2/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/3.0.2/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/3.0.2/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..be51c20689 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,104 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/3.0.2/ops/building/planning/bitcask + - /riak/kv/3.0.2/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/cluster-capacity.md b/content/riak/kv/3.0.2/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..f925abc47f --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/cluster-capacity.md @@ -0,0 +1,238 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/3.0.2/ops/building/planning/cluster + - /riak/kv/3.0.2/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/3.0.2/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/3.0.2/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/future.md b/content/riak/kv/3.0.2/setup/planning/future.md new file mode 100644 index 0000000000..258ed9b108 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/future.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 3.0.2 +#menu: +# riak_kv-3.0.2: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/operating-system.md b/content/riak/kv/3.0.2/setup/planning/operating-system.md new file mode 100644 index 0000000000..dff8c94e9b --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/operating-system.md @@ -0,0 +1,30 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +aliases: +--- + +[downloads]: {{<baseurl>}}riak/kv/3.0.2/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + + + + diff --git a/content/riak/kv/3.0.2/setup/planning/start.md b/content/riak/kv/3.0.2/setup/planning/start.md new file mode 100644 index 0000000000..b68857784f --- /dev/null +++ b/content/riak/kv/3.0.2/setup/planning/start.md @@ -0,0 +1,61 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/3.0.2/ops/building/planning/system-planning + - /riak/kv/3.0.2/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + + + + diff --git a/content/riak/kv/3.0.2/setup/search.md b/content/riak/kv/3.0.2/setup/search.md new file mode 100644 index 0000000000..3f2ff2d6cc --- /dev/null +++ b/content/riak/kv/3.0.2/setup/search.md @@ -0,0 +1,5 @@ + + + + + diff --git a/content/riak/kv/3.0.2/setup/upgrading.md b/content/riak/kv/3.0.2/setup/upgrading.md new file mode 100644 index 0000000000..9e57b57e81 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/upgrading.md @@ -0,0 +1,38 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 3.0.2][upgrade version] + +A tutorial on updating to Riak KV 3.0.2 + +[Learn More >>][upgrade version] + + + + diff --git a/content/riak/kv/3.0.2/setup/upgrading/checklist.md b/content/riak/kv/3.0.2/setup/upgrading/checklist.md new file mode 100644 index 0000000000..df186a5c34 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/upgrading/checklist.md @@ -0,0 +1,225 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/3.0.2/ops/upgrading/production-checklist/ + - /riak/kv/3.0.2/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/3.0.2/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/3.0.2/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/3.0.2/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/3.0.2/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/3.0.2/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/3.0.2/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/3.0.2/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + + + + diff --git a/content/riak/kv/3.0.2/setup/upgrading/cluster.md b/content/riak/kv/3.0.2/setup/upgrading/cluster.md new file mode 100644 index 0000000000..618ce5b093 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/upgrading/cluster.md @@ -0,0 +1,303 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "3.0.2" +menu: + riak_kv-3.0.2: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/3.0.2/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.2/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/3.0.2/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/3.0.2/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.2/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/3.0.2/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/3.0.2/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` - See [JMX Monitoring][jmx monitor] for more information. + * `snmp` - See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + + + + diff --git a/content/riak/kv/3.0.2/setup/upgrading/multi-datacenter.md b/content/riak/kv/3.0.2/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..12e14833bd --- /dev/null +++ b/content/riak/kv/3.0.2/setup/upgrading/multi-datacenter.md @@ -0,0 +1,24 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 3.0.2 +#menu: +# riak_kv-3.0.2: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: +--- + +## TODO + +How to update to a new version with multi-datacenter. + + + + + diff --git a/content/riak/kv/3.0.2/setup/upgrading/search.md b/content/riak/kv/3.0.2/setup/upgrading/search.md new file mode 100644 index 0000000000..d5a3ab05df --- /dev/null +++ b/content/riak/kv/3.0.2/setup/upgrading/search.md @@ -0,0 +1,281 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "3.0.2" +menu: + riak_kv-3.0.2: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/3.0.2/ops/advanced/upgrading-search-2 + - /riak/kv/3.0.2/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + + + + diff --git a/content/riak/kv/3.0.2/setup/upgrading/version.md b/content/riak/kv/3.0.2/setup/upgrading/version.md new file mode 100644 index 0000000000..a1308ffa47 --- /dev/null +++ b/content/riak/kv/3.0.2/setup/upgrading/version.md @@ -0,0 +1,252 @@ +--- +title: "Upgrading to Riak KV 3.0.2" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Upgrading to 3.0.2" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/3.0.2/upgrade-v20/ + - /riak/kv/3.0.2/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.2/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.2/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/3.0.2/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/3.0.2/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/3.0.2/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.2/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/3.0.2/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/3.0.2/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/3.0.2/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 3.0.2 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 3.0.2 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 3.0.2 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + + + + diff --git a/content/riak/kv/3.0.2/using.md b/content/riak/kv/3.0.2/using.md new file mode 100644 index 0000000000..8fb1faf0c6 --- /dev/null +++ b/content/riak/kv/3.0.2/using.md @@ -0,0 +1,78 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + + + + diff --git a/content/riak/kv/3.0.2/using/admin.md b/content/riak/kv/3.0.2/using/admin.md new file mode 100644 index 0000000000..fae432c770 --- /dev/null +++ b/content/riak/kv/3.0.2/using/admin.md @@ -0,0 +1,51 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/3.0.2/ops/running/cluster-admin + - /riak/kv/3.0.2/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + + + + diff --git a/content/riak/kv/3.0.2/using/admin/commands.md b/content/riak/kv/3.0.2/using/admin/commands.md new file mode 100644 index 0000000000..bf93ab2c1c --- /dev/null +++ b/content/riak/kv/3.0.2/using/admin/commands.md @@ -0,0 +1,378 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.2/ops/running/cluster-admin + - /riak/kv/3.0.2/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` - There are five possible values for status: + * `valid` - The node has begun participating in cluster operations + * `leaving` - The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` - The node's ownership transfers are complete and it is + currently shutting down + * `joining` - The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` - The node is not currently responding +* `avail` - There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` - What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` - The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033322.9.844576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033322.9.844576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` + + + + diff --git a/content/riak/kv/3.0.2/using/admin/riak-admin.md b/content/riak/kv/3.0.2/using/admin/riak-admin.md new file mode 100644 index 0000000000..27c2972112 --- /dev/null +++ b/content/riak/kv/3.0.2/using/admin/riak-admin.md @@ -0,0 +1,721 @@ +--- +title: "riak admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "riak admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.2/ops/running/tools/riak-admin + - /riak/kv/3.0.2/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/3.0.2/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/3.0.2/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/3.0.2/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/3.0.2/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/3.0.2/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/3.0.2/using/security/ +[security managing]: {{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/3.0.2/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/3.0.2/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#stats + +## `riak admin` + +The riak admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak admin` by itself will output a list of available commands: + +``` +Usage: riak admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak admin join` command has +been deprecated in favor of the [`riak admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak admin leave` command has +been deprecated in favor of the new [`riak admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak admin services` to see which services are +available on a running node. + +```bash +riak admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak admin search <command> +``` + +### aae-status + +```bash +riak admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak admin status`][use admin riak admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + + + + diff --git a/content/riak/kv/3.0.2/using/admin/riak-cli.md b/content/riak/kv/3.0.2/using/admin/riak-cli.md new file mode 100644 index 0000000000..22c12df1f9 --- /dev/null +++ b/content/riak/kv/3.0.2/using/admin/riak-cli.md @@ -0,0 +1,204 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.2/ops/running/tools/riak + - /riak/kv/3.0.2/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + + + + diff --git a/content/riak/kv/3.0.2/using/admin/riak-control.md b/content/riak/kv/3.0.2/using/admin/riak-control.md new file mode 100644 index 0000000000..a92db505ef --- /dev/null +++ b/content/riak/kv/3.0.2/using/admin/riak-control.md @@ -0,0 +1,237 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/riak-control + - /riak/kv/3.0.2/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/3.0.2/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations.md b/content/riak/kv/3.0.2/using/cluster-operations.md new file mode 100644 index 0000000000..1e6c6b9d6e --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations.md @@ -0,0 +1,109 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +aliases: +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/3.0.2/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..a6cc944a76 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,289 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/3.0.2/ops/advanced/aae/ + - /riak/3.0.2/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/3.0.2/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/3.0.2/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..4453244735 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,198 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.2/ops/running/nodes/adding-removing + - /riak/kv/3.0.2/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/3.0.2/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/backend.md b/content/riak/kv/3.0.2/using/cluster-operations/backend.md new file mode 100644 index 0000000000..1dc0266d8f --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/backend.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 3.0.2 +#menu: +# riak_kv-3.0.2: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/backing-up.md b/content/riak/kv/3.0.2/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..b186b282b5 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/backing-up.md @@ -0,0 +1,271 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.2/ops/running/backups + - /riak/kv/3.0.2/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/3.0.2/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/3.0.2/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/bucket-types.md b/content/riak/kv/3.0.2/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..2ff97faca8 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/bucket-types.md @@ -0,0 +1,63 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/3.0.2/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..6bdea53e42 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,458 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.2/ops/running/nodes/renaming + - /riak/kv/3.0.2/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/handoff.md b/content/riak/kv/3.0.2/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..7500df716d --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/handoff.md @@ -0,0 +1,120 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.2/ops/running/handoff + - /riak/kv/3.0.2/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/inspecting-node.md b/content/riak/kv/3.0.2/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..bf9bc065ef --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/inspecting-node.md @@ -0,0 +1,496 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.2/ops/running/nodes/inspecting + - /riak/kv/3.0.2/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1392.9.848081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` - The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` - The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` - The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/load-balancing.md b/content/riak/kv/3.0.2/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..d4a2befa3f --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/load-balancing.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 3.0.2 +#menu: +# riak_kv-3.0.2: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this exists in docs)** + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/logging.md b/content/riak/kv/3.0.2/using/cluster-operations/logging.md new file mode 100644 index 0000000000..7073f96bc0 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/logging.md @@ -0,0 +1,47 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/replacing-node.md b/content/riak/kv/3.0.2/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..1ebf0bf033 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/replacing-node.md @@ -0,0 +1,100 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/3.0.2/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/secondary-indexes.md b/content/riak/kv/3.0.2/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..1808ce9eb4 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.2 +#menu: +# riak_kv-3.0.2: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/strong-consistency.md b/content/riak/kv/3.0.2/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..60220ec506 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/strong-consistency.md @@ -0,0 +1,76 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.2s/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/3.0.2/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..2c001f35e3 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,34 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/3.0.2/ops/advanced/tictacaae/ + - /riak/3.0.2/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/3.0.2/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..8cc553d731 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,263 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v2/operations + - /riak/kv/3.0.2/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/3.0.2/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/3.0.2/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` - The IP address and port of a connected client (site)</li><li>`cluster_name` - The name of the connected client (site)</li><li>`connecting` - The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/3.0.2/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/3.0.2/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + + + + diff --git a/content/riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..2eb2bc7263 --- /dev/null +++ b/content/riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,425 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/operations + - /riak/kv/3.0.2/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.2/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/3.0.2/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/3.0.2/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/3.0.2/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + + + + diff --git a/content/riak/kv/3.0.2/using/performance.md b/content/riak/kv/3.0.2/using/performance.md new file mode 100644 index 0000000000..27b46cfb7d --- /dev/null +++ b/content/riak/kv/3.0.2/using/performance.md @@ -0,0 +1,268 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/3.0.2/ops/tuning/linux/ + - /riak/3.0.2/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/3.0.2/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/3.0.2/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/3.0.2/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/3.0.2/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/3.0.2/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/3.0.2/using/performance/open-files-limit/) + + + + diff --git a/content/riak/kv/3.0.2/using/performance/amazon-web-services.md b/content/riak/kv/3.0.2/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..a5323a9292 --- /dev/null +++ b/content/riak/kv/3.0.2/using/performance/amazon-web-services.md @@ -0,0 +1,247 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.2/ops/tuning/aws + - /riak/kv/3.0.2/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + + + + diff --git a/content/riak/kv/3.0.2/using/performance/benchmarking.md b/content/riak/kv/3.0.2/using/performance/benchmarking.md new file mode 100644 index 0000000000..cf17aea394 --- /dev/null +++ b/content/riak/kv/3.0.2/using/performance/benchmarking.md @@ -0,0 +1,602 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.2/ops/building/benchmarking + - /riak/kv/3.0.2/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/3.0.2/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput - Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` - generate as many ops per second as possible +* `{rate, N}` - generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` - Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` - Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` - Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` - Directly invokes the Bitcask API +* `basho_bench_driver_dets` - Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` - operation completed successfully +* `{error, Reason, NewState}` - operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` - operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` - operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` - generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` - the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` - the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` - selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` - selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` - the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` - takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` - takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` - generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` - generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` - generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + + + + diff --git a/content/riak/kv/3.0.2/using/performance/erlang.md b/content/riak/kv/3.0.2/using/performance/erlang.md new file mode 100644 index 0000000000..c33f6cdc5b --- /dev/null +++ b/content/riak/kv/3.0.2/using/performance/erlang.md @@ -0,0 +1,371 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.2/ops/tuning/erlang + - /riak/kv/3.0.2/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + + + + diff --git a/content/riak/kv/3.0.2/using/performance/latency-reduction.md b/content/riak/kv/3.0.2/using/performance/latency-reduction.md new file mode 100644 index 0000000000..e84a00de91 --- /dev/null +++ b/content/riak/kv/3.0.2/using/performance/latency-reduction.md @@ -0,0 +1,267 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.2/ops/tuning/latency-reduction + - /riak/kv/3.0.2/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + + + + diff --git a/content/riak/kv/3.0.2/using/performance/multi-datacenter-tuning.md b/content/riak/kv/3.0.2/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..b35c6e0171 --- /dev/null +++ b/content/riak/kv/3.0.2/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +[perf index]: {{<baseurl>}}riak/kv/3.0.2/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + + + + diff --git a/content/riak/kv/3.0.2/using/performance/open-files-limit.md b/content/riak/kv/3.0.2/using/performance/open-files-limit.md new file mode 100644 index 0000000000..3c44610559 --- /dev/null +++ b/content/riak/kv/3.0.2/using/performance/open-files-limit.md @@ -0,0 +1,351 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.2/ops/tuning/open-files-limit/ + - /riak/kv/3.0.2/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + + + + diff --git a/content/riak/kv/3.0.2/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/3.0.2/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..ad2534e08e --- /dev/null +++ b/content/riak/kv/3.0.2/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,50 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/3.0.2/using/reference.md b/content/riak/kv/3.0.2/using/reference.md new file mode 100644 index 0000000000..3df1686477 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference.md @@ -0,0 +1,135 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +aliases: +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + + + + diff --git a/content/riak/kv/3.0.2/using/reference/architecture.md b/content/riak/kv/3.0.2/using/reference/architecture.md new file mode 100644 index 0000000000..bc2ff9bbc1 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/architecture.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +#menu: +# riak_kv-3.0.2: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +aliases: +--- + +<!-- TODO: Content --> + + + + diff --git a/content/riak/kv/3.0.2/using/reference/bucket-types.md b/content/riak/kv/3.0.2/using/reference/bucket-types.md new file mode 100644 index 0000000000..78d85c5d81 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/bucket-types.md @@ -0,0 +1,823 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +aliases: +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/3.0.2/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/3.0.2/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.2/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.2/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/3.0.2/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/3.0.2/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + + + + diff --git a/content/riak/kv/3.0.2/using/reference/custom-code.md b/content/riak/kv/3.0.2/using/reference/custom-code.md new file mode 100644 index 0000000000..d217eeec1f --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/custom-code.md @@ -0,0 +1,135 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/install-custom-code/ + - /riak/kv/3.0.2/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/3.0.2/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/3.0.2/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.2/using/reference/failure-recovery.md b/content/riak/kv/3.0.2/using/reference/failure-recovery.md new file mode 100644 index 0000000000..c31926e6fe --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/failure-recovery.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.2/using/reference/handoff.md b/content/riak/kv/3.0.2/using/reference/handoff.md new file mode 100644 index 0000000000..04ea4fd6de --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/handoff.md @@ -0,0 +1,201 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.2/ops/running/handoff/ + - /riak/kv/3.0.2/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + + + + diff --git a/content/riak/kv/3.0.2/using/reference/jmx.md b/content/riak/kv/3.0.2/using/reference/jmx.md new file mode 100644 index 0000000000..8a7cb9b954 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/jmx.md @@ -0,0 +1,190 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/running/monitoring/jmx + - /riak/kv/3.0.2/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + + + + diff --git a/content/riak/kv/3.0.2/using/reference/logging.md b/content/riak/kv/3.0.2/using/reference/logging.md new file mode 100644 index 0000000000..4d84fa668e --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/logging.md @@ -0,0 +1,301 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.2/ops/running/logging + - /riak/kv/3.0.2/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 2.9.8 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` - Every night at midnight +* `$D23` - Every day at 23:00 (11 pm) +* `$W0D20` - Every week on Sunday at 20:00 (8 pm) +* `$M1D0` - On the first day of every month at midnight +* `$M5D6` - On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` - Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/3.0.2/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` - Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-cli/#attach-direct) command +* `both` - Console logs will be emitted both to a file and to standard + output +* `off` - Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + + + + diff --git a/content/riak/kv/3.0.2/using/reference/multi-datacenter.md b/content/riak/kv/3.0.2/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..9f3de47118 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/multi-datacenter.md @@ -0,0 +1,53 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + + + + diff --git a/content/riak/kv/3.0.2/using/reference/multi-datacenter/comparison.md b/content/riak/kv/3.0.2/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..015085cc0f --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,100 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.2/ops/mdc/comparison + - /riak/kv/3.0.2/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/3.0.2/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/3.0.2/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + + + + diff --git a/content/riak/kv/3.0.2/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/3.0.2/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..e280811a2b --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,170 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.2/ops/mdc/monitoring + - /riak/kv/3.0.2/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + + + + diff --git a/content/riak/kv/3.0.2/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/3.0.2/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..d08b424386 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,66 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.2/ops/mdc/per-bucket + - /riak/kv/3.0.2/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` - Enable replication (realtime + fullsync) + * `false` - Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` - Replication only occurs in realtime for this bucket + * `fullsync` - Replication only occurs during a fullsync operation + * `both` - Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + + + + diff --git a/content/riak/kv/3.0.2/using/reference/multi-datacenter/statistics.md b/content/riak/kv/3.0.2/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..e1d51ea2c0 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,244 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.2/ops/mdc/statistics + - /riak/kv/3.0.2/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + + + + diff --git a/content/riak/kv/3.0.2/using/reference/object-deletion.md b/content/riak/kv/3.0.2/using/reference/object-deletion.md new file mode 100644 index 0000000000..3f1805949e --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/object-deletion.md @@ -0,0 +1,121 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` - Disables tombstone removal +* `immediate` - The tombstone is removed as soon as the request is + received +* Custom time interval - How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + + + + diff --git a/content/riak/kv/3.0.2/using/reference/runtime-interaction.md b/content/riak/kv/3.0.2/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..159382e2a7 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/runtime-interaction.md @@ -0,0 +1,70 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/runtime + - /riak/kv/3.0.2/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` - Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` - Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` - The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` - The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` - A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` - A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` - A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + + + + diff --git a/content/riak/kv/3.0.2/using/reference/search.md b/content/riak/kv/3.0.2/using/reference/search.md new file mode 100644 index 0000000000..2ccc659ab3 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/search.md @@ -0,0 +1,457 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/search + - /riak/kv/3.0.2/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/3.0.2/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. + + + diff --git a/content/riak/kv/3.0.2/using/reference/secondary-indexes.md b/content/riak/kv/3.0.2/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..3b2a044996 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/secondary-indexes.md @@ -0,0 +1,76 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.2/dev/advanced/2i + - /riak/kv/3.0.2/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/3.0.2/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + + + + diff --git a/content/riak/kv/3.0.2/using/reference/snmp.md b/content/riak/kv/3.0.2/using/reference/snmp.md new file mode 100644 index 0000000000..02c24f6257 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/snmp.md @@ -0,0 +1,166 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/running/monitoring/snmp + - /riak/kv/3.0.2/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + + + + diff --git a/content/riak/kv/3.0.2/using/reference/statistics-monitoring.md b/content/riak/kv/3.0.2/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..25698c4b55 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/statistics-monitoring.md @@ -0,0 +1,395 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.2/ops/running/stats-and-monitoring + - /riak/kv/3.0.2/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/3.0.2/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/3.0.2/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/3.0.2/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/3.0.2/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/3.0.2/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/3.0.2/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + + + + diff --git a/content/riak/kv/3.0.2/using/reference/strong-consistency.md b/content/riak/kv/3.0.2/using/reference/strong-consistency.md new file mode 100644 index 0000000000..b1c2380b3d --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/strong-consistency.md @@ -0,0 +1,150 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +aliases: +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/3.0.2/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/3.0.2/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/3.0.2/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/3.0.2/configuring/strong-consistency/#performance). + + + + diff --git a/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter.md b/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..acbe1fc674 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter.md @@ -0,0 +1,40 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +aliases: +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.2/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + + + + diff --git a/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..9101b818fe --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,130 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/3.0.2/ops/mdc/v2/architecture + - /riak/kv/3.0.2/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.2/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/3.0.2/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + + + + diff --git a/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..5a8bcfe2a9 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,53 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/3.0.2/ops/mdc/v2/scheduling-fullsync + - /riak/kv/3.0.2/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.2/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter.md b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..0c17498fcf --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter.md @@ -0,0 +1,52 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +aliases: +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + + + + diff --git a/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..28a73dbe46 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,129 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/aae + - /riak/kv/3.0.2/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + + + + diff --git a/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..b0c6acb797 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,186 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/architecture + - /riak/kv/3.0.2/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + + + + diff --git a/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..15f84e2221 --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,102 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/cascading-writes + - /riak/kv/3.0.2/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + + + + diff --git a/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..cdf00a11cd --- /dev/null +++ b/content/riak/kv/3.0.2/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,72 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.2/ops/mdc/v3/scheduling-fullsync + - /riak/kv/3.0.2/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + + + + diff --git a/content/riak/kv/3.0.2/using/repair-recovery.md b/content/riak/kv/3.0.2/using/repair-recovery.md new file mode 100644 index 0000000000..ec982c22f4 --- /dev/null +++ b/content/riak/kv/3.0.2/using/repair-recovery.md @@ -0,0 +1,53 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +aliases: +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + + + + diff --git a/content/riak/kv/3.0.2/using/repair-recovery/errors.md b/content/riak/kv/3.0.2/using/repair-recovery/errors.md new file mode 100644 index 0000000000..d9c4efc646 --- /dev/null +++ b/content/riak/kv/3.0.2/using/repair-recovery/errors.md @@ -0,0 +1,366 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.2/ops/running/recovery/errors + - /riak/kv/3.0.2/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.2/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.2/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.2/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.2/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/3.0.2/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.2/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/3.0.2/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/3.0.2/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + + + + diff --git a/content/riak/kv/3.0.2/using/repair-recovery/failed-node.md b/content/riak/kv/3.0.2/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..764fecffbd --- /dev/null +++ b/content/riak/kv/3.0.2/using/repair-recovery/failed-node.md @@ -0,0 +1,114 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.2/ops/running/recovery/failed-node + - /riak/kv/3.0.2/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` + + + + diff --git a/content/riak/kv/3.0.2/using/repair-recovery/failure-recovery.md b/content/riak/kv/3.0.2/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..f29430c724 --- /dev/null +++ b/content/riak/kv/3.0.2/using/repair-recovery/failure-recovery.md @@ -0,0 +1,129 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.2/ops/running/recovery/failure-recovery + - /riak/kv/3.0.2/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/3.0.2/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** - A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** - If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** - Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/3.0.2/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.2/using/repair-recovery/repairs.md b/content/riak/kv/3.0.2/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..5a25eb3d3a --- /dev/null +++ b/content/riak/kv/3.0.2/using/repair-recovery/repairs.md @@ -0,0 +1,391 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.2/ops/running/recovery/repairing-indexes + - /riak/kv/3.0.2/ops/running/recovery/repairing-indexes + - /riak/3.0.2/ops/running/recovery/failed-node + - /riak/kv/3.0.2/ops/running/recovery/failed-node + - /riak/3.0.2/ops/running/recovery/repairing-leveldb + - /riak/kv/3.0.2/ops/running/recovery/repairing-leveldb + - /riak/3.0.2/ops/running/recovery/repairing-partitions + - /riak/kv/3.0.2/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/3.0.2/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/3.0.2/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/3.0.2/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/3.0.2/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + + + + diff --git a/content/riak/kv/3.0.2/using/repair-recovery/rolling-replaces.md b/content/riak/kv/3.0.2/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..1606455c98 --- /dev/null +++ b/content/riak/kv/3.0.2/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,76 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +aliases: +--- + +[upgrade]: {{<baseurl>}}riak/kv/3.0.2/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/3.0.2/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + + + + diff --git a/content/riak/kv/3.0.2/using/repair-recovery/rolling-restart.md b/content/riak/kv/3.0.2/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..6a2feec70d --- /dev/null +++ b/content/riak/kv/3.0.2/using/repair-recovery/rolling-restart.md @@ -0,0 +1,64 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.2/ops/running/recovery/rolling-restart + - /riak/kv/3.0.2/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/3.0.2/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + + + + diff --git a/content/riak/kv/3.0.2/using/repair-recovery/secondary-indexes.md b/content/riak/kv/3.0.2/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..5eaa25e763 --- /dev/null +++ b/content/riak/kv/3.0.2/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,142 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.2/ops/running/recovery/repairing-indexes + - /riak/kv/3.0.2/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + + + + diff --git a/content/riak/kv/3.0.2/using/running-a-cluster.md b/content/riak/kv/3.0.2/using/running-a-cluster.md new file mode 100644 index 0000000000..15dadc7df7 --- /dev/null +++ b/content/riak/kv/3.0.2/using/running-a-cluster.md @@ -0,0 +1,339 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/3.0.2/ops/building/basic-cluster-setup + - /riak/kv/3.0.2/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/3.0.2/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/3.0.2/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + + + + diff --git a/content/riak/kv/3.0.2/using/security.md b/content/riak/kv/3.0.2/using/security.md new file mode 100644 index 0000000000..549b54bc22 --- /dev/null +++ b/content/riak/kv/3.0.2/using/security.md @@ -0,0 +1,199 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/3.0.2/ops/advanced/security + - /riak/kv/3.0.2/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/3.0.2/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/3.0.2/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/3.0.2/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/3.0.2/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/3.0.2/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + + + + diff --git a/content/riak/kv/3.0.2/using/security/basics.md b/content/riak/kv/3.0.2/using/security/basics.md new file mode 100644 index 0000000000..43e28297b0 --- /dev/null +++ b/content/riak/kv/3.0.2/using/security/basics.md @@ -0,0 +1,851 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/3.0.2/ops/running/authz + - /riak/kv/3.0.2/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/3.0.2/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/3.0.2/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/3.0.2/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/3.0.2/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/3.0.2/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/3.0.2/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/3.0.2/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/3.0.2/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + + + + diff --git a/content/riak/kv/3.0.2/using/security/best-practices.md b/content/riak/kv/3.0.2/using/security/best-practices.md new file mode 100644 index 0000000000..144a89cdc5 --- /dev/null +++ b/content/riak/kv/3.0.2/using/security/best-practices.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.2/using/security/managing-sources.md b/content/riak/kv/3.0.2/using/security/managing-sources.md new file mode 100644 index 0000000000..fb26237b0c --- /dev/null +++ b/content/riak/kv/3.0.2/using/security/managing-sources.md @@ -0,0 +1,273 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/3.0.2/ops/running/security-sources + - /riak/kv/3.0.2/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/3.0.2/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/3.0.2/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/3.0.2/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/3.0.2/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/3.0.2/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/3.0.2/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + + + + diff --git a/content/riak/kv/3.0.2/using/security/v2-v3-ssl-ca.md b/content/riak/kv/3.0.2/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..e93ccb0354 --- /dev/null +++ b/content/riak/kv/3.0.2/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.2/using/troubleshooting.md b/content/riak/kv/3.0.2/using/troubleshooting.md new file mode 100644 index 0000000000..b6431c5129 --- /dev/null +++ b/content/riak/kv/3.0.2/using/troubleshooting.md @@ -0,0 +1,28 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +aliases: +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + + + + diff --git a/content/riak/kv/3.0.2/using/troubleshooting/http-204.md b/content/riak/kv/3.0.2/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..6bc13b642f --- /dev/null +++ b/content/riak/kv/3.0.2/using/troubleshooting/http-204.md @@ -0,0 +1,22 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 3.0.2 +menu: + riak_kv-3.0.2: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +aliases: +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + + + + diff --git a/content/riak/kv/3.0.3/_reference-links.md b/content/riak/kv/3.0.3/_reference-links.md new file mode 100644 index 0000000000..614822440c --- /dev/null +++ b/content/riak/kv/3.0.3/_reference-links.md @@ -0,0 +1,254 @@ + +# Riak KV 3.0.3 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/3.0.3/downloads/ +[install index]: {{}}riak/kv/3.0.3/setup/installing +[upgrade index]: {{}}riak/kv/3.0.3/upgrading +[plan index]: {{}}riak/kv/3.0.3/planning +[config index]: {{}}riak/kv/3.0.3/using/configuring/ +[config reference]: {{}}riak/kv/3.0.3/configuring/reference/ +[manage index]: {{}}riak/kv/3.0.3/using/managing +[performance index]: {{}}riak/kv/3.0.3/using/performance +[glossary vnode]: {{}}riak/kv/3.0.3/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/3.0.3/setup/planning +[plan start]: {{}}riak/kv/3.0.3/setup/planning/start +[plan backend]: {{}}riak/kv/3.0.3/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/3.0.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/3.0.3/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/3.0.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/3.0.3/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/3.0.3/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/3.0.3/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/3.0.3/setup/planning/best-practices +[plan future]: {{}}riak/kv/3.0.3/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/3.0.3/setup/installing +[install aws]: {{}}riak/kv/3.0.3/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/3.0.3/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/3.0.3/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/3.0.3/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/3.0.3/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/3.0.3/setup/installing/smartos +[install solaris]: {{}}riak/kv/3.0.3/setup/installing/solaris +[install suse]: {{}}riak/kv/3.0.3/setup/installing/suse +[install windows azure]: {{}}riak/kv/3.0.3/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/3.0.3/setup/installing/source +[install source erlang]: {{}}riak/kv/3.0.3/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/3.0.3/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/3.0.3/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/3.0.3/setup/upgrading +[upgrade checklist]: {{}}riak/kv/3.0.3/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/3.0.3/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/3.0.3/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/3.0.3/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/3.0.3/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/3.0.3/configuring +[config basic]: {{}}riak/kv/3.0.3/configuring/basic +[config backend]: {{}}riak/kv/3.0.3/configuring/backend +[config manage]: {{}}riak/kv/3.0.3/configuring/managing +[config reference]: {{}}riak/kv/3.0.3/configuring/reference/ +[config strong consistency]: {{}}riak/kv/3.0.3/configuring/strong-consistency +[config load balance]: {{}}riak/kv/3.0.3/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/3.0.3/configuring/mapreduce +[config search]: {{}}riak/kv/3.0.3/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/3.0.3/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/3.0.3/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/3.0.3/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/3.0.3/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/3.0.3/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/3.0.3/using/ +[use admin commands]: {{}}riak/kv/3.0.3/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/3.0.3/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/3.0.3/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/3.0.3/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/3.0.3/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/3.0.3/using/reference/search +[use ref 2i]: {{}}riak/kv/3.0.3/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/3.0.3/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/3.0.3/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/3.0.3/using/reference/jmx +[use ref obj del]: {{}}riak/kv/3.0.3/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/3.0.3/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/3.0.3/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/3.0.3/using/admin/ +[use admin commands]: {{}}riak/kv/3.0.3/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/3.0.3/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/3.0.3/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/3.0.3/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/3.0.3/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/3.0.3/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/3.0.3/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/3.0.3/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/3.0.3/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/3.0.3/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/3.0.3/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/3.0.3/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/3.0.3/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/3.0.3/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/3.0.3/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/3.0.3/using/repair-recovery +[repair recover index]: {{}}riak/kv/3.0.3/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/3.0.3/using/security/ +[security basics]: {{}}riak/kv/3.0.3/using/security/basics +[security managing]: {{}}riak/kv/3.0.3/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/3.0.3/using/performance/ +[perf benchmark]: {{}}riak/kv/3.0.3/using/performance/benchmarking +[perf open files]: {{}}riak/kv/3.0.3/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/3.0.3/using/performance/erlang +[perf aws]: {{}}riak/kv/3.0.3/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/3.0.3/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/3.0.3/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/3.0.3/developing +[dev client libraries]: {{}}riak/kv/3.0.3/developing/client-libraries +[dev data model]: {{}}riak/kv/3.0.3/developing/data-modeling +[dev data types]: {{}}riak/kv/3.0.3/developing/data-types +[dev kv model]: {{}}riak/kv/3.0.3/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/3.0.3/developing/getting-started +[getting started java]: {{}}riak/kv/3.0.3/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/3.0.3/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/3.0.3/developing/getting-started/python +[getting started php]: {{}}riak/kv/3.0.3/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/3.0.3/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/3.0.3/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/3.0.3/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/3.0.3/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/3.0.3/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/3.0.3/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/3.0.3/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/3.0.3/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/3.0.3/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/3.0.3/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/3.0.3/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/3.0.3/developing/usage +[usage bucket types]: {{}}riak/kv/3.0.3/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/3.0.3/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/3.0.3/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/3.0.3/developing/usage/content-types +[usage create objects]: {{}}riak/kv/3.0.3/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/3.0.3/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/3.0.3/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/3.0.3/developing/usage/mapreduce +[usage search]: {{}}riak/kv/3.0.3/developing/usage/search +[usage search schema]: {{}}riak/kv/3.0.3/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/3.0.3/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/3.0.3/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/3.0.3/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/3.0.3/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/3.0.3/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/3.0.3/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/3.0.3/developing/api/backend +[dev api http]: {{}}riak/kv/3.0.3/developing/api/http +[dev api http status]: {{}}riak/kv/3.0.3/developing/api/http/status +[dev api pbc]: {{}}riak/kv/3.0.3/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/3.0.3/learn/glossary/ +[glossary aae]: {{}}riak/kv/3.0.3/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/3.0.3/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/3.0.3/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/3.0.3/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/3.0.3/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/3.0.3/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/3.0.3/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/3.0.3/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/3.0.3/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/3.0.3/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/3.0.3/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/3.0.3/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + + + + diff --git a/content/riak/kv/3.0.3/add-ons.md b/content/riak/kv/3.0.3/add-ons.md new file mode 100644 index 0000000000..196a4ab271 --- /dev/null +++ b/content/riak/kv/3.0.3/add-ons.md @@ -0,0 +1,25 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/3.0.3/add-ons/redis/) + + + + + diff --git a/content/riak/kv/3.0.3/add-ons/redis.md b/content/riak/kv/3.0.3/add-ons/redis.md new file mode 100644 index 0000000000..dbaf8b7a4f --- /dev/null +++ b/content/riak/kv/3.0.3/add-ons/redis.md @@ -0,0 +1,63 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +aliases: +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + + + + diff --git a/content/riak/kv/3.0.3/add-ons/redis/developing-rra.md b/content/riak/kv/3.0.3/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..b2cee46e47 --- /dev/null +++ b/content/riak/kv/3.0.3/add-ons/redis/developing-rra.md @@ -0,0 +1,330 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/3.0.3/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/3.0.3/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/3.0.3/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/3.0.3/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/3.0.3/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + + + diff --git a/content/riak/kv/3.0.3/add-ons/redis/redis-add-on-features.md b/content/riak/kv/3.0.3/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..6812e36cc4 --- /dev/null +++ b/content/riak/kv/3.0.3/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,136 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + + + + diff --git a/content/riak/kv/3.0.3/add-ons/redis/set-up-rra.md b/content/riak/kv/3.0.3/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..914b4ff0ec --- /dev/null +++ b/content/riak/kv/3.0.3/add-ons/redis/set-up-rra.md @@ -0,0 +1,285 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/3.0.3/setup/installing +[perf open files]: {{}}riak/kv/3.0.3/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + + + + diff --git a/content/riak/kv/3.0.3/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/3.0.3/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..0db04df374 --- /dev/null +++ b/content/riak/kv/3.0.3/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,143 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +aliases: +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + + + + diff --git a/content/riak/kv/3.0.3/add-ons/redis/using-rra.md b/content/riak/kv/3.0.3/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..456891882e --- /dev/null +++ b/content/riak/kv/3.0.3/add-ons/redis/using-rra.md @@ -0,0 +1,246 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/3.0.3/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/3.0.3/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + + + + diff --git a/content/riak/kv/3.0.3/configuring.md b/content/riak/kv/3.0.3/configuring.md new file mode 100644 index 0000000000..1e00896a95 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring.md @@ -0,0 +1,88 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + + + + diff --git a/content/riak/kv/3.0.3/configuring/backend.md b/content/riak/kv/3.0.3/configuring/backend.md new file mode 100644 index 0000000000..cb6f2d6ed0 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/backend.md @@ -0,0 +1,647 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +aliases: +--- + +[plan backend leveldb]: {{}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/3.0.3/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/3.0.3/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/3.0.3/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/3.0.3/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + + + + diff --git a/content/riak/kv/3.0.3/configuring/basic.md b/content/riak/kv/3.0.3/configuring/basic.md new file mode 100644 index 0000000000..5c21e8eb22 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/basic.md @@ -0,0 +1,239 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.3/ops/building/configuration/ + - /riak/kv/3.0.3/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/3.0.3/configuring/reference +[use running cluster]: {{}}riak/kv/3.0.3/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/3.0.3/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/3.0.3/using/performance/erlang +[plan start]: {{}}riak/kv/3.0.3/setup/planning/start +[plan best practices]: {{}}riak/kv/3.0.3/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/3.0.3/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/3.0.3/setup/planning/backend +[plan backend multi]: {{}}riak/kv/3.0.3/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/3.0.3/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/3.0.3/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/3.0.3/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/3.0.3/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/3.0.3/using/performance/benchmarking +[perf open files]: {{}}riak/kv/3.0.3/using/performance/open-files-limit +[perf index]: {{}}riak/kv/3.0.3/using/performance +[perf aws]: {{}}riak/kv/3.0.3/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/3.0.3/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/3.0.3/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + + + + diff --git a/content/riak/kv/3.0.3/configuring/global-object-expiration.md b/content/riak/kv/3.0.3/configuring/global-object-expiration.md new file mode 100644 index 0000000000..0bae8322ce --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/global-object-expiration.md @@ -0,0 +1,90 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-3.0.3: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 3.0.3 +toc: true +aliases: +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + + + + diff --git a/content/riak/kv/3.0.3/configuring/load-balancing-proxy.md b/content/riak/kv/3.0.3/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..3484cb14cc --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/load-balancing-proxy.md @@ -0,0 +1,275 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/3.0.3/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/3.0.3/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + + + + diff --git a/content/riak/kv/3.0.3/configuring/managing.md b/content/riak/kv/3.0.3/configuring/managing.md new file mode 100644 index 0000000000..70f99fe1d4 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/managing.md @@ -0,0 +1,121 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +aliases: +--- + +[use admin riak cli]: {{}}riak/kv/3.0.3/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/3.0.3/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/3.0.3/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + + + + diff --git a/content/riak/kv/3.0.3/configuring/mapreduce.md b/content/riak/kv/3.0.3/configuring/mapreduce.md new file mode 100644 index 0000000000..94ebfa171b --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/mapreduce.md @@ -0,0 +1,200 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/configs/mapreduce/ + - /riak/kv/3.0.3/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/3.0.3/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/3.0.3/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/3.0.3/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + + + + diff --git a/content/riak/kv/3.0.3/configuring/next-gen-replication.md b/content/riak/kv/3.0.3/configuring/next-gen-replication.md new file mode 100644 index 0000000000..642ea98bc8 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/next-gen-replication.md @@ -0,0 +1,63 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +aliases: +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. + diff --git a/content/riak/kv/3.0.3/configuring/reference.md b/content/riak/kv/3.0.3/configuring/reference.md new file mode 100644 index 0000000000..0c700d2ed9 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/reference.md @@ -0,0 +1,2034 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/configs/configuration-files/ + - /riak/kv/3.0.3/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] - [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] - [configuration][config backend leveldb] +* [Leveled][plan backend leveled] - [configuration][config backend leveled] +* [Memory][plan backend memory] - [configuration][config backend memory] +* [Multi][plan backend multi] - [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + + + + diff --git a/content/riak/kv/3.0.3/configuring/search.md b/content/riak/kv/3.0.3/configuring/search.md new file mode 100644 index 0000000000..0a739ce7d0 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/search.md @@ -0,0 +1,278 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/configs/search/ + - /riak/kv/3.0.3/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/3.0.3/developing/usage/search +[usage search schema]: {{}}riak/kv/3.0.3/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/3.0.3/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/3.0.3/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/3.0.3/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/3.0.3/configuring/reference +[config reference#search]: {{}}riak/kv/3.0.3/configuring/reference/#search +[glossary aae]: {{}}riak/kv/3.0.3/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/3.0.3/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + + + + diff --git a/content/riak/kv/3.0.3/configuring/strong-consistency.md b/content/riak/kv/3.0.3/configuring/strong-consistency.md new file mode 100644 index 0000000000..1d3d233a64 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/strong-consistency.md @@ -0,0 +1,702 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/3.0.3/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/3.0.3/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/3.0.3/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/3.0.3/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/3.0.3/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/3.0.3/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/3.0.3/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/3.0.3/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/3.0.3/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/3.0.3/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/3.0.3/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/3.0.3/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/3.0.3/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/3.0.3/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/3.0.3/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/3.0.3/developing/data-types +[glossary aae]: {{}}riak/kv/3.0.3/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/3.0.3/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/3.0.3/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/3.0.3/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/3.0.3/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble +--- +The ID of the ensemble
  • Quorum +--- +The number of ensemble peers that are either leading or following
  • Nodes +--- +The number of nodes currently online
  • Leader +--- +The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer +--- +The ID of the peer
  • Status +--- +Whether the peer is a leader or a follower
  • Trusted +--- +Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch +--- +The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node +--- +The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] +--- +If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] +--- +Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] +--- +Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** +--- +A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** +--- +In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** +--- +Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** +--- +At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** +--- +Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + + diff --git a/content/riak/kv/3.0.3/configuring/v2-multi-datacenter.md b/content/riak/kv/3.0.3/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..62a5ca96b3 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/v2-multi-datacenter.md @@ -0,0 +1,160 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v2/configuration + - /riak/kv/3.0.3/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/3.0.3/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. + + + + diff --git a/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..60d0620be6 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,82 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v2/nat + - /riak/kv/3.0.3/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/3.0.3/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + + + + diff --git a/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..3c4b2e1e98 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,371 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v2/quick-start + - /riak/kv/3.0.3/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + + + + diff --git a/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..c385897dd0 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,164 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v2/ssl + - /riak/kv/3.0.3/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + + + + diff --git a/content/riak/kv/3.0.3/configuring/v3-multi-datacenter.md b/content/riak/kv/3.0.3/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..ad4a4afef2 --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/v3-multi-datacenter.md @@ -0,0 +1,161 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/configuration + - /riak/kv/3.0.3/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/3.0.3/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + + + + diff --git a/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..5e8a80d10a --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/nat + - /riak/kv/3.0.3/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + + + + diff --git a/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..94825491ad --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,172 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/quick-start + - /riak/kv/3.0.3/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/3.0.3/using/performance +[config v3 mdc]: {{}}riak/kv/3.0.3/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + + + + diff --git a/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..7ece5e459f --- /dev/null +++ b/content/riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,174 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/ssl + - /riak/kv/3.0.3/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/3.0.3/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + + + + diff --git a/content/riak/kv/3.0.3/developing.md b/content/riak/kv/3.0.3/developing.md new file mode 100644 index 0000000000..4fb0570491 --- /dev/null +++ b/content/riak/kv/3.0.3/developing.md @@ -0,0 +1,79 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + + + + diff --git a/content/riak/kv/3.0.3/developing/api.md b/content/riak/kv/3.0.3/developing/api.md new file mode 100644 index 0000000000..1d5ef87733 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api.md @@ -0,0 +1,42 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +aliases: +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + + + + diff --git a/content/riak/kv/3.0.3/developing/api/backend.md b/content/riak/kv/3.0.3/developing/api/backend.md new file mode 100644 index 0000000000..0907361fa7 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/backend.md @@ -0,0 +1,118 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.3/dev/references/backend-api + - /riak/kv/3.0.3/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/3.0.3/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http.md b/content/riak/kv/3.0.3/developing/api/http.md new file mode 100644 index 0000000000..afad9d3393 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http.md @@ -0,0 +1,93 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.3/dev/references/http + - /riak/kv/3.0.3/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/3.0.3/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/3.0.3/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/3.0.3/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/3.0.3/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/3.0.3/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/3.0.3/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/3.0.3/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/3.0.3/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/3.0.3/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/3.0.3/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/3.0.3/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/3.0.3/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/3.0.3/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/3.0.3/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/3.0.3/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/3.0.3/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/3.0.3/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/3.0.3/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/3.0.3/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/3.0.3/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/3.0.3/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/3.0.3/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/3.0.3/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/3.0.3/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/3.0.3/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/3.0.3/developing/api/http/store-search-schema) + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/counters.md b/content/riak/kv/3.0.3/developing/api/http/counters.md new file mode 100644 index 0000000000..7b841a6bc3 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/counters.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/counters + - /riak/kv/3.0.3/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/3.0.3/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/3.0.3/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/delete-object.md b/content/riak/kv/3.0.3/developing/api/http/delete-object.md new file mode 100644 index 0000000000..ce6d93122f --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/delete-object.md @@ -0,0 +1,79 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/delete-object + - /riak/kv/3.0.3/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/delete-search-index.md b/content/riak/kv/3.0.3/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..cb39fd3ca2 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/delete-search-index.md @@ -0,0 +1,38 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/delete-search-index + - /riak/kv/3.0.3/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` - The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` - The request timed out internally + + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/fetch-object.md b/content/riak/kv/3.0.3/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..f2f0edafd4 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/fetch-object.md @@ -0,0 +1,246 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/fetch-object + - /riak/kv/3.0.3/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/3.0.3/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/3.0.3/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/3.0.3/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/3.0.3/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/3.0.3/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/fetch-search-index.md b/content/riak/kv/3.0.3/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..a8b25a566c --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/fetch-search-index.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/fetch-search-index + - /riak/kv/3.0.3/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/3.0.3/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` - No Search index with that name is currently + available +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/3.0.3/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/fetch-search-schema.md b/content/riak/kv/3.0.3/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..b4892b6346 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/fetch-search-schema.md @@ -0,0 +1,42 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/fetch-search-schema + - /riak/kv/3.0.3/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/get-bucket-props.md b/content/riak/kv/3.0.3/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..f3fccd658e --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/get-bucket-props.md @@ -0,0 +1,86 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/get-bucket-props + - /riak/kv/3.0.3/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/3.0.3/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/3.0.3/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/3.0.3/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/link-walking.md b/content/riak/kv/3.0.3/developing/api/http/link-walking.md new file mode 100644 index 0000000000..0b77a2a267 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/link-walking.md @@ -0,0 +1,129 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/link-walking + - /riak/kv/3.0.3/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/3.0.3/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/3.0.3/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/3.0.3/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/list-buckets.md b/content/riak/kv/3.0.3/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..9f84865554 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/list-buckets.md @@ -0,0 +1,68 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/list-buckets + - /riak/kv/3.0.3/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/list-keys.md b/content/riak/kv/3.0.3/developing/api/http/list-keys.md new file mode 100644 index 0000000000..f1e7b5071d --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/list-keys.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/list-keys + - /riak/kv/3.0.3/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/list-resources.md b/content/riak/kv/3.0.3/developing/api/http/list-resources.md new file mode 100644 index 0000000000..0afab8e5a3 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/list-resources.md @@ -0,0 +1,84 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/list-resources + - /riak/kv/3.0.3/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/3.0.3/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/3.0.3/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/3.0.3/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/3.0.3/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/3.0.3/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/3.0.3/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/3.0.3/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/3.0.3/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/mapreduce.md b/content/riak/kv/3.0.3/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..7eb70076e6 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/mapreduce.md @@ -0,0 +1,74 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/mapreduce + - /riak/kv/3.0.3/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/3.0.3/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/3.0.3/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/ping.md b/content/riak/kv/3.0.3/developing/api/http/ping.md new file mode 100644 index 0000000000..0d78599aae --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/ping.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/ping + - /riak/kv/3.0.3/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/reset-bucket-props.md b/content/riak/kv/3.0.3/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..d7a7a373fc --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/reset-bucket-props.md @@ -0,0 +1,61 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/reset-bucket-props + - /riak/kv/3.0.3/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/search-index-info.md b/content/riak/kv/3.0.3/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..8b5e8342dc --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/search-index-info.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/search-index-info + - /riak/kv/3.0.3/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/3.0.3/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` - Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/search-query.md b/content/riak/kv/3.0.3/developing/api/http/search-query.md new file mode 100644 index 0000000000..3eafdf490c --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/search-query.md @@ -0,0 +1,73 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/search-query + - /riak/kv/3.0.3/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/3.0.3/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` - The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` - The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/3.0.3/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` - Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` - Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` - The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/secondary-indexes.md b/content/riak/kv/3.0.3/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..42bca4a6c0 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/secondary-indexes.md @@ -0,0 +1,95 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/secondary-indexes + - /riak/kv/3.0.3/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/set-bucket-props.md b/content/riak/kv/3.0.3/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..830f315c22 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/set-bucket-props.md @@ -0,0 +1,116 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/set-bucket-props + - /riak/kv/3.0.3/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/3.0.3/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/3.0.3/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/status.md b/content/riak/kv/3.0.3/developing/api/http/status.md new file mode 100644 index 0000000000..ff6c3757b2 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/status.md @@ -0,0 +1,173 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/status + - /riak/kv/3.0.3/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/3.0.3/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/store-object.md b/content/riak/kv/3.0.3/developing/api/http/store-object.md new file mode 100644 index 0000000000..c3e67021be --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/store-object.md @@ -0,0 +1,150 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/store-object + - /riak/kv/3.0.3/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/3.0.3/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/3.0.3/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/3.0.3/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/store-search-index.md b/content/riak/kv/3.0.3/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..5ce7b93d44 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/store-search-index.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/store-search-index + - /riak/kv/3.0.3/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/3.0.3/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/3.0.3/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` - The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` - The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` - The request timed out internally + + + + + diff --git a/content/riak/kv/3.0.3/developing/api/http/store-search-schema.md b/content/riak/kv/3.0.3/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..bdec75d189 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/http/store-search-schema.md @@ -0,0 +1,54 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.3/dev/references/http/store-search-schema + - /riak/kv/3.0.3/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` - The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` - The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` - The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..45ec210422 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers.md @@ -0,0 +1,189 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers + - /riak/kv/3.0.3/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` - A string representation of what went wrong +* `errcode` - A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/yz-schema-put) + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..39f3af58e8 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,34 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/auth-req + - /riak/kv/3.0.3/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/3.0.3/using/security/basics). + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..333a907cf8 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,82 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/coverage-queries + - /riak/kv/3.0.3/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..1d59a14697 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,104 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/delete-object + - /riak/kv/3.0.3/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/3.0.3/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..8bb2110ee5 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/3.0.3/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/3.0.3/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-map-store). + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..ff4d3988a2 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,131 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/dt-fetch + - /riak/kv/3.0.3/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/3.0.3/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/3.0.3/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/3.0.3/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..07a15edc9d --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,77 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/dt-map-store + - /riak/kv/3.0.3/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..59572168b1 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,36 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/dt-set-store + - /riak/kv/3.0.3/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..93e658788c --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,132 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/dt-store + - /riak/kv/3.0.3/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/3.0.3/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/3.0.3/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/3.0.3/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..51eda08e0f --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/dt-union + - /riak/kv/3.0.3/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/dt-store) message. + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..a2e8dc4131 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,185 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/fetch-object + - /riak/kv/3.0.3/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` - The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` - The character encoding of the object, e.g. `utf-8` +* `content_encoding` - The content encoding of the object, e.g. + `video/mp4` +* `vtag` - The object's [vtag]({{}}riak/kv/3.0.3/learn/glossary/#vector-clock) +* `links` - This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` - A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` - A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` - This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` - Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..a639c73a31 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,114 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/3.0.3/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/3.0.3/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/3.0.3/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..1469c50c22 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,37 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/3.0.3/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/3.0.3/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-props) message. + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..5b19031cd5 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,65 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/get-client-id + - /riak/kv/3.0.3/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..af2e05a1bc --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,80 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/list-buckets + - /riak/kv/3.0.3/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` - Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..833ac8076d --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,101 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/list-keys + - /riak/kv/3.0.3/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` - bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..56ab76b3fa --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,153 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/mapreduce + - /riak/kv/3.0.3/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` - MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` - JSON-encoded MapReduce job +* `application/x-erlang-binary` - Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/3.0.3/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/3.0.3/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` - Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/ping.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..df3d7707df --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/ping.md @@ -0,0 +1,46 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/ping + - /riak/kv/3.0.3/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..77f8f1d590 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,63 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/3.0.3/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/3.0.3/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/search.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..64c0986755 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/search.md @@ -0,0 +1,152 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/search + - /riak/kv/3.0.3/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` - The contents of the query +* `index` - The name of the index to search + +Optional Parameters + +* `rows` - The maximum number of rows to return +* `start` - A start offset, i.e. the number of keys to skip before + returning values +* `sort` - How the search results are to be sorted +* `filter` - Filters search with additional query scoped to inline + fields +* `df` - Override the `default_field` setting in the schema file +* `op` - `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` - Return the fields limit +* `presort` - Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` - A list of docs that match the search request +* `max_score` - The top score returned +* `num_found` - Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..1fb409ada6 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,125 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/3.0.3/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/3.0.3/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/server-info.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..c304763172 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,62 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/server-info + - /riak/kv/3.0.3/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..9b34d0ce33 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,72 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/3.0.3/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/3.0.3/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..9a4f16e792 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,35 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/3.0.3/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/3.0.3/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/get-bucket-props). + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..e15daa4278 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,66 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/set-client-id + - /riak/kv/3.0.3/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/store-object.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..3857318fbb --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,154 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/store-object + - /riak/kv/3.0.3/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/3.0.3/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/3.0.3/learn/concepts/buckets), and [bucket type]({{}}riak/kv/3.0.3/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/3.0.3/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/3.0.3/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/3.0.3/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..0aace28681 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,37 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/3.0.3/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..7ff7ba50f5 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,63 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/yz-index-get + - /riak/kv/3.0.3/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/3.0.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..54a8c98113 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/yz-index-put + - /riak/kv/3.0.3/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/3.0.3/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..02effa20bc --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,52 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/3.0.3/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + + + + diff --git a/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..6fdaec14dd --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.3/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/3.0.3/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/3.0.3/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/3.0.3/developing/api/repl-hooks.md b/content/riak/kv/3.0.3/developing/api/repl-hooks.md new file mode 100644 index 0000000000..ec4e07bff9 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/api/repl-hooks.md @@ -0,0 +1,196 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v2/hooks + - /riak/kv/3.0.3/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + + + + diff --git a/content/riak/kv/3.0.3/developing/app-guide.md b/content/riak/kv/3.0.3/developing/app-guide.md new file mode 100644 index 0000000000..07ba8eb1a1 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/app-guide.md @@ -0,0 +1,420 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/3.0.3/dev/using/application-guide/ + - /riak/kv/3.0.3/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/3.0.3/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/3.0.3/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/3.0.3/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/3.0.3/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/3.0.3/developing/key-value-modeling +[dev data types]: {{}}riak/kv/3.0.3/developing/data-types +[dev data types#counters]: {{}}riak/kv/3.0.3/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/3.0.3/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/3.0.3/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/3.0.3/developing/usage/creating-objects +[usage search]: {{}}riak/kv/3.0.3/developing/usage/search +[use ref search]: {{}}riak/kv/3.0.3/using/reference/search +[usage 2i]: {{}}riak/kv/3.0.3/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/3.0.3/developing/client-libraries +[concept crdts]: {{}}riak/kv/3.0.3/learn/concepts/crdts +[dev data model]: {{}}riak/kv/3.0.3/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/3.0.3/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/3.0.3/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/3.0.3/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/3.0.3/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/3.0.3/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/3.0.3/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/3.0.3/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/3.0.3/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/3.0.3/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/3.0.3/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/3.0.3/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/3.0.3/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/3.0.3/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/3.0.3/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/3.0.3/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/3.0.3/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/3.0.3/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/3.0.3/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/3.0.3/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/3.0.3/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/3.0.3/setup/installing +[getting started]: {{}}riak/kv/3.0.3/developing/getting-started +[usage index]: {{}}riak/kv/3.0.3/developing/usage +[glossary]: {{}}riak/kv/3.0.3/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** - While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** - Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** - Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** - It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** - If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** - If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** - If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] - Getting started with Riak Search +* [Search Details][use ref search] - A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] - How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** - Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** - At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** - In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] - A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] - A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] - An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** - If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** - If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** - If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** - While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** - Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] - A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] - A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** - You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** - Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] - A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] - Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** - At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** - If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** - 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] - Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] - A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] - How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] - A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] - A listing of frequently used terms in Riak's + documentation + + + + + diff --git a/content/riak/kv/3.0.3/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/3.0.3/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..3749227428 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,802 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/mapreduce/ + - /riak/kv/3.0.3/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/3.0.3/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/3.0.3/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/3.0.3/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/3.0.3/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/3.0.3/learn/glossary/#vnode +[config reference]: {{}}riak/kv/3.0.3/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/3.0.3/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) - Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) - Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) - Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
+
+
+
diff --git a/content/riak/kv/3.0.3/developing/app-guide/cluster-metadata.md b/content/riak/kv/3.0.3/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..6f29b1b7db
--- /dev/null
+++ b/content/riak/kv/3.0.3/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,72 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 3.0.3
+menu:
+  riak_kv-3.0.3:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/3.0.3/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/3.0.3/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/3.0.3/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
+
+
+
diff --git a/content/riak/kv/3.0.3/developing/app-guide/reference.md b/content/riak/kv/3.0.3/developing/app-guide/reference.md
new file mode 100644
index 0000000000..a1722fe509
--- /dev/null
+++ b/content/riak/kv/3.0.3/developing/app-guide/reference.md
@@ -0,0 +1,21 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 3.0.3
+#menu:
+#  riak_kv-3.0.3:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+**TODO: Add content**
+
+
+
+
diff --git a/content/riak/kv/3.0.3/developing/app-guide/replication-properties.md b/content/riak/kv/3.0.3/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..5ad552c8ff
--- /dev/null
+++ b/content/riak/kv/3.0.3/developing/app-guide/replication-properties.md
@@ -0,0 +1,584 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 3.0.3
+menu:
+  riak_kv-3.0.3:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/3.0.3/dev/advanced/replication-properties
+  - /riak/kv/3.0.3/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/3.0.3/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/3.0.3/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/3.0.3/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/3.0.3/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/3.0.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/3.0.3/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/3.0.3/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/3.0.3/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/3.0.3/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/3.0.3/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/3.0.3/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/3.0.3/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/3.0.3/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/3.0.3/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/3.0.3/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/3.0.3/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
+
+
+
diff --git a/content/riak/kv/3.0.3/developing/app-guide/strong-consistency.md b/content/riak/kv/3.0.3/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..2616be4a78
--- /dev/null
+++ b/content/riak/kv/3.0.3/developing/app-guide/strong-consistency.md
@@ -0,0 +1,261 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 3.0.3
+menu:
+  riak_kv-3.0.3:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/3.0.3/dev/advanced/strong-consistency
+  - /riak/kv/3.0.3/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/3.0.3/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/3.0.3/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/3.0.3/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/3.0.3/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/3.0.3/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/3.0.3/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/3.0.3/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/3.0.3/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/3.0.3/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/3.0.3/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/3.0.3/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/3.0.3/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/3.0.3/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/3.0.3/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/3.0.3/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/3.0.3/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/3.0.3/developing/client-libraries
+[getting started]: {{}}riak/kv/3.0.3/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/3.0.3/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + + + + diff --git a/content/riak/kv/3.0.3/developing/app-guide/write-once.md b/content/riak/kv/3.0.3/developing/app-guide/write-once.md new file mode 100644 index 0000000000..7589906991 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/app-guide/write-once.md @@ -0,0 +1,159 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/3.0.3/dev/advanced/write-once + - /riak/kv/3.0.3/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/3.0.3/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/3.0.3/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/3.0.3/developing/data-types +[strong consistency]: {{}}riak/kv/3.0.3/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/3.0.3/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.3/developing/client-libraries.md b/content/riak/kv/3.0.3/developing/client-libraries.md new file mode 100644 index 0000000000..b4d3fe8067 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/client-libraries.md @@ -0,0 +1,294 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/3.0.3/dev/using/libraries + - /riak/kv/3.0.3/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) - A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) - A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) - A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) - A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) - An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) - An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) - Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) - A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) - Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) - A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) - HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) - Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) - A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) - Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) - Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) - Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) - Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) - An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) - A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) - A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) - A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) - A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) - A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) - Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) - Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) - A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) - Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) - Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) - Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) - Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) - Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) - Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) - A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) - Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) - A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) - Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) - Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) - a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) - A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) - A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) - Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) - Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) - Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) - A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) - Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) - A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) - A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) - Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) - Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) - Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) - A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) - A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) - A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) - A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) - [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) - A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) - Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) - A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) - A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) - Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) - A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) - A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) - Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) - Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) - Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) - A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) - Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) - Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) - DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) - Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) - An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) - Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) - Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) - Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) - A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) - An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) - A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) - A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + + + + diff --git a/content/riak/kv/3.0.3/developing/data-modeling.md b/content/riak/kv/3.0.3/developing/data-modeling.md new file mode 100644 index 0000000000..d0cbaaec65 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/data-modeling.md @@ -0,0 +1,15 @@ +--- +layout: redirect +target: "riak/kv/3.0.3/learn/use-cases/" +aliases: +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + + + + diff --git a/content/riak/kv/3.0.3/developing/data-types.md b/content/riak/kv/3.0.3/developing/data-types.md new file mode 100644 index 0000000000..4404fd2129 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/data-types.md @@ -0,0 +1,279 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/3.0.3/dev/using/data-types + - /riak/kv/3.0.3/dev/using/data-types + - /riak/3.0.3/dev/data-modeling/data-types + - /riak/kv/3.0.3/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + + + + diff --git a/content/riak/kv/3.0.3/developing/data-types/counters.md b/content/riak/kv/3.0.3/developing/data-types/counters.md new file mode 100644 index 0000000000..2dc77f4c26 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/data-types/counters.md @@ -0,0 +1,635 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.3/dev/using/data-types/counters + - /riak/kv/3.0.3/dev/using/data-types/counters + - /riak/3.0.3/dev/data-modeling/data-types/counters + - /riak/kv/3.0.3/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/data-types/gsets.md b/content/riak/kv/3.0.3/developing/data-types/gsets.md new file mode 100644 index 0000000000..ce865aefb1 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/data-types/gsets.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.3/dev/using/data-types/gsets + - /riak/kv/3.0.3/dev/using/data-types/gsets + - /riak/3.0.3/dev/data-modeling/data-types/gsets + - /riak/kv/3.0.3/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/data-types/hyperloglogs.md b/content/riak/kv/3.0.3/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..cf81cdaa6c --- /dev/null +++ b/content/riak/kv/3.0.3/developing/data-types/hyperloglogs.md @@ -0,0 +1,643 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.3/dev/using/data-types/hyperloglogs + - /riak/kv/3.0.3/dev/using/data-types/hyperloglogs + - /riak/3.0.3/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/3.0.3/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/data-types/maps.md b/content/riak/kv/3.0.3/developing/data-types/maps.md new file mode 100644 index 0000000000..ed71fa9204 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/data-types/maps.md @@ -0,0 +1,1885 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.3/dev/using/data-types/maps + - /riak/kv/3.0.3/dev/using/data-types/maps + - /riak/3.0.3/dev/data-modeling/data-types/maps + - /riak/kv/3.0.3/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/data-types/sets.md b/content/riak/kv/3.0.3/developing/data-types/sets.md new file mode 100644 index 0000000000..33d2fa1551 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/data-types/sets.md @@ -0,0 +1,773 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.3/dev/using/data-types/sets + - /riak/kv/3.0.3/dev/using/data-types/sets + - /riak/3.0.3/dev/data-modeling/data-types/sets + - /riak/kv/3.0.3/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/faq.md b/content/riak/kv/3.0.3/developing/faq.md new file mode 100644 index 0000000000..1c6c9210cc --- /dev/null +++ b/content/riak/kv/3.0.3/developing/faq.md @@ -0,0 +1,592 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/3.0.3/community/faqs/developing + - /riak/kv/3.0.3/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/3.0.3/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/3.0.3/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/3.0.3/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/3.0.3/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/3.0.3/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/3.0.3/developing/client-libraries +[MapReduce]: {{}}riak/kv/3.0.3/developing/usage/mapreduce +[Memory]: {{}}riak/kv/3.0.3/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/3.0.3/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/3.0.3/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) - requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) - if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started.md b/content/riak/kv/3.0.3/developing/getting-started.md new file mode 100644 index 0000000000..8e9b35e4ad --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started.md @@ -0,0 +1,51 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +aliases: +--- + +[install index]: {{}}riak/kv/3.0.3/setup/installing +[dev client libraries]: {{}}riak/kv/3.0.3/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/csharp.md b/content/riak/kv/3.0.3/developing/getting-started/csharp.md new file mode 100644 index 0000000000..bbc9928ec4 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/csharp.md @@ -0,0 +1,86 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/csharp + - /riak/kv/3.0.3/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.3/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.3/developing/getting-started/csharp/crud-operations) + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/3.0.3/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..dd3e7dccae --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +aliases: +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/3.0.3/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..eb2e01b50f --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,111 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/3.0.3/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/csharp/querying.md b/content/riak/kv/3.0.3/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..37d07208bc --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/csharp/querying.md @@ -0,0 +1,214 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/querying-csharp + - /riak/kv/3.0.3/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/erlang.md b/content/riak/kv/3.0.3/developing/getting-started/erlang.md new file mode 100644 index 0000000000..1f379517f1 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/erlang.md @@ -0,0 +1,59 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/erlang + - /riak/kv/3.0.3/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.3/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.3/developing/getting-started/erlang/crud-operations) + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/3.0.3/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..a505ff8994 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/3.0.3/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..81e2f3bfd5 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,342 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/3.0.3/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/3.0.3/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/erlang/querying.md b/content/riak/kv/3.0.3/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..f17f3c4855 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/erlang/querying.md @@ -0,0 +1,308 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/querying-erlang + - /riak/kv/3.0.3/dev/taste-of-riak/querying-erlang +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/3.0.3/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/golang.md b/content/riak/kv/3.0.3/developing/getting-started/golang.md new file mode 100644 index 0000000000..32b4db7008 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/golang.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/golang + - /riak/kv/3.0.3/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.3/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.3/developing/getting-started/golang/crud-operations) + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/golang/crud-operations.md b/content/riak/kv/3.0.3/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..9915425e27 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,376 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +aliases: +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/golang/object-modeling.md b/content/riak/kv/3.0.3/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..8faaf09ce1 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,552 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/object-modeling-golang + - /riak/kv/3.0.3/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/3.0.3/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/golang/querying.md b/content/riak/kv/3.0.3/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..abd0bcdaf5 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/golang/querying.md @@ -0,0 +1,580 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/querying-golang + - /riak/kv/3.0.3/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/java.md b/content/riak/kv/3.0.3/developing/getting-started/java.md new file mode 100644 index 0000000000..9a5d14d73e --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/java.md @@ -0,0 +1,93 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/java + - /riak/kv/3.0.3/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.3/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.3/developing/getting-started/java/crud-operations) + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/java/crud-operations.md b/content/riak/kv/3.0.3/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..05de08a11a --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/java/crud-operations.md @@ -0,0 +1,206 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +aliases: +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.3/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.3/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.3/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.3/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.3/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.3/developing/usage/conflict-resolution/) +documention. + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/java/object-modeling.md b/content/riak/kv/3.0.3/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..3732555051 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/java/object-modeling.md @@ -0,0 +1,432 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/object-modeling-java + - /riak/kv/3.0.3/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/java/querying.md b/content/riak/kv/3.0.3/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..45634bb450 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/java/querying.md @@ -0,0 +1,280 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/querying-java + - /riak/kv/3.0.3/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/nodejs.md b/content/riak/kv/3.0.3/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..405c890c9f --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/nodejs.md @@ -0,0 +1,104 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/nodejs + - /riak/kv/3.0.3/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.3/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.3/developing/getting-started/nodejs/crud-operations) + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/3.0.3/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..494367418c --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,138 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +aliases: +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/3.0.3/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..385b62c6f0 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/3.0.3/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/nodejs/querying.md b/content/riak/kv/3.0.3/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..d611b5afa1 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/nodejs/querying.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/querying-nodejs + - /riak/kv/3.0.3/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/php.md b/content/riak/kv/3.0.3/developing/getting-started/php.md new file mode 100644 index 0000000000..3cb23d84b3 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/php.md @@ -0,0 +1,80 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/php + - /riak/kv/3.0.3/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.3/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.3/developing/getting-started/php/crud-operations) + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/php/crud-operations.md b/content/riak/kv/3.0.3/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..92a258543e --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/php/crud-operations.md @@ -0,0 +1,187 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +aliases: +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/3.0.3/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/php/querying.md b/content/riak/kv/3.0.3/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..59449fe929 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/php/querying.md @@ -0,0 +1,408 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/querying-php + - /riak/kv/3.0.3/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/python.md b/content/riak/kv/3.0.3/developing/getting-started/python.md new file mode 100644 index 0000000000..406badc740 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/python.md @@ -0,0 +1,103 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/python + - /riak/kv/3.0.3/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.3/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` - Header files and a static library for Python +* `libffi-dev` - Foreign function interface library +* `libssl-dev` - libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.3/developing/getting-started/python/crud-operations) + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/python/crud-operations.md b/content/riak/kv/3.0.3/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..fcd69a3da8 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/python/crud-operations.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/python/object-modeling.md b/content/riak/kv/3.0.3/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..b8fcf99206 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/python/object-modeling.md @@ -0,0 +1,264 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/object-modeling-python + - /riak/kv/3.0.3/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/python/querying.md b/content/riak/kv/3.0.3/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..67c961b506 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/python/querying.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/querying-python + - /riak/kv/3.0.3/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/ruby.md b/content/riak/kv/3.0.3/developing/getting-started/ruby.md new file mode 100644 index 0000000000..6dc805d80c --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/ruby.md @@ -0,0 +1,68 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/ruby + - /riak/kv/3.0.3/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.3/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.3/developing/getting-started/ruby/crud-operations) + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/3.0.3/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..6c3b269964 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,151 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/3.0.3/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..fec02ffcda --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,295 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/3.0.3/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.3/developing/getting-started/ruby/querying.md b/content/riak/kv/3.0.3/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..fc799984bc --- /dev/null +++ b/content/riak/kv/3.0.3/developing/getting-started/ruby/querying.md @@ -0,0 +1,256 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/3.0.3/dev/taste-of-riak/querying-ruby + - /riak/kv/3.0.3/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.3/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.3/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.3/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.3/developing/key-value-modeling.md b/content/riak/kv/3.0.3/developing/key-value-modeling.md new file mode 100644 index 0000000000..099c9e6be3 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/key-value-modeling.md @@ -0,0 +1,535 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/3.0.3/dev/data-modeling/key-value/ + - /riak/kv/3.0.3/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/3.0.3/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/3.0.3/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/3.0.3/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/3.0.3/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/3.0.3/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/3.0.3/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/3.0.3/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/3.0.3/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/3.0.3/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/3.0.3/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/3.0.3/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/3.0.3/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/3.0.3/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/3.0.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/3.0.3/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/3.0.3/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/3.0.3/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/3.0.3/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + + + + diff --git a/content/riak/kv/3.0.3/developing/usage.md b/content/riak/kv/3.0.3/developing/usage.md new file mode 100644 index 0000000000..d11bb7d40f --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage.md @@ -0,0 +1,138 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +aliases: +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/bucket-types.md b/content/riak/kv/3.0.3/developing/usage/bucket-types.md new file mode 100644 index 0000000000..b3fb9602d8 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/bucket-types.md @@ -0,0 +1,102 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/bucket-types + - /riak/kv/3.0.3/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/commit-hooks.md b/content/riak/kv/3.0.3/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..d7580b7c05 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/commit-hooks.md @@ -0,0 +1,243 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/using/commit-hooks + - /riak/kv/3.0.3/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/3.0.3/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object - This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` - The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/3.0.3/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` - The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/conflict-resolution.md b/content/riak/kv/3.0.3/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..1b9c3671a1 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/conflict-resolution.md @@ -0,0 +1,681 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/using/conflict-resolution + - /riak/kv/3.0.3/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/strong-consistency) - A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/3.0.3/configuring/strong-consistency) - A guide for operators +> * [strong consistency][use ref strong consistency] - A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/3.0.3/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/3.0.3/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/3.0.3/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.3/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/3.0.3/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** - If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** - Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** - If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/3.0.3/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.3/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/3.0.3/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..7a196dcfe3 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.3/dev/using/conflict-resolution/csharp + - /riak/kv/3.0.3/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/conflict-resolution/golang.md b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..bec5b47228 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.3/dev/using/conflict-resolution/golang + - /riak/kv/3.0.3/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/conflict-resolution/java.md b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..114761f151 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/java.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.3/dev/using/conflict-resolution/java + - /riak/kv/3.0.3/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.3/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..1dfed6d09a --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.3/dev/using/conflict-resolution/nodejs + - /riak/kv/3.0.3/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/conflict-resolution/php.md b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..ae892fdbb2 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/php.md @@ -0,0 +1,244 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.3/dev/using/conflict-resolution/php + - /riak/kv/3.0.3/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.3/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/conflict-resolution/python.md b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..a7703b80ef --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/python.md @@ -0,0 +1,258 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.3/dev/using/conflict-resolution/python + - /riak/kv/3.0.3/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.3/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..ded1c1662f --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.3/dev/using/conflict-resolution/ruby + - /riak/kv/3.0.3/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.3/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/content-types.md b/content/riak/kv/3.0.3/developing/usage/content-types.md new file mode 100644 index 0000000000..f550da37cc --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/content-types.md @@ -0,0 +1,192 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +aliases: +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/creating-objects.md b/content/riak/kv/3.0.3/developing/usage/creating-objects.md new file mode 100644 index 0000000000..eb1dc1f651 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/creating-objects.md @@ -0,0 +1,555 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +aliases: +--- + +[usage content types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/3.0.3/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/custom-extractors.md b/content/riak/kv/3.0.3/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..0812b68c2b --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/custom-extractors.md @@ -0,0 +1,424 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/search/custom-extractors + - /riak/kv/3.0.3/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` - Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` - Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/deleting-objects.md b/content/riak/kv/3.0.3/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..8ba898cc3f --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/deleting-objects.md @@ -0,0 +1,157 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +aliases: +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/document-store.md b/content/riak/kv/3.0.3/developing/usage/document-store.md new file mode 100644 index 0000000000..c37ba10339 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/document-store.md @@ -0,0 +1,617 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/search/document-store + - /riak/kv/3.0.3/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/3.0.3/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/mapreduce.md b/content/riak/kv/3.0.3/developing/usage/mapreduce.md new file mode 100644 index 0000000000..a844933773 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/mapreduce.md @@ -0,0 +1,246 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/using/mapreduce + - /riak/kv/3.0.3/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/3.0.3/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/3.0.3/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** - The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** - The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/next-gen-replication.md b/content/riak/kv/3.0.3/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..1f0ca6f10a --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/next-gen-replication.md @@ -0,0 +1,153 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/3.0.3/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. + + + diff --git a/content/riak/kv/3.0.3/developing/usage/reading-objects.md b/content/riak/kv/3.0.3/developing/usage/reading-objects.md new file mode 100644 index 0000000000..899994cd96 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/reading-objects.md @@ -0,0 +1,252 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +aliases: +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/replication.md b/content/riak/kv/3.0.3/developing/usage/replication.md new file mode 100644 index 0000000000..319bafdd86 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/replication.md @@ -0,0 +1,592 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/replication-properties + - /riak/kv/3.0.3/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/3.0.3/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/3.0.3/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/3.0.3/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/3.0.3/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/3.0.3/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/3.0.3/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/3.0.3/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/3.0.3/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/search-schemas.md b/content/riak/kv/3.0.3/developing/usage/search-schemas.md new file mode 100644 index 0000000000..a91245f579 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/search-schemas.md @@ -0,0 +1,511 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/search-schema + - /riak/kv/3.0.3/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/), and [more]({{<baseurl>}}riak/kv/3.0.3/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/search.md b/content/riak/kv/3.0.3/developing/usage/search.md new file mode 100644 index 0000000000..7218c28b3f --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/search.md @@ -0,0 +1,1455 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/using/search + - /riak/kv/3.0.3/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/3.0.3/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.3/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.3/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/3.0.3/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/3.0.3/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/3.0.3/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/3.0.3/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/searching-data-types.md b/content/riak/kv/3.0.3/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..5d85ff420c --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/searching-data-types.md @@ -0,0 +1,1687 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/search/search-data-types + - /riak/kv/3.0.3/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/secondary-indexes.md b/content/riak/kv/3.0.3/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..ece4dce4e4 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/secondary-indexes.md @@ -0,0 +1,2030 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/using/2i + - /riak/kv/3.0.3/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/3.0.3/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.3/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.3/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` - Binary index `field1_bin` and integer index `field2_int` +* `Moe` - Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` - Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` - Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/security.md b/content/riak/kv/3.0.3/developing/usage/security.md new file mode 100644 index 0000000000..af9467b890 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/security.md @@ -0,0 +1,103 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/client-security + - /riak/kv/3.0.3/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/3.0.3/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/3.0.3/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/3.0.3/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/3.0.3/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.3/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/3.0.3/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/3.0.3/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/3.0.3/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/3.0.3/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/security/erlang.md b/content/riak/kv/3.0.3/developing/usage/security/erlang.md new file mode 100644 index 0000000000..8d1f952972 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/security/erlang.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/client-security/erlang + - /riak/kv/3.0.3/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.3/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/security/java.md b/content/riak/kv/3.0.3/developing/usage/security/java.md new file mode 100644 index 0000000000..ed7ef3a1f6 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/security/java.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/client-security/java + - /riak/kv/3.0.3/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/security/php.md b/content/riak/kv/3.0.3/developing/usage/security/php.md new file mode 100644 index 0000000000..c5314a2883 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/security/php.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/client-security/php + - /riak/kv/3.0.3/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/security/python.md b/content/riak/kv/3.0.3/developing/usage/security/python.md new file mode 100644 index 0000000000..e06003482a --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/security/python.md @@ -0,0 +1,176 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/client-security/python + - /riak/kv/3.0.3/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.3/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/3.0.3/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/security/ruby.md b/content/riak/kv/3.0.3/developing/usage/security/ruby.md new file mode 100644 index 0000000000..38dbf62eed --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/security/ruby.md @@ -0,0 +1,162 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/client-security/ruby + - /riak/kv/3.0.3/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.3/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + + + + diff --git a/content/riak/kv/3.0.3/developing/usage/updating-objects.md b/content/riak/kv/3.0.3/developing/usage/updating-objects.md new file mode 100644 index 0000000000..bfdd257446 --- /dev/null +++ b/content/riak/kv/3.0.3/developing/usage/updating-objects.md @@ -0,0 +1,778 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.3/dev/using/updates + - /riak/kv/3.0.3/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/3.0.3/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + + + + diff --git a/content/riak/kv/3.0.3/downloads.md b/content/riak/kv/3.0.3/downloads.md new file mode 100644 index 0000000000..33af8fe305 --- /dev/null +++ b/content/riak/kv/3.0.3/downloads.md @@ -0,0 +1,27 @@ +--- +title: "Download for Riak KV 3.0.3" +description: "Download some stuff!" +menu: + riak_kv-3.0.3: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 3.0.3 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 3.0.3 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/3.0.3/downloads + - /riak/kv/3.0.3/downloads +--- + + + + + diff --git a/content/riak/kv/3.0.3/index.md b/content/riak/kv/3.0.3/index.md new file mode 100644 index 0000000000..35f1534425 --- /dev/null +++ b/content/riak/kv/3.0.3/index.md @@ -0,0 +1,79 @@ +--- +title: "Riak KV 3.0.3" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/3.0.3/ +--- + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/3.0.3/configuring +[downloads]: {{<baseurl>}}riak/kv/3.0.3/downloads/ +[install index]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/3.0.3/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/3.0.3/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/3.0.3/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/3.0.3/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +This release is tested with OTP 20, OTP 21 and OTP 22; but optimal performance is likely to be achieved when using OTP 22. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/3.0.3/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + + + + diff --git a/content/riak/kv/3.0.3/learn.md b/content/riak/kv/3.0.3/learn.md new file mode 100644 index 0000000000..f97eb6ab62 --- /dev/null +++ b/content/riak/kv/3.0.3/learn.md @@ -0,0 +1,53 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts.md b/content/riak/kv/3.0.3/learn/concepts.md new file mode 100644 index 0000000000..254b7d5cd6 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts.md @@ -0,0 +1,49 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +aliases: +--- + +[concept aae]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/3.0.3/configuring +[plan index]: {{<baseurl>}}riak/kv/3.0.3/setup/planning +[use index]: {{<baseurl>}}riak/kv/3.0.3/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/active-anti-entropy.md b/content/riak/kv/3.0.3/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..8e2572055a --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/active-anti-entropy.md @@ -0,0 +1,111 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/aae + - /riak/kv/3.0.3/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/buckets.md b/content/riak/kv/3.0.3/learn/concepts/buckets.md new file mode 100644 index 0000000000..a8d85549c1 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/buckets.md @@ -0,0 +1,217 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/Buckets + - /riak/kv/3.0.3/theory/concepts/Buckets + - /riak/3.0.3/theory/concepts/buckets + - /riak/kv/3.0.3/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/3.0.3/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/3.0.3/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/3.0.3/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/3.0.3/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/capability-negotiation.md b/content/riak/kv/3.0.3/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..3c795d28f0 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/capability-negotiation.md @@ -0,0 +1,36 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/capability-negotiation + - /riak/kv/3.0.3/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/3.0.3/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/causal-context.md b/content/riak/kv/3.0.3/learn/concepts/causal-context.md new file mode 100644 index 0000000000..7462f4ed1d --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/causal-context.md @@ -0,0 +1,289 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/context + - /riak/kv/3.0.3/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/3.0.3/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/3.0.3/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/3.0.3/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/clusters.md b/content/riak/kv/3.0.3/learn/concepts/clusters.md new file mode 100644 index 0000000000..b7c6b91aa3 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/clusters.md @@ -0,0 +1,117 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/Clusters + - /riak/kv/3.0.3/theory/concepts/Clusters + - /riak/3.0.3/theory/concepts/clusters + - /riak/kv/3.0.3/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/3.0.3/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/crdts.md b/content/riak/kv/3.0.3/learn/concepts/crdts.md new file mode 100644 index 0000000000..bc6af9873b --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/crdts.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/crdts + - /riak/kv/3.0.3/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/3.0.3/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/3.0.3/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/eventual-consistency.md b/content/riak/kv/3.0.3/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..8124f72340 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/eventual-consistency.md @@ -0,0 +1,202 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/Eventual-Consistency + - /riak/kv/3.0.3/theory/concepts/Eventual-Consistency + - /riak/3.0.3/theory/concepts/eventual-consistency + - /riak/kv/3.0.3/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/keys-and-objects.md b/content/riak/kv/3.0.3/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..8d7ac7b38d --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/keys-and-objects.md @@ -0,0 +1,53 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/keys-and-values + - /riak/kv/3.0.3/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/replication.md b/content/riak/kv/3.0.3/learn/concepts/replication.md new file mode 100644 index 0000000000..568cccd22a --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/replication.md @@ -0,0 +1,323 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/Replication + - /riak/kv/3.0.3/theory/concepts/Replication + - /riak/3.0.3/theory/concepts/replication + - /riak/kv/3.0.3/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/3.0.3/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/strong-consistency.md b/content/riak/kv/3.0.3/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..09d4ca6411 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/strong-consistency.md @@ -0,0 +1,105 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/strong-consistency + - /riak/kv/3.0.3/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/3.0.3/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + + + + diff --git a/content/riak/kv/3.0.3/learn/concepts/vnodes.md b/content/riak/kv/3.0.3/learn/concepts/vnodes.md new file mode 100644 index 0000000000..98ca00b322 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/concepts/vnodes.md @@ -0,0 +1,160 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.3/theory/concepts/vnodes + - /riak/kv/3.0.3/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033323.0.344576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + + + + diff --git a/content/riak/kv/3.0.3/learn/dynamo.md b/content/riak/kv/3.0.3/learn/dynamo.md new file mode 100644 index 0000000000..63ed09141c --- /dev/null +++ b/content/riak/kv/3.0.3/learn/dynamo.md @@ -0,0 +1,1928 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/3.0.3/theory/dynamo + - /riak/kv/3.0.3/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/3.0.3/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/3.0.3/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/3.0.3/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/3.0.3/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.2.6 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/3.0.3/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.3/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/3.0.3/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/3.0.3/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/3.0.3/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.3/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + + + + diff --git a/content/riak/kv/3.0.3/learn/glossary.md b/content/riak/kv/3.0.3/learn/glossary.md new file mode 100644 index 0000000000..f377f63c79 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/glossary.md @@ -0,0 +1,358 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +aliases: +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/3.0.3/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/3.0.3/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/3.0.3/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/3.0.3/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/3.0.3/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/3.0.3/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/3.0.3/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + + + + diff --git a/content/riak/kv/3.0.3/learn/new-to-nosql.md b/content/riak/kv/3.0.3/learn/new-to-nosql.md new file mode 100644 index 0000000000..fc0c6e04e5 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/new-to-nosql.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 3.0.3 +#menu: +# riak_kv-3.0.3: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this lives in existing docs)** + + + + diff --git a/content/riak/kv/3.0.3/learn/use-cases.md b/content/riak/kv/3.0.3/learn/use-cases.md new file mode 100644 index 0000000000..7aed4efd42 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/use-cases.md @@ -0,0 +1,405 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/3.0.3/dev/data-modeling/ + - /riak/kv/3.0.3/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/3.0.3/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/3.0.3/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + + + + diff --git a/content/riak/kv/3.0.3/learn/why-riak-kv.md b/content/riak/kv/3.0.3/learn/why-riak-kv.md new file mode 100644 index 0000000000..9a14a17426 --- /dev/null +++ b/content/riak/kv/3.0.3/learn/why-riak-kv.md @@ -0,0 +1,225 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/3.0.3/theory/why-riak/ + - /riak/kv/3.0.3/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.3/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/3.0.3/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + + + + diff --git a/content/riak/kv/3.0.3/release-notes.md b/content/riak/kv/3.0.3/release-notes.md new file mode 100644 index 0000000000..465b0b6def --- /dev/null +++ b/content/riak/kv/3.0.3/release-notes.md @@ -0,0 +1,40 @@ +--- +title: "Riak KV 3.0.3 Release Notes" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/3.0.3/community/release-notes + - /riak/kv/3.0.3/intro-v20 + - /riak/3.0.3/intro-v20 + - /riak/kv/3.0.3/introduction +--- + +Released Jan 14, 2021. + + +## Overview + +There are two fixes provided in Release 3.0.3: + +A performance issue with OTP 22 and leveled has been corrected. This generally did not have a significant impact when running Riak, but there were some potential cases with Tictac AAE and AAE Folds where there could have been a noticeable slowdown. + +An issue with console commands for bucket types has now been fully corrected, having been partially mitigated in 3.0.2. + +This release is tested with OTP 20, OTP 21 and OTP 22; but optimal performance is likely to be achieved when using OTP 22. + +## Previous Release Notes + +Please see the KV 3.0.2 release notes [here]({{<baseurl>}}riak/kv/3.0.2/release-notes/). + + + + + diff --git a/content/riak/kv/3.0.3/setup.md b/content/riak/kv/3.0.3/setup.md new file mode 100644 index 0000000000..30656109b0 --- /dev/null +++ b/content/riak/kv/3.0.3/setup.md @@ -0,0 +1,51 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + + + + diff --git a/content/riak/kv/3.0.3/setup/downgrade.md b/content/riak/kv/3.0.3/setup/downgrade.md new file mode 100644 index 0000000000..551a056ab1 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/downgrade.md @@ -0,0 +1,179 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/3.0.3/ops/upgrading/rolling-downgrades/ + - /riak/kv/3.0.3/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/3.0.3/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 3.0.3, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing.md b/content/riak/kv/3.0.3/setup/installing.md new file mode 100644 index 0000000000..14a431b153 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing.md @@ -0,0 +1,61 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/3.0.3/ops/building/installing + - /riak/kv/3.0.3/ops/building/installing + - /riak/3.0.3/installing/ + - /riak/kv/3.0.3/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/3.0.3/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/amazon-web-services.md b/content/riak/kv/3.0.3/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..f2cb52060c --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/amazon-web-services.md @@ -0,0 +1,153 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/3.0.3/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/3.0.3/installing/amazon-web-services/ + - /riak/kv/3.0.3/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/3.0.3/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3-1.amzn2x86_64.rpm +sudo yum localinstall -y riak_3.0.3-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3-1.amzn2x86_64.rpm +sudo rpm -i riak_3.0.3-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3-1.amzn1x86_64.rpm +sudo yum localinstall -y riak_3.0.3-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3-1.amzn1x86_64.rpm +sudo rpm -i riak_3.0.3-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/debian-ubuntu.md b/content/riak/kv/3.0.3/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..16661b5b0d --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/debian-ubuntu.md @@ -0,0 +1,171 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/3.0.3/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/3.0.3/installing/debian-ubuntu/ + - /riak/kv/3.0.3/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/3.0.3/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-1_amd64.deb +sudo dpkg -i riak_3.0.3-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-1_amd64.deb +sudo dpkg -i riak_3.0.3-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/trusty64/riak_3.0.3-1_amd64.deb +sudo dpkg -i riak_3.0.3-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/precise64/riak_3.0.3-1_amd64.deb +sudo dpkg -i riak_3.0.3-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-1_amd64.deb +sudo dpkg -i riak_3.0.3-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-1_amd64.deb +sudo dpkg -i riak_3.0.3-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/7/riak_3.0.3-1_amd64.deb +sudo dpkg -i riak_3.0.3-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/raspbian/buster/riak_3.0.3-1_armhf.deb +sudo dpkg -i riak_3.0.3-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/riak-3.0.3.tar.gz +tar zxvf riak-3.0.3.tar.gz +cd riak-3.0.3 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/freebsd.md b/content/riak/kv/3.0.3/setup/installing/freebsd.md new file mode 100644 index 0000000000..29926a7618 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/freebsd.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/3.0.3/ops/building/installing/Installing-on-FreeBSD + - /riak/3.0.3/installing/freebsd/ + - /riak/kv/3.0.3/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/3.0.3/downloads/ +[install verify]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-3.0.3.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/3.0/3.0.3/freebsd/11.1/riak-3.0.3.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/3.0/3.0.3/freebsd/10.4/riak-3.0.3.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/mac-osx.md b/content/riak/kv/3.0.3/setup/installing/mac-osx.md new file mode 100644 index 0000000000..f39c51724c --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/mac-osx.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/3.0.3/ops/building/installing/Installing-on-Mac-OS-X + - /riak/3.0.3/installing/mac-osx/ + - /riak/kv/3.0.3/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/3.0.3/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.3/osx/10.11/riak-3.0.3-OSX-x86_64.tar.gz +tar xzvf riak-3.0.3-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 3.0.3 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `2.9.1` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.3/riak-3.0.3.tar.gz +tar zxvf riak-3.0.3.tar.gz +cd riak-3.0.3 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/rhel-centos.md b/content/riak/kv/3.0.3/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..5b13e931b0 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/rhel-centos.md @@ -0,0 +1,134 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/3.0.3/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/3.0.3/installing/rhel-centos/ + - /riak/kv/3.0.3/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3-1.el8.x86_64.rpm +sudo yum localinstall -y riak-3.0.3-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3-1.el8.x86_64.rpm +sudo rpm -Uvh riak-3.0.3-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3-1.el7.x86_64.rpm +sudo yum localinstall -y riak-3.0.3-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3-1.el7.x86_64.rpm +sudo rpm -Uvh riak-3.0.3-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/6/riak-3.0.3-1.el6.x86_64.rpm +sudo yum localinstall -y riak-3.0.3-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/6/riak-3.0.3-1.el6.x86_64.rpm +sudo rpm -Uvh riak-3.0.3-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.3/riak-3.0.3.tar.gz +tar zxvf riak-3.0.3.tar.gz +cd riak-3.0.3 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/smartos.md b/content/riak/kv/3.0.3/setup/installing/smartos.md new file mode 100644 index 0000000000..33b73b1736 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/smartos.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-SmartOS + - /riak/kv/3.0.3/ops/building/installing/Installing-on-SmartOS + - /riak/3.0.3/installing/smartos/ + - /riak/kv/3.0.3/installing/smartos/ + - /riak/kv/3.0.3/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/3.0.3/setup/installing/solaris.md b/content/riak/kv/3.0.3/setup/installing/solaris.md new file mode 100644 index 0000000000..bfec02fcbb --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/solaris.md @@ -0,0 +1,91 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-Solaris + - /riak/kv/3.0.3/ops/building/installing/Installing-on-Solaris + - /riak/3.0.3/installing/solaris/ + - /riak/kv/3.0.3/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/2.9.0p5/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/3.0.3/setup/installing/source.md b/content/riak/kv/3.0.3/setup/installing/source.md new file mode 100644 index 0000000000..17a330df57 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/source.md @@ -0,0 +1,110 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/Installing-Riak-from-Source + - /riak/kv/3.0.3/ops/building/Installing-Riak-from-Source + - /riak/3.0.3/installing/source/ + - /riak/kv/3.0.3/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/3.0.3/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.3/riak-3.0.3.tar.gz +tar zxvf riak-3.0.3.tar.gz +cd riak-3.0.3 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/source/erlang.md b/content/riak/kv/3.0.3/setup/installing/source/erlang.md new file mode 100644 index 0000000000..bc1472ab4d --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/source/erlang.md @@ -0,0 +1,571 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/erlang + - /riak/kv/3.0.3/ops/building/installing/erlang + - /riak/3.0.3/installing/source/erlang/ + - /riak/kv/3.0.3/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/3.0.3/setup/installing +[security basics]: {{<baseurl>}}riak/kv/3.0.3/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/source/jvm.md b/content/riak/kv/3.0.3/setup/installing/source/jvm.md new file mode 100644 index 0000000000..e91ef7b23d --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/source/jvm.md @@ -0,0 +1,55 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/jvm + - /riak/kv/3.0.3/ops/building/installing/jvm + - /riak/3.0.3/ops/building/installing/Installing-the-JVM + - /riak/kv/3.0.3/ops/building/installing/Installing-the-JVM + - /riak/3.0.3/installing/source/jvm/ + - /riak/kv/3.0.3/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/suse.md b/content/riak/kv/3.0.3/setup/installing/suse.md new file mode 100644 index 0000000000..a1b4369e76 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/suse.md @@ -0,0 +1,52 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-SUSE + - /riak/kv/3.0.3/ops/building/installing/Installing-on-SUSE + - /riak/3.0.3/installing/suse/ + - /riak/kv/3.0.3/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/3.0.3/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.2.3+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/verify.md b/content/riak/kv/3.0.3/setup/installing/verify.md new file mode 100644 index 0000000000..9adccba5c0 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/verify.md @@ -0,0 +1,169 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/installing/Post-Installation + - /riak/kv/3.0.3/ops/installing/Post-Installation + - /riak/3.0.3/installing/verify-install/ + - /riak/kv/3.0.3/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/3.0.3/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/3.0.3/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + + + + diff --git a/content/riak/kv/3.0.3/setup/installing/windows-azure.md b/content/riak/kv/3.0.3/setup/installing/windows-azure.md new file mode 100644 index 0000000000..c7ad654b14 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/installing/windows-azure.md @@ -0,0 +1,197 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/3.0.3/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/3.0.3/ops/building/installing/Installing-on-Windows-Azure + - /riak/3.0.3/installing/windows-azure/ + - /riak/kv/3.0.3/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + + + + diff --git a/content/riak/kv/3.0.3/setup/planning.md b/content/riak/kv/3.0.3/setup/planning.md new file mode 100644 index 0000000000..60f4100fe0 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning.md @@ -0,0 +1,61 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/backend.md b/content/riak/kv/3.0.3/setup/planning/backend.md new file mode 100644 index 0000000000..5eecd1a730 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/backend.md @@ -0,0 +1,60 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/3.0.3/ops/building/planning/backends/ + - /riak/kv/3.0.3/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/3.0.3/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/backend/bitcask.md b/content/riak/kv/3.0.3/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..e8fc31cb39 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/backend/bitcask.md @@ -0,0 +1,994 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/backends/bitcask/ + - /riak/kv/3.0.3/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/3.0.3/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` - lets the operating system manage syncing writes + (default) + * `o_sync` - uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval - Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) - Writes are made via Erlang's built-in file API +* `nif` - Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` - No restrictions on when merge operations can occur + (default) +* `never` - Merge will never be attempted +* `window` - Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** - This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** - This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** - This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** - This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** - This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033323.0.344576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033323.0.344576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/backend/leveldb.md b/content/riak/kv/3.0.3/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..d08f8f1b52 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/backend/leveldb.md @@ -0,0 +1,506 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/backends/leveldb/ + - /riak/kv/3.0.3/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/3.0.3/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** - The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** - LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033323.0.344576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033323.0.344576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/backend/leveled.md b/content/riak/kv/3.0.3/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..dcd51224ed --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/backend/leveled.md @@ -0,0 +1,141 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/backends/leveled/ + - /riak/kv/3.0.3/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/3.0.3/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 3.0.3 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/backend/memory.md b/content/riak/kv/3.0.3/setup/planning/backend/memory.md new file mode 100644 index 0000000000..fd3b8eb008 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/backend/memory.md @@ -0,0 +1,147 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/backends/memory/ + - /riak/kv/3.0.3/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/backend/multi.md b/content/riak/kv/3.0.3/setup/planning/backend/multi.md new file mode 100644 index 0000000000..8b6f5f247d --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/backend/multi.md @@ -0,0 +1,230 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/backends/multi/ + - /riak/kv/3.0.3/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/best-practices.md b/content/riak/kv/3.0.3/setup/planning/best-practices.md new file mode 100644 index 0000000000..db162907f1 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/best-practices.md @@ -0,0 +1,145 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/3.0.3/ops/building/planning/best-practices + - /riak/kv/3.0.3/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/3.0.3/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/3.0.3/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/3.0.3/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..b969ce3a1d --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,104 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/3.0.3/ops/building/planning/bitcask + - /riak/kv/3.0.3/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/cluster-capacity.md b/content/riak/kv/3.0.3/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..64481e9806 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/cluster-capacity.md @@ -0,0 +1,238 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/3.0.3/ops/building/planning/cluster + - /riak/kv/3.0.3/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/3.0.3/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/3.0.3/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/future.md b/content/riak/kv/3.0.3/setup/planning/future.md new file mode 100644 index 0000000000..23c9c68de3 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/future.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 3.0.3 +#menu: +# riak_kv-3.0.3: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/operating-system.md b/content/riak/kv/3.0.3/setup/planning/operating-system.md new file mode 100644 index 0000000000..cade426738 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/operating-system.md @@ -0,0 +1,30 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +aliases: +--- + +[downloads]: {{<baseurl>}}riak/kv/3.0.3/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + + + + diff --git a/content/riak/kv/3.0.3/setup/planning/start.md b/content/riak/kv/3.0.3/setup/planning/start.md new file mode 100644 index 0000000000..83f69e0e40 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/planning/start.md @@ -0,0 +1,61 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/3.0.3/ops/building/planning/system-planning + - /riak/kv/3.0.3/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + + + + diff --git a/content/riak/kv/3.0.3/setup/search.md b/content/riak/kv/3.0.3/setup/search.md new file mode 100644 index 0000000000..3f2ff2d6cc --- /dev/null +++ b/content/riak/kv/3.0.3/setup/search.md @@ -0,0 +1,5 @@ + + + + + diff --git a/content/riak/kv/3.0.3/setup/upgrading.md b/content/riak/kv/3.0.3/setup/upgrading.md new file mode 100644 index 0000000000..95c3024899 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/upgrading.md @@ -0,0 +1,38 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 3.0.3][upgrade version] + +A tutorial on updating to Riak KV 3.0.3 + +[Learn More >>][upgrade version] + + + + diff --git a/content/riak/kv/3.0.3/setup/upgrading/checklist.md b/content/riak/kv/3.0.3/setup/upgrading/checklist.md new file mode 100644 index 0000000000..5512be0bc1 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/upgrading/checklist.md @@ -0,0 +1,225 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/3.0.3/ops/upgrading/production-checklist/ + - /riak/kv/3.0.3/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/3.0.3/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/3.0.3/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/3.0.3/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/3.0.3/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/3.0.3/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/3.0.3/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/3.0.3/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + + + + diff --git a/content/riak/kv/3.0.3/setup/upgrading/cluster.md b/content/riak/kv/3.0.3/setup/upgrading/cluster.md new file mode 100644 index 0000000000..acdda7fc54 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/upgrading/cluster.md @@ -0,0 +1,303 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/3.0.3/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.3/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/3.0.3/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/3.0.3/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.3/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/3.0.3/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/3.0.3/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` - See [JMX Monitoring][jmx monitor] for more information. + * `snmp` - See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + + + + diff --git a/content/riak/kv/3.0.3/setup/upgrading/multi-datacenter.md b/content/riak/kv/3.0.3/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..c48f2c940d --- /dev/null +++ b/content/riak/kv/3.0.3/setup/upgrading/multi-datacenter.md @@ -0,0 +1,24 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 3.0.3 +#menu: +# riak_kv-3.0.3: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: +--- + +## TODO + +How to update to a new version with multi-datacenter. + + + + + diff --git a/content/riak/kv/3.0.3/setup/upgrading/search.md b/content/riak/kv/3.0.3/setup/upgrading/search.md new file mode 100644 index 0000000000..f7c27d94ff --- /dev/null +++ b/content/riak/kv/3.0.3/setup/upgrading/search.md @@ -0,0 +1,281 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/3.0.3/ops/advanced/upgrading-search-2 + - /riak/kv/3.0.3/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + + + + diff --git a/content/riak/kv/3.0.3/setup/upgrading/version.md b/content/riak/kv/3.0.3/setup/upgrading/version.md new file mode 100644 index 0000000000..0f1f1eaa02 --- /dev/null +++ b/content/riak/kv/3.0.3/setup/upgrading/version.md @@ -0,0 +1,252 @@ +--- +title: "Upgrading to Riak KV 3.0.3" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Upgrading to 3.0.3" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/3.0.3/upgrade-v20/ + - /riak/kv/3.0.3/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.3/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.3/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/3.0.3/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/3.0.3/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/3.0.3/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.3/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/3.0.3/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/3.0.3/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/3.0.3/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 3.0.3 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 3.0.3 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 3.0.3 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + + + + diff --git a/content/riak/kv/3.0.3/using.md b/content/riak/kv/3.0.3/using.md new file mode 100644 index 0000000000..f613f2132e --- /dev/null +++ b/content/riak/kv/3.0.3/using.md @@ -0,0 +1,78 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + + + + diff --git a/content/riak/kv/3.0.3/using/admin.md b/content/riak/kv/3.0.3/using/admin.md new file mode 100644 index 0000000000..286d893ea3 --- /dev/null +++ b/content/riak/kv/3.0.3/using/admin.md @@ -0,0 +1,51 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/3.0.3/ops/running/cluster-admin + - /riak/kv/3.0.3/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + + + + diff --git a/content/riak/kv/3.0.3/using/admin/commands.md b/content/riak/kv/3.0.3/using/admin/commands.md new file mode 100644 index 0000000000..bc4aaa28fa --- /dev/null +++ b/content/riak/kv/3.0.3/using/admin/commands.md @@ -0,0 +1,378 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.3/ops/running/cluster-admin + - /riak/kv/3.0.3/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` - There are five possible values for status: + * `valid` - The node has begun participating in cluster operations + * `leaving` - The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` - The node's ownership transfers are complete and it is + currently shutting down + * `joining` - The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` - The node is not currently responding +* `avail` - There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` - What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` - The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033323.0.344576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033323.0.344576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` + + + + diff --git a/content/riak/kv/3.0.3/using/admin/riak-admin.md b/content/riak/kv/3.0.3/using/admin/riak-admin.md new file mode 100644 index 0000000000..caf5adc1b2 --- /dev/null +++ b/content/riak/kv/3.0.3/using/admin/riak-admin.md @@ -0,0 +1,721 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.3/ops/running/tools/riak-admin + - /riak/kv/3.0.3/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/3.0.3/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/3.0.3/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/3.0.3/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/3.0.3/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/3.0.3/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/3.0.3/using/security/ +[security managing]: {{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/3.0.3/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/3.0.3/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + + + + diff --git a/content/riak/kv/3.0.3/using/admin/riak-cli.md b/content/riak/kv/3.0.3/using/admin/riak-cli.md new file mode 100644 index 0000000000..ed83e093d4 --- /dev/null +++ b/content/riak/kv/3.0.3/using/admin/riak-cli.md @@ -0,0 +1,204 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.3/ops/running/tools/riak + - /riak/kv/3.0.3/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + + + + diff --git a/content/riak/kv/3.0.3/using/admin/riak-control.md b/content/riak/kv/3.0.3/using/admin/riak-control.md new file mode 100644 index 0000000000..3050664de9 --- /dev/null +++ b/content/riak/kv/3.0.3/using/admin/riak-control.md @@ -0,0 +1,237 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/riak-control + - /riak/kv/3.0.3/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/3.0.3/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations.md b/content/riak/kv/3.0.3/using/cluster-operations.md new file mode 100644 index 0000000000..935f75eead --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations.md @@ -0,0 +1,109 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +aliases: +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/3.0.3/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..ca789cd169 --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,289 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/3.0.3/ops/advanced/aae/ + - /riak/3.0.3/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/3.0.3/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/3.0.3/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..99a9cd7617 --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,198 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.3/ops/running/nodes/adding-removing + - /riak/kv/3.0.3/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/3.0.3/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/backend.md b/content/riak/kv/3.0.3/using/cluster-operations/backend.md new file mode 100644 index 0000000000..960bea2b83 --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/backend.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 3.0.3 +#menu: +# riak_kv-3.0.3: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/backing-up.md b/content/riak/kv/3.0.3/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..8fe88caf44 --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/backing-up.md @@ -0,0 +1,271 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.3/ops/running/backups + - /riak/kv/3.0.3/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/3.0.3/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/3.0.3/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/bucket-types.md b/content/riak/kv/3.0.3/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..6d51ca6d65 --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/bucket-types.md @@ -0,0 +1,63 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/3.0.3/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..77606d517d --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,458 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.3/ops/running/nodes/renaming + - /riak/kv/3.0.3/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/handoff.md b/content/riak/kv/3.0.3/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..4ef7cbda4e --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/handoff.md @@ -0,0 +1,120 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.3/ops/running/handoff + - /riak/kv/3.0.3/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/inspecting-node.md b/content/riak/kv/3.0.3/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..96392c3e6a --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/inspecting-node.md @@ -0,0 +1,496 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.3/ops/running/nodes/inspecting + - /riak/kv/3.0.3/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1393.0.348081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` - The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` - The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` - The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/load-balancing.md b/content/riak/kv/3.0.3/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..9ea5bbc6ad --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/load-balancing.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 3.0.3 +#menu: +# riak_kv-3.0.3: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this exists in docs)** + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/logging.md b/content/riak/kv/3.0.3/using/cluster-operations/logging.md new file mode 100644 index 0000000000..bb3aa08abc --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/logging.md @@ -0,0 +1,47 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/replacing-node.md b/content/riak/kv/3.0.3/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..e7adbadc7d --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/replacing-node.md @@ -0,0 +1,100 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/3.0.3/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/secondary-indexes.md b/content/riak/kv/3.0.3/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..d118e085e3 --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.3 +#menu: +# riak_kv-3.0.3: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/strong-consistency.md b/content/riak/kv/3.0.3/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..47e70b805c --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/strong-consistency.md @@ -0,0 +1,76 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/3.0.3/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..b37a83836c --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,34 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/3.0.3/ops/advanced/tictacaae/ + - /riak/3.0.3/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/3.0.3/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..1f273e5975 --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,263 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v2/operations + - /riak/kv/3.0.3/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/3.0.3/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/3.0.3/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` - The IP address and port of a connected client (site)</li><li>`cluster_name` - The name of the connected client (site)</li><li>`connecting` - The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/3.0.3/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/3.0.3/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + + + + diff --git a/content/riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..493f72a966 --- /dev/null +++ b/content/riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,425 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/operations + - /riak/kv/3.0.3/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.3/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/3.0.3/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/3.0.3/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/3.0.3/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + + + + diff --git a/content/riak/kv/3.0.3/using/performance.md b/content/riak/kv/3.0.3/using/performance.md new file mode 100644 index 0000000000..def9593982 --- /dev/null +++ b/content/riak/kv/3.0.3/using/performance.md @@ -0,0 +1,268 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/3.0.3/ops/tuning/linux/ + - /riak/3.0.3/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/3.0.3/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/3.0.3/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/3.0.3/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/3.0.3/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/3.0.3/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/3.0.3/using/performance/open-files-limit/) + + + + diff --git a/content/riak/kv/3.0.3/using/performance/amazon-web-services.md b/content/riak/kv/3.0.3/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..94c373846b --- /dev/null +++ b/content/riak/kv/3.0.3/using/performance/amazon-web-services.md @@ -0,0 +1,247 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.3/ops/tuning/aws + - /riak/kv/3.0.3/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + + + + diff --git a/content/riak/kv/3.0.3/using/performance/benchmarking.md b/content/riak/kv/3.0.3/using/performance/benchmarking.md new file mode 100644 index 0000000000..e02adf8b3f --- /dev/null +++ b/content/riak/kv/3.0.3/using/performance/benchmarking.md @@ -0,0 +1,602 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.3/ops/building/benchmarking + - /riak/kv/3.0.3/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/3.0.3/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput - Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` - generate as many ops per second as possible +* `{rate, N}` - generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` - Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` - Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` - Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` - Directly invokes the Bitcask API +* `basho_bench_driver_dets` - Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` - operation completed successfully +* `{error, Reason, NewState}` - operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` - operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` - operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` - generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` - the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` - the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` - selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` - selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` - the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` - takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` - takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` - generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` - generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` - generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + + + + diff --git a/content/riak/kv/3.0.3/using/performance/erlang.md b/content/riak/kv/3.0.3/using/performance/erlang.md new file mode 100644 index 0000000000..441b78a894 --- /dev/null +++ b/content/riak/kv/3.0.3/using/performance/erlang.md @@ -0,0 +1,371 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.3/ops/tuning/erlang + - /riak/kv/3.0.3/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + + + + diff --git a/content/riak/kv/3.0.3/using/performance/latency-reduction.md b/content/riak/kv/3.0.3/using/performance/latency-reduction.md new file mode 100644 index 0000000000..118c5d837e --- /dev/null +++ b/content/riak/kv/3.0.3/using/performance/latency-reduction.md @@ -0,0 +1,267 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.3/ops/tuning/latency-reduction + - /riak/kv/3.0.3/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + + + + diff --git a/content/riak/kv/3.0.3/using/performance/multi-datacenter-tuning.md b/content/riak/kv/3.0.3/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..077fd7d495 --- /dev/null +++ b/content/riak/kv/3.0.3/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +[perf index]: {{<baseurl>}}riak/kv/3.0.3/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + + + + diff --git a/content/riak/kv/3.0.3/using/performance/open-files-limit.md b/content/riak/kv/3.0.3/using/performance/open-files-limit.md new file mode 100644 index 0000000000..2d4e781aeb --- /dev/null +++ b/content/riak/kv/3.0.3/using/performance/open-files-limit.md @@ -0,0 +1,351 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.3/ops/tuning/open-files-limit/ + - /riak/kv/3.0.3/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + + + + diff --git a/content/riak/kv/3.0.3/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/3.0.3/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..aca8eb0253 --- /dev/null +++ b/content/riak/kv/3.0.3/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,50 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/3.0.3/using/reference.md b/content/riak/kv/3.0.3/using/reference.md new file mode 100644 index 0000000000..d4d4a8e7a6 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference.md @@ -0,0 +1,135 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +aliases: +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + + + + diff --git a/content/riak/kv/3.0.3/using/reference/architecture.md b/content/riak/kv/3.0.3/using/reference/architecture.md new file mode 100644 index 0000000000..e53b797639 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/architecture.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +#menu: +# riak_kv-3.0.3: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +aliases: +--- + +<!-- TODO: Content --> + + + + diff --git a/content/riak/kv/3.0.3/using/reference/bucket-types.md b/content/riak/kv/3.0.3/using/reference/bucket-types.md new file mode 100644 index 0000000000..9df0bf3ec3 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/bucket-types.md @@ -0,0 +1,823 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +aliases: +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/3.0.3/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/3.0.3/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.3/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.3/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/3.0.3/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/3.0.3/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + + + + diff --git a/content/riak/kv/3.0.3/using/reference/custom-code.md b/content/riak/kv/3.0.3/using/reference/custom-code.md new file mode 100644 index 0000000000..eaa38ca314 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/custom-code.md @@ -0,0 +1,135 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/install-custom-code/ + - /riak/kv/3.0.3/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/3.0.3/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/3.0.3/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.3/using/reference/failure-recovery.md b/content/riak/kv/3.0.3/using/reference/failure-recovery.md new file mode 100644 index 0000000000..ad2284f8ed --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/failure-recovery.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.3/using/reference/handoff.md b/content/riak/kv/3.0.3/using/reference/handoff.md new file mode 100644 index 0000000000..913b34c8fe --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/handoff.md @@ -0,0 +1,201 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.3/ops/running/handoff/ + - /riak/kv/3.0.3/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + + + + diff --git a/content/riak/kv/3.0.3/using/reference/jmx.md b/content/riak/kv/3.0.3/using/reference/jmx.md new file mode 100644 index 0000000000..a65ff2fb5d --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/jmx.md @@ -0,0 +1,190 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/running/monitoring/jmx + - /riak/kv/3.0.3/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + + + + diff --git a/content/riak/kv/3.0.3/using/reference/logging.md b/content/riak/kv/3.0.3/using/reference/logging.md new file mode 100644 index 0000000000..17d6c8ba57 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/logging.md @@ -0,0 +1,301 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.3/ops/running/logging + - /riak/kv/3.0.3/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 3.0.3 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` - Every night at midnight +* `$D23` - Every day at 23:00 (11 pm) +* `$W0D20` - Every week on Sunday at 20:00 (8 pm) +* `$M1D0` - On the first day of every month at midnight +* `$M5D6` - On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` - Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/3.0.3/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` - Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-cli/#attach-direct) command +* `both` - Console logs will be emitted both to a file and to standard + output +* `off` - Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + + + + diff --git a/content/riak/kv/3.0.3/using/reference/multi-datacenter.md b/content/riak/kv/3.0.3/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..c039e60fb2 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/multi-datacenter.md @@ -0,0 +1,53 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + + + + diff --git a/content/riak/kv/3.0.3/using/reference/multi-datacenter/comparison.md b/content/riak/kv/3.0.3/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..601d47e8b5 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,100 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.3/ops/mdc/comparison + - /riak/kv/3.0.3/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/3.0.3/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/3.0.3/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + + + + diff --git a/content/riak/kv/3.0.3/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/3.0.3/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..a872206ddc --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,168 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.3/ops/mdc/monitoring + - /riak/kv/3.0.3/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + + + + diff --git a/content/riak/kv/3.0.3/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/3.0.3/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..ecdac898c0 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,66 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.3/ops/mdc/per-bucket + - /riak/kv/3.0.3/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` - Enable replication (realtime + fullsync) + * `false` - Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` - Replication only occurs in realtime for this bucket + * `fullsync` - Replication only occurs during a fullsync operation + * `both` - Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + + + + diff --git a/content/riak/kv/3.0.3/using/reference/multi-datacenter/statistics.md b/content/riak/kv/3.0.3/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..2ca496521d --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,244 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.3/ops/mdc/statistics + - /riak/kv/3.0.3/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + + + + diff --git a/content/riak/kv/3.0.3/using/reference/object-deletion.md b/content/riak/kv/3.0.3/using/reference/object-deletion.md new file mode 100644 index 0000000000..5d27e94ff8 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/object-deletion.md @@ -0,0 +1,121 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` - Disables tombstone removal +* `immediate` - The tombstone is removed as soon as the request is + received +* Custom time interval - How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + + + + diff --git a/content/riak/kv/3.0.3/using/reference/runtime-interaction.md b/content/riak/kv/3.0.3/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..8409062ff7 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/runtime-interaction.md @@ -0,0 +1,70 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/runtime + - /riak/kv/3.0.3/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` - Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` - Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` - The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` - The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` - A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` - A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` - A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + + + + diff --git a/content/riak/kv/3.0.3/using/reference/search.md b/content/riak/kv/3.0.3/using/reference/search.md new file mode 100644 index 0000000000..0047ad9acb --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/search.md @@ -0,0 +1,457 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/search + - /riak/kv/3.0.3/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/3.0.3/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. + + + diff --git a/content/riak/kv/3.0.3/using/reference/secondary-indexes.md b/content/riak/kv/3.0.3/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..204166d1bc --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/secondary-indexes.md @@ -0,0 +1,76 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.3/dev/advanced/2i + - /riak/kv/3.0.3/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/3.0.3/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + + + + diff --git a/content/riak/kv/3.0.3/using/reference/snmp.md b/content/riak/kv/3.0.3/using/reference/snmp.md new file mode 100644 index 0000000000..d24d670ac8 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/snmp.md @@ -0,0 +1,166 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/running/monitoring/snmp + - /riak/kv/3.0.3/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + + + + diff --git a/content/riak/kv/3.0.3/using/reference/statistics-monitoring.md b/content/riak/kv/3.0.3/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..e5941f9d2c --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/statistics-monitoring.md @@ -0,0 +1,395 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.3/ops/running/stats-and-monitoring + - /riak/kv/3.0.3/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/3.0.3/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/3.0.3/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/3.0.3/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/3.0.3/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/3.0.3/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/3.0.3/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + + + + diff --git a/content/riak/kv/3.0.3/using/reference/strong-consistency.md b/content/riak/kv/3.0.3/using/reference/strong-consistency.md new file mode 100644 index 0000000000..6dd4dd0c75 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/strong-consistency.md @@ -0,0 +1,150 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +aliases: +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/3.0.3/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/3.0.3/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/3.0.3/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/3.0.3/configuring/strong-consistency/#performance). + + + + diff --git a/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter.md b/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..1b725bc106 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter.md @@ -0,0 +1,40 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +aliases: +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.3/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + + + + diff --git a/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..8b973e549e --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,130 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/3.0.3/ops/mdc/v2/architecture + - /riak/kv/3.0.3/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.3/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/3.0.3/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + + + + diff --git a/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..5df2d7f830 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,53 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/3.0.3/ops/mdc/v2/scheduling-fullsync + - /riak/kv/3.0.3/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.3/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter.md b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..727106938d --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter.md @@ -0,0 +1,52 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +aliases: +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + + + + diff --git a/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..6deb4fd0fe --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,129 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/aae + - /riak/kv/3.0.3/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + + + + diff --git a/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..85470192ef --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,186 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/architecture + - /riak/kv/3.0.3/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + + + + diff --git a/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..39f7ce8e1c --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,102 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/cascading-writes + - /riak/kv/3.0.3/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + + + + diff --git a/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..88b0138be5 --- /dev/null +++ b/content/riak/kv/3.0.3/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,72 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.3/ops/mdc/v3/scheduling-fullsync + - /riak/kv/3.0.3/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + + + + diff --git a/content/riak/kv/3.0.3/using/repair-recovery.md b/content/riak/kv/3.0.3/using/repair-recovery.md new file mode 100644 index 0000000000..45364316e8 --- /dev/null +++ b/content/riak/kv/3.0.3/using/repair-recovery.md @@ -0,0 +1,53 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +aliases: +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + + + + diff --git a/content/riak/kv/3.0.3/using/repair-recovery/errors.md b/content/riak/kv/3.0.3/using/repair-recovery/errors.md new file mode 100644 index 0000000000..f25a9afb79 --- /dev/null +++ b/content/riak/kv/3.0.3/using/repair-recovery/errors.md @@ -0,0 +1,366 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.3/ops/running/recovery/errors + - /riak/kv/3.0.3/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.3/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.3/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.3/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.3/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/3.0.3/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.3/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/3.0.3/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/3.0.3/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + + + + diff --git a/content/riak/kv/3.0.3/using/repair-recovery/failed-node.md b/content/riak/kv/3.0.3/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..526591283a --- /dev/null +++ b/content/riak/kv/3.0.3/using/repair-recovery/failed-node.md @@ -0,0 +1,114 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.3/ops/running/recovery/failed-node + - /riak/kv/3.0.3/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` + + + + diff --git a/content/riak/kv/3.0.3/using/repair-recovery/failure-recovery.md b/content/riak/kv/3.0.3/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..bfd1572cae --- /dev/null +++ b/content/riak/kv/3.0.3/using/repair-recovery/failure-recovery.md @@ -0,0 +1,129 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.3/ops/running/recovery/failure-recovery + - /riak/kv/3.0.3/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/3.0.3/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** - A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** - If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** - Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/3.0.3/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.3/using/repair-recovery/repairs.md b/content/riak/kv/3.0.3/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..a6c38b455e --- /dev/null +++ b/content/riak/kv/3.0.3/using/repair-recovery/repairs.md @@ -0,0 +1,391 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.3/ops/running/recovery/repairing-indexes + - /riak/kv/3.0.3/ops/running/recovery/repairing-indexes + - /riak/3.0.3/ops/running/recovery/failed-node + - /riak/kv/3.0.3/ops/running/recovery/failed-node + - /riak/3.0.3/ops/running/recovery/repairing-leveldb + - /riak/kv/3.0.3/ops/running/recovery/repairing-leveldb + - /riak/3.0.3/ops/running/recovery/repairing-partitions + - /riak/kv/3.0.3/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/3.0.3/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/3.0.3/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/3.0.3/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/3.0.3/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + + + + diff --git a/content/riak/kv/3.0.3/using/repair-recovery/rolling-replaces.md b/content/riak/kv/3.0.3/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..4f78f716a6 --- /dev/null +++ b/content/riak/kv/3.0.3/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,76 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +aliases: +--- + +[upgrade]: {{<baseurl>}}riak/kv/3.0.3/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/3.0.3/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + + + + diff --git a/content/riak/kv/3.0.3/using/repair-recovery/rolling-restart.md b/content/riak/kv/3.0.3/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..95f5910af7 --- /dev/null +++ b/content/riak/kv/3.0.3/using/repair-recovery/rolling-restart.md @@ -0,0 +1,64 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.3/ops/running/recovery/rolling-restart + - /riak/kv/3.0.3/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/3.0.3/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + + + + diff --git a/content/riak/kv/3.0.3/using/repair-recovery/secondary-indexes.md b/content/riak/kv/3.0.3/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..5d3e18423d --- /dev/null +++ b/content/riak/kv/3.0.3/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,142 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.3/ops/running/recovery/repairing-indexes + - /riak/kv/3.0.3/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + + + + diff --git a/content/riak/kv/3.0.3/using/running-a-cluster.md b/content/riak/kv/3.0.3/using/running-a-cluster.md new file mode 100644 index 0000000000..cc5c812fd4 --- /dev/null +++ b/content/riak/kv/3.0.3/using/running-a-cluster.md @@ -0,0 +1,339 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/3.0.3/ops/building/basic-cluster-setup + - /riak/kv/3.0.3/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/3.0.3/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/3.0.3/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + + + + diff --git a/content/riak/kv/3.0.3/using/security.md b/content/riak/kv/3.0.3/using/security.md new file mode 100644 index 0000000000..23fb5b9256 --- /dev/null +++ b/content/riak/kv/3.0.3/using/security.md @@ -0,0 +1,199 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/3.0.3/ops/advanced/security + - /riak/kv/3.0.3/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/3.0.3/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/3.0.3/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/3.0.3/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/3.0.3/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/3.0.3/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + + + + diff --git a/content/riak/kv/3.0.3/using/security/basics.md b/content/riak/kv/3.0.3/using/security/basics.md new file mode 100644 index 0000000000..3c7d687481 --- /dev/null +++ b/content/riak/kv/3.0.3/using/security/basics.md @@ -0,0 +1,851 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/3.0.3/ops/running/authz + - /riak/kv/3.0.3/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/3.0.3/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/3.0.3/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/3.0.3/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/3.0.3/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/3.0.3/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/3.0.3/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/3.0.3/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/3.0.3/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + + + + diff --git a/content/riak/kv/3.0.3/using/security/best-practices.md b/content/riak/kv/3.0.3/using/security/best-practices.md new file mode 100644 index 0000000000..62b7e248ca --- /dev/null +++ b/content/riak/kv/3.0.3/using/security/best-practices.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.3/using/security/managing-sources.md b/content/riak/kv/3.0.3/using/security/managing-sources.md new file mode 100644 index 0000000000..dc77f73604 --- /dev/null +++ b/content/riak/kv/3.0.3/using/security/managing-sources.md @@ -0,0 +1,273 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/3.0.3/ops/running/security-sources + - /riak/kv/3.0.3/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/3.0.3/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/3.0.3/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/3.0.3/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/3.0.3/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/3.0.3/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/3.0.3/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + + + + diff --git a/content/riak/kv/3.0.3/using/security/v2-v3-ssl-ca.md b/content/riak/kv/3.0.3/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..066eed0174 --- /dev/null +++ b/content/riak/kv/3.0.3/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.3/using/troubleshooting.md b/content/riak/kv/3.0.3/using/troubleshooting.md new file mode 100644 index 0000000000..9fa931fd98 --- /dev/null +++ b/content/riak/kv/3.0.3/using/troubleshooting.md @@ -0,0 +1,28 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +aliases: +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + + + + diff --git a/content/riak/kv/3.0.3/using/troubleshooting/http-204.md b/content/riak/kv/3.0.3/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..0ff4ccbdb2 --- /dev/null +++ b/content/riak/kv/3.0.3/using/troubleshooting/http-204.md @@ -0,0 +1,22 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 3.0.3 +menu: + riak_kv-3.0.3: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +aliases: +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + + + + diff --git a/content/riak/kv/3.0.4/_reference-links.md b/content/riak/kv/3.0.4/_reference-links.md new file mode 100644 index 0000000000..e065a6b774 --- /dev/null +++ b/content/riak/kv/3.0.4/_reference-links.md @@ -0,0 +1,254 @@ + +# Riak KV 3.0.4 Reference Links List + + +## Common + +[downloads]: {{}}riak/kv/3.0.4/downloads/ +[install index]: {{}}riak/kv/3.0.4/setup/installing +[upgrade index]: {{}}riak/kv/3.0.4/upgrading +[plan index]: {{}}riak/kv/3.0.4/planning +[config index]: {{}}riak/kv/3.0.4/using/configuring/ +[config reference]: {{}}riak/kv/3.0.4/configuring/reference/ +[manage index]: {{}}riak/kv/3.0.4/using/managing +[performance index]: {{}}riak/kv/3.0.4/using/performance +[glossary vnode]: {{}}riak/kv/3.0.4/learn/glossary/#vnode +[contact basho]: https://www.tiot.jp/en/about-us/contact-us/ + + +## Planning + +[plan index]: {{}}riak/kv/3.0.4/setup/planning +[plan start]: {{}}riak/kv/3.0.4/setup/planning/start +[plan backend]: {{}}riak/kv/3.0.4/setup/planning/backend +[plan backend bitcask]: {{}}riak/kv/3.0.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/3.0.4/setup/planning/backend/leveled +[plan backend memory]: {{}}riak/kv/3.0.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/3.0.4/setup/planning/backend/multi +[plan cluster capacity]: {{}}riak/kv/3.0.4/setup/planning/cluster-capacity +[plan bitcask capacity]: {{}}riak/kv/3.0.4/setup/planning/bitcask-capacity-calc +[plan best practices]: {{}}riak/kv/3.0.4/setup/planning/best-practices +[plan future]: {{}}riak/kv/3.0.4/setup/planning/future + + +## Installing + +[install index]: {{}}riak/kv/3.0.4/setup/installing +[install aws]: {{}}riak/kv/3.0.4/setup/installing/amazon-web-services +[install debian & ubuntu]: {{}}riak/kv/3.0.4/setup/installing/debian-ubuntu +[install freebsd]: {{}}riak/kv/3.0.4/setup/installing/freebsd +[install mac osx]: {{}}riak/kv/3.0.4/setup/installing/mac-osx +[install rhel & centos]: {{}}riak/kv/3.0.4/setup/installing/rhel-centos +[install smartos]: {{}}riak/kv/3.0.4/setup/installing/smartos +[install solaris]: {{}}riak/kv/3.0.4/setup/installing/solaris +[install suse]: {{}}riak/kv/3.0.4/setup/installing/suse +[install windows azure]: {{}}riak/kv/3.0.4/setup/installing/windows-azure + +[install source index]: {{}}riak/kv/3.0.4/setup/installing/source +[install source erlang]: {{}}riak/kv/3.0.4/setup/installing/source/erlang +[install source jvm]: {{}}riak/kv/3.0.4/setup/installing/source/jvm + +[install verify]: {{}}riak/kv/3.0.4/setup/installing/verify + + +## Upgrading + +[upgrade index]: {{}}riak/kv/3.0.4/setup/upgrading +[upgrade checklist]: {{}}riak/kv/3.0.4/setup/upgrading/checklist +[upgrade version]: {{}}riak/kv/3.0.4/setup/upgrading/version +[upgrade cluster]: {{}}riak/kv/3.0.4/setup/upgrading/cluster +[upgrade mdc]: {{}}riak/kv/3.0.4/setup/upgrading/multi-datacenter +[upgrade downgrade]: {{}}riak/kv/3.0.4/setup/downgrade + + +## Configuring + +[config index]: {{}}riak/kv/3.0.4/configuring +[config basic]: {{}}riak/kv/3.0.4/configuring/basic +[config backend]: {{}}riak/kv/3.0.4/configuring/backend +[config manage]: {{}}riak/kv/3.0.4/configuring/managing +[config reference]: {{}}riak/kv/3.0.4/configuring/reference/ +[config strong consistency]: {{}}riak/kv/3.0.4/configuring/strong-consistency +[config load balance]: {{}}riak/kv/3.0.4/configuring/load-balancing-proxy +[config mapreduce]: {{}}riak/kv/3.0.4/configuring/mapreduce +[config search]: {{}}riak/kv/3.0.4/configuring/search/ + +[config v3 mdc]: {{}}riak/kv/3.0.4/configuring/v3-multi-datacenter +[config v3 nat]: {{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl + +[config v2 mdc]: {{}}riak/kv/3.0.4/configuring/v2-multi-datacenter +[config v2 nat]: {{}}riak/kv/3.0.4/configuring/v2-multi-datacenter/nat +[config v2 quickstart]: {{}}riak/kv/3.0.4/configuring/v2-multi-datacenter/quick-start +[config v2 ssl]: {{}}riak/kv/3.0.4/configuring/v2-multi-datacenter/ssl + + + +## Using + +[use index]: {{}}riak/kv/3.0.4/using/ +[use admin commands]: {{}}riak/kv/3.0.4/using/cluster-admin-commands +[use running cluster]: {{}}riak/kv/3.0.4/using/running-a-cluster + +### Reference + +[use ref custom code]: {{}}riak/kv/3.0.4/using/reference/custom-code +[use ref handoff]: {{}}riak/kv/3.0.4/using/reference/handoff +[use ref monitoring]: {{}}riak/kv/3.0.4/using/reference/statistics-monitoring +[use ref search]: {{}}riak/kv/3.0.4/using/reference/search +[use ref 2i]: {{}}riak/kv/3.0.4/using/reference/secondary-indexes +[use ref snmp]: {{}}riak/kv/3.0.4/using/reference/snmp +[use ref strong consistency]: {{}}riak/kv/3.0.4/using/reference/strong-consistency +[use ref jmx]: {{}}riak/kv/3.0.4/using/reference/jmx +[use ref obj del]: {{}}riak/kv/3.0.4/using/reference/object-deletion/ +[use ref v3 mdc]: {{}}riak/kv/3.0.4/using/reference/v3-multi-datacenter +[use ref v2 mdc]: {{}}riak/kv/3.0.4/using/reference/v2-multi-datacenter + +### Cluster Admin + +[use admin index]: {{}}riak/kv/3.0.4/using/admin/ +[use admin commands]: {{}}riak/kv/3.0.4/using/admin/commands/ +[use admin riak cli]: {{}}riak/kv/3.0.4/using/admin/riak-cli/ +[use admin riak-admin]: {{}}riak/kv/3.0.4/using/admin/riak-admin/ +[use admin riak control]: {{}}riak/kv/3.0.4/using/admin/riak-control/ + +### Cluster Operations + +[cluster ops add remove node]: {{}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes +[cluster ops inspect node]: {{}}riak/kv/3.0.4/using/cluster-operations/inspecting-node +[cluster ops change info]: {{}}riak/kv/3.0.4/using/cluster-operations/changing-cluster-info +[cluster ops load balance]: {{}}riak/kv/3.0.4/configuring/load-balancing-proxy +[cluster ops bucket types]: {{}}riak/kv/3.0.4/using/cluster-operations/bucket-types +[cluster ops handoff]: {{}}riak/kv/3.0.4/using/cluster-operations/handoff +[cluster ops log]: {{}}riak/kv/3.0.4/using/cluster-operations/logging +[cluster ops obj del]: {{}}riak/kv/3.0.4/using/reference/object-deletion +[cluster ops backup]: {{}}riak/kv/3.0.4/using/cluster-operations/backing-up +[cluster ops mdc]: {{}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter +[cluster ops strong consistency]: {{}}riak/kv/3.0.4/using/cluster-operations/strong-consistency +[cluster ops 2i]: {{}}riak/kv/3.0.4/using/reference/secondary-indexes +[cluster ops v3 mdc]: {{}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter +[cluster ops v2 mdc]: {{}}riak/kv/3.0.4/using/cluster-operations/v2-multi-datacenter + +### Repair/Recover + +[repair recover index]: {{}}riak/kv/3.0.4/using/repair-recovery +[repair recover index]: {{}}riak/kv/3.0.4/using/repair-recovery/failure-recovery/ + +### Security + +[security index]: {{}}riak/kv/3.0.4/using/security/ +[security basics]: {{}}riak/kv/3.0.4/using/security/basics +[security managing]: {{}}riak/kv/3.0.4/using/security/managing-sources/ + +### Performance + +[perf index]: {{}}riak/kv/3.0.4/using/performance/ +[perf benchmark]: {{}}riak/kv/3.0.4/using/performance/benchmarking +[perf open files]: {{}}riak/kv/3.0.4/using/performance/open-files-limit/ +[perf erlang]: {{}}riak/kv/3.0.4/using/performance/erlang +[perf aws]: {{}}riak/kv/3.0.4/using/performance/amazon-web-services +[perf latency checklist]: {{}}riak/kv/3.0.4/using/performance/latency-reduction + +### Troubleshooting + +[troubleshoot http]: {{}}riak/kv/3.0.4/using/troubleshooting/http-204 + + +## Developing + +[dev index]: {{}}riak/kv/3.0.4/developing +[dev client libraries]: {{}}riak/kv/3.0.4/developing/client-libraries +[dev data model]: {{}}riak/kv/3.0.4/developing/data-modeling +[dev data types]: {{}}riak/kv/3.0.4/developing/data-types +[dev kv model]: {{}}riak/kv/3.0.4/developing/key-value-modeling + +### Getting Started + +[getting started]: {{}}riak/kv/3.0.4/developing/getting-started +[getting started java]: {{}}riak/kv/3.0.4/developing/getting-started/java +[getting started ruby]: {{}}riak/kv/3.0.4/developing/getting-started/ruby +[getting started python]: {{}}riak/kv/3.0.4/developing/getting-started/python +[getting started php]: {{}}riak/kv/3.0.4/developing/getting-started/php +[getting started csharp]: {{}}riak/kv/3.0.4/developing/getting-started/csharp +[getting started nodejs]: {{}}riak/kv/3.0.4/developing/getting-started/nodejs +[getting started erlang]: {{}}riak/kv/3.0.4/developing/getting-started/erlang +[getting started golang]: {{}}riak/kv/3.0.4/developing/getting-started/golang + +[obj model java]: {{}}riak/kv/3.0.4/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/3.0.4/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/3.0.4/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/3.0.4/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/3.0.4/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/3.0.4/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/3.0.4/developing/getting-started/golang/object-modeling + +### Usage + +[usage index]: {{}}riak/kv/3.0.4/developing/usage +[usage bucket types]: {{}}riak/kv/3.0.4/developing/usage/bucket-types +[usage commit hooks]: {{}}riak/kv/3.0.4/developing/usage/commit-hooks +[usage conflict resolution]: {{}}riak/kv/3.0.4/developing/usage/conflict-resolution +[usage content types]: {{}}riak/kv/3.0.4/developing/usage/content-types +[usage create objects]: {{}}riak/kv/3.0.4/developing/usage/creating-objects +[usage custom extractors]: {{}}riak/kv/3.0.4/developing/usage/custom-extractors +[usage delete objects]: {{}}riak/kv/3.0.4/developing/usage/deleting-objects +[usage mapreduce]: {{}}riak/kv/3.0.4/developing/usage/mapreduce +[usage search]: {{}}riak/kv/3.0.4/developing/usage/search +[usage search schema]: {{}}riak/kv/3.0.4/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/3.0.4/developing/usage/searching-data-types +[usage 2i]: {{}}riak/kv/3.0.4/developing/usage/secondary-indexes +[usage update objects]: {{}}riak/kv/3.0.4/developing/usage/updating-objects + +### App Guide + +[apps mapreduce]: {{}}riak/kv/3.0.4/developing/app-guide/advanced-mapreduce +[apps replication properties]: {{}}riak/kv/3.0.4/developing/app-guide/replication-properties +[apps strong consistency]: {{}}riak/kv/3.0.4/developing/app-guide/strong-consistency + +### API + +[dev api backend]: {{}}riak/kv/3.0.4/developing/api/backend +[dev api http]: {{}}riak/kv/3.0.4/developing/api/http +[dev api http status]: {{}}riak/kv/3.0.4/developing/api/http/status +[dev api pbc]: {{}}riak/kv/3.0.4/developing/api/protocol-buffers/ + + +## Learn + +[learn new nosql]: {{}}riak/kv/learn/new-to-nosql +[learn use cases]: {{}}riak/kv/learn/use-cases +[learn why riak]: {{}}riak/kv/learn/why-riak-kv + +[glossary]: {{}}riak/kv/3.0.4/learn/glossary/ +[glossary aae]: {{}}riak/kv/3.0.4/learn/glossary/#active-anti-entropy-aae +[glossary read rep]: {{}}riak/kv/3.0.4/learn/glossary/#read-repair +[glossary vnode]: {{}}riak/kv/3.0.4/learn/glossary/#vnode + +[concept aae]: {{}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/ +[concept buckets]: {{}}riak/kv/3.0.4/learn/concepts/buckets +[concept cap neg]: {{}}riak/kv/3.0.4/learn/concepts/capability-negotiation +[concept causal context]: {{}}riak/kv/3.0.4/learn/concepts/causal-context +[concept clusters]: {{}}riak/kv/3.0.4/learn/concepts/clusters/ +[concept crdts]: {{}}riak/kv/3.0.4/learn/concepts/crdts +[concept eventual consistency]: {{}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[concept keys objects]: {{}}riak/kv/3.0.4/learn/concepts/keys-and-objects +[concept replication]: {{}}riak/kv/3.0.4/learn/concepts/replication +[concept strong consistency]: {{}}riak/kv/3.0.4/using/reference/strong-consistency +[concept vnodes]: {{}}riak/kv/3.0.4/learn/concepts/vnodes + + + +## Community + +[community]: {{}}community +[community projects]: {{}}community/projects +[reporting bugs]: {{}}community/reporting-bugs +[taishi]: {{}}community/taishi + + + + + + diff --git a/content/riak/kv/3.0.4/add-ons.md b/content/riak/kv/3.0.4/add-ons.md new file mode 100644 index 0000000000..81396ca6fc --- /dev/null +++ b/content/riak/kv/3.0.4/add-ons.md @@ -0,0 +1,25 @@ +--- +title: "Add-ons" +description: "Add-on technology for Riak KV" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Add-ons" + identifier: "add-ons" + weight: 400 + pre: tools +toc: true +aliases: +--- + + + +In the days of Basho, integrations between Riak KV and other best-of-breed components were developed for your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. + +* [Riak Redis Add-on]({{}}riak/kv/3.0.4/add-ons/redis/) + + + + + diff --git a/content/riak/kv/3.0.4/add-ons/redis.md b/content/riak/kv/3.0.4/add-ons/redis.md new file mode 100644 index 0000000000..a6bc183d2a --- /dev/null +++ b/content/riak/kv/3.0.4/add-ons/redis.md @@ -0,0 +1,63 @@ +--- +title: "Riak Redis Add-on" +description: "Redis Add-on for Riak KV" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Redis Add-on" + identifier: "add-ons_redis" + weight: 101 + parent: "add-ons" +toc: true +commercial_offering: true +aliases: +--- + + +[addon redis develop]: ./developing-rra/ +[addon redis features]: ./redis-add-on-features/ +[addon redis setup]: ./set-up-rra/ +[addon redis use]: ./get-started-with-rra/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +{{% note title="Warning: No longer actively maintained" %}} +Since moving to Open Source, the Riak Redis Add-on is no longer actively maintained. As basic functionality has not changed, we expect the add-on to continue working with newer versions without incident but cannot guarantee this. The text below is left from the last known good version. +{{% /note %}} + +Riak Redis Add-on (RRA) is a distributed cache service that joins the power of Redis caching with the eventual consistency guarantees of Riak KV. + +RRA enables you to reduce latency for Riak KV reads through the use of a distributed cache layer. This type of caching is most effective for keys that are immutable or have an infrequent change rate. + +Whether you are looking to build out a session, shopping cart, advertisement or other dynamically-rendered copy, RRA helps reduce read pressure on your persistent store (Riak KV). + +## Compatibility + +RRA is supported on the following platforms: + +* RHEL/CentOS 6 +* RHEL/CentOS 7 +* Ubuntu 12.04 LTS "Precise Pangolin" +* Ubuntu 14.04 LTS "Trusty Tahr" +* Debian 7 "Wheezy" +* Debian 8 "Jessie" + +RRA is compatible with the following services: + +* Riak KV Enterprise (2.1.4+) +* Riak TS Enterprise (1.4.0+) +* Redis 2.x and 3.x (in 3.x, not supporting Redis Cluster) + * Redis Cluster and RRA's consistent hash are at odds, which surface as errors + such as MOVED, ASK, and CROSSSLOT messages from Redis, see (WIP): + https://github.com/antirez/redis-rb-cluster + +## Get Started + +* [Set up RRA.][addon redis setup] +* [Use RRA with various clients.][addon redis use] +* [Develop with RRA.][addon redis develop] +* [Learn about RRA's features.][addon redis features] + + + + diff --git a/content/riak/kv/3.0.4/add-ons/redis/developing-rra.md b/content/riak/kv/3.0.4/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..9474b23705 --- /dev/null +++ b/content/riak/kv/3.0.4/add-ons/redis/developing-rra.md @@ -0,0 +1,330 @@ +--- +title: "Developing with Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Develop with Redis Add-on" + identifier: "add-ons_redis_develop" + weight: 403 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[redis-clients]: http://redis.io/clients +[usage bucket types]: {{}}riak/kv/3.0.4/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/3.0.4/developing/api/http +[config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ +[apps replication properties]: {{}}riak/kv/3.0.4/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/3.0.4/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/3.0.4/learn/concepts/causal-context +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + +This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. + +## Overview + +Riak Redis Add-on (RRA) packages a cache proxy service. The cache proxy service provides accessibility to Riak KV, as a persistent data store, with Redis, as a cache through the various Redis client libraries and command-line interface tool `redis-cli`. + +As with Riak KV, the cache proxy service almost always performs best and most +predictably when you use the basic CRUD operations -- Create, Read, Update, +Delete -- that you'd find in any key/value store. Learning these operations +is a great place to start when beginning to develop applications that use +RRA. + +The set of clients (including recommendations) for Redis are listed at +[Redis clients][redis-clients]. For brevity sake, examples provided here are +in: + +* Erlang (Eredis) +* Javascript (node_redis) +* Python (redis-py) +* Ruby (redis-rb) +* Scala (lettuce) +* Java, see the Scala examples. The code intentionally uses as few Scala tricks as possible to focus on the use of the Redis client. + +## Riak KV Setup + +While you can use Riak Redis Add-on with Riak KV configured so either `last_write_wins` is set to 'true' or `allow_mult` is set to 'true', we recommend using the `allow_mult` setting in order to provide client sibling resolution in the event of a network partition. The examples and instructions on this page will assume `allow_mult` is set to 'true'. + +The cache proxy service is tested under both configurations. However, due to lack of support via the Redis protocol for returning multiple values for a single `GET`, effectively `last_write_wins` semantics apply. + +For a deeper explanation of Riak KV's configurable behaviors, see John Daily's +blog series [part 4][config-behaviors] . + +### Bucket Type Setup + +#### Create a Bucket Type + +If your application organizes data in a way that does not include bucket-type +and instead only uses bucket to organize its keyspace, the `default` bucket-type +can be used by omitting the bucket-type portion of the colon-delimited +hierarchical namespaced key. Otherwise said, `test:food` is equivalent to +`default:test:food` where the bucket-type is `default`, the bucket is `test`, +and the key is `food`. For examples here, we will use `rra:test:food` to clearly +use a bucket-type. + +If your application organizes data including a bucket-type, ensure that that +bucket-type is created in Riak without specifying the data type, so effectively +an opaque value, ie a `string`. The following command provides an example of +creating the bucket-type `rra`: + +```sh +if ! riak-admin bucket-type status rra >/dev/null 2>&1; then + riak-admin bucket-type create rra '{"props":{}}' + riak-admin bucket-type activate rra +fi +``` + +#### Set Bucket Props + +The following is an example, using Riak KV's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': + +```sh +curl -XPUT -H 'Content-Type: application/json' \ + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ + 'http://127.0.0.1:8098/types/rra/buckets/test/props' +``` + +For additional configuration options see [bucket properties][dev api http]. + +## Object/Key Operations + +Riak KV organizes data into buckets, keys, and values, with +[bucket types][usage bucket types] acting as an additional namespace in Riak KV +versions 2.0 and greater. Values, which we'll refer to as objects, are identifiable by a unique key, and each key/value pair is stored in a bucket. + +Objects accessed via the cache proxy service in Riak Redis Add-on are restricted to plaintext format. This plaintext format may be a simple string, JSON, XML, or other plaintext representations that can be parsed in the client application (e.g. YAML). + +While buckets are a flat namespace in Riak KV and you can name them +whatever you'd like (`bucket` or `a90bf521c` or `___`), within the cache proxy +service, Redis bucket_type:bucket:key is mapped to Riak KV +bucket_type/bucket/key, so bucket type and bucket names should not contain +colon (`:`). When not specified, bucket type defaults to "default". + +Outside of the above restriction, bucket names have no intrinsic significance beyond allowing you to store objects with the same key in different buckets. + +The same goes for naming keys: many objects can have the same key as long as they're in different buckets. There is no restriction on key containing colon (`:`), and this practice of representing a nested namespace is common in applications using Redis. + +Riak KV [bucket types][usage bucket types] enable you to provide common +configurations for buckets (as many buckets as you wish). This means you can +easily enable buckets to share common configurations, i.e. identical +[replication properties][apps replication properties] or +[commit hooks][usage commit hooks]. + + +## Reading Objects + +Reads via the cache proxy service are analogous to a Redis `GET`, with the added benefit of reading-through to Riak KV which results in greater resilience through node outages and network partitions. + +To request a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, Value} = eredis:q(RedisClientPid, ["GET", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.get("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.get("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.get("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +var value = connection.get("rra:test:food") +``` + +### Get Configuration Parameters + +>**Note:** The cache proxy service read option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf. This will result in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `GET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pr` | How many vnodes must respond for a read to be deemed successful. | `0` | +|`r` | How many replicas need to agree when retrieving an existing object before responding. | `2` | +|`basic_quorum` | Whether to return early in some failure cases, e.g. when `r`=1 and you get 2 errors and a success. | `0` (false) | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | +|`notfound_ok` | Whether to treat notfounds as successful reads for the purpose of `r`. | 1 (true) | +|`timeout` | The number of milliseconds to await a response. | `0` (server specified) | + + +### Sibling Resolution + +As the Redis protocol does not provide a means to return multiple siblings, +the cache proxy service must provide server-side sibling resolution. At present, only last-write-wins sibling resolution is available. The result is an effective +last-write-wins configuration for access through the cache proxy service. + + +## Writing Objects + +Writes via the cache proxy service are analogous to a Redis `SET`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. As with HTTP PUT, `SET` semantically covers both create and update +operations. + +To set a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["SET", "rra:test:food", "apple"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.set("rra:test:food", "apple", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.set("rra:test:food", "apple") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.set("rra:test:food', 'apple") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.set("rra:test:food", "apple") +``` + +### Set Configuration Parameters + +>**Note:** The cache proxy service write option (related to replication factor and +consistency concern) may optionally be set within the nutcracker.conf, resulting +in an override of the setting value at the bucket-level in Riak KV. + +The following configuration parameters apply to `SET` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + +### Sibling Explosion + +As noted in the section "Sibling Resolution" above, Riak KV provides for a line of +descendency (known as the [causal context][[concept causal context]]) for a value stored at a key. Clients +performing write operations provide this causal context by setting the vector +clock (VClock) that they last read. + +If a client does not provide the causal context, Riak KV makes no assumptions and treats the write as a new causal context, semantically equivalent to a +create. In the case that a value is already stored at the key, this would lead +to a sibling. + +Since the Redis protocol does not provide a means to pass a VClock, the cache +proxy service needs to perform a read-before-write to obtain the current VClock so the write can continue the causal context previously established and avoid +"sibling explosion". + +Despite these efforts, in the event of a network partition, siblings will still +be created as clients writing to nodes on either side of the network partition +can create divergent lines of descendency. Sibling resolution remains the means +to merge these lines of descent into a coherent causal context. + +## Deleting Objects + +Deletes via the cache proxy service are analogous to a Redis `DEL`, with the added +benefit of writing to Riak KV followed by a `PEXPIRE` to Redis, invalidating +cache. + +To delete a value at a bucket/key in Riak KV, issue the following: + +```erlang +{ok, RedisClientPid} = eredis:start_link("127.0.0.1", 22122). +{ok, KeysAffected} = eredis:q(RedisClientPid, ["DEL", "rra:test:food"]). +``` + +```javascript +var redis = require("redis"), + client = redis.createClient(22122, "127.0.0.1"); + +client.del("rra:test:food", redis.print); +``` + +```python +import redis + +r = redis.StrictRedis(host="127.0.0.1", port=22122) + +r.del("rra:test:food") +``` + +```ruby +require "redis" + +redis = Redis.new(host: "127.0.0.1", port: 22122) + +redis.del("rra:test:food") +``` + +```scala +import com.lambdaworks.redis._ + +var client = RedisClient.create("redis://127.0.0.1:22122") +var connection = client.connect() + +connection.del("rra:test:food") +``` + +### Delete Configuration Parameters + +The following configuration parameters apply to `DEL` and may be set within the +RRA configuration file `/etc/cache_proxy/cache_proxy_22122.yml`: + +|Parameter |Description |Default| +|----------------|-----------------|-------| +|`n_val` | The number of replicas for objects in a bucket. The `n_val` should be an integer greater than 0 and less than or equal to the number of nodes in the cluster.

**NOTE**: If you change the `n_val` after keys have been added to the bucket it may result in failed reads, as the new value may not be replicated to all of the appropriate partitions. | `3` | +|`pw` | How many vnodes must respond for a write to be deemed successful. | `0` | +|`w` | How many replicas need to acknowledge the write before responding. | `2` | +|`sloppy_quorum` | Whether to treat vnodes holding values for another vnode as acceptable within the quorum determination. | `0` (false) | + + + + diff --git a/content/riak/kv/3.0.4/add-ons/redis/redis-add-on-features.md b/content/riak/kv/3.0.4/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..76c2b3aa3d --- /dev/null +++ b/content/riak/kv/3.0.4/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,136 @@ +--- +title: "Riak Redis Add-on Features" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Redis Add-on Features" + identifier: "add-ons_redis_features" + weight: 504 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png +[redis docs]: http://redis.io/commands +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md + +## Overview + +The cache proxy service in Riak Redis Add-on (RRA) provides pre-sharding and connection aggregation as a service, which reduces latency and increases addressable cache memory space with lower-cost hardware. + +On this page, you will find detailed descriptions of cache proxy service components, including what each component does and how you implement it. The following components are available: + +* [Pre-sharding](#pre-sharding) +* [Connection Aggregation](#connection-aggregation) +* [Command Pipelining](#command-pipelining) +* [Read-through Cache](#read-through-cache) +* [Write-around Cache](#write-around-cache) +* [Commands](#commands) +* [Object Lifetime](#object-lifetime) + +## Pre-sharding + +Pre-sharding with consistent hashing dispatches object reads and writes based +on a configurable hash function, spreading load across multiple cache servers. +The cache proxy service uses pre-sharding to extend the total addressable cache memory space based on the number of Redis servers. Request keys are hashed, then +requests are routed to the Redis server that handles that portion of the key +range. + +Redis with no persistence is used as the frontend cache proxy service, and +Redis as a data server holds all data in memory. The addressable memory of +cache proxy is limited. By employing pre-sharding, the total addressable cache +memory space is extended by the number of Redis servers. + +## Connection Aggregation + +Redis client connections are a limited resource. Using the cache proxy service, connections may be spread across multiple Riak Redis Add-on (RRA) servers. This reduces the total required connections to the Redis server for the same key. + +Redis clients in various languages support specifying multiple servers, as well +as implementing multiple methods of spreading load across those servers (i.e. +round-robin load balancing or consistent hashing). Since the cache proxy service is providing consistent hashing, any Redis client method of supporting multiple +servers will suffice. + +## Command Pipelining + +The cache proxy service increases performance by pipelining requests to Redis. While pipelining can be performed at the client, the cache proxy service is ideal due to connection aggregation. Pipelining reduces network roundtrips to Redis and +lowers CPU usage on Redis. + +## Read-Through Cache + +Implementing caching strategies in the cache proxy service reduces the cost of implementing cache strategies in client code in multiple applications and languages. The cache proxy service supports the read-through cache strategy, the most prevalent caching strategy used in distributed computing. + +The read-through cache strategy of the GET command is represented by the +following sequence diagram: + +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) + + +The `CACHE_TTL` configuration option establishes how long the cache takes to +become consistent with the backend server during a write (DELETE or PUT) to the +backend server. + +A short `CACHE_TTL`, for example "15s", reduces a significant amount of read +pressure from Riak, increasing performance of the overall solution. + +## Write-Around Cache + +The read-through cache strategy requires a TTL to keep cache as coherent as possible given that writes to Riak KV can and will be issued without the cache proxy service being informed of the write. The effect is that the cache proxy service is eventually consistent with the underlying Riak KV data store, with the time to consistency equal to the TTL. + +The cache proxy service write-around cache strategy was introduced to provide a means to keep cache coherent with zero time to consistency with the underlying Riak KV data store for all writes that the cache proxy is informed of. For the Redis String (Value in KV) datatype, SET and DEL commands result in writes to the underlying Riak KV data store followed by a PEXPIRE to invalidate cache. + +Of the three write cache strategies, the write-around cache strategy is the least +prone to race condition, but least optimal for the read which immediately follows +the write. In the overwhelming majority of distributed application data access +patterns, the added certainty of cache coherency afforded by write-around over +write-through is well worth the single cache miss. By definition, a key that is +cached is expected to be accessed frequently, hence the single cache miss is +expected to be followed by several accurate cache hits. + +The write-around cache strategy of the SET command is represented by the +following sequence diagram: + +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) + +The write-around cache strategy of the DEL command is represented by the +following sequence diagram: + +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) + +## Commands + +For command details, refer to the Redis [documentation][redis docs]. + +The cache proxy service supports the following augmented Redis commands fully: + +* GET - get the value of a key from Redis or Riak KV utilizing the read-through + caching strategy with a TTL set at service configuration time. + +* SET - set the value of a key to Riak KV and invalidate cache, issue a PEXPIRE + to Redis. + +* DEL - delete the value of a key to Riak KV and invalidate cache, issue a + PEXPIRE to Redis. + +The cache proxy service also supports the set of Redis commands supported by Twemproxy, but only to the point of pre-sharding and command pipelining, issued only to Redis. Refer to the Twemproxy [documentation][twemproxy docs]. + +>**Important:** While the cache proxy service does support issuing DEL commands, PEXPIRE, with a small TTL, is suggested instead when the semantic intent is to remove an item from cache. With write-around, the DEL command will issue a delete to the Riak backend. + +## Object Lifetime + +With the combination of read-through and write-around cache strategies, the +full object lifetime for a key-value is represented by the following +sequence diagram: + +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) + + + + diff --git a/content/riak/kv/3.0.4/add-ons/redis/set-up-rra.md b/content/riak/kv/3.0.4/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..6d7a1d1752 --- /dev/null +++ b/content/riak/kv/3.0.4/add-ons/redis/set-up-rra.md @@ -0,0 +1,285 @@ +--- +title: "Setting Up Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Set Up Redis Add-on" + identifier: "add-ons_redis_setup" + weight: 201 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: +--- + +[addon redis develop]: ../developing-rra/ +[addon redis use]: ../using-rra +[ee]: https://www.tiot.jp/en/about-us/contact-us/ +[install index]: {{}}riak/kv/3.0.4/setup/installing +[perf open files]: {{}}riak/kv/3.0.4/using/performance/open-files-limit/#changing-the-limit +[lab ansible]: https://github.com/paegun/ansible-cache-proxy + +This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. + +## Prerequisites + +Before you begin installing Riak Redis Add-on (RRA), you will need to ensure that you have root or sudo access on the nodes where you plan to install RRA. You will also need to have Riak KV already [installed][install index]. + +While this page assumes that Redis is not already installed, existing installations of Redis are supported. If you have an existing Redis installation, look for the *skip ahead* instructions as you go. + +This page assumes that Redis is (or will be) installed on separate hosts from Riak KV. You will need the list of Riak KV and Redis host:port combinations. RRA communicates with Riak KV via the protobuf port, and the host:port values are used +to configure the cache proxy. + +## In the Lab + +An ansible setup for the Riak Redis Add-on (RRA) was developed to provide a +runnable example of an installation, see [ansible cache proxy][lab ansible]. +The remainder of this setup guide lists the commands required to install and +configure RRA manually. + +## Installing + +1. On all Redis and Riak Redis Add-on hosts, change the [open-files limit][perf open files]. +2. On all Redis hosts, install Redis. **Skip ahead* if you already have Redis installed. +3. Install Riak Redis Add-on. + +### Change the open-files limit + +As with Riak KV, both the total open-files limit and the per-user open-files limit +must be high enough to allow Redis and Riak Redis Add-on (RRA) to function. + +For a complete guide on changing limit in Riak KV, see +[Changing the limit][perf open files]. + +#### Linux + +On most Linux distributions, the total limit for open files is controlled by `sysctl`. + +```bash +sudo sysctl fs.file-max fs.file-max=65536 +sudo sysctl -p +``` + +To change the per-user file limit, you need to edit `/etc/security/limits.conf`. + +#### CentOS + +On CentOS systems, set a proper limit for the user you're usually logging in with +to do any kind of work on the machine, including managing Riak KV, Redis, or RRA services. On CentOS, `sudo` properly inherits the values from the +executing user. + +#### Ubuntu + +On Ubuntu systems, the following settings are recommended: + +```config +»USERNAME« hard nofile 65536 +»USERNAME« soft nofile 65536 +root hard nofile 65536 +root soft nofile 65536 +``` + +>**Note:** You may need to log out of your shell and then log back in for these changes to take effect. + + +### Install Redis + +>**Note:** If you already have Redis installed, *skip ahead* to "Install Riak Redis Add-on". + +#### Install on Ubuntu + +If you are on Ubuntu, run the following to install Redis: + +```bash +# add the dotdeb repositories to your APT sources. +sudo bash -c "cat >> /etc/apt/sources.list.d/dotdeb.org.list" <**Notes:** ss is used here to support a minimal installed system, but netstat may be used as well. + +### Install Riak Redis Add-on (RRA) + +>**Note:** +>Riak Redis Add-on (RRA) is available to Enterprise customers for download in the usual Zendesk forums. + +If you are on CentOS, run the following to install RRA: + +```bash +sudo yum -y localinstall cache_proxy_ee_1.1.0_x86_64.rpm +``` + +If you are on Ubuntu, run the following to install RRA: + +```bash +sudo dpkg -i cache_proxy_ee_1.1.0_amd64.deb +``` + +## Configuring Riak Redis Add-on + +To configure Riak Redis Add-on (RRA), edit the configuration file: /etc/cache_proxy/cache_proxy_22122.yml. + +The RRA configuration file is in YAML format. An example configuration +file is provided in the install, and it contains all relevant configuration elements: + +```config +» XML node name« : + listen: 0.0.0.0:22122 + hash: fnv1a_64 + distribution: ketama + auto_eject_hosts: true + redis: true + server_retry_timeout: 2000 + server_failure_limit: 1 + server_ttl: 1h + servers: + - 127.0.0.1:6379:1 + backend_type: riak + backend_max_resend: 2 + backends: + - 127.0.0.1:8087 +``` + +Set the `listen` configuration value to set the RRA listen port. + +To set the time-to-live (TTL) for values stored in cache, set the `server_ttl` +configuration value. Human-readable time values can be specified, +with the most likely units being `s` for seconds or `ms` for milliseconds. + +Set the list of Redis servers by listing the servers, separated by `-`, under the `servers` configuration value in the format `»host«:»port«:»weight«` (weight is optional). + +Set the list of Riak KV servers by listing the servers, separated by `-`, under the `backends` configuration value in the format `»host«:»port«:»weight«` +(weight is optional). You will want to make sure to list the Riak KV protobuf (pb) port here. + +### Verify your configuration + +If you are on Ubuntu, run the following to start RRA: + +```bash +sudo service cache_proxy start +``` + +If you are on CentOS, run the following to restart Redis and ensure redis-server +is enabled to start on boot: + +```bash +systemctl start cache_proxy +``` + +To verify RRA is running and listening on the expected port, run the +following (using the loopback interface and the default RRA port 22122 +as an example): + +```bash +redis-cli -h 127.0.0.1 -p 22122 set test:redis-add-on SUCCESS +redis-cli -h 127.0.0.1 -p 22122 get test:redis-add-on SUCCESS +``` + +Redis should respond with `SUCCESS`. + +If RRA is responding with the expected output, run the following to +clean up and remove the test value: + +```bash +redis-cli -h 127.0.0.1 -p 22122 del test:redis-add-on +``` + +If you did not get the expected output, run the following +to verify that RRA is running on the expected port: + +```bash +ss -nlp |grep [n]utcracker +``` + +>**Note:** ss is used here to support a minimal installed system, but netstat may be used as well. + +## Next Steps + +Get started with some [basic usage][addon redis use] or checkout out more info on [setting up for development (with examples)][addon redis develop]. + + + + diff --git a/content/riak/kv/3.0.4/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/3.0.4/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..a75c88eeee --- /dev/null +++ b/content/riak/kv/3.0.4/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,143 @@ +--- +title: "Riak Redis Add-on Deployment Models" +description: "Explore the various models for deploying Riak Redis Add-on" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Redis Add-on Deployment Models" + identifier: "add-ons_redis_deployment" + weight: 201 + parent: "add-ons_redis_setup" +toc: true +commercial_offering: true +aliases: +--- + +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png + +## Deployment Models + +### Local Cache Deployment + +In a local cache deployment, the RRA and Redis are deployed to the application +server. + +![Local-deployment]({{}}images/redis/rra_deployment_local.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between Application Servers to Riak Nodes is distributed + and bounded to equal the number of Riak nodes _multiplied_ by the number of + Application Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Cache hits are extremely fast + +Disadvantages: + +* Cache writes on one application server are *not* observed on other application + servers, so cache hit rates are likely lower unless some form of consistent + routing to the application server exists within the solution. +* Redis competing for RAM with the application service may be problematic + +### Colocated Cache Deployment + +In a colocated cache deployment, the RRA may be deployed either to the +application server (suggested) or to the Riak servers and Redis is deployed to +the Riak servers. + +In the case of deploying the RRA to the application servers, the RRA features +of reducing connections from the relatively high number of application service +instances to the fewer Redis (cache) and Riak (persistent) data service +instances allows for the greatest scale at the expense of the deployment cost +of pushing a service and its configuration. + +In the case of deploying the RRA to the colocated Redis and Riak data servers, +the maximum scale for the solution is contrained by the number of network +connections from the application services while deployment costs remain a matter +of pushing a service and its configuration. In either case, deployment should +be automated, so are not multiplied by the number of servers. + +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are distributed and bounded to equal the number of Riak nodes + _multiplied_ by the number of Application Service instances. +* Redis: The connection between the RRA Service instance and Redis Service + instance is local. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _squared_. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so slightly increased latency compared to local. +* Redis competing for RAM with Riak will likely be problematic. Redis should + be configured to ensure `maxmemory` and `maxmemory-policy` constrain Redis + to ensure Riak is allotted sufficient RAM to serve the more important + persistent data storage and retrieval services. See http://redis.io/topics/config +* This model may seem to provide data locality, but in the case of faults in + either Redis or Riak services, the fault tolerance mechanisms of RRA and + Riak will not match exactly as communicating the necessary information to + support such a lock-step fault tolerance would lead to greater mean latencies + and Riak provides superior 99th percentile latency performance in the face + of faults. + + +### Distributed Cache Deployment + +In a distributed cache deployment, the RRA is deployed to the application server +and Redis is deployed to standalone servers, separate from Riak cluster nodes. + +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) + +Connections: + +* RRA: The connections between Application Service instances to RRA Service + instance are local. +* Redis: The connection between the RRA Service instance and Redis Service + instance are distributed and bounded to equal the number of Application + Servers _multipled_ by the number of Redis Servers. +* Riak: The connections between RRA to Riak Nodes is distributed and bounded to + equal the number of Riak nodes _multiplied_ by the number of Application + Servers since they are aggregated at the RRA Service instance. + +Advantages: + +* Increases the cache hit rate as a cache write from one application server + will lead to a cache hit by all other application servers. +* Keeps RRA near the application, reducing network connections. +* Moves Redis to distinct servers, allowing the cache more RAM and not + constraining the RAM of either application or persistent data services. + +Disadvantages: + +* Typically increased distance between the application service and Redis and + Riak services, so increased latency compared to local. + +### Recommendation + +The relative advantages and disadvantages of the Distributed Cache Deployment, +most notably the increased cache hit rate and reduced connection overhead, +should make it the standout choice for applications requiring the scale and +operational simplicity of Riak. For this reason, we recommend the Distributed +Cache Deployment. + + + + diff --git a/content/riak/kv/3.0.4/add-ons/redis/using-rra.md b/content/riak/kv/3.0.4/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..54ddb44b45 --- /dev/null +++ b/content/riak/kv/3.0.4/add-ons/redis/using-rra.md @@ -0,0 +1,246 @@ +--- +title: "Using Riak Redis Add-on" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Using Redis Addon" + identifier: "add-ons_redis_getstarted" + weight: 302 + parent: "add-ons_redis" +toc: true +commercial_offering: true +aliases: + - /riak/kv/3.0.4/add-ons/redis/get-started-with-rra +--- + +[addon redis develop]: ../developing-rra/ +[addon redis setup]: ../set-up-rra/ +[dev api http]: {{}}riak/kv/3.0.4/developing/api/http/ +[ee]: https://www.tiot.jp/en/about-us/contact-us/ + + +Now that you’ve [set up Riak Redis Add-on (RRA)][addon redis setup], you're ready to use RRA with any Redis client which supports `GET`, `PUT` and `DEL` operations. + +This page will walk you through using RRA. + +## Prerequisites + +We assume that the Redis client (`redis-cli`) is installed, either alongside the Redis server or on a test machine. + +You will need the list of Riak KV and Riak Redis Add-on host:port combinations. For testing, Riak KV values are obtained via the [HTTP API][dev api http]. + +## Run the Read-Through Test + +Throughout this test example, the bucket "test" and key "foo" are used to +demonstrate how to address the hieararchical namespace support in Riak KV +through the flat Redis key. The bucket type is not specified in this example, +so is effectively the default bucket type, named "default". For additional +information regarding key namespace, see [develop Riak Redis Add-on (RRA)][addon redis develop]. + +The read-through test ensures that your configuration correctly tracks values obtained from Riak KV and Riak Redis Add-on (RRA). The main actions of the test are: + +* DELETE the Riak object at the `test` bucket with the key `foo`, which checks that there are no siblings. +* PUT a Riak object with the value 'bar' at the `test` bucket with the key `foo`. +* GET the Riak object at the `test` bucket with the key `foo`. +* GET the string-representation of the object from the cache proxy service using the key `test:foo`. (The cache proxy service should parse out the first portion of the Redis colon-separated key (namespace) to identify which Riak bucket to perform the backend read from.) +* Assert that the value obtained from the previous cache proxy GET is 'bar'. + +First, create a file named`read_through_test.sh` with the following content: + +```bash +# set test environment +RIAK_HTTP_IP="127.0.0.1" +RIAK_HTTP_PORT="8098" +CACHE_PROXY_IP="127.0.0.1" +CACHE_PROXY_PORT="22122" +CACHE_PROXY_STATISTICS_PORT="22123" +RIAK_TEST_BUCKET="test" +KEY="foo" +VALUE="bar" + +# DELETE Riak object, ensure no siblings +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# PUT Riak object +curl -s -X PUT -d "$VALUE" "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# GET Riak object +RIAK_VALUE=$(curl -s -X GET "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY") + +# GET Cache Proxy value +CACHE_VALUE=$(redis-cli -h "$CACHE_PROXY_IP" -p "$CACHE_PROXY_PORT" "$RIAK_TEST_BUCKET:$KEY" + +# DELETE Riak object, cleanup +curl -s -X DELETE "http://$RIAK_HTTP_IP:$RIAK_HTTP_PORT/buckets/$RIAK_TEST_BUCKET/keys/$KEY" + +# Assert +if [[ "RIAK_VALUE" == "$CACHE_VALUE" ]]; then + RESULT="Success" +else + RESULT="FAIL" +fi +echo "$RESULT - read $RIAK_VALUE from Riak and $CACHE_VALUE from Cache Proxy." +``` + +Then, once you've created the file, run it as follows: + +```bash +./read_through_test.sh 22122 8098 test +``` + +### Exceptions + +If the test does not pass, verify that both Redis and RRA are running. You can do this by running: + +```bash +ps aux |grep [r]edis +ps aux |grep [n]utcracker +``` + +The result should list `redis` and `nutcracker` respectively. + +Also, verify that Riak KV is started and listening on the protocol buffer port specified: + +```bash +sudo riak config effective |grep proto +``` + +If RRA is misconfigured, [reconfigure][redis add-on setup] it, and restart the service with the following: + +```bash +sudo service cache_proxy restart +``` + +If RRA is configured correctly and all required services are running, you may want to restart each service from front to back as follows: + +1. Stop RRA. +2. Stop Redis. +3. *Optional* Restart Riak KV (This should only be necessary if Riak KV is not responding to protocol buffer requests.) +4. Start Redis. +5. Start RRA. + +```bash +sudo service cache_proxy stop +sudo service redis stop + +# optional +sudo riak restart + +sudo service redis start +sudo service cache_proxy start +``` + +## Using Riak Redis Add-on + +Once you've successfully configured Riak Redis Add-on (RRA) and established a Riak KV and Redis client in the language of your choosing, you're ready to start using RRA. + +For objects that should not be cached, interact with Riak KV as usual: issuing GET, PUT, and DELETE commands through the Riak client. + +For objects that should be cached, read from RRA: issuing GET, SET, and DEL commands through the Redis client. + +### Monitoring + +#### RRA + +Since RRA is installed as a service, the system service monitoring daemon will automatically restart a service with the correct configuration in the event that the service’s process was killed or terminated by other means. + +The log file for RRA is stored by default in /var/log/cache_proxy.log . RRA is logrotate friendly, responding to the signal to reopen the log file following a rotate. + +For additional monitoring, RRA provides statistics on service availability. The statistics provided are generally useful in monitoring the health of the RRA service. + +For example, running the following command (using the loopback interface and the default statistics port as an example): + +```bash +telnet 127.0.0.1 22123 +``` + +Returns statistic results: + +```json +{ + "bdp_cache_proxy": { + "192.168.50.2:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 216, + "requests": 9, + "response_bytes": 39, + "responses": 4, + "server_connections": 1, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.3:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 0, + "requests": 0, + "response_bytes": 0, + "responses": 0, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "192.168.50.4:6379": { + "in_queue": 0, + "in_queue_bytes": 0, + "out_queue": 0, + "out_queue_bytes": 0, + "request_bytes": 90, + "requests": 5, + "response_bytes": 258, + "responses": 2, + "server_connections": 0, + "server_ejected_at": 0, + "server_eof": 0, + "server_err": 0, + "server_timedout": 0 + }, + "client_connections": 0, + "client_eof": 6, + "client_err": 0, + "forward_error": 0, + "fragments": 0, + "server_ejects": 0 + }, + "curr_connections": 4, + "service": "nutcracker", + "source": "vagrant", + "timestamp": 1438301846, + "total_connections": 10, + "uptime": 7227, + "version": "0.4.0" +} +``` + +Using the above results, you should be able to determine metrics changes that would flag a change in service health. With this information you can implement monitoring to help guarantee the overall health of the cache proxy service in RRA and the custom software within your overall solution. + +While we do not endorse a specific monitoring solution, the open interface to statistics allows you to use the monitoring solution of your choice. The following is a brief listing of compatible monitoring solutions: + +* Custom - https://github.com/gfranxman/NutcrackerMonitor +* NewRelic - http://newrelic.com/plugins/schoology/245 +* Nagios - https://github.com/schoology/twemproxy_nagios + +#### Redis + +Various Redis monitoring solutions exist in the market and, like monitoring RRA, these monitoring solutions make underlying calls to obtain Redis statistics, typically via the `info` command alone. + +As with RRA, Redis statistics available on the Redis client port allow for monitoring via solutions such as the following: + +* Custom - http://volumelabs.net/redis_monitoring/ +* NewRelic - http://newrelic.com/plugins/poison-pen-llc/28 +* Nagios - https://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details + + + + diff --git a/content/riak/kv/3.0.4/configuring.md b/content/riak/kv/3.0.4/configuring.md new file mode 100644 index 0000000000..48e14270f6 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring.md @@ -0,0 +1,88 @@ +--- +title: "Configuring Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Configuring" + identifier: "configuring" + weight: 200 + pre: cog +toc: true +aliases: +--- + +[config basic]: ../configuring/basic +[config backend]: ../configuring/backend +[config manage]: ../configuring/managing +[config reference]: ../configuring/reference +[config strong consistency]: ../configuring/strong-consistency +[config load balance]: ../configuring/load-balancing-proxy +[config mapreduce]: ../configuring/mapreduce +[config search]: ../configuring/search +[config v3 mdc]: ../configuring/v3-multi-datacenter +[config v2 mdc]: ../configuring/v2-multi-datacenter + +## In This Section + +#### [Basic Configuration][config basic] + +A guide covering commonly adjusted parameters when setting up a new cluster. + +[Learn More >>][config basic] + +#### [Backend Configuration][config backend] + +Information on backend-specific configuration parameters. + +[Learn More >>][config backend] + +#### [Managing Configuration][config manage] + +A small guide to retrieving, checking, and debugging your cluster configuration. + +[Learn More >>][config manage] + +#### [Configuration Reference][config reference] + +A detailed list of all possible configuration parameters. + +[Learn More >>][config reference] + +#### [Implementing Strong Consistency][config strong consistency] + +An article providing information on configuring and monitoring a Riak KV +cluster's optional strong consistency subsystem. + +[Learn More >>][config strong consistency] + +#### [Load Balancing & Proxy][config load balance] + +A brief guide on commonly used load-balancing and proxy solutions. + +[Learn More >>][config load balance] + +#### [MapReduce Settings][config mapreduce] + +Tutorial on configuring and tuning MapReduce for a cluster. + +[Learn More >>][config mapreduce] + +#### [Search Settings][config search] + +Information on configuring and using Riak KV Search from an operational perspective. + +[Learn More >>][config search] + +#### [V3 Multi-Datacenter][config v3 mdc] + +A guide on configuring Riak's V3 Multi-Datacenter Replication + +[Learn More >>][config v3 mdc] + + + + + + diff --git a/content/riak/kv/3.0.4/configuring/backend.md b/content/riak/kv/3.0.4/configuring/backend.md new file mode 100644 index 0000000000..e77555f318 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/backend.md @@ -0,0 +1,647 @@ +--- +title: "Backend Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Backend Configuration" + identifier: "configuring_backend" + weight: 110 + parent: "configuring" +toc: true +aliases: +--- + +[plan backend leveldb]: {{}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend leveled]: {{}}riak/kv/3.0.4/setup/planning/backend/leveled +[plan backend bitcask]: {{}}riak/kv/3.0.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/3.0.4/setup/planning/backend/memory +[plan backend multi]: {{}}riak/kv/3.0.4/setup/planning/backend/multi + +## LevelDB + +Configurable parameters for Riak's [LevelDB][plan backend leveldb] storage backend. + +> **Note on upgrading to 2.0** +> +> If you are upgrading to Riak 2.0+ from a 1.x version, using LevelDB, and +wish to use your old configuration files, i.e. `app.config` and +`vm.args`, please note that you must set the `total_leveldb_mem_percent` +setting in the `eleveldb` section of `app.config`. We recommend setting +it to `70`. If you do not set this parameter, it will default to 15, +which can lead to problems in some clusters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
leveldb.block_cache_thresholdThis setting defines the limit past which block cache memory can no +longer be released in favor of the page cache. This setting has no +impact in favor of file cache. The value is set on a per-vnode basis. +32MB
leveldb.compaction.trigger.tombstone_countControls when a background compaction initiates solely due to the +number of delete tombstones within an individual .sst table +file. A value of off disables the feature.1000
leveldb.compressionEnabling this setting (on), which is the default, +saves disk space. Disabling it may reduce read latency but increase +overall disk activity. This option can be changed at any time, but it +will not impact data on disk until the next time a file requires +compaction.on
leveldb.compression.algorithmThis setting is used to select which compression algorithm + is selected when leveldb.compression is on. + In new riak.conf files, this is explicitly set to + lz4; however when this setting is not provided, + snappy will be used for backward-compatibility. +

+ When you determine that you will no longer need backward-compatibility, + setting this to lz4 will cause future compactions + to use the LZ4 algorithm for compression.
lz4 in new riak.conf files

+ snappy when not provided +
leveldb.data_rootThe directory in which LevelDB will store its data../data/leveldb
leveldb.fadvise_willneedOption to override LevelDB's use of fadvise(DONTNEED) +with fadvise(WILLNEED) instead. WILLNEED can +reduce disk activity on systems where physical memory exceeds the +database size.false
leveldb.maximum_memoryThis parameter defines the server memory (in bytes) to assign to +LevelDB. Also see leveldb.maximum_memory.percent to set +LevelDB memory as a percentage of system total.80
leveldb.maximum_memory.percentThis parameter defines the percentage of total server memory to +assign to LevelDB. LevelDB will dynamically adjust its internal cache +sizes to stay within this size. The memory size can alternately be +assigned as a byte count via leveldb.maximum_memory +instead.70
leveldb.threadsThe number of worker threads performing LevelDB operations.71
leveldb.verify_checksumsEnables or disables the verification of the data fetched from +LevelDB against internal checksums.on
leveldb.verify_compactionEnables or disables the verification of LevelDB data during +compaction.on
leveldb.block.size_stepsDefines the number of incremental adjustments to attempt between the +block.size value and the maximum block.size +for an .sst table file. A value of zero disables the +underlying dynamic block_size feature.16
leveldb.block.restart_intervalDefines the key count threshold for a new key entry in the key +index for a block. Most deployments should leave this parameter alone. +16
leveldb.block.sizeDefines the size threshold for a block/chunk of data within one +.sst table file. Each new block gets an index entry in the +.sst table file's master index.4KB
leveldb.bloomfilterEach database .sst table file can include an optional +"bloom filter" that is highly effective in shortcutting data queries +that are destined to not find the requested key. The Bloom filter +typically increases the size of an .sst table file by about +2%.on
leveldb.write_buffer_size_minEach vnode first stores new key/value data in a memory-based write +buffer. This write buffer is in parallel to the recovery log mentioned +in the sync parameter. Riak creates each vnode with a +randomly sized write buffer for performance reasons. The random size is +somewhere between write_buffer_size_min and +write_buffer_size_max.30MB
leveldb.write_buffer_size_maxSee leveldb.write_buffer_size_min directly above.60MB
leveldb.limited_developer_memThis is a Riak-specific option that is used when a developer is +testing a high number of vnodes and/or several VMs on a machine with +limited physical memory. Do not use this option if making +performance measurements. This option overwrites values given to +write_buffer_size_min and +write_buffer_size_max.off
leveldb.sync_on_writeWhether LevelDB will flush after every write.

+Note: If you are familiar with fsync, this is analogous +to calling fsync after every write.
off
leveldb.tieredThe level number at which LevelDB data switches from the faster to +the slower array. The default of off disables the +feature.off
leveldb.tiered.path.fastThe path prefix for .sst files below the level set by +leveldb.tiered.
leveldb.tiered.path.slowThe path prefix for .sst files below the level set by +leveldb.tiered.
+ +## Leveled + +Configurable Parameters for Riak's [leveled][plan backend leveled] storage backend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Bitcask + +Configurable parameters for Riak's [Bitcask][plan backend bitcask] storage backend. + +
ConfigDescriptionDefault
leveled.data_rootA path under which leveled data files will be stored.$(platform_data_dir)/leveled +
leveled.sync_strategy +Strategy for flushing data to disk - Can be set to riak_sync, sync (if OTP > 16) or none. Use none, and the OS will flush when most efficient. Use riak_sync or sync to flush after every PUT (not recommended wihtout some hardware support e.g. flash drives and/or +Flash-backed Write Caches)none
leveled.compression_methodCan be lz4 or native (which will use the Erlang native zlib compression) within term_to_binarynative
leveled.compression_pointThe point at which compression is applied to the Journal (the Ledger is always compressed). Use on_receipt or on_compact. on_compact is suitable +when values are unlikely to yield much benefit from compression(compression is only attempted when compacting)on_receipt
leveled.log_levelCan be debug, info, warn, error or critical. Set the minimum log level to be used within leveled. Leveled will log many lines to allow for stats to be etracted by those using log indexers such as Splunkinfo
leveled.journal_size The approximate size (in bytes) when a Journal file should be rolled. Normally keep this as around the size of o(100K) objects.1000000000
leveled.compaction_runs_perdayThe number of journal compactions per vnode per day, The higher the value, the more compaction runs, and the sooner space is recovered. But each run has a cost.24
leveled.compaction_low_hourThe hour of the day in which journal compaction can start. Use Low hour of 0 and High hour of 23 to have no compaction window (i.e. always compactregardless of time of day)0
leveled.compaction_top_hourThe hour of the day, after which journal compaction should stop. If low hour > top hour then, compaction will work overnight between low hour and top hour (inclusive). Timings rely on server's view of local time23
leveled.max_run_lengthIn a single compaction run, what is the maximum number of consecutive files which may be compacted.4
leveled_reload_recalcEnable the `recalc` compaction strategy within the leveled backend in riak.disabled
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
bitcask.data_rootThe directory under which Bitcask will store its data../data/bitcask
bitcask.io_modeConfigure how Bitcask writes data to disk. If set to +erlang, writes are made via Erlang's built-in file API; if +set to nif, writes are made via direct calls to the POSIX C +API. The nif mode provides higher throughput for certain +workloads, but has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse +erlang
bitcask.expiryBy default, Bitcask keeps all of your data around. If your data has +limited time value, or if you need to purge data for space reasons, you +can set the expiry option. For example, if you need to +purge data automatically after 1 day, set the value to 1d. +off disables automatic expirationoff
bitcask.expiry.grace_timeBy default, Bitcask will trigger a merge whenever a data file +contains an expired key. This may result in excessive merging under some +usage patterns. To prevent this you can set the +bitcask.expiry.grace_time option. Bitcask will defer +triggering a merge solely for key expiry by the configured number of +seconds. Setting this to 1h effectively limits each cask to +merging for expiry once per hour.0
bitcask.hintfile_checksumsWhether to allow the CRC to be present at the end of hintfiles. +Setting this to allow_missing runs Bitcask in a +backwards-compatible mode in which old hint files will still be accepted +without CRC signatures.strict
bitcask.fold.max_putsSee the description for the bitcask.fold.max_age +config directly below.0
bitcask.fold.max_ageFold keys thresholds will reuse the keydir if another fold was +started less than fold.max_age ago and there were fewer +than fold.max_puts updates. Otherwise, it will wait until +all current fold keys complete and then start. Set either option to +unlimited to disable.unlimited
bitcask.merge.thresholds.fragmentationDescribes which ratio of dead keys to total keys in a file will +cause it to be included in the merge. The value of this setting is a +percentage from 0 to 100. For example, if a data file contains 4 dead +keys and 6 live keys, it will be included in the merge at the default +ratio (which is 40). Increasing the value will cause fewer files to be +merged, decreasing the value will cause more files to be merged.40
bitcask.merge.thresholds.dead_bytesDescribes the minimum amount of data occupied by dead keys in a file +to cause it to be included in the merge. Increasing the value will cause +fewer files to be merged, whereas decreasing the value will cause more +files to be merged.128MB
bitcask.merge.thresholds.small_fileDescribes the minimum size a file must have to be excluded from the +merge. Files smaller than the threshold will be included. Increasing +the value will cause more files to be merged, whereas decreasing the +value will cause fewer files to be merged.10MB
bitcask.merge.triggers.dead_bytesDescribes how much data stored for dead keys in a single file will +trigger merging. If a file meets or exceeds the trigger value for dead +bytes, merge will be triggered. Increasing the value will cause merging +to occur less often, whereas decreasing the value will cause merging to +happen more often. When either of these constraints are met by any file +in the directory, Bitcask will attempt to merge files.512MB
bitcask.merge.triggers.fragmentationDescribes which ratio of dead keys to total keys in a file will +trigger merging. The value of this setting is a percentage from 0 to +100. For example, if a data file contains 6 dead keys and 4 live keys, +then merge will be triggered at the default setting. Increasing this +value will cause merging to occur less often, whereas decreasing the +value will cause merging to happen more often.60
bitcask.merge.window.endSee the description of the bitcask.merge.policy config +below.23
bitcask.merge.window.startSee the description of the bitcask.merge.policy config +below.0
bitcask.merge.policyLets you specify when during the day merge operations are allowed to +be triggered. Valid options are: always, meaning no +restrictions; never, meaning that merging will never be +attempted; and window, specifying the hours during which +merging is permitted, where bitcask.merge.window.start and +bitcask.merge.window.end are integers between 0 and 23. If +merging has a significant impact on performance of your cluster, or your +cluster has quiet periods in which little storage activity occurs, you +may want to change this setting from the default.always
bitcask.merge_check_intervalBitcask periodically runs checks to determine whether merges are +necessary. This parameter determines how often those checks take place. +Expressed as a time unit, e.g. `10s` for 10 seconds, `5m` for 5 minutes, +etc.3m
bitcask.merge_check_jitterIn order to prevent merge operations from taking place on different +nodes at the same time, Riak can apply random variance to merge times, +expressed as a percentage of bitcask.merge_check_interval. +30%
bitcask.max_merge_sizeMaximum amount of data to merge in one go in the Bitcask backend. +100GB
bitcask.max_file_sizeDescribes the maximum permitted size for any single data file in the +Bitcask directory. If a write causes the current file to exceed this +size threshold then that file is closed, and a new file is opened for +writes.2GB
bitcask.sync.intervalSee the description of the bitcask.sync.strategy +directly below.
bitcask.sync.strategyChanges the durability of writes by specifying when to synchronize +data to disk. The default setting protects against data loss in the +event of application failure (process death) but leaves open a small +window in which data could be lost in the event of complete system +failure (e.g. hardware, OS, or power). The default mode, +none, writes data into operating system buffers which will +be written to the disks when those buffers are flushed by the operating +system. If the system fails, e.g. due power loss or crash, that data is +lost before those buffers are flushed to stable storage. This is +prevented by the setting o_sync, which forces the operating +system to flush to stable storage at every write. The effect of flushing +each write is better durability, however write throughput will suffer as +each write will have to wait for the write to complete. Available sync +strategies: none, which will let the operating system +manage syncing writes; o_sync, which will uses the +O_SYNC flag to force syncs on every write; and +interval, by which will force Bitcask to sync every +bitcask.sync.interval seconds.none
bitcask.open_timeoutSpecifies the maximum time Bitcask will block on startup while +attempting to create or open the data directory. You generally need not +change this value. If for some reason the timeout is exceeded on open +you'll see a log message of the form Failed to start bitcask +backend: .... . Only then should you consider a longer timeout. +4s
+ +## Memory Backend + +Configurable parameters for Riak's [Memory][plan backend memory] backend. + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
memory_backend.ttlEach value written will be written with this "time to live." Once +that object's time is up, it will be deleted on the next read of its +key. Minimum: 1s.
memory_backend.max_memory_per_vnodeThe maximum amount of memory consumed per vnode by the memory +storage backend. Minimum: 1MB.
+ +## Multi Backend + +Configurable parameters for Riak's [Multi][plan backend multi] backend, which enables you to utilize multiple data backends in a single Riak cluster. + +If you are using multiple backends, you can configure the backends +individually by prepending the setting with `multi_backend.$name`, where +`$name` is the name of the backend. `$name` can be any valid +configuration word, like `customer_data`, `my_data`, `foo_bar_backend`, +etc. + +Below is the general form for setting multi-backend parameters: + +```riakconf +multi_backend.$name.(existing_setting) = +# or +multi_backend.$name.$backend_type.(backend_specific_setting) = +``` + +Below is a listing of the available parameters: + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
multi_backend.$name.storage_backendThis parameter specifies the Erlang module defining the storage +mechanism that will be used on this node.bitcask
multi_backend.defaultThe default name of a backend when one is not specified.
+ +To give an example, if you have a LevelDB backend named +`customer_backend` and wish to set the `data_root` parameter to +`$(platform_data_dir)/leveldb_backends/customer_backend/`, you would +do so as follows: + +```riakconf +multi_backend.customer_backend.storage_backend = leveldb +multi_backend.customer_backend.leveldb.data_root = $(platform_data_dir)/leveldb_backends/customer_backend +multi_backend.customer_backend.leveldb.maximum_memory.percent = 50 +``` + + + + diff --git a/content/riak/kv/3.0.4/configuring/basic.md b/content/riak/kv/3.0.4/configuring/basic.md new file mode 100644 index 0000000000..f016efe9e1 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/basic.md @@ -0,0 +1,239 @@ +--- +title: "Basic Riak KV Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Basic Configuration" + identifier: "configuring_basic" + weight: 100 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.4/ops/building/configuration/ + - /riak/kv/3.0.4/ops/building/configuration/ +--- + +[config reference]: {{}}riak/kv/3.0.4/configuring/reference +[use running cluster]: {{}}riak/kv/3.0.4/using/running-a-cluster +[use admin riak-admin#member-status]: {{}}riak/kv/3.0.4/using/admin/riak-admin/#member-status +[perf erlang]: {{}}riak/kv/3.0.4/using/performance/erlang +[plan start]: {{}}riak/kv/3.0.4/setup/planning/start +[plan best practices]: {{}}riak/kv/3.0.4/setup/planning/best-practices +[cluster ops backup]: {{}}riak/kv/3.0.4/using/cluster-operations/backing-up +[cluster ops add remove node]: {{}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes +[plan backend]: {{}}riak/kv/3.0.4/setup/planning/backend +[plan backend multi]: {{}}riak/kv/3.0.4/setup/planning/backend/multi +[plan backend bitcask]: {{}}riak/kv/3.0.4/setup/planning/backend/bitcask +[usage bucket types]: {{}}riak/kv/3.0.4/developing/usage/bucket-types +[apps replication properties]: {{}}riak/kv/3.0.4/developing/app-guide/replication-properties +[concept buckets]: {{}}riak/kv/3.0.4/learn/concepts/buckets +[concept eventual consistency]: {{}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[perf benchmark]: {{}}riak/kv/3.0.4/using/performance/benchmarking +[perf open files]: {{}}riak/kv/3.0.4/using/performance/open-files-limit +[perf index]: {{}}riak/kv/3.0.4/using/performance +[perf aws]: {{}}riak/kv/3.0.4/using/performance/amazon-web-services +[Cluster Capacity Planning]: {{}}riak/kv/3.0.4/setup/planning/cluster-capacity/#ring-size-number-of-partitions + +This document covers the parameters that are commonly adjusted when +setting up a new cluster. We recommend that you also review the detailed +[Configuration Files][config reference] document before moving a cluster into +production. + +All configuration values discussed here are managed via the +configuration file on each node, and a node must be restarted for any +changes to take effect. + +> **Note** +> +> If you are upgrading to Riak KV version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config` configuration file or +the newer `riak.conf` if you wish. +> +> If you have installed Riak KV 2.0 directly, you should use only +`riak.conf`. +> +> More on configuring Riak KV can be found in the [configuration files][config reference] +doc. + +We advise that you make as many of the changes below as practical +_before_ joining the nodes together into a cluster. Once your +configuration has been set on each node, follow the steps in [Basic Cluster Setup][use running cluster] to complete the clustering process. + +Use [`riak-admin member-status`][use admin riak-admin#member-status] +to determine whether any given node is a member of a cluster. + +## Erlang VM Tunings + +Prior to building and starting a cluster, there are some +Erlang-VM-related changes that you should make to your configuration +files. If you are using the older, `vm.args`-based Erlang VM tunings, +you should set the following: + +```vmargs ++sfwi 500 ++scl false +``` + +If you are using the newer, `riak.conf`-based configuration system, we +recommend the following settings: + +```riakconf +erlang.schedulers.force_wakeup_interval = 500 +erlang.schedulers.compaction_of_load = false +``` + +More information can be found in [Erlang VM Tuning][perf erlang]. + +## Ring Size + +The ring size, in Riak parlance, is the number of data partitions that +comprise the cluster. This quantity impacts the scalability and +performance of a cluster and, importantly, **it should be established +before the cluster starts receiving data**. + +If the ring size is too large for the number of servers, disk I/O will +be negatively impacted by the excessive number of concurrent databases +running on each server; if the ring size is too small, the servers' other +resources (primarily CPU and RAM) will go underutilized. + +See [Cluster Capacity Planning] for more details on choosing a ring size. + +The steps involved in changing the ring size depend on whether the +servers (nodes) in the cluster have already been joined together. + +### Cluster joined, but no data needs to be preserved + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for the location of this file) +4. Start all nodes +5. Re-add each node to the cluster (see [Adding and Removing Nodes][cluster ops add remove node]) or finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### New servers, have not yet joined a cluster + +1. Change the ring creation size parameter by uncommenting it and then +setting it to the desired value, for example 64: + + ```riakconf + ring_size = 64 + ``` + + ```appconfig + %% In the riak_core section: + {ring_creation_size, 64} + ``` + +2. Stop all nodes +3. Remove the ring data file on each node (see [Backing up Riak][cluster ops backup] for +the location of this file) +4. Finish reviewing this document and proceed to [Basic Cluster Setup][use running cluster] + +### Verifying ring size + +You can use the `riak-admin` command can verify the ring size: + +```bash +riak-admin status | grep ring +``` + +Console output: + +``` +ring_members : ['riak@10.160.13.252'] +ring_num_partitions : 8 +ring_ownership : <<"[{'riak@10.160.13.252',8}]">> +ring_creation_size : 8 +``` + +If `ring_num_partitions` and `ring_creation_size` do not agree, that +means that the `ring_creation_size` value was changed too late and that +the proper steps were not taken to start over with a new ring. + +**Note**: Riak will not allow two nodes with different ring sizes to be +joined into a cluster. + +## Backend + +Another critical decision to be made is the backend to use. The choice +of backend strongly influences the performance characteristics and +feature set for a Riak environment. + +See [Choosing a Backend][plan backend] for a list of supported backends. Each +referenced document includes the necessary configuration bits. + +As with ring size, changing the backend will result in all data being +effectively lost, so spend the necessary time up front to evaluate and +benchmark backends. + +If still in doubt, consider using the [Multi][plan backend multi] backend for future +flexibility. + +If you do change backends from the default ([Bitcask][plan backend bitcask]), make sure you change it across all nodes. It is possible but generally unwise to use different backends on different nodes, as this would limit the +effectiveness of backend-specific features. + +## Default Bucket Properties + +Bucket properties are also very important factors in Riak's performance +and general behavior. The properties for any individual bucket can be +configured dynamically [using bucket types][usage bucket types], but default values for those properties can be defined in your [configuration files][config reference]. + +Below is an example of setting `last_write_wins` to `true` and `r` to 3. + +```riakconf +buckets.default.last_write_wins = true +buckets.default.r = 3 +``` + +```appconfig +{default_bucket_props, [ + {last_write_wins,true}, + {r,3}, + ... + ]} +``` + +For more on bucket properties, we recommend reviewing our docs on +[buckets][concept buckets], [bucket types][usage bucket types], [replication properties][apps replication properties], and [eventual consistency][concept eventual consistency], as well as Basho's five-part blog series, "Understanding Riak's Configurable Behaviors." + +* [Part 1](https://riak.com/understanding-riaks-configurable-behaviors-part-1/) +* [Part 2](https://riak.com/riaks-config-behaviors-part-2/) +* [Part 3](https://riak.com/riaks-config-behaviors-part-3/) +* [Part 4](https://riak.com/riaks-config-behaviors-part-4/) +* [Epilogue](https://riak.com/riaks-config-behaviors-epilogue/) + +If the default bucket properties are modified in your configuration +files and the node is restarted, any existing buckets will **not** be +directly impacted, although the mechanism described in [HTTP Reset Bucket Properties]({{}}riak/kv/3.0.4/developing/api/http/reset-bucket-props) can be used to force them to pick up the new +defaults. + +## System tuning + +Please review the following documents before conducting any +[benchmarking][perf benchmark] and/or rolling out a live production +cluster. + +* [Open Files Limit][perf open files] +* [System Performance Tuning][perf index] +* [AWS Performance Tuning][perf aws] +* [Configuration Files][config reference] + +## Joining the nodes together + +Please see [Running A Cluster][use running cluster] for the cluster creation process. + + + + diff --git a/content/riak/kv/3.0.4/configuring/global-object-expiration.md b/content/riak/kv/3.0.4/configuring/global-object-expiration.md new file mode 100644 index 0000000000..b6ddb4fa00 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/global-object-expiration.md @@ -0,0 +1,90 @@ +--- +title: "Configure Global Object Expiration" +description: "Enabling and configuring global object expiration for Riak KV." +menu: + riak_kv-3.0.4: + name: "Global Object Expiration" + identifier: "config_expiry" + weight: 180 + parent: "configuring" +project: "riak_kv" +project_version: 3.0.4 +toc: true +aliases: +--- + +[ttl]: https://en.wikipedia.org/wiki/Time_to_live + +By default, LevelDB keeps all of your data. But Riak KV allows you to configure global object expiration (`expiry`) or [time to live (TTL)][ttl] for your data. + +Expiration is disabled by default, but enabling it lets you expire older objects to reclaim the space used or purge data with a limited time value. + +## Enabling Expiry + +To enable global object expiry, add the `leveldb.expiration` setting to your riak.conf file: + +```riak.conf +leveldb.expiration = on +``` + +{{% note %}} +Turning on global object expiration will not retroactively expire previous data. Only data created while expiration is on will be scheduled for expiration. +{{% /note %}} + +## Setting Retention Time + +The `retention_time` setting is used to specify the time until objects expire. +Durations are set using a combination of an integer and a shortcut for the supported units: + +- Milliseconds - `ms` +- Seconds - `s` +- Minutes - `m` +- Hours - `h` +- Days - `d` +- Weeks - `w` +- Fortnight - `f` + +The following example configures objects to expire after 5 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 5h +``` + +You can also combine durations. For example, let's say you wanted objects to expire after 8 days and 9 hours: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 8d9h +``` + +## Expiry Modes + +Global expiration supports two modes: + +- `whole_file` - the whole sorted string table (`.sst`) file is deleted when all of its objects are expired. +- `normal` - individual objects are removed as part of the usual compaction process. + +We recommend using `whole_file` with time series data that has a similar lifespan, as it will be much more efficient. + +The following example configure objects to expire after 1 day: + +```riak.conf +leveldb.expiration = on +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + +## Disable Expiry + +To disable global object expiration, set `leveldb.expiration` to `off` in your riak.conf file. If expiration is disabled, the other 2 settings are ignored. For example: + +```riak.conf +leveldb.expiration = off +leveldb.expiration.retention_time = 1d +leveldb.expiration.mode = whole_file +``` + + + + diff --git a/content/riak/kv/3.0.4/configuring/load-balancing-proxy.md b/content/riak/kv/3.0.4/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..b1117866c2 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/load-balancing-proxy.md @@ -0,0 +1,275 @@ +--- +title: "Load Balancing and Proxy Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Load Balancing & Proxy" + identifier: "configuring_load_balance" + weight: 150 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/configs/load-balanacing-proxy/ + - /riak/kv/3.0.4/ops/advanced/configs/load-balanacing-proxy/ +--- + +[perf open files]: {{}}riak/kv/3.0.4/using/performance/open-files-limit + +The recommended best practice for operating Riak in production is to +place Riak behind a load-balancing or proxy solution, either hardware- +or software- based, while never directly exposing Riak to public network +interfaces. + +Riak users have reported success in using Riak with a variety of load- +balancing and proxy solutions. Common solutions include proprietary +hardware-based load balancers, cloud-based load balancing options, such +as Amazon's Elastic Load Balancer, and open-source software based +projects like HAProxy and Nginx. + +This guide briefly explores the commonly used open-source software-based +solutions HAProxy and Nginx, and provides some configuration and +operational tips gathered from community users and operations oriented +engineers at Basho. + +While it is by no means an exhaustive overview of the topic, this guide +should provide a starting point for choosing and implementing your own +solution. + +## HAProxy + +[HAProxy](http://haproxy.1wt.eu/) is a fast and reliable open-source +solution for load balancing and proxying of HTTP- and TCP-based +application traffic. + +Users have reported success in using HAProxy in combination with Riak in +a number of configurations and scenarios. Much of the information and +example configuration for this section is drawn from experiences of +users in the Riak community in addition to suggestions from Basho +engineering. + +### Example Configuration + +The following is an example starting-point configuration for HAProxy to +act as a load balancer. The example cluster has 4 nodes and will be +accessed by Riak clients using both the Protocol Buffers and HTTP +interfaces. + +> **Note on open files limits** +> +> The operating system's open files limits need to be greater than 256000 +for the example configuration that follows. Consult the [Open Files Limit][perf open files] documentation for details on configuring the value for different operating systems. + +```config +global + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + maxconn 256000 + chroot /var/lib/haproxy + user haproxy + group haproxy + spread-checks 5 + daemon + quiet + +defaults + log global + option dontlognull + option redispatch + option allbackups + maxconn 256000 + timeout connect 5000 + +backend riak_rest_backend + mode http + balance roundrobin + option httpchk GET /ping + option httplog + server riak1 riak1.:8098 weight 1 maxconn 1024 check + server riak2 riak2.:8098 weight 1 maxconn 1024 check + server riak3 riak3.:8098 weight 1 maxconn 1024 check + server riak4 riak4.:8098 weight 1 maxconn 1024 check + +frontend riak_rest + bind 127.0.0.1:8098 + # Example bind for SSL termination + # bind 127.0.0.1:8443 ssl crt /opt/local/haproxy/etc/data.pem + mode http + option contstats + default_backend riak_rest_backend + + +backend riak_protocol_buffer_backend + balance leastconn + mode tcp + option tcpka + option srvtcpka + server riak1 riak1.:8087 weight 1 maxconn 1024 check + server riak2 riak2.:8087 weight 1 maxconn 1024 check + server riak3 riak3.:8087 weight 1 maxconn 1024 check + server riak4 riak4.:8087 weight 1 maxconn 1024 check + + +frontend riak_protocol_buffer + bind 127.0.0.1:8087 + mode tcp + option tcplog + option contstats + mode tcp + option tcpka + option srvtcpka + default_backend riak_protocol_buffer_backend +``` + +A specific configuration detail worth noting from the example is the +commented option for SSL termination. HAProxy supports SSL directly as +of version 1.5. Provided that your HAProxy instance was built with +OpenSSL support, you can enable it by uncommenting the example line and +modifying it to suit your environment. More information is available in +the [HAProxy +documentation](http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5-ssl). + +Also note that the above example is considered a starting point and is a +work in progress based upon [this +example](https://gist.github.com/1507077). You should carefully examine +the configuration and change it according to your specific environment. + +### Maintaining Nodes Behind HAProxy + +When using HAProxy with Riak, you can instruct HAProxy to ping each node +in the cluster and automatically remove nodes that do not respond. + +You can also specify a round-robin configuration in HAProxy and have +your application handle connection failures by retrying after a timeout, +thereby reaching a functioning node upon retrying the connection +attempt. + +HAPproxy also has a standby system you can use to remove a node from +rotation while allowing existing requests to finish. You can remove +nodes from HAProxy directly from the command line by interacting with +the HAProxy stats socket with a utility such as +[socat](http://www.dest-unreach.org/socat/): + +```bash +echo "disable server /" | socat stdio /etc/haproxy/haproxysock +``` + +At this point, you can perform maintenance on the node, down the node, +and so on. When you've finished working with the node and it is again +available for requests, you can re-enable it: + +```bash +echo "enable server /" | socat stdio /etc/haproxy/haproxysock +``` + +Consult the following HAProxy documentation resources for more +information on configuring HAProxy in your environment: + +* [HAProxy Documentation](http://developers.google.com/s/results/?q=haproxy) +* [HAProxy Architecture](http://haproxy.1wt.eu/download/1.2/doc/architecture.txt) + +## Nginx + +Some users have reported success in using the [Nginx](http://nginx.org/) +HTTP server to proxy requests for Riak clusters. An example that +provides access to a Riak cluster *through GET requests only* is +provided here for reference. + +### Example Configuration + +The following is an example starting point configuration for Nginx to +act as a front-end proxy to a 5-node Riak cluster. + +This example forwards all GET requests to Riak nodes while rejecting all +other HTTP operations. + +{{% note title="Nginx version notes" %}} +This example configuration was verified on **Nginx version 1.2.3**. Please be +aware that earlier versions of Nginx did not support any HTTP 1.1 semantics +for upstream communication to backends. You should carefully examine this +configuration and make changes appropriate to your specific environment before +attempting to use it +{{% /note %}} + +Here is an example `nginx.conf` file: + +```config +upstream riak_hosts { + # server 10.0.1.10:8098; + # server 10.0.1.11:8098; + # server 10.0.1.12:8098; + # server 10.0.1.13:8098; + # server 10.0.1.14:8098; +} + +server { + listen 80; + server_name _; + access_log /var/log/nginx/riak.access.log; + + # your standard Nginx config for your site here... + location / { + root /var/www/nginx-default; + } + + # Expose the /riak endpoint and allow queries for keys only + location /riak/ { + proxy_set_header Host $host; + proxy_redirect off; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 64k; # If set to a smaller value, + # nginx can complain with an + # "too large headers" error + proxy_buffers 4 64k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + + if ($request_method != GET) { + return 405; + } + + # Disallow any link with the MapReduce query format "bucket,tag,_" + if ($uri ~ "/riak/[^/]*/[^/]*/[^,]+,[^,]+," ) { + return 405; + } + + if ($request_method = GET) { + proxy_pass http://riak_hosts; + } + } +} +``` + +{{% note title="Note on access controls" %}} +Even when filtering and limiting requests to GETs only as done in the example, +you should strongly consider additional access controls beyond what Nginx can +provide directly, such as specific firewall rules to limit inbound connections +to trusted sources. +{{% /note %}} + +### Querying Secondary Indexes Over HTTP + +When accessing Riak over HTTP and issuing Secondary Index queries, you +can encounter an issue due to the default Nginx handling of HTTP header +names containing underscore (`_`) characters. + +By default, Nginx will issue errors for such queries, but you can +instruct Nginx to handle such header names when doing Secondary Index +queries over HTTP by adding the following directive to the appropriate +`server` section of `nginx.conf`: + +``` +underscores_in_headers on; +``` + + + + diff --git a/content/riak/kv/3.0.4/configuring/managing.md b/content/riak/kv/3.0.4/configuring/managing.md new file mode 100644 index 0000000000..439abd6f44 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/managing.md @@ -0,0 +1,121 @@ +--- +title: "Managing Your Configuration" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Managing Configuration" + identifier: "configuring_managing" + weight: 130 + parent: "configuring" +toc: true +aliases: +--- + +[use admin riak cli]: {{}}riak/kv/3.0.4/using/admin/riak-cli +[use admin riak cli#chkconfig]: {{}}riak/kv/3.0.4/using/admin/riak-cli/#chkconfig +[config reference#search]: {{}}riak/kv/3.0.4/configuring/reference/#search + +## Retrieving a Configuration Listing + +At any time, you can get a snapshot of currently applied configurations +through the command line. For a listing of *all* of the configs +currently applied in the node: + +```bash +riak config effective +``` + +This will output a long list of the following form: + +``` +anti_entropy = active +anti_entropy.bloomfilter = on +anti_entropy.concurrency_limit = 2 +# and so on +``` + +For detailed information about a particular configuration variable, use +the `config describe ` command. This command will output a +description of what the parameter configures, which datatype you should +use to set the parameter (integer, string, enum, etc.), the default +value of the parameter, the currently set value in the node, and the +name of the parameter in `app.config` in older versions of Riak (if +applicable). + +For in-depth information about the `ring_size` variable, for example: + +```bash +riak config describe ring_size +``` + +This will output the following: + +``` +Documentation for ring_size +Number of partitions in the cluster (only valid when first +creating the cluster). Must be a power of 2, minimum 8 and maximum +1024. + + Datatype : [integer] + Default Value: 64 + Set Value : undefined + app.config : riak_core.ring_creation_size +``` + +## Checking Your Configuration + +The [`riak`][use admin riak cli] command line tool has a +[`chkconfig`][use admin riak cli#chkconfig] command that enables you to +determine whether the syntax in your configuration files is correct. + +```bash +riak chkconfig +``` + +If your configuration files are syntactically sound, you should see the +output `config is OK` followed by a listing of files that were checked. +You can safely ignore this listing. If, however, something is +syntactically awry, you'll see an error output that provides details +about what is wrong. To give an example, the `search.solr.jmx_port` +setting (in the [Search][config reference#search] section below) +must be set as an integer. Imagine that we set it to something else: + +```riakconf +search.solr.jmx_port = banana +``` + +If we run `riak chkconfig` now, we'll get an error: + +``` +[error] Error generating configuration in phase transform_datatypes +[error] Error transforming datatype for: search.solr.jmx_port +[error] "banana" can't be converted to an integer +``` + +The error message will specify which configurable parameters are +syntactically unsound and attempt to provide an explanation why. + +Please note that the `chkconfig` command only checks for syntax. It will +_not_ be able to discern if your configuration is otherwise unsound, +e.g. if your configuration will cause problems on your operating system +or doesn't activate subsystems that you would like to use. + +## Debugging Your Configuration + +If there is a problem with your configuration but you're having trouble +identifying the problem, there is a command that you can use to debug +your configuration: + +```bash +riak config generate -l debug +``` + +If there are issues with your configuration, you will see detailed +output that might provide a better sense of what has gone wrong in the +config generation process. + + + + diff --git a/content/riak/kv/3.0.4/configuring/mapreduce.md b/content/riak/kv/3.0.4/configuring/mapreduce.md new file mode 100644 index 0000000000..86e2f46e72 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/mapreduce.md @@ -0,0 +1,200 @@ +--- +title: "MapReduce Settings" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "MapReduce Settings" + identifier: "configuring_mapreduce" + weight: 170 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/configs/mapreduce/ + - /riak/kv/3.0.4/ops/advanced/configs/mapreduce/ +--- + +[usage mapreduce]: {{}}riak/kv/3.0.4/developing/usage/mapreduce +[config reference#appconfig]: {{}}riak/kv/3.0.4/configuring/reference/#app-config +[usage secondary-indexes]: {{}}riak/kv/3.0.4/developing/usage/secondary-indexes + +## Configuring MapReduce + +[MapReduce (M/R)][usage mapreduce] is always enabled, but configurable +through the [app.config][config reference#appconfig] file as +follows under `riak_kv` + +```erlang +{riak_kv, [ +``` + +`mapred_name` is the URL directory used to submit M/R requests to Riak. +By default `mapred`, making the command path, for example: +`http://localhost:8098/mapred` + +```erlang + {mapred_name, "mapred"}, +``` + +`mapred_2i_pipe` indicates whether [2i][usage secondary-indexes] +MapReduce inputs are queued in parallel in their own pipe (`true`), or +serially through a helper process (`false` or undefined). + +> **Note**: Set to `false` or leave undefined during an upgrade from 1.0. + +```erlang + {mapred_2i_pipe, true}, +``` + +Each of these entries control how many Javascript virtual machines are +available for executing map, reduce, pre- and post-commit hook +functions. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {map_js_vm_count, 8 }, + {reduce_js_vm_count, 6 }, + {hook_js_vm_count, 2 }, +``` + +`js_max_vm_mem` is the maximum amount of memory, in megabytes, allocated +to the Javascript VMs. If unset, the default is 8MB. + +This is largely relevant only if you are writing JavaScript M/R jobs. + +```erlang + {js_max_vm_mem, 8}, +``` + +`js_thread_stack` is the maximum amount of thread stack, in megabytes, +allocated to the Javascript VMs. If unset, the default is 16MB. + +> **Note**: This is not the same as the C thread stack. + +```erlang + {js_thread_stack, 16}, +``` + +`js_source_dir` should point to a directory containing Javascript source +files which will be loaded when Riak initializes Javascript VMs. + +```erlang + %{js_source_dir, "/tmp/js_source"}, +``` + + + +## Configuration Tuning for Javascript + +If you load larger JSON objects in your buckets there is a possibility you might encounter an error like the following: + +```json + {"lineno":465,"message":"InternalError: script stack space quota is exhausted","source":"unknown"} +``` + + +You can increase the amount of memory allocated to the Javascript VM stack by editing your app.config. The following will increase the stack size from 8MB to 32MB: + +```erlang +{js_thread_stack, 8} +``` + +becomes + +```erlang +{js_thread_stack, 32}, +``` + +In addition to increasing the amount of memory allocated to the stack you can increase the heap size as well by increasing the `js_max_vm_mem` from the default of 8MB. If you are collecting a large amount of results in a reduce phase you may need to increase this setting. + +## Configuration for Riak 1.0 + +Riak 1.0 is the first release including the new MapReduce subsystem known as Riak Pipe. By default, new Riak clusters will use Riak Pipe to power their MapReduce queries. Existing Riak clusters that are upgraded to Riak 1.0 will continue to use the legacy MapReduce system unless the following line is added to the riak_kv section of each node's app.config: + +```erlang +%% Use Riak Pipe to power MapReduce queries +{mapred_system, pipe}, +``` + +> **Warning:** +> +> Do not enable Riak Pipe for MapReduce processing until all nodes in the cluster are running Riak 1.0. + +Other than speed and stability of the cluster, the choice of MapReduce subsystem (Riak Pipe or legacy) should be invisible to your client. All queries should have the same syntax and return the same results on Riak 1.0 with Riak Pipe as they did on earlier versions with the legacy subsystem. If you should find a case where this is not true, you may revert to using the legacy subsystem by either removing the aforementioned line in your app.config or by changing it to read like this: + +```erlang +%% Use the legacy MapReduce system +{mapred_system, legacy}, +``` + +## Configuration Tuning for Reduce Phases + +If you are using Riak 1.0 and the Riak Pipe subsystem for MapReduce queries, you have additional options for tuning your reduce phases. + +### Batch Size + +By default, Riak will evaluate a reduce function every time its phase receives 20 new inputs. If your reduce phases would run more efficiently with more or fewer new inputs, you may change this default by adding the following to the riak_kv section of your app.config: + +```erlang +%% Run reduce functions after 100 new inputs are received +{mapred_reduce_phase_batch_size, 100}, +``` + +You may also control this batching behavior on a per-query basis by using the static argument of the phase specification. When specifying phases over HTTP, the JSON configuration for evaluating the function after 150 new inputs looks like this: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_batch_size":150}}} +``` + +In Erlang, you may either specify a similar mochijson2 structure for the phase argument, or use the simpler proplist form: + +```erlang +{reduce, FunSpec, [{reduce_phase_batch_size, 150}], Keep} +``` + +Finally, if you want your reduce function to be evaluated only once, after all inputs are received, use this argument instead: + +```json +{"reduce": + {...language, etc. as usual... + "arg":{"reduce_phase_only_1":true}}} +``` + +Similarly, in Erlang: + +```erlang +{reduce, FunSpec, [reduce_phase_only_1], Keep} +``` + +> **Warning:** +> +> A known bug in Riak 1.0.0 means that it is possible a reduce function may run more often than specified if handoff happens while the phase is accumulating inputs. This bug was fixed in 1.0.1. + +### Pre-Reduce + +If your reduce functions can benefit from parallel execution, it is possible to request that the outputs of a preceding map phase be reduced local to the partition that produced them, before being sent, as usual, to the final aggregate reduce. + +Pre-reduce is disabled by default. To enable it for all reduce phases by default, add the following to the riak_kv section of your app.config: + +```erlang +%% Always pre-reduce between map and reduce phases +{mapred_always_prereduce, true} +``` + +Pre-reduce may also be enabled or disabled on a per-phase basis via the Erlang API for map phases implemented in Erlang. To enable pre-reduce, for any map phase followed by a reduce phase, pass a proplist as its static phase argument and include the following flag: + +```erlang +{map, FunSpec, [do_prereduce], Keep} +``` + +> **Warning:** +> +>A known bug in Riak 1.0.0 prevents per-phase pre-reduce from being enabled over HTTP. This bug also prevents per-phase pre-reduce from being enabled for Javascript phases. Use the global app.config flag for these cases. This bug was fixed in 1.0.1. + + + + diff --git a/content/riak/kv/3.0.4/configuring/next-gen-replication.md b/content/riak/kv/3.0.4/configuring/next-gen-replication.md new file mode 100644 index 0000000000..fa44b7b41a --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/next-gen-replication.md @@ -0,0 +1,63 @@ +--- +tile_supertext: "Configuring:" +title: "Next Gen Replication" +description: "" +project: "riak_kv" +project_version: "3.0.4" +menu: + riak_kv-3.0.4: + name: "Next Gen Replication" + identifier: "nextgen_rep" + weight: 200 + parent: "configuring" +version_history: + in: "2.9.1+" +toc: true +commercial_offering: true +aliases: +--- + +The configuration for Next Gen Replication is kept in + the `riak.conf` configuration file. + +## Settings + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak.conf Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ttaaefs_scope` | `{disabled, all, bucket, type}` | **REQUIRED** | For Tictac full-sync does all data need to be sync'd, or should a specific bucket be sync'd (bucket), or a specific bucket type (type).Note that in most cases sync of all data is lower overhead than sync of a subset of data - as cached AAE trees will be used. +`ttaaefs_queuename` | `text` | `q1_ttaaefs` | For tictac full-sync what registered queue name on this cluster should be use for passing references to data which needs to be replicated for AAE full-sync. This queue name must be defined as a `riak_kv.replq_queuename`, but need not be exlusive to full-sync (i.e. a real-time replication queue may be used as well). +`ttaaefs_maxresults` | `any` (integer) | `64` | or tictac full-sync what is the maximum number of AAE segments to be compared per exchange. Reducing this will speed up clock compare queries, but will increase the number of exchanges required to complete a repair. +`ttaaefs_rangeboost` | `any` (integer) | `8` | For tictac full-sync what is the maximum number of AAE segments to be compared per exchange. When running a range_check query this will be the ttaaefs_max results * ttaaefs_rangeboost. +`ttaaefs_bucketfilter_name` | `any`, (text)| `` | For Tictac bucket full-sync which bucket should be sync'd by this node. Only ascii string bucket definitions supported (which will be converted using list_to_binary). +`ttaaefs_bucketfilter_type` | `any` (text) | `default` | For Tictac bucket full-sync what is the bucket type of the bucket name. Only ascii string type bucket definitions supported (these definitions will be converted to binary using list_to_binary) +`ttaaefs_localnval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd by this node. This is the `local` nval, as the data in the remote cluster may have an alternative nval. +`ttaaefs_remotenval` | `any` (integer) | `3` | For Tictac all full-sync which NVAL should be sync'd in the remote cluster. +`ttaaefs_peerip` | `127.0.0.1` (text) | `` | The network address of the peer node in the cluster with which this node will connect to for full_sync purposes. If this peer node is unavailable, then this local node will not perform any full-sync actions, so alternative peer addresses should be configured in other nodes. +`ttaaefs_peerport` | `8898` (integer) | `` | The port to be used when connecting to the remote peer cluster. +`ttaaefs_peerprotocol` | `http`, `pb` | `http` | The protocol to be used when conecting to the peer in the remote cluster. Could be http or pb (but only http currently being tested). +`ttaaefs_allcheck` | `any` (integer) | `24` | How many times per 24hour period should all the data be checked to confirm it is fully sync'd. When running a full (i.e. nval) sync this will check all the data under that nval between the clusters, and when the trees are out of alignment, will check across all data where the nval matches the specified nval. +`ttaaefs_nocheck` | `any` (integer) | `0` | How many times per 24hour period should no data be checked to confirm it is fully sync'd. Use nochecks to align the number of checks done by each node - if each node has the same number of slots, they will naurally space their checks within the period of the slot. +`ttaaefs_hourcheck` | `any` (integer) | `0` | How many times per 24hour period should the last hours data be checked to confirm it is fully sync'd. +`ttaaefs_daycheck` | `any` (integer) | `0` | How many times per 24hour period should the last 24-hours of data be checked to confirm it is fully sync'd. +`ttaaefs_rangecheck` | `any` (integer) | `0` | How many times per 24hour period should the a range_check be run. +`ttaaefs_logrepairs` | `enabled`, `disabled` | `enabled` | If Tictac AAE full-sync discovers keys to be repaired, should each key that is repaired be logged +`tictacaae_active` | `active`, `passive` | `passive` | Enable or disable tictacaae. Note that disabling tictacaae will set the use of tictacaae_active only at startup - setting the environment variable at runtime will have no impact. +`aae_tokenbucket` | `enabled`, `disabled` | `enabled` | To protect against unbounded queues developing and subsequent timeouts/crashes of the AAE process, back-pressure signalling is used to block the vnode should a backlog develop on the AAE process. This can be disabled. +`tictacaae_dataroot` | `` | `"$platform_data_dir/tictac_aae"` | Set the path for storing tree caches and parallel key stores. Note that at startup folders may be created for every partition, and not removed when that partition hands off (although the contents should be cleared). +`tictacaae_parallelstore` | `leveled_ko`, `leveled_so` | `leveled_so` | On startup, if tictacaae is enabled, then the vnode will detect of the vnode backend has the capability to be a "native" store. If not, then parallel mode will be entered, and a parallel AAE keystore will be started. There are two potential parallel store backends - leveled_ko, and leveled_so. +`tictacaae_rebuildwait` | `` | `336` | This is the number of hours between rebuilds of the Tictac AAE system for each vnode. A rebuild will invoke a rebuild of the key store (which is a null operation when in native mode), and then a rebuild of the tree cache from the rebuilt store. +`tictacaae_rebuilddelay` | `` | `345600` | Once the AAE system has expired (due to the rebuild wait), the rebuild will not be triggered until the rebuild delay which will be a random number up to the size of this delay (in seconds). +`tictacaae_storeheads` | `enabled`, `disabled` | `disabled` | By default when running a parallel keystore, only a small amount of metadata is required for AAE purposes, and with store heads disabled only that small amount of metadata is stored. +`tictacaae_exchangetick` | `` | `240000` | Exchanges are prompted every exchange tick, on each vnode. By default there is a tick every 4 minutes. Exchanges will skip when previous exchanges have not completed, in order to prevent a backlog of fetch-clock scans developing. +`tictacaae_rebuildtick` | `` | `3600000` | Rebuilds will be triggered depending on the riak_kv.tictacaae_rebuildwait, but they must also be prompted by a tick. The tick size can be modified at run-time by setting the environment variable via riak attach. +`tictacaae_maxresults` | `` | `256` | The Merkle tree used has 4096 * 1024 leaves. When a large discrepancy is discovered, only part of the discrepancy will be resolved each exchange - active anti-entropy is intended to be a background process for repairing long-term loss of data, hinted handoff and read-repair are the short-term and immediate answers to entropy. How much of the tree is repaired each pass is defined by the tictacaae_maxresults. + diff --git a/content/riak/kv/3.0.4/configuring/reference.md b/content/riak/kv/3.0.4/configuring/reference.md new file mode 100644 index 0000000000..f7973ba319 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/reference.md @@ -0,0 +1,2034 @@ +--- +title: "Riak KV Configuration Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Configuration Reference" + identifier: "configuring_reference" + weight: 140 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/configs/configuration-files/ + - /riak/kv/3.0.4/ops/advanced/configs/configuration-files/ +--- + +[concept clusters]: ../../learn/concepts/clusters +[plan backend bitcask]: ../../setup/planning/backend/bitcask +[config backend bitcask]: ../../setup/planning/backend/bitcask/#configuring-bitcask +[plan backend leveldb]: ../../setup/planning/backend/leveldb +[config backend leveldb]: ../../setup/planning/backend/leveldb/#configuring-eleveldb +[plan backend leveled]: ../../setup/planning/backend/leveled +[config backend leveled]: ../../setup/planning/backend/leveled/#configuring-leveled +[plan backend memory]: ../../setup/planning/backend/memory +[config backend memory]: ../../setup/planning/backend/memory/#configuring-the-memory-backend +[plan backend multi]: ../../setup/planning/backend/multi +[config backend multi]: ../../setup/planning/backend/multi/#configuring-multiple-backends-1 +[use admin riak cli]: ../../using/admin/riak-cli +[use admin riak-admin]: ../../using/admin/riak-admin +[glossary aae]: ../../learn/glossary/#active-anti-entropy-aae +[use ref search 2i]: ../../using/reference/secondary-indexes +[cluster ops bucket types]: ../../using/cluster-operations/bucket-types +[usage conflict resolution]: ../../developing/usage/conflict-resolution +[concept causal context]: ../../learn/concepts/causal-context +[usage mapreduce]: ../../developing/usage/mapreduce +[security index]: ../../using/security/ +[cluster ops strong consistency]: ../../using/cluster-operations/strong-consistency +[glossary vnode]: ../../learn/glossary/#vnode +[cluster ops handoff]: ../../using/cluster-operations/handoff +[Search Settings]: ../search#search-config-settings + +Riak has a `riak.conf` configuration file located in `/etc` if you are +using a source install or in `/etc/riak` or `/usr/local/etc` if you used +a binary install. + +The `riak.conf` file is used to set a wide variety of attributes for the +node, from the storage backend that the node will use to store data to +the location of SSL-related files to sibling resolution parameters and +beyond. + +> **Note on upgrades to 2.0** +> +> If your cluster is currently running a version of Riak prior to 2.0 and +you'd like to upgrade to version 2.0 or later, you may continue to use +your old `app.config` and `vm.args` files. You may also use the newer +`riak.conf` alongside them, but please be aware that any settings in +`app.config` or `vm.args` will override settings in `riak.conf`. + +## The advanced.config file + +For most Riak installations, the `riak.conf` file should be sufficient +for configuration management. But some installations, particularly those +upgrading from an earlier version of Riak to version 2.0 or later, may +need to make use of an `advanced.config` file to control some settings +available only in versions prior to 2.0. If this applies to your +installation, please see the [Advanced Configuration](#advanced-configuration) section below. + +## Node Metadata + +Every Riak node has a name and a cookie used to facilitate inter-node +communication. The following parameters enable you to customize the name +and cookie. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
distributed_cookieCookie for distributed node communication within a Riak cluster. +All nodes in the same cluster should use the same cookie or they will +not be able to communicate.riak
nodenameThe name of the Riak node.riak@127.0.0.1
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
+ +## Ring + +Configurable parameters for your cluster's [ring][concept clusters]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ring.state_dirDefault location of ringstate../data/ring
ring_sizeNumber of partitions in the cluster (only valid when first creating +the cluster). Must be a power of 2. The minimum is 8 and the maximum is +1024.64
transfer_limitNumber of concurrent node-to-node transfers allowed.2
+ +## Storage Backend + +Riak enables you to choose from the following storage backends: + +* [Bitcask][plan backend bitcask] - [configuration][config backend bitcask] +* [LevelDB][plan backend leveldb] - [configuration][config backend leveldb] +* [Leveled][plan backend leveled] - [configuration][config backend leveled] +* [Memory][plan backend memory] - [configuration][config backend memory] +* [Multi][plan backend multi] - [configuration][config backend multi] + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
storage_backendSpecifies the storage engine used for Riak's key-value data and +secondary indexes (if supported).

The available options are +bitcask (the default), leveldb, +memory, leveled and multi.
bitcask
+ +## Directories + +The directories in which Riak stores data, logs, dependencies, +executables, and configuration files can be configured using the +parameters below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
platform_bin_dirThe directory in which the riak-admin, +riak-debug, and now-deprecated search-cmd +executables are stored../bin
platform_data_dirThe directory in which Riak stores its storage backend data, as well +as active anti-entropy data, and cluster metadata../data
platform_etc_dirThe directory in which Riak's configuration files are stored../etc
platform_lib_dirThe directory in which Riak's dependencies are housed../lib
platform_log_dirThe directory in which Riak's log files are stored, e.g. +console.log, erlang.log, and +crash.log files../log
+ +Each of these directory parameters can be used to construct values for +other parameters by placing it within a `$(...)`. Thus, +`platform_log_dir` becomes `$(platform_log_dir)` and so on. + +To give an example, you can select the directory used by Riak's [active anti-entropy](#active-anti-entropy) system using the +`anti_entropy.data_dir` parameter. When setting that parameter, you can +specify an absolute directory, as below: + +```riakconf +anti_entropy.data_dir = /path/to/anti_entropy +``` + +Or you can use the value of `platform_data_dir`: + +```riakconf +anti_entropy.data_dir = $(platform_data_dir)/anti_entropy +``` + +## Search + +Configuration parameters for [Riak KV Search][use ref search 2i]. For a more detailed description of these parameters, check out [Search Settings]. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`search` | `off` | `on` or `off` +`search.anti_entropy.data_dir` | `./data/yz_anti_entropy` | Directory +`search.anti_entropy.throttle.$tier.delay` | No default | Non-negative integer +`search.anti_entropy.throttle.$tier.solrq_queue_length` | No default | Non-negative integer +`search.dist_query` | `on` | `on` or `off` +`search.index.error_threshold.failure_count` | `3` | Integer +`search.index.error_threshold.failure_interval` | `5000` | Milliseconds +`search.index.error_threshold.reset_interval` | `30000` | Milliseconds +`search.queue.batch.flush_interval` | `1000` | `ms`, `s`, `m`, `h` +`search.queue.batch.maximum`| `100` | Integer +`search.queue.batch.minimum` | `1` | Integer +`search.queue.high_watermark` | `10000` | Integer +`search.queue.high_watermark.purge_strategy` | `purge_one` | `purge_one`, `purge_index`, or `off` +`search.root_dir` | `./data/yz` | Directory +`search.solr.jvm_options` | `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops` | Java command-line arguments +`search.solr.jmx_port` | `8985` | Integer +`search.solr.jmx_port` | `8985` | Integer +`search.solr.port` | `8093` | Integer +`search.solr.start_timeout` | `30s` | Integer with time units (eg. 2m) +`yokozuna.aae_throttle_enabled` | `on` | `on` or `off` + + +## Riak Control + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. The configurable parameters below enable you +to turn the Riak Control subsystem on and off and to configure console +authorization. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
riak_controlSet to off to disable the admin panel.off
riak_control.auth.modeAuthentication mode used for access to the admin panel. Options are +off (which is the default) or userlist.off
riak_control.auth.user.$username.passwordIf Riak Control's authentication mode +(riak_control.auth.mode) is set to userlist, +this is the list of usernames and passwords for access to the admin +panel.
+ +## Runtime Health + +Configurable parameters for interaction between Riak and the underlying +operating system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
runtime_health.triggers.distribution_portWhether distribution ports with full input buffers will be counted +as busy. Distribution ports connect Riak nodes within a single cluster. +on
runtime_health.triggers.portWhether ports with full input buffers will be counted as busy. +Ports can represent open files or network sockets.on
runtime_health.triggers.process.heap_sizeA process will become busy when its heap exceeds this size +(in bytes).160444000
runtime_health.triggers.process.garbage_collectionA process will become busy when it exceeds this amount of time doing +garbage collection. Set as an integer plus time unit, e.g. `50ms` for 50 +milliseconds, `5s` for 5 seconds, etc.Note: Enabling +this setting can cause performance problems on multi-core systems.off
runtime_health.triggers.process.long_scheduleA process will become busy when it exceeds this amount of time +during a single process scheduling and execution cycle. Set as an integer +plus time unit, e.g. `50ms` for 50 milliseconds, `5s` for 5 seconds, +etc.off
runtime_health.thresholds.busy_portsThe threshold at which a warning will be triggered about the number +of ports that are overly busy. Ports with full input buffers count +toward this threshold.2
runtime_health.thresholds.busy_processesThe threshold at which to warn a warning will be triggered about the +number of processes that are overly busy. Processes with large heaps or +that take a long time to garbage collect will count toward this +threshold.30
+ +## Default Bucket Properties + +When configuring buckets [using bucket types][cluster ops bucket types], the table below lists the bucket properties that are used when no bucket type is specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
buckets.default.allow_multWhether or not siblings are allowed +

+Note: See +Conflict Resolution for a discussion of siblings.
false
buckets.default.basic_quorumWhether not-founds will invoke the "basic quorum" optimization. +This setting will short-circuit fetches where the majority of replicas +report that the key is not found. Only used when +notfound_ok is set to false.false
buckets.default.dwThe number of replicas which must reply to a write request +indicating that the write was committed to durable storage for the write +to be deemed successful.quorum
buckets.default.last_write_winsWhether conflicting writes resolve via timestamp.false
buckets.default.merge_strategyThe strategy used when merging objects that potentially have +conflicts. The default is 2 in Riak 2.0 for typed buckets +and 1 for non-typed buckets. This setting reduces sibling +creation through additional metadata on each sibling (also known as Dotted +Version Vectors). Setting this to 1 is the default for +Riak 1.4 and earlier, and may duplicate siblings that originated in the +same write.1
buckets.default.n_valThe number of replicas stored in **non-typed** buckets. For typed buckets, the default is 3 unless changed explicitly for that bucket type. +

+Note: See +Replication Properties +for further discussion.
3
buckets.default.notfound_okWhether not-founds will count toward a quorum of reads.true
buckets.default.postcommitA space-delimited list of functions that will be run after a value +is stored. Only Erlang functions are allowed, using the +module:function format.
buckets.default.precommitA space-delimited list of functions that will be run before a value +is stored, and that can abort the write. Only Erlang functions are +allowed, using the module:function format.
buckets.default.prThe number of primary, non-fallback replicas that must reply to a +read request.0
buckets.default.pwThe number of primary, non-fallback replicas which must reply to a +write request.0
buckets.default.rThe number of replicas which must reply to a read request.quorum
buckets.default.wThe number of replicas which must reply to a write request, +indicating that the write was received.quorum
buckets.default.rwThe number of replicas which must reply to a delete request.quorum
+ +## Object Settings + +Configurable parameters for [conflict resolution][usage conflict resolution] and dealing with [sibling explosion][concept causal context]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
object.formatControls which binary representation of a riak value is stored on +disk. Options are 0, which will use the original +erlang:term_to_binary format but has a higher space +overhead, or 1, which will tell Riak to utilize a new +format for more compact storage of small values.1
object.siblings.maximumWriting an object with more than this number of siblings will send +a failure to the client.100
object.siblings.warning_thresholdWriting an object with more than this number of siblings will +generate a warning in the logs.25
object.size.maximumWriting an object larger than this will send a failure to the +client.50MB
object.size.warning_thresholdReading or writing objects larger than this size will write a +warning in the logs.5MB
+ +## Erlang VM + +In the older configuration system, the Erlang VM in which Riak runs was +configured using a `vm.args` file. In the new, `riak.conf`-based +system, the Erlang VM can be configured using the parameters in the +table below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
erlang.async_threadsThe number of threads in the Erlang VM's asynchronous thread pool. +The valid range is 0-1024. If thread support is not available, this +parameter will have no impact; if thread support is available, the +default value is 64. This is the equivalent of the +A flag. +More information can be found here. +64 (if thread support is available)
erlang.async_threads.stack_sizeIf thread support is available in your Erlang VM, this parameter +sets the amount of memory allocated to each asynchronous thread, which +you can set as KB, MB, GB, etc. The valid range is 16-8192 kilowords, +which translates to 64-32768 KB on 32-bit architectures. Although there +is no default, we suggest a stack size of 16 kilowords, which translates +to 64 KB. This small default size has been chosen because the number of +asynchronous threads, set using the erlang.async_threads +parameter explained above, might be quite large. The 64 KB default is +enough for drivers delivered with Erlang/OTP but might not be large +enough to accommodate drivers that use the driver_async() +functionality, documented here.
erlang.distribution.net_ticktimeThe net kernel is an Erlang system process that provides various +forms of network monitoring. In a Riak cluster, one of the functions of +the net kernel is to periodically check node liveness. Tick +time is the frequency with which those checks happen. This +parameter determines that frequency for every N. If you set +this parameter to 10, for example, the tick will occur once +every 10 seconds.
erlang.distribution.port_range.minimumFor ease of firewall configuration, the Erlang distribution can be +bound to a limited range of TCP ports. If this parameter is set, and +erlang.distribution.port_range.maximum is not set, only +this port will be used. If the minimum is unset, no restriction will be +made on the port range. Instead, Erlang will listen on a random +high-numbered port. More information here and here.
erlang.distribution.port_range.maximumSee the description for +erlang.distribution.port_range.minimum directly above. +
erlang.schedulers.force_wakeup_intervalSet the scheduler forced wakeup interval. All run queues will be +scanned each time period specified (in milliseconds). While there are +sleeping schedulers in the system, one scheduler will be woken for each +non-empty run queue found. An interval of zero disables this feature, +which is the default. This feature is a workaround for lengthy executing +native code, and native code that does not properly bump reductions. +More information here.
erlang.schedulers.compaction_of_loadEnables or disables the Erlang scheduler's compaction of load. When +enabled (which is the default), load balancing will strive to establish +a load distribution that causes as many scheduler threads as possible to +be fully loaded, i.e. not to run out of scheduled work. This is +accomplished by migrating load, such as running processes, into a +smaller set of schedulers when schedulers frequently run out of work. +When disabled, the frequency at which schedulers run out of work will +not be taken into account by the load balancing logic.true (enabled)
erlang.schedulers.utilization_balancingEnables or disables the Erlang scheduler's balancing of load. By +default, scheduler utilization of balancing is disabled while scheduler +compaction of load is enabled, i.e. +erlang.schedulers.compaction_of_load is set to +true. In this state, the Erlang VM will strive for a load +distribution which causes as many scheduler threads as possible to be +fully loaded, i.e. to not run out of work. When load balancing is +enabled using this setting, the system will attempt to equally scheduler +utilization between schedulers.false (disabled)
erlang.distribution_buffer_sizeFor nodes with many busy_dist_port events, Basho +recommends raising the sender-side network distribution buffer size. +32MB may not be sufficient for some workloads and is a suggested +starting point. Erlangers may know this as +zdbbl. See more +here +.32MB
erlang.process_limitRaises the default Erlang process limit256000
erlang.max_ets_tablesRaises the ETS table limit256000
erlang.crash_dumpSets the location of crash dumps./log/erl_crash.dump
erlang.fullsweep_afterA non-negative integer which indicates how many times generational +garbage collections can be done without forcing a fullsweep collection. +In low-memory systems (especially without virtual memory), setting the +value to 0 can help to conserve memory. More information here. +0
erlang.max_portsThe number of concurrent ports/sockets. The valid range is 1024 to +134217727.65536
erlang.KEnables or disables the kernel poll functionality if the emulator +supports it. If the emulator does not support kernel poll, and the +K flag is passed to the emulator, a warning is issued at +startup. Similar information here.on
erlang.schedulers.totalSets the number of scheduler threads to create and scheduler +threads to set online when erlang.smp support has been +enabled. The maximum for both values is 1024. If the Erlang runtime +system is able to determine the amount of logical processors configured +and logical processors available, schedulers.total will +default to logical processors configured, and +schedulers.online will default to the number of logical +processors available. Otherwise, the default values will be 1. +Schedulers may be omitted if schedulers.online is not and +vice versa. If schedulers.total or +schedulers.online is specified as a negative number, the +value is subtracted from the default number of logical processors +configured or logical processors available, respectively. Specifying +the value 0 for Schedulers or +SchedulersOnline resets the number of scheduler threads or +scheduler threads online respective to its default value. This option +is ignored if the emulator doesn't have SMP support enabled (see the +erlang.smp flag). More information +here. +
erlang.schedulers.onlineSee the description for erlang.schedulers.total +directly above.
erlang.WSets the mapping of warning messages for error_logger. +Messages sent to the error logger using one of the warning routines can +be mapped either to errors, warnings (w, +which is the default), or info reports (i).w
erlang.smpStarts the Erlang runtime system with SMP support enabled. This may +fail if no runtime system with SMP support is available. The +auto setting starts the Erlang runtime system with SMP +support enabled if it is available and more than one logical processor +is detected. A value of disable starts a runtime system +without SMP support. Note: The runtime system with SMP +support will not be available on all supported platforms. See also the +erlang.schedulers settings. Some native extensions (NIFs) +require use of the SMP emulator. More information here.enable
erlang.shutdown_timeLimits how long the Erlang VM spends shutting down. After the +specified duration elapses, all existing processes are killed.10s
+ +## JavaScript MapReduce + +Configurable parameters for Riak's now-deprecated JavaScript +[MapReduce][usage mapreduce] system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
javascript.source_dirA directory containing the Javascript source files which will be +loaded by Riak when it initializes Javascript VMs.
javascript.maximum_stack_sizeThe maximum amount of thread stack memory to allocate to each +JavaScript virtual machine.16MB
javascript.maximum_heap_sizeThe maximum amount of memory allocated to each JavaScript virtual +machine.8MB
javascript.hook_pool_sizeThe number of JavaScript virtual machines available for executing +pre-commit hook functions.2
javascript.reduce_pool_sizeThe number of JavaScript virtual machines available for executing +reduce functions.6
javascript.map_pool_sizeThe number of JavaScript virtual machines available for executing +map functions.8
+ +## Security + +Configurable parameters for [Riak KV Security][security index]. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ssl.cacertfileThe default signing authority location for HTTPS.#(platform_etc_dir)/cacertfile.pem
ssl.keyfileDefault key location for HTTPS.#(platform_etc_dir)/key.pem
ssl.certfileDefault cert location for HTTPS.#(platform_etc_dir)/cert.pem
secure_referer_checkMeasures were added to Riak 1.2 to counteract cross-site scripting +and request-forgery attacks. Some reverse proxies cannot remove the +Referer header and make serving data directly from Riak +impossible. Turning this setting to off disables this +security check.on
check_crlWhether to check the certificate +revocation list (CRL) of a client certificate. This defaults to +on but some CAs may not maintain or define a CRL, so this +can be disabled if no CRL is available.on
tls_protocols.sslv3Determine which SSL/TLS versions are allowed. By default, only TLS +1.2 is allowed, but other versions can be enabled if clients don't +support the latest TLS standard. It is strongly recommended that SSLv3 +not be enabled unless absolutely necessary. More than one protocol can +be enabled at once. The tls_protocols parameters below can +be used to turn different versions on and off.off
tls_protocols.tlsv1.2on
tls_protocols.tlsv1.1off
tls_protocols.tlsv1off
honor_cipher_orderWhether to prefer the order in which the server lists its ciphers. +When set to off, the client's preferred cipher order +dictates which cipher is chosen.on
+ +## Client Interfaces + +Configurable parameters for clients connecting to Riak either through +Riak's Protocol Buffers or HTTP API. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
protobuf.nagleTurns off Nagle's algorithm for Protocol Buffers connections. This +is equivalent to setting the TCP_NODELAY option on the +socket.off
protobuf.backlogThe maximum length to which the queue of pending connections may +grow. If set, it must be an integer greater than zero. If you +anticipate a huge number of connections being initialized +simultaneously, set this number higher.128
listener.protobuf.$nameThis is the IP address and TCP port to which the Riak Protocol +Buffers interface will bind.{"127.0.0.1",8087}
listener.http.$nameThis is the IP address and TCP port to which the Riak HTTP +interface will bind.{"127.0.0.1",8098}
listener.https.$nameThis is the IP address and TCP port to which the Riak HTTPS +interface will bind.
+ +## Logging + +Configurable parameters for [lager](https://github.com/basho/lager), +Riak's logging system. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
log.consoleWhere to emit the default log messages (typically at +info severity). Possible values: off, which +disables console log messages; file, which specifies that +log messages will be output to the file specified by +log.console.file; console, which outputs +messages to standard output (seen when using riak +attach-direct); or both, which outputs messages both +to the file specified in log.console.file and to standard +out.file
log.console.fileWhen log.console is set to file or +both, this parameter determines the path of the file to +which console messages will be logged../log/console.log
log.console.levelThe severity level of the console log. Possible +values: +
    +
  • debug
  • +
  • info
  • +
  • warning
  • +
  • error
  • +
info
log.crashWhether to enable the crash logon
log.crash.fileIf the crash log is enabled, the file where its messages will be +written./log/crash.log
log.crash.maximum_message_sizeMaximum size of individual messages in the crash log64KB
log.crash.rotationThe schedule on which to rotate the crash log. More information here. +$D0
log.crash.rotation.keepThe number of rotated crash logs to keep. When set to +current, only the current open log file is kept. +Otherwise, an integer can be specified.5
log.crash.sizeMaximum size of the crash log before it is rotated10MB
log.error.fileThe file where error messages will be logged../log/error.log
log.error.messages_per_secondMaximum number of error_logger messages to handle per +second100
log.error.redirectWhether to redirect error_logger messages into +lageron
log.syslogWhen set to on, enables log output to syslogoff
log.syslog.facilitySets the facility +level of syslog output if log.syslog is set to +on. Possible values: +
  • auth
  • authpriv
  • +
  • clock
  • cron
  • +
  • daemon
  • ftp
  • +
  • kern
  • lpr
  • +
  • mail
  • news
  • +
  • syslog
  • user
  • +
  • uucp
+In addition to these settings, you may also select local0 +through local7.
daemon
log.syslog.identIf log.syslog is set to on, this setting +determines the prefix appended to each syslog message.riak
log.syslog.levelIf log.syslog is set to on, this setting +determines the log level of syslog output. Possible values: +
  • alert
  • critical
  • +
  • debug
  • emergency
  • +
  • error
  • info
  • none
  • notice
  • +
  • warning
info
saslWhether to enable sasl, Erlang's +built-in error loggeroff
+ +## Active Anti-Entropy + +Configurable parameters for Riak's active anti-entropy subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
anti_entropyHow Riak will repair out-of-sync keys. If set to +active, out-of-sync keys will be repaired in the +background; if set to passive, out-of-sync keys are only +repaired on read; and if set to active-debug, verbose +debugging information will be output.active
search.anti_entropy.throttleWhether the distributed throttle for Active Anti-Entropy is +enabled.on
search.anti_entropy.throttle.$tier.solrq_queue_lengthSets the throttling tiers for Active Anti-Entropy. Each tier is a +minimum vnode mailbox size and a time-delay that the throttle should +observe at that size and above. For example, +anti_entropy.throttle.tier1.mailbox_size = 0, +anti_entropy.throttle.tier1.delay = 0ms, +anti_entropy.throttle.tier2.mailbox_size = 40, +anti_entropy.throttle.tier2.delay = 5ms, etc. If +configured, there must be a tier which includes a mailbox size of 0. +Both .mailbox_size and .delay must be set for +each tier.
search.anti_entropy.throttle.$tier.delaySee the description for +anti_entropy.throttle.$tier.mailbox_size above.
anti_entropy.bloomfilterBloom filters are highly effective in shortcutting data queries +that are destined to not find the requested key, though they tend to +entail a small performance cost.on
anti_entropy.max_open_files20
anti_entropy.write_buffer_sizeThe LevelDB options used by Active Anti-Entropy to generate the +LevelDB-backed on-disk hashtrees.4MB
anti_entropy.data_dirThe directory where AAE hash trees are stored../data/anti_entropy
anti_entropy.trigger_intervalThe tick determines how often the Active Anti-Entropy manager looks +for work to do (building/expiring trees, triggering exchanges, etc). +Lowering this value will speed up the rate at which all replicas are +synced across the cluster. Increasing the value is not recommended. +15s
anti_entropy.concurrency_limitLimit how many Active Anti-Entropy exchanges or builds can happen +concurrently.2
anti_entropy.tree.expiryDetermines how often hash trees are expired after being built. +Periodically expiring a hash tree ensures that the on-disk hash tree +data stays consistent with the actual K/V backend data. It also helps +Riak identify silent disk failures and bit rot. However, expiration is +not needed for normal active anti-entropy operations and should be +infrequent for performance reasons. The time is specified in +milliseconds.1w
anti_entropy.tree.build_limit.per_timespan1h
anti_entropy.tree.build_limit.numberRestrict how fast AAE can build hash trees. Building the tree for a +given partition requires a full scan over that partition's data. Once +built, trees stay built until they are expired. .number is +the number of builds; .per_timespan is the amount of time +in which that number of builds occurs.1
anti_entropy.use_background_managerWhether AAE is to use a background process to limit AAE tree +rebuilds. If set to on, this will help to prevent system +response degradation under times of heavy load from multiple background +tasks that contend for the same system resources; setting this parameter +to off can cut down on system resource usage. +off
+ +## TicTac Active Anti-Entropy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Intra-Cluster Handoff + +Configurable parameters for intra-cluster, i.e. inter-node, [handoff][cluster ops handoff]. + +
ConfigDescriptionDefault
tictacaae_activeChanges TicTacAAE from Passive or Active. If you want to run TicTac AAE alongside legacy AAE, set both to Active. Can be active or passive +passive
tictacaae_datarootPath under which aae datafiles will be stored (platform_data_dir)/tictac_aae
tictacaae_parallelstoreWhen running in parallel mode, which will be the default if the backend does not support native tictac aae (i.e. is not leveled), what type of parallel key store should be kept - leveled_ko (leveled and key-ordered), or leveled_so (leveled and segment ordered). When running in native mode, this setting is ignored. Acceptable values are leveled_ko or leveled_soleveled_ko
tictacaae_rebuildwaitThe minimum number of hours to wait between rebuilds.336
tictacaae_rebuilddelayThe number of seconds which represents the length of the period in which the next rebuild will be scheduled. So if all vnodes are scheduled to rebuild at the same time, they will actually rebuild randomly between 0 and this value (in seconds) after the rebuild time.345600
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
handoff.max_rejectsThe maximum number of times that a secondary system within Riak, +such as Riak Search, can block handoff +of primary key/value data. The approximate maximum duration that a vnode +can be blocked can be determined by multiplying this setting by +vnode_management_timer. If you want to prevent handoff from +ever being blocked by a secondary system, set this parameter to +0.6
handoff.inboundWhether inbound handoff is enabled on the node. Possible values are +on or off.on
handoff.outboundWhether outbound handoff is enabled on the node. Possible values are +on or off.on
handoff.portSpecifies the TCP port that Riak uses for intra-cluster data +handoff.8099
handoff.ssl.certfileTo encrypt riak_core intra-cluster data handoff +traffic, uncomment this line and edit its path to an appropriate +certfile and keyfile.
handoff.ssl.keyfileThe keyfile paired with the certfile specified in +.certfile.
handoff.use_background_managerWhether Riak will use a background manager to limit K/V handoff. +This can help to prevent system response degradation during times of +heavy load caused by multiple background tasks that contend for the same +system resources; setting this parameter to off can cut +down on system resource usage.off
+ +## Riak Data Types + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
datatypes.compression_levelWhether serialized Data Types will use compression and at what +level. When set to an integer, the parameter refers to the +aggressiveness of compression, on a scale from 0 to 9. on +is equivalent to 6, whereas off is equivalent to 0. Higher +values for compression tend to be more CPU intensive.1
+ +## SNMP + +Owing to lack of usage, SNMP support has been removed from Riak KV 2.9.0 and higher. + +## JMX + +Owing to lack of usage, JMX support has also been removed from Riak KV 2.9.0 and higher. + +## Strong Consistency + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. It suffers from known issues and we do not recommend its usage in any production environment. + +Riak's strong consistency feature has a variety of tunable parameters +that allow you to enable and disable strong consistency, modify the +behavior of leaders and followers, set various timeouts, and more. More +detailed information from an operations perspective can be found in our +documentation on [managing strong consistency][cluster ops strong consistency]. + +Strong consistency is disabled by default. The `strong_consistency` +parameter enables you to turn it on. This setting is available in each +node's `riak.conf` file. + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
strong_consistencyEnables the consensus subsystem used for strongly consistent Riak +operations if set to on.off
+ +Unlike the `strong_consistency` setting, the settings listed below are +available only in `advanced.config`, in the `riak_ensemble` section of +that file. That section looks like this: + +```advancedconfig +{riak_ensemble, [ + {parameter1, value}, + {parameter2, value}, + %% Other setting + ]} +``` + +Further instructions on setting parameters in `advanced.config` can be +found in the [advanced configuration](#advanced-configuration) section below. + +Using these settings properly demands a firm understanding of the basic +architecture of Riak's implementation of strong consistency. We highly +recommend reading our documentation on the [implementation details][cluster ops strong consistency] behind +strong consistency before changing the defaults on these parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
ensemble_tickThe rate at which leaders perform their periodic duties, including +refreshing the leader lease, in milliseconds. This setting must be lower +than both the lease_duration and +follower_timeout settings (both listed below). Lower values +mean that leaders perform their duties more frequently, which can allow +for faster convergence if a leader goes offline and then returns to the +ensemble; higher values mean that leaders perform their duties less +frequently, which can reduce network overhead.500
lease_durationDetermines how long a leader lease remains valid without being +refreshed (in milliseconds). This should be set higher than the +ensemble_tick setting (listed above) so that leaders have +time to refresh their leases before they time out, and it must be set +lower than the follower_timeout setting (listed below). +ensemble_tick * 3/2
follower_timeoutDetermines how long a follower waits to hear from a leader before it +abandons the leader (in milliseconds). This must be set greater than the +lease_duration setting.lease_duration * 4
alive_tokensDetermines the number of ticks the leader will wait to hear from its +associated vnode before assuming that the vnode +is unhealthy and stepping down as leader. If the vnode does not respond +to the leader before ensemble_tick * +alive_tokens milliseconds have elapsed, the leader will +give up leadership. It may be necessary to raise this setting if your +Riak vnodes are frequently stalling out on slow backend reads/writes. If +this setting is too low, it may cause slow requests to time out earlier +than the request timeout.2
storage_delayDetermines how long the consensus subsystem delays syncing to disk +when performing certain metadata operations (in milliseconds). This +delay allows multiple operations to be coalesced into a single disk +write. We do not recommend that you change this setting.50
storage_tickDetermines how often the consensus subsystem writes data to disk +that was requested to be written asynchronously (in milliseconds). We do +not recommend that you change this setting.5000
trust_leaseDetermines whether leader leases are used to optimize reads. When +set to true, a leader with a valid lease will handle the +read directly without contacting any followers; when set to +false, the leader will always contact followers. For more +information, see our internal documentation on + +leader leases.true
peer_get_timeoutDetermines the timeout used internally for reading consistent data, +in milliseconds. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_put_timeoutDetermines the timeout, in milliseconds, used internally for writing +consistent data. This setting must be greater than the highest request +timeout used by your application.60000 (1 minute)
peer_workersThe number of concurrent workers used by the leader to service +requests. Increasing this setting may boost performance depending on the +workload.1
tree_validationDetermines whether Riak considers peer Merkle trees to be trusted +after a node restart. When validation is enabled (the default), Riak +does not trust peer trees after a restart, instead requiring the peer to +sync with a trusted majority. This is the safest option, as it protects +Riak against undetected corruption of the Merkle tree. However, this +mode reduces Riak availability since it can sometimes require more than +a simple majority of nodes to be online and reachable.true
synchronous_tree_updatesDetermines whether the metadata updates to follower Merkle trees are +handled synchronously or not. When set to true, Riak +requires two quorum round trips to occur before replying back to the +client, the first quorum request to write the actual object and the +second to write the Merkle tree data. When set to false, +Riak will respond back to the client after the first round trip, letting +the metadata update happen asynchronously.

It's important to +note that the leader always updates its local Merkle tree +before responding to the client. This setting only affects the metadata +writes sent to followers.

In principle, asynchronous updates +are unsafe. If the leader crashes before sending the metadata updates +and all followers that had acknowledged the object write somehow revert +to the object value immediately prior to a write request, a future read +could return the immediately preceding value without realizing that it +was incorrect. Given that this scenario is unlikely, this setting +defaults to false in the name of improved performance.
false
+ + +## Miscellaneous + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
metadata_cache_sizeThis setting controls the size of the metadata cache for each vnode. +The cache can be disabled by setting it to off (this is the +default). Enabling the cache should not be necessary in disk-based +backends (i.e. LevelDB and Bitcask) but it can help performance in the +Memory backend. Note that this setting adjusts the size of the ETS table +rather than the actual data. Thus, more space may be used than the +simple size * number-of-vnodes calculation would imply. +

+Caution: This setting should not be changed without +extensive benchmarking.
off
max_concurrent_requestsThe maximum number of concurrent requests of each type (GET or PUT) +that is allowed. Setting this value to infinite disables +overload protection. The erlang.process_limit should be at +least 3 times this setting.50000
dtraceWhether DTrace is enabled. +Do not enable unless your Erlang/OTP runtime is compiled to support +DTrace, which is available in R15B01 (supported by the official source +package) and in R14B04 via a custom repository and branch.off
vnode_management_timerSets the frequency with which vnodes attempt to trigger handoff between +this node and other nodes in the cluster.10s (10 seconds)
retry_put_coordinator_failureWhen a PUT (i.e. write) request fails, Riak will retry the operation +if this setting is set to on, which is the default. Setting +it to off will speed response times on PUT requests in +general, but at the risk of potentially increasing the likelihood of +write failure.on
background_managerRiak's background manager is a subsystem that coordinates access to +shared resources from other Riak subsystems. The background manager can +help to prevent system response degradation under times of heavy load +caused by multiple background tasks.on
+ +## Advanced Configuration + +The `advanced.config` file takes the same format as the `app.config` +file familiar to users of versions of Riak prior to 2.0. Here is an +example: + +```advancedconfig +[ + {riak_core, + [ + {cluster_mgr, {"127.0.0.1", 8098 } }, + %% more riak_core configs + ]}, + + {riak_repl, + [ + {data_root, "/var/db/riak/riak_repl/"}, + %% more riak_repl configs + ] + } +]. +``` + +The following settings are available in the `advanced.config` file: + +#### `riak_repl` settings + +Most settings that are configurable through `advanced.config` are +related to Riak's `riak_repl` subsystem. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigDescriptionDefault
data_rootPath (relative or absolute) to the working directory for the +replication process./var/db/riak/riak_repl/
max_fssource_clusterThe hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means that if you have configured fullsync for +two different clusters, both with a max_fssource_cluster of +5, 10 fullsync workers can be in progress. This only affects nodes on +the source cluster on which this parameter is defined, either via the +configuration file or command line.5
max_fssource_nodeThis setting limits the number of fullsync workers that will be +running on each individual node in a source cluster. This is a hard +limit for all fullsyncs enabled; additional fullsync configurations will +not increase the number of fullsync workers allowed to run on any node. +This only affects nodes on the source cluster on which this parameter is +defined, either via the configuration file or command line. +1
max_fssink_nodeThis setting limits the number of fullsync workers allowed to run on +each individual node in a sink cluster. This is a hard limit for all +fullsyncs enabled; additional fullsync configurations will not increase +the number of fullsync workers allowed to run on any node. This only +affects nodes on the source cluster on which this parameter is defined, +either via the configuration file or command line.1
fullsync_on_connectWhether to initiate a fullsync on initial connection from the sink +cluster.true
fullsync_intervalA single-integer value representing the duration to wait, in +minutes, between fullsyncs, or a list of {clustername, +time_in_minutes} pairs for each sink participating in fullsync +replication.30
rtq_max_bytesThe maximum size, in bytes, to which the realtime replication queue +can grow before new objects are dropped. Dropped objects will need to be +replicated with a fullsync.104857600
proxy_getWhether to enable Riak CS proxy_get and block +filter.disabled
rt_heartbeat_intervalA heartbeat message is sent from the source to the sink every +rt_heartbeat_interval seconds. Setting +rt_heartbeat_interval to undefined disables +the realtime heartbeat. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
rt_heartbeat_timeoutIf a heartbeat response is not received within the time period +specified by this setting (in seconds), the source connection exits and +will be re-established. This feature is available only in Riak KV +Enterprise Edition 1.3.2 - 2.2.3 and then from Riak KV 2.2.6 onwards.15
realtime_connection_rebalance_max_delay_secsShould a server on the source cluster be restarted, this is +the amount of time (in seconds), before the realtime connections are +rebalanced by a change in the number of source nodes.300
fullsync_use_background_managerBy default, fullsync replication will attempt to coordinate with +other Riak subsystems that may be contending for the same resources. +This will help to prevent system response degradations during times of +heavy load from multiple background tasks. To disable background +coordination, set this parameter to `false`. This feature is available +only in Riak KV Enterprise Edition 2.0 and later as well as Riak KV 2.2.6 onwards.true
+ +#### Upgrading Riak Search with `advanced.config` + +If you are upgrading to Riak 2.x and wish to upgrade to the new [Riak Search][use ref search]\(codename Yokozuna), you will need to enable +legacy Search while the upgrade is underway. You can add the following +snippet to your `advanced.config` configuration to do so: + +```advancedconfig +[ + %% Other configs + + {riak_search, [ {enabled, true} ]}, + {merge_index, [ + {data_root, "/var/lib/riak/merge_index"}, + {buffer_rollover_size, 1048576}, + {max_compact_segments, 20} + ]}, + + %% Other configs +]. +``` + +#### Other settings + +There are three non-`riak_repl` settings available in +`advanced.config`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ConfigSectionDescriptionDefault
add_pathsriak_kvIf you are installing +custom code for Riak, e.g. for the purpose of running MapReduce jobs or commit hooks, this setting specifies +the paths to any compiled .beam files that you wish to use. +This is expressed as a list of absolute paths on the node's filesystem, +e.g. [ "/tmp", "/other" ].
cluster_mgrriak_coreThe cluster manager listens for connections from remote clusters on +the specified IP and port. Every node runs one cluster manager, but only +the cluster manager running on the cluster leader will service requests. +This can change as nodes enter and leave the cluster.{"127.0.0.1", 9080}
delete_moderiak_kvSpecifies how Riak behaves after objects are marked for deletion +with a tombstone. There are three possible settings: keep +disables tombstone removal altogether; immediate removes +objects' tombstones as soon as the delete request is received; and +setting delete_mode to an integer value specifies the +number of milliseconds to wait before removing tombstones. More +information can be found in Object +Deletion.3000 (3 seconds)
target_n_valriak_coreThe highest n_val that you generally intend to use. +This setting affects how partitions are distributed within the cluster, +helping to ensure that "hot spots" don't occur, i.e. that data is never +stored more than once on the same physical node. You will need to change +this setting only in rare circumstances. Assuming that +ring_size is a power of 2, the ideal value for this setting +is both (a) greater than or equal to the largest n_val for +any bucket type and (b) an even divisor of the number of partitions in +the ring, i.e. ring_size. The default is 4, +and the number of physical nodes in your cluster must be greater than +target_n_val for this setting to be effective at preventing +hot spots.4
+ +## Cluster Job Controls + +{{% note title="Warning" %}} +Before changing `cluster.job` controls in a production environment, test your application to ensure it does not have any hidden dependencies on them. +{{% /note %}} + +The `cluster.job` switches control whether classes of jobs are enabled or disabled through the HTTP(S) and Protobuf interfaces. All jobs are enabled by default. + +Field | Default | Valid values | +:-----|:--------|:-------------| +`cluster.job.riak_kv.list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_buckets`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.stream_list_keys`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.map_reduce_js`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_kv.secondary_index`|`enabled`|`enabled` or `disabled` +`cluster.job.riak_search.query`|`enabled`|`enabled` or `disabled` +`cluster.job.yokozuna.query`|`enabled`|`enabled` or `disabled` + + + + diff --git a/content/riak/kv/3.0.4/configuring/search.md b/content/riak/kv/3.0.4/configuring/search.md new file mode 100644 index 0000000000..da6f0f614f --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/search.md @@ -0,0 +1,278 @@ +--- +title: "Riak Search Settings" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Riak Search Settings" + identifier: "configuring_search" + weight: 160 + parent: "configuring" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/configs/search/ + - /riak/kv/3.0.4/ops/advanced/configs/search/ +--- + +[usage search]: {{}}riak/kv/3.0.4/developing/usage/search +[usage search schema]: {{}}riak/kv/3.0.4/developing/usage/search-schemas +[usage search data types]: {{}}riak/kv/3.0.4/developing/usage/searching-data-types +[usage custom extractors]: {{}}riak/kv/3.0.4/developing/usage/custom-extractors +[cluster-ops aae throttle]: {{}}riak/kv/3.0.4/using/cluster-operations/active-anti-entropy/#throttling +[config reference]: {{}}riak/kv/3.0.4/configuring/reference +[config reference#search]: {{}}riak/kv/3.0.4/configuring/reference/#search +[glossary aae]: {{}}riak/kv/3.0.4/learn/glossary/#active-anti-entropy-aae +[security index]: {{}}riak/kv/3.0.4/using/security/ + +[java se downloads]: http://www.oracle.com/technetwork/java/javase/downloads +[java se docs]: http://www.oracle.com/technetwork/java/javase/documentation + +This page covers how to use Riak Search (with +[Solr](http://lucene.apache.org/solr/) integration). + +For a simple reference of the available configs and their defaults, see the [configuration reference][config reference#search]. + +If you are looking to develop on or with Riak Search, take a look at: + +* [Using Search][usage search] +* [Search Schema][usage search schema] +* [Custom Search Extractors][usage custom extractors] +* [Riak KV Data Types and Search][usage search data types] + +## Overview + +We'll be walking through: + +1. [Prequisites](#prerequisites) +2. [Enable Riak Search](#enabling-riak-search) +3. [Search Configuration Settings](#search-config-settings) +4. [Additional Solr Information](#more-on-solr) + +## Prerequisites + +Because Solr is a Java application, you will need to install **Java 7 +or later** on every node. Installation packages can be found on the [Java SE Downloads +page][java se downloads] and instructions in the [Java SE documentation site][java se docs]. + + +## Enabling Riak Search + +Riak Search is not enabled by default, so you must enable it in every +node's [configuration file][config reference] as follows: + +```riak.conf +search = on +``` + + +## Search Config Settings + +You will find all the Riak Search configuration settings in riak.conf. Setting `search` to `on` is required, but other search settings are optional. A handy reference list of these parameters can be found in our [configuration files][config reference#search] documentation. + +### `search` + +Enable or disable search; defaults to `off`. + +Valid values: `on` or `off` + +### `search.anti_entropy.data_dir` + +The directory in which Riak Search stores files related to [active anti-entropy][glossary aae]; defaults to `./data/yz_anti_entropy`. + +Valid values: a directory + +### `search.anti_entropy.throttle` + +Whether the throttle for Yokozuna active anti-entropy is enabled; defaults to `on`. + +Valid values: `on` or `off` + +You can read more about throttling [here][cluster-ops aae throttle]. + +### `search.anti_entropy.throttle.$tier.delay` + +Set the throttling tiers delay for [active anti-entropy][glossary aae]; no default. + +Each tier is a [minimum Solrq queue size](#search-anti-entropy-throttle-tier-solrq-queue-length) and a time-delay that the throttle should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both [`.solrq_queue_length`](#search-anti-entropy-throttle-tier-solrq-queue-length) and `.delay` must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.anti_entropy.throttle.$tier.solrq_queue_length` + +Set the throttling tiers for [active anti-entropy][glossary aae]; no default. + +Each tier is a minimum Solrq queue size and a [time-delay](#search-anti-entropy-throttle-tier-delay) that the throttle +should observe at that size and above. + +For example: + +``` +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 0ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 40 +search.anti_entropy.throttle.tier2.delay = 5ms +``` +will introduce a 5 millisecond sleep for any queues of length 40 or higher. If configured, there must be a tier which includes a mailbox size of 0. Both `.solrq_queue_length` and [`.delay`](#search-anti-entropy-throttle-tier-delay) must be set for each tier. There is no limit to the number of tiers that may be specified. See [`search.anti_entropy.throttle`](#search-anti-entropy-throttle). + +Valid values: Non-negative integer + +### `search.dist_query` + +Enable this node in distributed query plans; defaults to `on`. + +If enabled, this node will participate in distributed Solr queries. If disabled, the node will be excluded from Riak search cover plans, and will therefore never be consulted in a distributed query. Note that this node may still be used to execute a query. Use this flag if you have a long running administrative operation (e.g. reindexing) which requires that the node be removed from query plans, and which would otherwise result in inconsistent search results. + +This setting can also be changed via `riak-admin` by issuing one of the following commands: + +``` +riak-admin set search.dist_query=off +``` + or + +``` +riak-admin set search.dist_query=on +``` + +Setting this value in riak.conf is useful when you are restarting a node which was removed from search queries with the `riak-admin` feature. Setting `search.dis_query` in riak.conf will prevent the node from being included in search queries until it is fully spun up. + +Valid values: `on` or `off` + +### `search.index.error_threshold.failure_count` + +The number of failures encountered while updating a search index within [`search.index.error_threshold.failure_interval`](#search-index-error-threshold-failure-interval) before Riak KV will skip updates to that index; defaults to `3`. + +Valid values: Integer + +### `search.index.error_threshold.failure_interval` + +The window of time during which `search.index.error_threshold.failure_count` failures will cause Riak KV to skip updates to a search index; defaults to `5000`. + +If [`search.index.error_threshold.failure_count`](#search-index-error-threshold-failure-count) errors have occurred within this interval on a given search index, then Riak will skip updates to that index until the [`search.index.error_threshold.reset_interval`](#search-index-error-threshold-reset-interval) has passed. + +Valid values: Milliseconds + +### `search.index.error_threshold.reset_interval` + +The amount of time it takes for updates to a given search index to resume/refresh once Riak KV has started skipping update operations; defaults to `30000`. + +Valid values: Milliseconds + +### `search.queue.batch.flush_interval` + +The maximum delay between notification to flush batches to Solr; defaults to `1000` (milliseconds). + +This setting is used to increase or decrease the frequency of batch delivery into Solr, specifically for relatively low-volume input into Riak KV. This setting ensures that data will be delivered into Solr in accordance with the `search.queue.batch.minimum` and `search.queue.batch.maximum` settings within the specified interval. Batches that are smaller than `search.queue.batch.minimum` will be delivered to Solr within this interval. This setting will generally have no effect on heavily loaded systems. You may use any time unit; the default is in milliseconds. + +Valid values: `ms`, `s`, `m`, or `h` + +### `search.queue.batch.maximum` + +The maximum batch size, in number of Riak objects; defaults to `500`. + +Any batches that are larger than this amount will be split, where the first `search.queue.batch.maximum` objects will be flushed to Solr and the remaining objects enqueued for that index will be retained until the next batch is delivered. This parameter ensures that at most `search.queue.batch.maximum` objects will be delivered into Solr in any given request. + +Valid values: Integer + +### `search.queue.batch.minimum` + +The minimum batch size, in number of Riak objects; defaults to `10`. + +Any batches that are smaller than this amount will not be immediately flushed to Solr, but are guaranteed to be flushed within the `search.queue.batch.flush_interval`. + +Valid valus: Integer + +### `search.queue.high_watermark` + +The queue high water mark; defaults to `1000`. + +If the total number of queued messages in a Solrq worker instance exceed this limit, then the calling vnode will be blocked until the total number falls below this limit. This parameter exercises flow control between Riak KV and the Riak Search batching subsystem, if writes into Solr start to fall behind. + +Valid values: Integer + +### `search.queue.high_watermark.purge_strategy` + +The strategy for how purging is handled when the `search.queue.high_watermark` is hit; defaults to `purge_one`. + +Valid values: `purge_one`, `purge_index`, or `off` + +* `purge_one` removes the oldest item on the queue from an erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `purge_index` removes all items associated with one random erroring (references to fuses blown in the code) index in order to get below the [`search.queue.high_watermark`](#search-queue-high-watermark) +* `off` disables purging + +### `search.root_dir` + +The root directory in which index data and configuration is stored; defaults to `./data/yz`. + +Valid values: a directory + +### `search.solr.jvm_options` + +The options to pass to the Solr JVM; defaults to `-d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops`. + +Non-standard options (e.g. `-XX`) may not be portable across JVM implementations. + +Valid values: Java command-line arguments + +### `search.solr.jmx_port` + +The port number to which Solr JMX binds (note: binds on every interface); defaults to `8985`. + +Valid values: Integer + +NB JMX ceased being a Riak feature in Riak KV 2.9.0p5. This setting is left here for reference but no longer affects anything. + +### `search.solr.port` + +The port number to which Solr binds (note: binds on every interface); defaults to `8093`. + +Valid values: Integer + +### `search.solr.start_timeout` + +How long Riak KV will wait for Solr to start (attempts twice before shutdown); defaults to `30s`. + +Values lower than 1s will be rounded up to 1s. + +Valid values: Integer with time units (e.g. 2m) + + +## More on Solr +### Solr JVM and Ports + +Riak Search runs one Solr process per node to manage its indexing and +search functionality. While the underlying project manages +index distribution, node coverage for queries, active anti-entropy +(AAE), and JVM process management, you should provide plenty of RAM and diskspace for running both Riak and the JVM running Solr. We recommend a minimum of 6GB of RAM per node. + +Concerning ports, be sure to take the necessary [security][security index] precautions to prevent exposing the extra Solr ports +to the outside world. + +### Solr for Operators + +For further information on Solr monitoring, tuning, and performance, we +recommend the following documents for getting started: + +* [Solr Monitoring](https://wiki.apache.org/solr/SolrMonitoring) +* [Solr Performance + Factors](https://wiki.apache.org/solr/SolrPerformanceFactors) +* [Solr Performance + Problems](https://wiki.apache.org/solr/SolrPerformanceProblems) +* [JConsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) + +A wide variety of other documentation is available from the Solr OSS +community. + + + + diff --git a/content/riak/kv/3.0.4/configuring/strong-consistency.md b/content/riak/kv/3.0.4/configuring/strong-consistency.md new file mode 100644 index 0000000000..2d4d09c6e1 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/strong-consistency.md @@ -0,0 +1,702 @@ +--- +title: "Implementing Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Implementing Strong Consistency" + identifier: "configuring_strong_consistency" + weight: 190 + parent: "configuring" +toc: true +--- + +[apps strong consistency]: {{}}riak/kv/3.0.4/developing/app-guide/strong-consistency +[concept strong consistency]: {{}}riak/kv/3.0.4/using/reference/strong-consistency +[cluster ops add remove node]: {{}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes +[config reference#strong-cons]: {{}}riak/kv/3.0.4/configuring/reference/#strong-consistency +[use admin riak cli]: {{}}riak/kv/3.0.4/using/admin/riak-cli +[concept eventual consistency]: {{}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[plan backend bitcask]: {{}}riak/kv/3.0.4/setup/planning/backend/bitcask +[glossary vnode]: {{}}riak/kv/3.0.4/learn/glossary/#vnode +[concept buckets]: {{}}riak/kv/3.0.4/learn/concepts/buckets +[cluster ops bucket types]: {{}}riak/kv/3.0.4/using/cluster-operations/bucket-types +[use admin riak-admin#ensemble]: {{}}riak/kv/3.0.4/using/admin/riak-admin/#ensemble-status +[use admin riak-admin]: {{}}riak/kv/3.0.4/using/admin/riak-admin +[config reference#advanced]: {{}}riak/kv/3.0.4/configuring/reference/#advanced-configuration +[plan cluster capacity]: {{}}riak/kv/3.0.4/setup/planning/cluster-capacity +[cluster ops strong consistency]: {{}}riak/kv/3.0.4/using/cluster-operations/strong-consistency +[apps replication properties]: {{}}riak/kv/3.0.4/developing/app-guide/replication-properties +[concept causal context]: {{}}riak/kv/3.0.4/learn/concepts/causal-context +[dev data types]: {{}}riak/kv/3.0.4/developing/data-types +[glossary aae]: {{}}riak/kv/3.0.4/learn/glossary/#active-anti-entropy-aae +[cluster ops 2i]: {{}}riak/kv/3.0.4/using/reference/secondary-indexes +[usage commit hooks]: {{}}riak/kv/3.0.4/developing/usage/commit-hooks +[cluster ops obj del]: {{}}riak/kv/3.0.4/using/reference/object-deletion +[dev client libraries]: {{}}riak/kv/3.0.4/developing/client-libraries + +> **Please Note:** +> +> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment. + +This document provides information on configuring and monitoring a Riak +cluster's optional strong consistency subsystem. Documentation for +developers building applications using Riak's strong consistency feature +can be found in [Using Strong Consistency][apps strong consistency], while a more theoretical +treatment can be found in [Strong Consistency][concept strong consistency]. + +## Minimum Cluster Size + +In order to use strong consistency in Riak, **your cluster must consist +of at least three nodes**. If it does not, all strongly consistent +operations will fail. If your cluster is smaller than three nodes, you +will need to [add more nodes][cluster ops add remove node] and make sure +that strong consistency is [enabled](#enabling-strong-consistency) on all of them. + +Strongly consistent operations on a given key may also fail if a +majority of object replicas in a given ensemble are unavailable, whether +due to slowness, crashes, or network partitions. This means that you may +see strongly consistent operations fail even if the minimum cluster size +requirement has been met. More information on ensembles can be found in +[Implementation Details](#implementation-details). + +While strong consistency requires at least three nodes, we have a +variety of recommendations regarding cluster size, which can be found in +[Fault Tolerance](#fault-tolerance). + +## Enabling Strong Consistency + +Strong consistency in Riak is disabled by default. You can enable it in +each node's [configuration files][config reference#strong-cons]. + +```riakconf +strong_consistency = on +``` + +```appconfig +%% In the older, app.config-based system, the strong consistency +%% parameter is enable_consensus: + +{riak_core, [ + % ... + {enable_consensus, true}, + % ... + ]} +``` + +Remember that you must [restart your node][use admin riak cli] for +configuration changes to take effect. + +For strong consistency requirements to be applied to specific keys, +those keys must be in [buckets][concept buckets] bearing a bucket type with the +`consistent` property set to `true`. More information can be found in +[Using Bucket Types][cluster ops bucket types]. + +If you enable strong consistency on all nodes in a cluster with fewer +than three nodes, strong consistency will be **enabled** but not yet +**active**. Strongly consistent operations are not possible in this +state. Once at least three nodes with strong consistency enabled are +detected in the cluster, the system will be activated and ready for use. +You can check on the status of the strong consistency subsystem using +the [`riak-admin ensemble-status`][use admin riak-admin#ensemble] command. + +## Fault Tolerance + +Strongly consistent operations in Riak are necessarily less highly +available than [eventually consistent][concept eventual consistency] operations +because strongly consistent operations can only succeed if a **quorum** +of object replicas are currently reachable. A quorum can be expressed as +N / 2 + 1 (or `n_val` / 2 + 1), meaning that 3 replicas constitutes a +quorum if N=5, 4 replicas if N=7, etc. If N=7 and 4 replicas are +unavailable, for example, no strongly consistent operations on that +object can succeed. + +While Riak uses N=3 by default, bear in mind that **higher values of N +will allow for more fault tolerance**. The table below shows the number +of allowable missing replicas for assorted values of N: + +Replicas | Allowable missing replicas +:--------|:-------------------------- +3 | 1 +5 | 2 +7 | 3 +9 | 4 +15 | 7 + +Thus, we recommend setting `n_val` higher than the default of 3 for +strongly consistent operations. More on `n_val` in the section below. + +### n_val Recommendations + +Due to the quorum requirements explained above, we recommend that you +use _at least_ N=5 for strongly consistent data. You can set the value +of N, i.e. `n_val`, for buckets +[using bucket types][cluster ops bucket types]. For example, you +can create and activate a bucket type with N set to 5 and strong +consistency enabled---we'll call the bucket type +`consistent_and_fault_tolerant`---using the following series of +[commands][use admin riak-admin]: + +```bash +riak-admin bucket-type create consistent_and_fault_tolerant \ + '{"props": {"consistent":true,"n_val":5}}' +riak-admin bucket-type activate consistent_and_fault_tolerant +``` + +If the `activate` command outputs `consistent_and_fault_tolerant has +been activated`, the bucket type is now ready to provide strong +consistency guarantees. + +#### Setting the target_n_val parameter + +The `target_n_val` parameter sets the highest `n_val` that you intend to +use in an entire cluster. The purpose of this parameter is to ensure +that so-called "hot spots" don't occur, i.e. that data is never stored +more than once on the same physical node. This can happen when: + +* `target_n_val` is greater than the number of physical nodes, or +* the `n_val` for a bucket is greater than `target_n_val`. + +A problem to be aware of if you're using strong consistency is that the +default for `target_n_val` is 4, while our suggested minimum `n_val` for +strongly consistent bucket types is 5. This means that you will need to +raise `target_n_val` if you intend to use an `n_val` over 4 for _any_ +bucket type in your cluster. If you anticipate using an `n_val` of 7 as +the largest `n_val` within your cluster, for example, you will need to +set `target_n_val` to 7. + +This setting is not contained in `riak.conf`, and must instead be set in +the `advanced.config` file. For more information, see our documentation +on [advanced configuration][config reference#advanced]. + +If you are using strong consistency in a cluster that has already been +created with a `target_n_val` that is too low (remember that the default +is too low), you will need to raise it to the desired higher value and +restart each node. + +#### Note on Bucket Properties + +The `consistent` bucket property is one of two bucket properties, +alongside [`datatype`][cluster ops bucket types], that cannot be changed once a +bucket type has been created. + +Furthermore, if `consistent` is set to `true` for a bucket type, you +cannot change the `n_val` for the bucket type once it's been created. If +you attempt to do so, you'll see the following error: + +``` +Error updating bucket : +n_val cannot be modified for existing consistent type +``` + +If you've created a bucket type with a specific `n_val` and wish to +change it, you will need to create a new bucket type with the +appropriate `n_val` and use the new bucket type instead. + +### Fault Tolerance and Cluster Size + +From the standpoint of strongly consistent operations, larger clusters +tend to be more fault tolerant. Spreading ensembles across more nodes will decrease the number of ensembles active on each node and thus decrease the number of quorums affected when a node goes down. + +Imagine a 3-node cluster in which all ensembles are N=3 ensembles. If +two nodes go down, _all_ ensembles will lose quorum and will be unable +to function. Strongly consistent operations on the entire keyspace will +fail until at least one node is brought back online. And even when that +one node is brought back online, a significant portion of the keyspace +will continue to be unavailable for strongly consistent operations. + +For the sake of contrast, imagine a 50-node cluster in which all +ensembles are N=5 (i.e. all objects are replicated to five nodes). In +this cluster, each node is involved in only 10% of the total ensembles; +if a single node fails, that failure will thus impact only 10% of +ensembles. In addition, because N is set to 5, that will not impact +quorum for _any_ ensemble in the cluster; two additional node failures +would need to occur for quorum to be lost for _any_ ensemble. And even +in the case of three nodes failing, it is highly unlikely that that +failure would impact the same ensembles; if it did, only those ensembles +would become unavailable, affecting only 10% of the key space, as +opposed to 100% in the example of a 3-node cluster consisting of N=3 +ensembles. + +These examples illustrate why we recommend higher values for N---again, +at least N=5---as well as clusters with many nodes. The 50-node cluster +example above is used only to illustrate why larger clusters are more +fault tolerant. The definition of "many" nodes will vary according to your needs. +For recommendations regarding cluster size, see [Cluster Capacity Planning][plan cluster capacity]. + +### Offline Node Recommendations + +In general, strongly consistent Riak is more sensitive to the number of +nodes in the cluster than eventually consistent Riak, due to the quorum +requirements described above. While Riak is designed to withstand a +variety of failure scenarios that make nodes in the cluster unreachable, +such as hardware or network failure, **we nonetheless recommend that you +limit the number of nodes that you intentionally down or reboot**. +Having multiple nodes leave the cluster at once can threaten quorum and +thus affect the viability of some or all strongly consistent operations, +depending on the size of the cluster. + +If you're using strong consistency and you do need to reboot multiple +nodes, we recommend rebooting them very carefully. Rebooting nodes too +quickly in succession can force the cluster to lose quorum and thus be +unable to service strongly consistent operations. The best strategy is +to reboot nodes one at a time and wait for each node to rejoin existing +[ensembles][cluster ops strong consistency] before +continuing to the next node. At any point in time, the state of +currently existing ensembles can be checked using [`riak-admin ensemble-status`][admin riak-admin#ensemble]. + +## Performance + +If you run into performance issues, bear in mind that the key space in a +Riak cluster is spread across multiple [consensus groups][cluster ops strong consistency], each of which manages a portion of +that key space. Larger [ring sizes][concept clusters] allow more +independent consensus groups to exist in a cluster, which can provide +for more concurrency and higher throughput, and thus better performance. +The ideal ring size, however, will also depend on the number of nodes in +the cluster. General recommendations can be found in [Cluster Capacity Planning][plan cluster capacity]. + +Adding nodes to your cluster is another means of enhancing the +performance of strongly consistent operations. Instructions on doing so +can be found in [Adding and Removing Nodes][cluster ops add remove node]. + +Your cluster's configuration can also affect strong consistency +performance. See the section on [configuration][config reference#strong-cons] below. + +## riak-admin ensemble-status + +The [`riak-admin`][use admin riak-admin] interface +used for general node/cluster management has an `ensemble-status` +command that provides insight into the current status of the consensus +subsystem undergirding strong consistency. + +Running the command by itself will provide the current state of the +subsystem: + +```bash +riak-admin ensemble-status +``` + +If strong consistency is not currently enabled, you will see `Note: The +consensus subsystem is not enabled.` in the output of the command; if +strong consistency is enabled, you will see output like this: + +``` +============================== Consensus System =============================== +Enabled: true +Active: true +Ring Ready: true +Validation: strong (trusted majority required) +Metadata: best-effort replication (asynchronous) + +================================== Ensembles ================================== + Ensemble Quorum Nodes Leader +------------------------------------------------------------------------------- + root 4 / 4 4 / 4 riak@riak1 + 2 3 / 3 3 / 3 riak@riak2 + 3 3 / 3 3 / 3 riak@riak4 + 4 3 / 3 3 / 3 riak@riak1 + 5 3 / 3 3 / 3 riak@riak2 + 6 3 / 3 3 / 3 riak@riak2 + 7 3 / 3 3 / 3 riak@riak4 + 8 3 / 3 3 / 3 riak@riak4 +``` + +### Interpreting ensemble-status Output + +The following table provides a guide to `ensemble-status` output: + +Item | Meaning +:----|:------- +`Enabled` | Whether the consensus subsystem is enabled on the current node, i.e. whether the `strong_consistency` parameter in [`riak.conf`][config reference#strong-cons] has been set to `on`. If this reads `off` and you wish to enable strong consistency, see our documentation on enabling strong consistency. +`Active` | Whether the consensus subsystem is active, i.e. whether there are enough nodes in the cluster to use strong consistency, which requires at least three nodes. +`Ring Ready` | If `true`, then all of the [vnodes][glossary vnode] in the cluster have seen the current ring, which means that the strong consistency subsystem can be used; if `false`, then the system is not yet ready. If you have recently added or removed one or more nodes to/from the cluster, it may take some time for `Ring Ready` to change. +`Validation` | This will display `strong` if the `tree_validation` setting in riak.conf has been set to `on` and `weak` if set to `off`. +`Metadata` | This depends on the value of the `synchronous_tree_updates` setting in riak.conf, which determines whether strong consistency-related Merkle trees are updated synchronously or asynchronously. If `best-effort replication (asynchronous)`, then `synchronous_tree_updates` is set to `false`; if `guaranteed replication (synchronous)` then `synchronous_tree_updates` is set to `true`. +`Ensembles` | This displays a list of all of the currently existing ensembles active in the cluster.
  • Ensemble +--- +The ID of the ensemble
  • Quorum +--- +The number of ensemble peers that are either leading or following
  • Nodes +--- +The number of nodes currently online
  • Leader +--- +The current leader node for the ensemble
+ +**Note**: The **root ensemble**, designated by `root` in the sample +output above, is a special ensemble that stores a list of nodes and +ensembles in the cluster. + +More in-depth information on ensembles can be found in our [internal +documentation](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md). + +### Inspecting Specific Ensembles + +The `ensemble-status` command also enables you to directly inspect the +status of specific ensembles in a cluster. The IDs for all current +ensembles are displayed in the `Ensembles` section of the +`ensemble-status` output described above. + +To inspect a specific ensemble, specify the ID: + +```bash +riak-admin ensemble-status +``` + +The following would inspect ensemble 2: + +```bash +riak-admin ensemble-status 2 +``` + +Below is sample output for a single ensemble: + +``` +================================= Ensemble #2 ================================= +Id: {kv,0,3} +Leader: riak@riak2 (2) +Leader ready: true + +==================================== Peers ==================================== + Peer Status Trusted Epoch Node +------------------------------------------------------------------------------- + 1 following yes 1 riak@riak1 + 2 leading yes 1 riak@riak2 + 3 following yes 1 riak@riak3 +``` + +The table below provides a guide to the output: + +Item | Meaning +:----|:------- +`Id` | The ID for the ensemble used internally by Riak, expressed as a 3-tuple. All ensembles are `kv`; the second element names the ring partition for which the ensemble is responsible; and the third element is the `n_val` for the keys for which the ensemble is responsible. +`Leader` | Identifies the ensemble's leader. In this case, the leader is on node `riak@riak2` and is identified as peer `2` in the ensemble. +`Leader ready` | States whether the ensemble's leader is ready to respond to requests. If not, requests to the ensemble will fail. +`Peers` | A list of peer [vnodes][glossary vnode] associated with the ensemble.
  • Peer +--- +The ID of the peer
  • Status +--- +Whether the peer is a leader or a follower
  • Trusted +--- +Whether the peer's Merkle tree is currently considered trusted or not
  • Epoch +--- +The current consensus epoch for the peer. The epoch is incremented each time the leader changes.
  • Node +--- +The node on which the peer resides.
+ +More information on leaders, peers, Merkle trees, and other details can +be found in [Implementation Details](#implementation-details) below. + +## Implementation Details + +Strong consistency in Riak is handled by a subsystem called +[`riak_ensemble`](https://github.com/basho/riak_ensemble/blob/develop/doc/Readme.md) +This system functions differently from other systems in Riak in a number +of ways, and many of these differences are important to bear in mind for +operators configuring their cluster's usage of strong consistency. + +### Basic Operations + +The first major difference is that strongly consistent Riak involves a +different set of operations from [eventually consistent][concept eventual consistency] Riak KV. In strongly consistent buckets, there are four types +of atomic operations on objects: + +* **Get** operations work just as they do against + non-strongly-consistent keys, but with two crucial differences: + 1. Connecting clients are guaranteed to return the most recently + written value (which makes those operations CP, i.e. consistent and + partition tolerant) + 2. Reads on strongly consistent keys *never* return siblings, hence + there is no need to develop any sort of [conflict resolution][usage conflict resolution] + strategy for those keys +* **Conditional put** operations write an object only if no object + currently exists in that key. The operation will fail if the key + already exists; if the key was never written or has been deleted, the + operation succeeds. +* **Conditional modify** operations are compare-and-swap (CAS) + operations that succeed only if the value of a key has not changed + since it was previously read. +* **Delete** operations work mostly like they do against + non-strongly-consistent keys, with the exception that + [tombstones][cluster ops obj deletion] are not harvested, which is + the equivalent of having `delete_mode` set to `keep`. + +**From the standpoint of clients connecting to Riak, there is little +difference between strongly and non-strongly consistent data**. The +operations performed on objects---reads, writes, deletes, etc.---are the +same, which means that the client API for strong consistency is +essentially the same as it is for eventually consistent operations, with +the important exception of error handling. + +### Ensembles + +The main actors in Riak's implementation of strong consistency are +**ensembles**, which are independent groups that watch over a portion of +a Riak cluster's key space and coordinate strongly consistent operations +across nodes. When watching over a given key space, ensembles must act +upon multiple replicas of a given object, the number of which is +specified by `n_val` (more on this in [Replication Properties][apps replication properties]). + +Eventually consistent Riak can service requests even when only a single +object replica is available, using mechanisms like [vector clocks][concept causal context] and [dotted version vectors][concept causal context]---or, in a different way, [Riak Data Types][dev data types])---to ensure eventual consistency between replicas. Strongly consistent Riak is different because it +requires that a **quorum** of object replicas be online and reachable, +where a quorum is defined as `n_val` / 2 + 1. **If a quorum is not +available for a key, all strongly consistent operations against that key +will fail**. + +More information can be found in the section on Fault Tolerance above. + +### Peers, Leaders, Followers, and Workers + +All ensembles in strongly consistent Riak consist of agents called +**peers**. The number of peers in an ensemble is defined by the `n_val` +of that ensemble, i.e. the number of object replicas that the +ensemble watches over. Amongst the peers in the ensemble, there are two +basic actors: **leaders** and **followers**. + +Leaders and followers coordinate with one another on most requests. +While leaders and followers coordinate on all writes, i.e. all puts and +deletes, you can enable leaders to respond to gets without the need to +coordinate with followers. This is known as granting a **leader lease**. +Leader leases are enabled by default, and are disabled (or re-enabled) +at the cluster level. A more in-depth account of ensemble behavior can +be found in our [internal +documentation](https://github.com/basho/riak_ensemble/tree/develop/doc). + +In addition to leaders and followers, ensemble peers use lightweight +Erlang processes called **workers** to perform long-running K/V +operations, allowing peers to remain responsive to requests. The number +of workers assigned to each peer depends on your configuration. + +These terms should be borne in mind in the sections on configuration +below. + +### Integrity Checking + +An essential part of implementing a strong consistency subsystem in a +distributed system is **integrity checking**, which is a process that +guards against data corruption and inconsistency even in the face of +network partitions and other adverse events that Riak was built to +handle gracefully. + +Like Riak's [active anti-entropy][glossary aae] subsystem, strong consistency +integrity checking utilizes [Merkle +trees](http://en.wikipedia.org/wiki/Merkle_tree) that are persisted on +disk. All peers in an ensemble, i.e. all leaders and followers, maintain +their own Merkle trees and update those trees in the event of most +strongly consistent operations. Those updates can occur synchronously or +asynchronously from the standpoint of client operations, depending on +the configuration that you specify. + +While integrity checking takes place automatically in Riak, there are +important aspects of its behavior that you can configure. See the Merkle Tree settings section below for more +information on configurable parameters. + +## Configuring Strong Consistency + +The `riak_ensemble` subsystem provides a wide variety of tunable +parameters that you can adjust to fit the needs of your Riak cluster. +All `riak_ensemble`-specific parameters, with the exception of the +`strong_consistency` parameter used to [enable strong consistency](#enabling-strong-consistency), +must be set in each node's `advanced.config` file, _not_ in `riak.conf` +or `app.config`. + +Information on the syntax and usage of `advanced.config` can be found in +our documentation on [advanced configuration][config reference#advanced]. That same document also contains a full +listing of [strong-consistency-related configuration parameters][config reference#strong-cons]. + +Please note that the sections below require a basic understanding of the +following terms: + +* ensemble +* peer +* leader +* follower +* worker +* integrity checking +* Merkle tree + +For an explanation of these terms, see the [Implementation Details](#implementation-details) section +above. + +#### Leader Behavior + +The `trust_lease` setting determines whether leader leases are used to +optimize reads. When set to `true`, a leader with a valid lease can +handle reads directly without needing to contact any followers. When +`false`, the leader will always contact followers, which can lead to +degraded read performance. The default is `true`. We recommend leaving +leader leases enabled for performance reasons. + +All leaders have periodic duties that they perform, including refreshing +the leader lease. You can determine how frequently this occurs, in +milliseconds, using the `ensemble_tick` setting. The default is 500 +milliseconds. Please note that this setting must be lower than both +the `lease_duration` and `follower_timeout` settings (both explained +below). + +If you set `trust_lease` to `true`, you can also specify how long a +leader lease remains valid without being refreshed using the +`lease_duration` setting, which is specified in milliseconds. This +setting should be higher than `ensemble_tick` to ensure that leaders +have to time to refresh their leases before they time out, and it _must_ +be lower than `follower_timeout`, explained in the section below. The +default is `ensemble_tick` * 3/2, i.e. if `ensemble_tick` is 400, +`lease_duration` will default to 600. + +#### Worker Settings + +You can choose how many workers are assigned to each peer using the +`peer_workers` setting. Workers are lightweight processes spawned by +leaders and followers. While increasing the number of workers will make +the strong consistency subsystem slightly more computationally +expensive, more workers can mean improved performance in some cases, +depending on the workload. The default is 1. + +### Timeouts + +You can establish timeouts for both reads and writes (puts and deletes) +using the `peer_get_timeout` and `peer_put_timeout` settings, +respectively. Both are expressed in milliseconds and default to 60000 +(1 minute). + +Longer timeouts will decrease the likelihood that read or write +operations will fail due to long computation times; shorter timeouts +entail shorter wait times for connecting clients, but at a higher risk +of failed operations under heavy load. + +### Merkle Tree Settings + + +Leaders and followers in Riak's strong consistency system maintain +persistent [Merkle trees](http://en.wikipedia.org/wiki/Merkle_tree) for +all data stored by that peer. More information can be found in the +**Integrity Checking** section above. The two sections directly below +describe Merkle-tree-related parameters. + +#### Tree Validation + +The `tree_validation` parameter determines whether Riak considers Merkle +trees to be trusted after peers are restarted (for whatever reason). +When enabled, i.e. when `tree_validation` is set to `true` (the +default), Riak does not trust peer trees after a restart, instead +requiring the peer to sync with a trusted quorum. While this is the +safest mode because it protects Riak against silent corruption in Merkle +trees, it carries the drawback that it can reduce Riak availability by +requiring more than a simple majority of nodes to be online and +reachable when peers restart. + +If you are using ensembles with N=3, we strongly recommend setting +`tree_validation` to `false`. + +#### Synchronous vs. Asynchronous Tree Updates + +Merkle tree updates can happen synchronously or asynchronously. This is +determined by the `synchronous_tree_updates` parameter. When set to +`false`, which is the default, Riak responds to the client after the +first roundtrip that updates the followers' data but before the second +roundtrip required to update the followers' Merkle trees, allowing the +Merkle tree update to happen asynchronously in the background; when set +to `true`, Riak requires two quorum roundtrips to occur before replying +back to the client, which can increase per-request latency. + +Please note that this setting applies only to Merkle tree updates sent +to followers. Leaders _always_ update their local Merkle trees before +responding to the client. Asynchronous updates can be unsafe in certain +scenarios. For example, if a leader crashes before sending metadata +updates to followers _and_ all followers that had acknowledged the write +somehow revert the object value immediately prior to the write request, +a future read could hypothetically return the immediately preceding +value without realizing that the value was incorrect. Setting +`synchronous_tree_updates` to `false` does bear this possibility, but it +is highly unlikely. + +## Strong Consistency and Active Anti-Entropy + +Riak's [active anti-entropy][glossary aae] \(AAE) feature _can_ repair strongly +consistent data. Although it is not necessary to use active anti-entropy +if you are using strong consistency, we nonetheless recommend doing so. + +Without AAE, all object conflicts are repaired via read repair. +Read repair, however, cannot repair conflicts in so-called "cold data," +i.e. data that may not be read for long periods of time. While using AAE +does entail small performance losses, not using AAE can lead to problems +with silent on-disk corruption. + +## Strong Consistency and Bitcask + +One feature that is offered by Riak's optional [Bitcask][plan backend bitcask] backend is object expiry. If you are using strong consistency and Bitcask together, you should be aware that object metadata is often updated by the strong consistency subsystem during leader changes, which typically take place when nodes go down or during network partitions. When these metadata updates take place, the time to live (TTL) of the object is refreshed, which can lead to general unpredictably in objects' TTL. Although leader changes will be rare in many clusters, we nonetheless recommend that you use object expiry in +strongly consistent buckets only in situations when these occasional +irregularities are acceptable. + +## Important Caveats + +The following Riak features are not currently available in strongly +consistent buckets: + +* [Secondary indexes][cluster ops 2i] +--- +If you do attach + secondary index metadata to objects in strongly consistent buckets, + strongly consistent operations can still proceed, but that metadata + will be silently ignored. +* [Riak Data Types][dev data types] +--- +Data Types can currently be + used only in an eventually consistent fashion +* [Using commit hooks][usage commit hooks] +--- +Neither pre- nor post-commit hooks are supported in strongly consistent buckets. If you do associate a + strongly consistent bucket with one or more commit hooks, strongly + consistent operations can proceed as normal in that bucket, but all + commit hooks will be silently ignored. + +Furthermore, you should also be aware that strong consistency guarantees +are applied only at the level of single keys. There is currently no +support within Riak for strongly consistent operations against multiple +keys, although it is always possible to incorporate client-side write +and read locks in applications that use strong consistency. + +## Known Issues + +There are a few known issues that you should be aware of when using the +latest version of strong consistency. + +* **Consistent reads of never-written keys create tombstones** +--- +A + [tombstone][cluster ops obj del] will be written if you perform a read + against a key that a majority of peers claims to not exist. This is + necessary for certain corner cases in which offline or unreachable + replicas containing partially written data need to be rolled back in + the future. +* **Consistent keys and key listing** +--- +In Riak, key listing + operations, such as listing all the keys in a bucket, do not filter + out tombstones. While this is rarely a problem for + non-strongly-consistent keys, it does present an issue for strong + consistency due to the tombstone issues mentioned above. +* **Secondary indexes not supported** +--- +Strongly consistent + operations do not support [secondary indexes][cluster ops 2i] \(2i) at this time. Furthermore, any other metadata + attached to objects, even if not related to 2i, will be silently + ignored by Riak in strongly consistent buckets. +* **Multi-Datacenter Replication not supported** +--- +At this time, + consistent keys are *not* replicated across clusters using Multi- + Datacenter Replication \(MDC). This is because MDC Replication currently supports only eventually consistent replication across clusters. Mixing strongly + consistent data within a cluster with eventually consistent data + between clusters is difficult to reason about from the perspective of + applications. In a future version of Riak, we will add support for + strongly consistent replication across multiple datacenters/clusters. +* **Client library exceptions** +--- +Basho's official [client + libraries][dev client libraries] convert errors returned by Riak into generic exceptions, + with a message derived from the returned server-side error message. + + diff --git a/content/riak/kv/3.0.4/configuring/v2-multi-datacenter.md b/content/riak/kv/3.0.4/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..d6b9e67612 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/v2-multi-datacenter.md @@ -0,0 +1,160 @@ +--- +title_supertext: "Configuring:" +title: "V2 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "V2 Multi-Datacenter" + identifier: "configuring_v2" + weight: 210 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v2/configuration + - /riak/kv/3.0.4/ops/mdc/v2/configuration +--- + +[config v2 ssl]: {{}}riak/kv/3.0.4/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication capabilities offer a +variety of configurable parameters. + +## File + +The configuration for replication is kept in the `riak_repl` section of +each node's `advanced.config`. That section looks like this: + +```advancedconfig +{riak_repl, [ + {fullsync_on_connect, true}, + {fullsync_interval, 360}, + % Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + % Solaris: + % {data_root, "/opt/riak/data/riak_repl"}, + % FreeBSD/SmartOS: + % {data_root, "/var/db/riak/riak_repl"}, + {queue_size, 104857600}, + {server_max_pending, 5}, + {client_ack_frequency, 5} + ]} +``` + +## Usage + +These settings are configured using the standard Erlang config file +syntax, i.e. `{Setting, Value}`. For example, if you wished to set +`ssl_enabled` to `true`, you would insert the following line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{riak_repl, [ + % Other configs + {ssl_enabled, true}, + % Other configs + ]} +``` + +## Settings + +Once your configuration is set, you can verify its correctness by +running the following command: + +```bash +riak chkconfig +``` + +The output from this command will point you to syntactical and other +errors in your configuration files. + +A full list of configurable parameters can be found in the sections +below. + +## Fullsync Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`fullsync_on_connect` | `true`, `false` | `true` | Whether or not to initiate a fullsync on initial connection from the secondary cluster +`fullsync_strategies` | `keylist` | `[keylist]` | A *list* of fullsync strategies to be used by replication.
**Note**: Please contact Basho support for more information. +`fullsync_interval` | `mins` (integer), `disabled` | `360` | How often to initiate a fullsync of data, in minutes. This is measured from the completion of one fullsync operation to the initiation of the next. This setting only applies to the primary cluster (listener). To disable fullsync, set `fullsync_interval` to `disabled` and `fullsync_on_connect` to `false`.** + +## SSL Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an SSL `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [4](#f4). + +## Queue, Object, and Batch Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`queue_size` | `bytes` (integer) | `104857600` (100 MiB) | The size of the replication queue in bytes before the replication leader will drop requests. If requests are dropped, a fullsync will be required. Information about dropped requests is available using the `riak-repl status` command +`server_max_pending` | `max` (integer) | `5` | The maximum number of objects the leader will wait to get an acknowledgment from, from the remote location, before queuing the request +`vnode_gets` | `true`, `false` | `true` | If `true`, repl will do a direct get against the vnode, rather than use a `GET` finite state machine +`shuffle_ring` | `true`, `false` | `true `| If `true`, the ring is shuffled randomly. If `false`, the ring is traversed in order. Useful when a sync is restarted to reduce the chance of syncing the same partitions. +`diff_batch_size` | `objects` (integer) | `100` | Defines how many fullsync objects to send before waiting for an acknowledgment from the client site + +## Client Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`client_ack_frequency` | `freq` (integer) | `5` | The number of requests a leader will handle before sending an acknowledgment to the remote cluster +`client_connect_timeout` | `ms` (integer) | `15000` | The number of milliseconds to wait before a client connection timeout occurs +`client_retry_timeout` | `ms` (integer) | `30000` | The number of milliseconds to wait before trying to connect after a retry has occurred + +## Buffer Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`sndbuf` | `bytes` (integer) | OS dependent | The buffer size for the listener (server) socket measured in bytes +`recbuf` | `bytes` (integer) | OS dependent | The buffer size for the site (client) socket measured in bytes + +## Worker Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`max_get_workers` | `max` (integer) | `100` | The maximum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`max_put_workers` | `max` (integer) | `100` | The maximum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). +`min_get_workers` | `min` (integer) | `5` | The minimum number of get workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [2](#f2). +`min_put_workers` | `min` (integer) | `5` | The minimum number of put workers spawned for fullsync. Every time a replication difference is found, a `GET` will be performed to get the actual object to send. See [3](#f3). + + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. Each get worker spawns 2 processes, one for the work and + one for the get FSM (an Erlang finite state machine implementation for `GET` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +3. Each put worker spawns 2 processes, one for the work, and + one for the put FSM (an Erlang finite state machine implementation for `PUT` + requests). Be sure that you don't run over the maximum number of allowed + processes in an Erlang VM (check `vm.args` for a `+P` property). + +4. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v2 ssl]. + + + + diff --git a/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..eb2555d32b --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,82 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "With NAT" + identifier: "configuring_v2_replication_nat" + weight: 101 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v2/nat + - /riak/kv/3.0.4/ops/mdc/v2/nat +--- + +[config v2 ssl]: {{}}riak/kv/3.0.4/configuring/v2-multi-datacenter/ssl + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/nat/) instead. +{{% /note %}} + +Riak supports replication of data on networks that use static +NAT. This capability can be used for replicating data over the internet +where servers have both internal and public IP addresses (see [Riak +REPL SSL][config v2 ssl] if you replicate data over a public network). + +## Requirements + +In order for Multi-Datacenter Replication to work on a server configured +with NAT, the NAT addresses must be configured statically. + +## Example + +Imagine the following scenario: + +* Server A is the source of replicated data +* Servers B and C would like to be clients of the replicated data + +Server A is set up with static NAT, configured for IP addresses: + + * `192.168.1.10` (internal) and `50.16.238.123` (public) + +Server A replication will listen on: + + * the internal IP address `192.168.1.10`, port `9010` + * the public IP address `50.16.238.123`, port `9011` + +Server B is set up with a single public IP address: `50.16.238.200` + + * Server B replication will connect as a client to the public IP + address `50.16.238.123`, port `9011` + +Server C is set up with a single internal IP address: `192.168.1.20` + + * Server C replication will connect as a client to the internal IP + address of `192.168.1.10`, port `9010` + +Configure a listener on Server A: + +```bash +riak-repl add-nat-listener riak@192.168.1.10 192.168.1.10 9010 50.16.238.123 9011 +``` + +Configure a site (client) on Server B: + +```bash +riak-repl add-site 50.16.238.123 9011 server_a_to_b +``` + +Configure a site (client) on Server C: + +```bash +riak-repl add-site 192.168.1.10 9010 server_a_to_c +``` + + + + diff --git a/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..05ba6f6158 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,371 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Quickstart" + identifier: "configuring_v2_quickstart" + weight: 100 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v2/quick-start + - /riak/kv/3.0.4/ops/mdc/v2/quick-start +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/quick-start/) instead. +{{% /note %}} + +The Riak Multi-Datacenter Replication Quick Start will walk you through +the process of configuring Riak's version 2 Replication to perform +replication between two sample Riak clusters in separate networks. This +guide will also cover bidirectional replication, which is accomplished +by setting up unidirectional replication in both directions between the +clusters. + +## Prerequisites + +This Guide assumes that you have completed the following steps: + +* [Installing Riak][install index] +* [Performing system tuning|System Performance Tuning][perf index] +* [Reviewing configuration][config v2 mdc] + +## Scenario + +Configure Riak MDC to perform replication, given the following +3-node Riak clusters: + +#### Cluster 1 + +Name | IP | Node name +:-----|:------------|:---------------- +`node1` | `172.16.1.11` | `riak@172.16.1.11` +`node2` | `172.16.1.12` | `riak@172.16.1.12` +`node3` | `172.16.1.13` | `riak@172.16.1.13` + +#### Cluster 2 + +Name | IP | Node name +:-----|-------------|----------------- +`node4` | `192.168.1.21` | `riak@192.168.1.21` +`node5` | `192.168.1.22` | `riak@192.168.1.22` +`node6` | `192.168.1.23` | `riak@192.168.1.23` + +**Note**: The addresses used in these example clusters are contrived, +non-routable addresses. In real-world applications, however, these +addresses would need to be routable over the public Internet. + +## Set Up Cluster1 → Cluster2 Replication + +### Set Up the Listeners on Cluster1 (Source cluster) + +On a node in Cluster1, `node1` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@172.16.1.11 172.16.1.11 9010 +riak-repl add-listener riak@172.16.1.12 172.16.1.12 9010 +riak-repl add-listener riak@172.16.1.13 172.16.1.13 9010 +``` + +### Set Up the Site on Cluster2 (Site cluster) + +On a node in Cluster2, `node4` for example, inform the replication +clients where the Source Listeners are located with `riak-repl add-site + `. Use the IP address(es) and port(s) you +configured in the earlier step. For `sitename` enter `Cluster1`. + +```bash +riak-repl add-site 172.16.1.11 9010 Cluster1 +``` + +**Note**: While a Listener needs to be added to each node, only a single +Site needs to be added on the Site cluster. Once connected to the Source +cluster, it will get the locations of the rest of the Listeners in the +Source cluster. + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on both a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that there are `listener_`s for +each listening node, and that `leader` and `server_stats` are populated. +They should look similar to the following: + +``` +listener_riak@172.16.1.11: "172.16.1.11:9010" +listener_riak@172.16.1.12: "172.16.1.12:9010" +listener_riak@172.16.1.13: "172.16.1.13:9010" +leader: 'riak@172.16.1.11' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +On the Cluster2 node, verify that `Cluster1_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster1_ips: "172.16.1.11:9010, 172.16.1.12:9010, 172.16.1.13:9010" +leader: 'riak@192.168.1.21' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"172.16.1.11",9010}, + {state,wait_for_fullsync}]}}] +``` + +### Testing Realtime Replication + +That's all there is to it! When `PUT` requests are coordinated by +Cluster1, these operations will be replicated to Cluster2. + +You can use the following example script to verify that `PUT` operations +sent to Cluster1 are being replicated to Cluster2: + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C1 to C2 consistent +``` + +## Set Up Cluster2 → Cluster1 Replication + +### About Bidirectional Replication + +Multi-Datacenter support can also be configured to replicate in both +directions, ensuring eventual consistency between your two datacenters. +Setting up bidirectional replication is as simple as repeating the steps +above in the other direction, i.e. from Cluster2 to Cluster1. + +### Set Up the Listeners on Cluster2 (Source cluster) + +On a node in Cluster2, `node4` for example, identify the nodes that will +be listening to connections from replication clients with `riak-repl +add-listener ` for each node that will be +listening for replication clients. + +```bash +riak-repl add-listener riak@192.168.1.21 192.168.1.21 9010 +riak-repl add-listener riak@192.168.1.22 192.168.1.22 9010 +riak-repl add-listener riak@192.168.1.23 192.168.1.23 9010 +``` + +### Set Up the Site on Cluster1 (Site cluster) + +On a node in Cluster1, `node1` for example, inform the replication +clients where the Source Listeners are with `riak-repl add-site + `. Use the IP address(es) and port(s) you configured in +the earlier step. For `sitename` enter **Cluster2**. + +```bash +riak-repl add-site 192.168.1.21 9010 Cluster2 +``` + +### Verify the Replication Configuration + +Verify the replication configuration using `riak-repl status` on a +Cluster1 node and a Cluster2 node. A full description of the `riak-repl +status` command's output can be found in the documentation for +`riak-repl`'s [status output][cluster ops v2 mdc#status]. + +On the Cluster1 node, verify that `Cluster2_ips`, `leader`, and +`client_stats` are populated. They should look similar to the following: + +``` +Cluster2_ips: "192.168.1.21:9010, 192.168.1.22:9010, 192.168.1.23:9010" +leader: 'riak@172.16.1.11' +client_stats: [{<8051.3902.0>, + {message_queue_len,0}, + {status,[{site,"Cluster2"}, + {strategy,riak_repl_keylist_client}, + {fullsync_worker,<8051.3909.0>}, + {put_pool_size,5}, + {connected,"192.168.1.21",9010}, + {state,wait_for_fullsync}]}}] +``` + +On the Cluster2 node, verify that there are listener entries for each +listening node, and that `leader` and `server_stats` are populated. They +should look similar to the following: + +``` +listener_riak@192.168.1.21: "192.168.1.21:9010" +listener_riak@192.168.1.22: "192.168.1.22:9010" +listener_riak@192.168.1.23: "192.168.1.23:9010" +leader: 'riak@192.168.1.21' +server_stats: [{<8051.3939.0>, + {message_queue_len,0}, + {status,[{site,"Cluster1"}, + {strategy,riak_repl_keylist_server}, + {fullsync_worker,<8051.3940.0>}, + {dropped_count,0}, + {queue_length,0}, + {queue_byte_size,0}, + {state,wait_for_partition}]}}] +``` + +### Testing Realtime Replication + +You can use the following script to perform `PUT`s and `GET`s on both +sides of the replication and verify that those changes are replicated to +the other side. + +```bash +#!/bin/bash + +VALUE=`date` +CLUSTER_1_IP=172.16.1.11 +CLUSTER_2_IP=192.168.1.21 + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_1_IP}:8098/riak/replCheck/c1 + +CHECKPUT_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c1` + +if [ "${VALUE}" = "${CHECKPUT_C1}" ]; then + echo "C1 PUT Successful" +else + echo "C1 PUT Failed" + exit 1 +fi + +curl -s -X PUT -d "${VALUE}" http://${CLUSTER_2_IP}:8098/riak/replCheck/c2 +CHECKPUT_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKPUT_C2}" ]; then + echo "C2 PUT Successful" +else + echo "C2 PUT Failed" + exit 1 +fi + +CHECKREPL_C1_TO_C2=`curl -s http://${CLUSTER_2_IP}:8098/riak/replCheck/c1` +CHECKREPL_C2_TO_C1=`curl -s http://${CLUSTER_1_IP}:8098/riak/replCheck/c2` + +if [ "${VALUE}" = "${CHECKREPL_C1_TO_C2}" ]; then + echo "C1 to C2 consistent" +else + echo "C1 to C2 inconsistent + C1:${CHECKPUT_C1} + C2:${CHECKREPL_C1_TO_C2}" + exit 1 +fi + +if [ "${VALUE}" = "${CHECKREPL_C2_TO_C1}" ]; then + echo "C2 to C1 consistent" +else + echo "C2 to C1 inconsistent + C2:${CHECKPUT_C2} + C1:${CHECKREPL_C2_TO_C1}" + exit 1 +fi + +exit 0 +``` + +You will have to change some of the above variables for your own +environment, such as IP addresses or ports. + +If you run this script and things are working as expected, you will get +the following output: + +``` +C1 PUT Successful +C2 PUT Successful +C1 to C2 consistent +C2 to C1 consistent +``` + +## Fullsync + +During realtime replication, operations coordinated by the Source +cluster will be replicated to the Site cluster. Riak Objects are placed +in a queue on the Source cluster and streamed to the Site cluster. When +the queue is full due to high traffic or a bulk loading operation, some +objects will be dropped from replication. These dropped objects can be +sent to the Site cluster by running a fullsync operation. The settings +for the realtime replication queue and their explanations are available +in the [configuration][config v2 mdc] documentation. + +### Initiating a fullsync + +To start a fullsync operation, issue the following command on your +leader node: + +```bash +riak-repl start-fullsync +``` + +A fullsync operation may also be cancelled. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show 'cancelled' in the +status. + +```bash +riak-repl cancel-fullsync +``` + +Fullsync operations may also be paused, resumed, or scheduled for +certain times using cron jobs. A complete list of fullsync commands is +available in the [MDC Operations][cluster ops v2 mdc] documentation. + + + + diff --git a/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..2e29f1dcbd --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,164 @@ +--- +title_supertext: "V2 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "SSL" + identifier: "configuring_v2_replication_ssl" + weight: 103 + parent: "configuring_v2" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v2/ssl + - /riak/kv/3.0.4/ops/mdc/v2/ssl +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl/) instead. +{{% /note %}} + +## Features + +Riak REPL SSL support consists of the following items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +## SSL Configuration + +To configure SSL, you will need to include the following four settings +in the `riak-repl` section of your `advanced.config`: + +```advancedconfig +{riak-repl, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertdir` is a directory containing all of the CA certificates +needed to verify the CA chain back to the root. + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_repl` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`: + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_repl, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of your `advanced.config`: + +```advancedconfig +{riak_repl, [ + % ... + {ssl_depth, ...} + % ... + ]} +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. By default, +no more than one (1) intermediate certificate is allowed between the peer +certificate and root CA. By definition, intermediate certificates cannot +be self signed. + +For example: + + * A depth of 0 indicates that the certificate must be signed directly + by a root certificate authority (CA) + * A depth of 1 indicates that the certificate may be signed by at most + 1 intermediate CA's, followed by a root CA + * A depth of 2 indicates that the certificate may be signed by at most + 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL is ONLY available in Riak 1.2+. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +You can generate your own CA and keys by using [this +guide](http://www.debian-administration.org/articles/618). + +Make sure that you remove the password protection from the keys you +generate. + + + + diff --git a/content/riak/kv/3.0.4/configuring/v3-multi-datacenter.md b/content/riak/kv/3.0.4/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..3c59b9869e --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/v3-multi-datacenter.md @@ -0,0 +1,161 @@ +--- +tile_supertext: "Configuring:" +title: "V3 Multi-Datacenter Replication" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "V3 Multi-Datacenter" + identifier: "configuring_v3" + weight: 200 + parent: "configuring" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/configuration + - /riak/kv/3.0.4/ops/mdc/v3/configuration +--- + +[config reference#advanced]: {{}}riak/kv/3.0.4/configuring/reference/#advanced-configuration +[config v3 ssl#verify-peer]: {{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl/#verifying-peer-certificates + +> **Note on the `cluster_mgr` setting** +> +> The `cluster_mgr` setting _must_ be set in order for version 3 replication to run. + + +The configuration for Multi-Datacenter (MDC) Replication is kept in +both the `riak_core` and `riak_repl` sections of the `app.config` +configuration file. + +If you are using Riak KV version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +Here is a sample of the syntax: + +```advancedconfig +{riak_core, [ + %% Every *node* runs one cluster_mgr + {cluster_mgr, {"0.0.0.0", 9080 }}, + % ... +]}, +{riak_repl, [ + %% Pick the correct data_root for your platform + %% Debian/Centos/RHEL: + {data_root, "/var/lib/riak/data/riak_repl"}, + %% Solaris: + %% {data_root, "/opt/riak/data/riak_repl"}, + %% FreeBSD/SmartOS: + %% {data_root, "/var/db/riak/riak_repl"}, + {max_fssource_cluster, 5}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {fullsync_on_connect, false}, + % ... +]} +``` + +## Settings + +Riak MDC configuration is set using the standard Erlang config file +syntax `{Setting, Value}`. For example, if you wished to set +`fullsync_on_connect` to `false`, you would insert this line into the +`riak_repl` section (appending a comma if you have more settings to +follow): + +```advancedconfig +{fullsync_on_connect, false} +``` + +Once your configuration is set, you can verify its correctness by +running the `riak` command-line tool: + +```bash +riak chkconfig +``` + +## riak_repl Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`cluster_mgr` | `{ip_address, port}` | **REQUIRED** | The cluster manager will listen for connections from remote clusters on this `ip_address` and `port`. Every node runs one cluster manager, but only the cluster manager running on the `cluster_leader` will service requests. This can change as nodes enter and leave the cluster. The value is a combination of an IP address (**not hostname**) followed by a port number. +`max_fssource_cluster` | `nodes` (integer) | `5` | The hard limit on the number of workers which will participate in the source cluster during a fullsync replication. This means that if one has configured fullsync for two different clusters, both with a `max_fssource_cluster` of 5, 10 fullsync workers can be in progress. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssource_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers that will be running on each individual node in a source cluster. This is a hard limit for all fullsyncs enabled; additional fullsync configurations will not increase the number of fullsync workers allowed to run on any node. Only affects nodes on the source cluster on which this parameter is defined via the configuration file or command line. +`max_fssink_node` | `nodes` (integer) | `1` | Limits the number of fullsync workers allowed to run on each individual node in a sink cluster. This is a hard limit for all fullsync sources interacting with the sink cluster. Thus, multiple simultaneous source connections to the sink cluster will have to share the sink nodes number of maximum connections. Only affects nodes on the sink cluster on which this parameter is defined via the configuration file or command line. +`fullsync_on_connect` | `true`, `false` | `true` | Whether to initiate a fullsync on initial connection from the secondary cluster +`data_root` | `path` (string) | `data/riak_repl` | Path (relative or absolute) to the working directory for the replication process +`fullsync_interval` | `minutes` (integer) OR `[{sink_cluster, minutes(integer)}, ...]` | `360` | A single integer value representing the duration to wait in minutes between fullsyncs, or a list of `{"clustername", time_in_minutes}` pairs for each sink participating in fullsync replication. +`rtq_overload_threshold` | `length` (integer) | `2000` | The maximum length to which the realtime replication queue can grow before new objects are dropped. Dropped objects will need to be replicated with a fullsync. +`rtq_overload_recover` | `length` (integer) | `1000` | The length to which the realtime replication queue, in an overload mode, must shrink before new objects are replicated again. +`rtq_max_bytes` | `bytes` (integer) | `104857600` | The maximum size to which the realtime replication queue can grow before new objects are dropped. Defaults to 100MB. Dropped objects will need to be replicated with a fullsync. +`proxy_get` | `enabled`, `disabled` | `disabled` | Enable Riak CS `proxy_get` and block filter. +`rt_heartbeat_interval` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). +`rt_heartbeat_timeout` | `seconds` (integer) | `15` | A full explanation can be found [below](#heartbeat-settings). + + +## riak_core Settings + +Setting | Options | Default | Description +:-------|:--------|:--------|:----------- +`keyfile` | `path` (string) | `undefined` | Fully qualified path to an ssl `.pem` key file +`cacertdir` | `path` (string) | `undefined` | The `cacertdir` is a fully-qualified directory containing all the CA certificates needed to verify the CA chain back to the root +`certfile` | `path` (string) | `undefined` | Fully qualified path to a `.pem` cert file +`ssl_depth` | `depth` (integer) | `1` | Set the depth to check for SSL CA certs. See [1](#f1). +`ssl_enabled` | `true`, `false` | `false` | Enable SSL communications +`peer_common_name_acl` | `cert` (string) | `"*"` | Verify an SSL peer’s certificate common name. You can provide an ACL as a list of common name *patterns*, and you can wildcard the leftmost part of any of the patterns, so `*.basho.com` would match `site3.basho.com` but not `foo.bar.basho.com` or `basho.com`. See [2](#f2). + + +## Heartbeat Settings + +There are two realtime-replication-related settings in the `riak_repl` +section of `advanced.config` related to the periodic "heartbeat" that is sent +from the source to the sink cluster to verify the sink cluster's +liveness. The `rt_heartbeat_interval` setting determines how often the +heartbeat is sent (in seconds). If a heartbeat is sent and a response is +not received, Riak will wait `rt_heartbeat_timeout` seconds before +attempting to re-connect to the sink; if any data is received from the +sink, even if it is not heartbeat data, the timer will be reset. Setting +`rt_heartbeat_interval` to `undefined` will disable the heartbeat. + +One of the consequences of lowering the timeout threshold arises when +connections are working properly but are slow to respond (perhaps due to +heavy load). In this case, shortening the timeout means that Riak may +attempt to re-connect more often that it needs to. On the other hand, +lengthening the timeout will make Riak less sensitive to cases in which +the connection really has been compromised. + +1. SSL depth is the maximum number of non-self-issued + intermediate certificates that may follow the peer certificate in a valid + certificate chain. If depth is `0`, the PEER must be signed by the trusted + ROOT-CA directly; if `1` the path can be PEER, CA, ROOT-CA; if depth is `2` + then PEER, CA, CA, ROOT-CA and so on. + +2. If the ACL is specified and not the special value `*`, + peers presenting certificates not matching any of the patterns will not be + allowed to connect. + If no ACLs are configured, no checks on the common name are done, except + as described for [Identical Local and Peer Common Names][config v3 ssl#verify-peer]. + +## Default Bucket Properties + +Riak KV version 2.2.0 changed the values of the default bucket properties hash. This will cause an issue replicating between Riak KV clusters with versions 2.2.0 or greater and Riak KV clusters with versions less than 2.2.0. + +To replicate between Riak KV versions 2.2.0 or greater and Riak KV clusters less than version 2.2.0, add the necessary override in the advanced.config file: + +```advanced.config +{riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} +]} +``` + +If all of the Replication clusters are running Riak KV 2.2.0 or greater, this override is no longer necessary and should be removed. + + + + diff --git a/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..b7d48c3734 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,171 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "With NAT" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "With NAT" + identifier: "configuring_v3_replication_nat" + weight: 101 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/nat + - /riak/kv/3.0.4/ops/mdc/v3/nat +--- + +[config v3 ssl]: {{}}riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl + +Riak's Version 3 Replication supports replication of data on +networks that use static NAT. + +This can be used for replicating data over the internet where servers +have both internal and public IP addresses (see the [Replication SSL docs][config v3 ssl] if you replicate data over a public network). + +### Requirements + +In order for Replication to work on a server configured with NAT, the +NAT addresses must be configured *statically*. + +## Configuration + +NAT rules can be configured at runtime, from the command line. + +* `riak-repl nat-map show` + + Shows the current NAT mapping table + +* `riak-repl nat-map add [:port] ` + + Adds a NAT map from the external IP, with an optional port, to an + internal IP. The port number refers to a port that is automatically + mapped to the internal `cluster_mgr` port number. + +* `riak-repl nat-map del [:port] ` + + Deletes a specific NAT map entry. + +### Applying Changes at Runtime + +* Realtime NAT replication changes will be applied once realtime is + stopped and started using the following command: + + * `riak-repl realtime stop ` + * `riak-repl realtime start ` + +* Fullsync NAT replication changes will be applied on the next run of a + fullsync, or you can stop and start the current fullsync. + + * `riak-repl fullsync stop ` + * `riak-repl fullsync start ` + + +## Example + +* Cluster_A is the **source** of replicated data. +* Cluster_B and Cluster_C are the **sinks** of the replicated data. + +### Cluster_A Setup + +Cluster_A is set up with nodes using the following **internal** IP +addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.1.20` | - +`192.168.1.21` | - +`192.168.1.22` | - +`192.168.1.23` | - +`192.168.1.24` | - + +### Cluster_B Setup + +A node from Cluster_B will be configured as follows: + +Internal IP | Public IP +---------------|------------------- +`192.168.2.40` | `50.16.238.120:5555` +`192.168.2.41` | `50.16.238.121:5555` +`192.168.2.42` | `50.16.238.122:5555` +`192.168.2.43` | `50.16.238.123:5555` +`192.168.2.44` | `50.16.238.124:5555` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5555`. + +### Cluster_C Setup + +A node from Cluster_C is set up with **static NAT**, configured with the +following IP addresses: + +Internal IP | Public IP +---------------|------------------- +`192.168.3.60` | `50.16.238.200:5550` +`192.168.3.61` | `50.16.238.200:5551` +`192.168.3.62` | `50.16.238.200:5552` +`192.168.3.63` | `50.16.238.200:5553` +`192.168.3.64` | `50.16.238.200:5554` + +In this example, the `cluster_mgr` port number is the default of `9080`, +while the configured NAT port listens on `5566`. + +```bash +# on any node of Cluster_A +riak-repl clustername Server_A + +# on any node of Cluster_B +riak-repl clustername Server_B + +# on any node of Cluster_C +riak-repl clustername Server_C + +# on 50.16.238.120 of Cluster_B +riak-repl nat-map add 50.16.238.120:5555 192.168.2.40 +# on 50.16.238.121 of Cluster_B +riak-repl nat-map add 50.16.238.121:5555 192.168.2.41 +# on 50.16.238.122 of Cluster_B +riak-repl nat-map add 50.16.238.122:5555 192.168.2.42 +# on 50.16.238.123 of Cluster_B +riak-repl nat-map add 50.16.238.123:5555 192.168.2.43 +# on 50.16.238.124 of Cluster_B +riak-repl nat-map add 50.16.238.124:5555 192.168.2.44 + +# on 192.168.3.60 of Cluster_C +riak-repl nat-map add 50.16.238.200:5550 192.168.3.60 +# on 192.168.3.61 of Cluster_C +riak-repl nat-map add 50.16.238.200:5551 192.168.3.61 +# on 192.168.3.62 of Cluster_C +riak-repl nat-map add 50.16.238.200:5552 192.168.3.62 +# on 192.168.3.63 of Cluster_C +riak-repl nat-map add 50.16.238.200:5553 192.168.3.63 +# on 192.168.3.64 of Cluster_C +riak-repl nat-map add 50.16.238.200:5554 192.168.3.64 + + +# Connect replication from Cluster_A to Cluster_B: +# on any node of Cluster_A +riak-repl connect 50.16.238.120:5555 +# You can connect to any node in Cluster_B with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + +# Connect replication from Cluster_A to Cluster_C: +# on any node of Cluster_A +riak-repl connect 50.16.238.200:5550 +# You can connect to any node in Cluster_C with NAT mapped IP's/ports +# This command only needs to be run *once* for a cluster. + + +# on any node from Cluster_A +riak-repl realtime enable Cluster_B +riak-repl realtime enable Cluster_C + +riak-repl realtime start Cluster_B +riak-repl realtime start Cluster_C +``` + + + + diff --git a/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..af0db2e355 --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,172 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "Quickstart" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Quickstart" + identifier: "configuring_v3_quickstart" + weight: 100 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/quick-start + - /riak/kv/3.0.4/ops/mdc/v3/quick-start +--- + +[perf index]: {{}}riak/kv/3.0.4/using/performance +[config v3 mdc]: {{}}riak/kv/3.0.4/configuring/v3-multi-datacenter +[cluster ops v3 mdc]: {{}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter + +This guide will walk you through the process of configuring Riak's v3 +Replication to perform replication between two sample Riak clusters on +separate networks. This guide will also cover bidirectional replication, +which is accomplished by setting up unidirectional replication in both +directions between the clusters. It is important to note that both +clusters must have the same ring size, but can have a different number +of nodes. + +## Prerequisites + +This guide assumes that you have completed the following steps: + +* Install [Riak][install index] +* Perform [System Tuning][perf index] +* Review [Configuration][config v3 mdc] + +## About v3 Replication in 1.3 and higher + +In Riak's v3 Replication from Riak KV version 1.3 onwards, the nomenclature for Source and Site +clusters has changed. To more accurately reflect the behavior of each of +the clusters, "listeners" and "sites" are now known as "sources" and +"sinks." Data transfer now originates at the "source" and replicates to +the "sink;" initiation is always from the primary (source) to the backup +(sink) data center. + +Additionally, knowledge of the state of each cluster is now managed by a +**cluster manager** process, which greatly simplifies the setup and +maintenance of Multi-Datacenter replication. + +## Scenario + +Configure Riak MDC to perform replication, given the following two +Riak Clusters, each of which consists of three nodes: + +### Cluster 1 + +Name | IP | Node name +:-----|:-------------|----------------- +`node1` | `10.60.67.149` | `riak@10.60.67.149` +`node2` | `10.60.83.39` | `riak@10.60.83.39` +`node3` | `10.60.90.252` | `riak@10.60.90.252` + +### Cluster 2 + +Name | IP | Node name +:-----|:------------|:---------------- +`node4` | `10.60.77.10` | `riak@10.60.77.10` +`node5` | `10.60.84.41` | `riak@10.60.84.41` +`node6` | `10.60.92.44` | `riak@10.60.92.44` + + +### Set up Cluster1 → Cluster2 Connection + +#### Set up the Source on Cluster1 + +On a node in Cluster1, `node1` for example, initiate and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster1 +``` + +#### Setup the Sink on Cluster2 + +On a node in Cluster2, `node4` for example, initiation and name this +cluster with `riak-repl clustername `: + +```bash +riak-repl clustername Cluster2 +``` + +#### Connect the Source to the Sink + +From Cluster1, connect to the IP and port of Cluster2 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.77.10:9080 +``` + +> The port can be found in the `riak_core` section of the `advanced.config` +> under `cluster_mgr`. + +#### View your active connections + +From Cluster1, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster2 Cluster2 <0.7985.0> ["10.60.77.10:9080"] (via 10.60.77.10:9080) +``` + +### Set up Cluster2 → Cluster1 Connection (if bidirectional replication is desired) + +#### Connect the Source to the Sink + +From Cluster2, connect to the IP and port of Cluster1 with `riak-repl +connect :`: + +```bash +riak-repl connect 10.60.67.149:9080 +``` + +#### View Your Active Connections + +From Cluster2, view your active connections with `riak-repl +connections`: + +``` +Sink Cluster Name [Members] +---- ------------ ---------- --------- +Cluster1 Cluster1 <0.4456.0> ["10.60.67.149:9080"] (via 10.60.67.149:9080) +``` + +{{% note title="Note on connections" %}} +At this point, if you do not have connections, replication will not work. +Check your IP bindings by running `netstat -a` on all nodes. You should see +`*:9080 LISTENING`. If not, you have configuration problems. +{{% /note %}} + +### Enable Realtime Replication + +From Cluster1, run `riak-repl realtime enable ` to start +queuing updates on Cluster1 for replication: + +```bash +riak-repl realtime enable Cluster2 +``` + +Also on Cluster1, run `riak-repl realtime start ` to +establish connectivity from Cluster1 to Cluster2 to push queued updates: + +```bash +riak-repl realtime start Cluster2 +``` + +To enable bidirectional replication, do the reverse from Cluster2. +Once this is done, bidirectional replication should be operating. + +## More information + +For a full list of commands, you may enter `riak-repl` to see full +instructions on usage, or check the [Operations][cluster ops v3 mdc] documentation. + + + + diff --git a/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..0002fb195a --- /dev/null +++ b/content/riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,174 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication:" +title: "SSL" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "SSL" + identifier: "configuring_v3_replication_ssl" + weight: 103 + parent: "configuring_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/ssl + - /riak/kv/3.0.4/ops/mdc/v3/ssl +--- + +[config reference#advanced.config]: {{}}riak/kv/3.0.4/configuring/reference/#the-advanced-config-file + +## Features + +Riak Multi-Datacenter (MDC) Replication SSL consists of the following +items: + + * Encryption of replication data + * SSL certificate chain validation + * SSL common name whitelisting support + +> **Note on cross-internet traffic** +> +> As an alternative to Riak's built-in SSL capabilities, we +recommend using [stunnel](https://www.stunnel.org/index.html) or a +virtual private network (VPM) for inter-datacenter connections. + +## SSL Configuration + +To configure SSL, you will need to include the following 4 settings in +the `riak-core` section of [`advanced.confg`][config reference#advanced.config]: + +```advancedconfig +{riak_core, [ + % ... + {ssl_enabled, true}, + {certfile, "/full/path/to/site1-cert.pem"}, + {keyfile, "/full/path/to/site1-key.pem"}, + {cacertdir, "/full/path/to/cacertsdir"} + % ... + ]} + +``` + +The `cacertsdir` is a directory containing all the CA certificates +needed to verify the CA chain back to the root. + +{{% note title="Note on configuration" %}} +In Version 3 replication, the SSL settings need to be placed in the +`riak-core` section of `advanced.config` as opposed to the `riak-repl` section +used by Version 2 replication. +{{% /note %}} + +## Verifying Peer Certificates + +Verification of a peer's certificate common name *(CN)* is enabled by using +the `peer_common_name_acl` property in the `riak_core` section of your +`advanced.config` to specify an Access Control List *(ACL)*. + +The ACL is a list of one or more *patterns*, separated by commas. Each +pattern may be either the exact CN of a certificate to allow, or a +wildcard in the form `*.some.domain.name`. Pattern comparison is +case-insensitive, and a CN matching any of the patterns is allowed to connect. + +For example, `["*.corp.com"]` would match `site3.corp.com` but not +`foo.bar.corp.com` or `corp.com`. If the ACL were +`["*.corp.com", "foo.bar.corp.com"]`, `site3.corp.com` and `foo.bar.corp.com` +would be allowed to connect, but `corp.com` still would not. + +If no ACL (or only the special value `"*"`) is specified, no CN filtering +is performed, except as described below. + +{{% note title="Identical Local and Peer Common Names" %}} +As a special case supporting the view that a host's CN is a fully-qualified +domain name that uniquely identifies a single network device, if the CNs of +the local and peer certificates are the same, the nodes will *NOT* be allowed +to connect. + +This evaluation supercedes ACL checks, so it cannot be overridden with any +setting of the `peer_common_name_acl` property. +{{% /note %}} + +### Examples + +The following example will only allow connections from peer certificate +names like `db.bashosamplecorp.com` and `security.bashosamplecorp.com`: + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["db.bashosamplecorp.com", "security.bashosamplecorp.com"]} + % ... + ]} + +``` + +The following example will allow connections from peer certificate names +like `foo.bashosamplecorp.com` or `db.bashosamplecorp.com`, but not a +peer certificate name like `db.backup.bashosamplecorp.com`. + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, ["*.bashosamplecorp.com"]} + % ... + ]} + +``` + +This example will match any peer certificate name (and is the default): + +```advancedconfig +{riak_core, [ + % ... + {peer_common_name_acl, "*"} + % ... + ]} + +``` + +## SSL CA Validation + +You can adjust the way CA certificates are validated by adding the +following to the `riak_repl` section of `advanced.config`: + +```advancedconfig +{riak_core, [ + % ... + {ssl_depth, 3} % Sets the depth to 3 + % ... + ]} + +``` + +**Note**: `ssl_depth` takes an integer parameter. + +The depth specifies the maximum number of intermediate certificates that +may follow the peer certificate in a valid certification path. The +intermediate certificates must not be self signed. + +The following example depths illustrate this: + + * a depth of `0` indicates that the certificate must be signed + directly by a root certificate authority (CA) + * a depth of `1` indicates that the certificate may be signed by at + most 1 intermediate CA's, followed by a root CA + * a depth of `2` indicates that the certificate may be signed by at + most 2 intermediate CA's, followed by a root CA + +## Compatibility + +Replication SSL for *Version 3* is available in *Riak 1.4+*. + +If SSL is enabled and a connection is made to a Riak Enterprise 1.0 or +1.1 node, the connection will be denied and an error will be logged. + +### Self-Signed Certificates + +Read how to [generate your own CA and +keys](http://www.debian-administration.org/articles/618). Ensure that +you remove the password protection from the keys you generate. + + + + diff --git a/content/riak/kv/3.0.4/developing.md b/content/riak/kv/3.0.4/developing.md new file mode 100644 index 0000000000..391d6ead56 --- /dev/null +++ b/content/riak/kv/3.0.4/developing.md @@ -0,0 +1,79 @@ +--- +title: "Developing with Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Developing" + identifier: "developing" + weight: 300 + pre: lambda +toc: true +aliases: +--- + +[getting started]: ../developing/getting-started +[usage index]: ../developing/usage +[client libraries]: ../developing/client-libraries +[dev data types]: ../developing/data-types +[dev data modeling]: ../developing/data-modeling +[apps index]: ../developing/app-guide +[dev api index]: ../developing/api +[dev faq]: ../developing/faq + +## In This Section + +#### [Getting Started][getting started] + +Step-by-step guide for getting started developing with Riak KV. + +[Learn More >>][getting started] + +#### [Usage][usage index] + +A set of tutorials covering common development tasks such as performing CRUD operations, working with search, and using bucket types. + +[Learn More >>][usage index] + +#### [Client Libraries][client libraries] + +Overview of client libraries for a variety of programming languages and environments. + +[Learn More >>][client libraries] + +#### [Data Types][dev data types] + +Overview and guide to working with data types in Riak KV. + +[Learn More >>][dev data types] + +#### [Data Modeling][dev data modeling] + +Information on use cases and data models that are a good fit for Riak KV. + +[Learn More >>][dev data modeling] + +#### [Application Guide][apps index] + +A guide that will walk you through questions to ask about your use case before getting started developing applications with Riak KV. + +[Learn More >>][apps index] + +#### [APIs Reference][dev api index] + +Information and reference material on Riak KV APIs. + +[Learn More >>][dev api index] + +#### [FAQ][dev faq] + +Frequently asked questions when developing applications with Riak KV. + +[Learn More >>][dev faq] + + + + + + diff --git a/content/riak/kv/3.0.4/developing/api.md b/content/riak/kv/3.0.4/developing/api.md new file mode 100644 index 0000000000..3b85133bcb --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api.md @@ -0,0 +1,42 @@ +--- +title: "APIs" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "APIs" + identifier: "developing_apis" + weight: 107 + parent: "developing" +toc: true +aliases: +--- + +[dev api http]: ./http +[dev api backend]: ./backend +[dev api pbc]: ./protocol-buffers/ + +## In This Section + +#### [HTTP APIs][dev api http] + +Documentation on Riak KV's HTTP API. + +[Learn More >>][dev api http] + +#### [Protocol Buffers][dev api pbc] + +Information on Riak KV's Protocol Buffer Client API + +[Learn More >>][dev api pbc] + +#### [Backend API][dev api backend] + +Overview of Riak KV's storage backend API. + +[Learn More >>][dev api backend] + + + + diff --git a/content/riak/kv/3.0.4/developing/api/backend.md b/content/riak/kv/3.0.4/developing/api/backend.md new file mode 100644 index 0000000000..6d6a12011d --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/backend.md @@ -0,0 +1,118 @@ +--- +title: "Backend API" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Backend API" + identifier: "apis_backend" + weight: 101 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.4/dev/references/backend-api + - /riak/kv/3.0.4/dev/references/backend-api +--- + +[plan backend]: {{}}riak/kv/3.0.4/setup/planning/backend + +Riak's storage API uniformly applies to all of the +[supported backends][plan backend]. This page presents the details of +the storage backend API in the form of +[Erlang type specifications](http://www.erlang.org/doc/reference_manual/typespec.html) +(specs). + +Specs are used by [dialyzer](http://www.erlang.org/doc/man/dialyzer.html), +an Erlang static analysis tool. We recommend copying these specs into any +custom backend modules and use them as a guide for development to +avoid errors and ensure full compatibility with Riak. + +Also included below is the function export list that can be pasted directly +into a custom storage backend module. + +```erlang +%% Riak Storage Backend API +-export([api_version/0, + start/2, + stop/1, + get/3, + put/5, + delete/4, + drop/1, + fold_buckets/4, + fold_keys/4, + fold_objects/4, + is_empty/1, + status/1, + callback/3]). + +%% =================================================================== +%% Public API +%% =================================================================== + +%% @doc Return the major version of the +%% current API and a capabilities list. +%% The current valid capabilities are async_fold +%% and indexes. +-spec api_version() -> {integer(), [atom()]}. + +%% @doc Start the backend +-spec start(integer(), config()) -> {ok, state()} | {error, term()}. + +%% @doc Stop the backend +-spec stop(state()) -> ok. + +%% @doc Retrieve an object from the backend +-spec get(riak_object:bucket(), riak_object:key(), state()) -> + {ok, any(), state()} | + {ok, not_found, state()} | + {error, term(), state()}. + +%% @doc Insert an object into the backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Delete an object from the backend +-spec delete(riak_object:bucket(), riak_object:key(), [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. + +%% @doc Fold over all the buckets +-spec fold_buckets(riak_kv_backend:fold_buckets_fun(), + any(), + [], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Fold over all the keys for one or all buckets. +-spec fold_keys(riak_kv_backend:fold_keys_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, term()} | {async, fun()}. + +%% @doc Fold over all the objects for one or all buckets. +-spec fold_objects(riak_kv_backend:fold_objects_fun(), + any(), + [{atom(), term()}], + state()) -> {ok, any()} | {async, fun()}. + +%% @doc Delete all objects from this backend +%% and return a fresh reference. +-spec drop(state()) -> {ok, state()} | {error, term(), state()}. + +%% @doc Returns true if this backend contains any +%% non-tombstone values; otherwise returns false. +-spec is_empty(state()) -> boolean() | {error, term()}. + +%% @doc Get the status information for this backend +-spec status(state()) -> [{atom(), term()}]. + +%% @doc Register an asynchronous callback +-spec callback(reference(), any(), state()) -> {ok, state()}. +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http.md b/content/riak/kv/3.0.4/developing/api/http.md new file mode 100644 index 0000000000..a89e86895e --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http.md @@ -0,0 +1,93 @@ +--- +title: "HTTP API" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "HTTP API" + identifier: "apis_http" + weight: 102 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.4/dev/references/http + - /riak/kv/3.0.4/dev/references/http +--- + +Riak has a rich, full-featured HTTP 1.1 API. This is an overview of the +operations you can perform via HTTP and can be used as a guide for +developing a compliant client. All URLs assume the default configuration +values where applicable. All examples use `curl` to interact with Riak. + +> **URL Escaping** +> +> Buckets, keys, and link specifications may not contain unescaped +slashes. Use a URL-escaping library or replace slashes with `%2F`. + +## Bucket-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//props` | [HTTP Get Bucket Properties]({{}}riak/kv/3.0.4/developing/api/http/get-bucket-props) +`PUT` | `/types//buckets//props` | [HTTP Set Bucket Properties]({{}}riak/kv/3.0.4/developing/api/http/set-bucket-props) +`DELETE` | `/types//buckets//props` | [HTTP Reset Bucket Properties]({{}}riak/kv/3.0.4/developing/api/http/reset-bucket-props) +`GET` | `/types//buckets?buckets=true` | [HTTP List Buckets]({{}}riak/kv/3.0.4/developing/api/http/list-buckets) +`GET` | `/types//buckets//keys?keys=true` | [HTTP List Keys]({{}}riak/kv/3.0.4/developing/api/http/list-keys) + +## Object-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/types//buckets//keys/` | [HTTP Fetch Object]({{}}riak/kv/3.0.4/developing/api/http/fetch-object) +`POST` | `/types//buckets//keys` | [HTTP Store Object]({{}}riak/kv/3.0.4/developing/api/http/store-object) +`PUT` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/3.0.4/developing/api/http/store-object) +`POST` | `/types//buckets//keys/` | [HTTP Store Object]({{}}riak/kv/3.0.4/developing/api/http/store-object) +`DELETE` | `/types//buckets//keys/` | [HTTP Delete Object]({{}}riak/kv/3.0.4/developing/api/http/delete-object) + +## Riak-Data-Type-related Operations + +Method | URL +:------|:---- +`GET` | `/types//buckets//datatypes/` +`POST` | `/types//buckets//datatypes` +`POST` | `/types//buckets//datatypes/` + +For documentation on the HTTP API for [Riak Data Types]({{}}riak/kv/3.0.4/learn/concepts/crdts), +see the `curl` examples in [Using Data Types]({{}}riak/kv/3.0.4/developing/data-types/#usage-examples) +and subpages e.g. [sets]({{}}riak/kv/3.0.4/developing/data-types/sets). + +Advanced users may consult the technical documentation inside the Riak +KV internal module `riak_kv_wm_crdt`. + +## Query-related Operations + +Method | URL | Doc +:------|:----|:--- +`POST` | `/mapred` | [HTTP MapReduce]({{}}riak/kv/3.0.4/developing/api/http/mapreduce) +`GET` | `/types//buckets//index//` | [HTTP Secondary Indexes]({{}}riak/kv/3.0.4/developing/api/http/secondary-indexes) +`GET` | `/types//buckets//index///` | [HTTP Secondary Indexes]({{}}riak/kv/3.0.4/developing/api/http/secondary-indexes) + +## Server-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/ping` | [HTTP Ping]({{}}riak/kv/3.0.4/developing/api/http/ping) +`GET` | `/stats` | [HTTP Status]({{}}riak/kv/3.0.4/developing/api/http/status) +`GET` | `/` | [HTTP List Resources]({{}}riak/kv/3.0.4/developing/api/http/list-resources) + +## Search-related Operations + +Method | URL | Doc +:------|:----|:--- +`GET` | `/search/query/` | [HTTP Search Query]({{}}riak/kv/3.0.4/developing/api/http/search-query) +`GET` | `/search/index` | [HTTP Search Index Info]({{}}riak/kv/3.0.4/developing/api/http/search-index-info) +`GET` | `/search/index/` | [HTTP Fetch Search Index]({{}}riak/kv/3.0.4/developing/api/http/fetch-search-index) +`PUT` | `/search/index/` | [HTTP Store Search Index]({{}}riak/kv/3.0.4/developing/api/http/store-search-index) +`DELETE` | `/search/index/` | [HTTP Delete Search Index]({{}}riak/kv/3.0.4/developing/api/http/delete-search-index) +`GET` | `/search/schema/` | [HTTP Fetch Search Schema]({{}}riak/kv/3.0.4/developing/api/http/fetch-search-schema) +`PUT` | `/search/schema/` | [HTTP Store Search Schema]({{}}riak/kv/3.0.4/developing/api/http/store-search-schema) + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/counters.md b/content/riak/kv/3.0.4/developing/api/http/counters.md new file mode 100644 index 0000000000..3dac8681ae --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/counters.md @@ -0,0 +1,82 @@ +--- +title: "HTTP Counters" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Counters" + identifier: "http_counters" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/counters + - /riak/kv/3.0.4/dev/references/http/counters +--- + +Riak counters are a CRDT (convergent replicated data type) that (eventually) +converge to the correct total. You merely increment the counter with some +integer, and any potential conflicts will be automatically resolved by Riak. + +## Setup + +Riak counters can only be used if the bucket has the `allow_mult` property +set to `true`. + +``` +curl -XPUT localhost:8098/buckets/BUCKET/props \ + -H "Content-Type: application/json" \ + -d "{\"props\" : {\"allow_mult\": true}}" +``` + +If you attempt to use counters without setting the above, you'll get this +message: + +``` +Counters require bucket property 'allow_mult=true' +``` + +## Request + +To insert just POST an integer value using the `/counters` resource. This will +increment that keyed value by the given amount. + +``` +POST /buckets/BUCKET/counters/KEY +``` + +To receive the current value is a GET using `/counters` + +``` +GET /buckets/BUCKET/counters/KEY +``` + +## Response + +The regular POST/PUT ([HTTP Store Object]({{}}riak/kv/3.0.4/developing/api/http/store-object)) and GET ([HTTP Fetch Object]({{}}riak/kv/3.0.4/developing/api/http/fetch-object)) responses apply here. + +Caveats: Counters have no support for Secondary Indexes (2i), Links or Custom HTTP Metadata. + +## Example + +The body must be an integer (positive or negative). + +``` +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "1" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +1 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "100" + +curl http://localhost:8098/buckets/my_bucket/counters/my_key +101 + +curl -XPOST http://localhost:8098/buckets/my_bucket/counters/my_key -d "-1" +100 +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/delete-object.md b/content/riak/kv/3.0.4/developing/api/http/delete-object.md new file mode 100644 index 0000000000..3475faae3b --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/delete-object.md @@ -0,0 +1,79 @@ +--- +title: "HTTP Delete Object" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Delete Object" + identifier: "http_delete_object" + weight: 107 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/delete-object + - /riak/kv/3.0.4/dev/references/http/delete-object +--- + +Deletes an object from the specified bucket / key. + +## Request + +``` +DELETE /types/type/buckets/bucket/keys/key +DELETE /buckets/bucket/keys/key +``` + +Optional query parameters: + +* `rw` - quorum for both operations (get and put) involved in deleting an +object (default is set at the bucket level) +* `r` - (read quorum) how many replicas need to agree when retrieving the object +* `pr` - (primary read quorum) works like `r` but requires that the nodes +read from are not fallback nodes +* `w` - (write quorum) how many replicas must confirm receiving writes before returning a successful response +* `dw` - (durable write quorum) how many replicas to commit to durable storage +before returning a successful response +* `pw` - (primary write quorum) how many replicas to commit to primary nodes +before returning a successful response + +## Response + +Normal response codes: + +* `204 No Content` +* `404 Not Found` + +Typical error codes: + +* `400 Bad Request` - e.g. when rw parameter is invalid (> N) + +`404` responses are "normal" in the sense that DELETE operations are idempotent +and not finding the resource has the same effect as deleting it. + +## Example + +```curl +$ curl -v -X DELETE http://127.0.0.1:8098/buckets/test/keys/test2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> DELETE /buckets/test/keys/test2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/delete-search-index.md b/content/riak/kv/3.0.4/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..2f4526a4b1 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/delete-search-index.md @@ -0,0 +1,38 @@ +--- +title: "HTTP Delete Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Delete Search Index" + identifier: "http_delete_search_index" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/delete-search-index + - /riak/kv/3.0.4/dev/references/http/delete-search-index +--- + +Deletes a Riak Search index. + +## Request + +``` +DELETE /search/index/ +``` + +## Normal Response Codes + +* `204 No Content` - The index was successfully deleted (also returned + if the index did not exist to begin with) + +## Typical Error Codes + +* `503 Service Unavailable` - The request timed out internally + + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/fetch-object.md b/content/riak/kv/3.0.4/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..2a90fd1a9d --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/fetch-object.md @@ -0,0 +1,246 @@ +--- +title: "HTTP Fetch Object" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Fetch Object" + identifier: "http_fetch_object" + weight: 105 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/fetch-object + - /riak/kv/3.0.4/dev/references/http/fetch-object +--- + +Reads an object from the specified bucket/key. + +## Request + +```bash +GET /types/type/buckets/bucket/keys/key +GET /buckets/bucket/keys/key +``` + +Important headers: + +* `Accept` - When `multipart/mixed` is the preferred content-type, objects with +siblings will return all siblings in single request. See [Siblings examples](#siblings-examples). See +also RFC 2616 - [Accept header definition](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1). + +Optional headers: + +* `If-None-Match` and `If-Modified-Since` invoke conditional request semantics, +matching on the `ETag` and `Last-Modified` of the object, respectively. If the +object fails one of the tests (that is, if the ETag is equal or the object is +unmodified since the supplied timestamp), Riak will return a `304 Not Modified` +response. See also RFC 2616 - [304 Not Modified](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.5). + +Optional query parameters: + +* `r` - (read quorum) how many replicas need to agree when retrieving the +object ([default is defined by the bucket]({{}}riak/kv/3.0.4/developing/api/http/set-bucket-props)) +* `pr` - how many primary replicas need to be online when doing the read +([default is defined by the bucket]({{}}riak/kv/3.0.4/developing/api/http/set-bucket-props)) +* `basic_quorum` - whether to return early in some failure cases (eg. when r=1 +and you get 2 errors and a success `basic_quorum=true` would return an error) +([default is defined by the bucket]({{}}riak/kv/3.0.4/developing/api/http/set-bucket-props)) +* `notfound_ok` - whether to treat notfounds as successful reads for the +purposes of R ([default is defined by the bucket]({{}}riak/kv/3.0.4/developing/api/http/set-bucket-props)) +* `vtag` - when accessing an object with siblings, which sibling to retrieve. +Scroll down to the [Manually requesting siblings](#manually-requesting-siblings) example for more information. + +## Response + +Normal response codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` (when using conditional request semantics) + +Typical error codes: + +* `400 Bad Request` - e.g. when r parameter is invalid (> N) +* `404 Not Found` - the object could not be found on enough partitions +* `503 Service Unavailable` - the request timed out internally + +Important headers: + +* `Content-Type` - the media type/format +* `X-Riak-Vclock` - the opaque vector clock for the object +* `X-Riak-Meta-*` - any user-defined metadata defined when storing the object +* `ETag` - the entity tag for the object, useful for conditional GET operations +and validation-based caching +* `Last-Modified` - a timestamp for when the object was last written, in HTTP +datetime format +* `Link` - user- and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/3.0.4/learn/glossary/#links) + +The body of the response will be the contents of the object except when siblings +are present. + +{{% note title="Siblings" %}} +When `allow_mult` is set to true in the bucket properties, concurrent updates +are allowed to create "sibling" objects, meaning that the object has any +number of different values that are related to one another by the vector +clock. This allows your application to use its own conflict resolution +technique. + +An object with multiple sibling values will result in a `300 Multiple Choices` +response. If the `Accept` header prefers `multipart/mixed`, all siblings will +be returned in a single request as sections of the `multipart/mixed` response +body. Otherwise, a list of "vtags" will be given in a simple text format. You +can request individual siblings by adding the `vtag` query parameter. Scroll +down to the 'manually requesting siblings' example below for more information. + +To resolve the conflict, store the resolved version with the `X-Riak-Vclock` +given in the response. +{{% /note %}} + +## Simple Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc2 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc2 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT +< ETag: 6dQBm9oYA1mxRSH0e96l5W +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"foo":"bar"} +``` + +## Siblings examples + +### Manually requesting siblings + +Simple call to fetch an object that has siblings: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 102 +< +Siblings: +16vic4eU9ny46o4KPiDz1f +4v5xOg4bVwUYZdMkqf0d6I +6nr5tDTmhxnwuAFJDd2s6G +6zRSZFUJlHXZ15o9CG0BYl +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +Now request one of the siblings directly: + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc?vtag=16vic4eU9ny46o4KPiDz1f HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT +< ETag: 16vic4eU9ny46o4KPiDz1f +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/x-www-form-urlencoded +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + +### Get all siblings in one request + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys/doc -H "Accept: multipart/mixed" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/keys/doc HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: multipart/mixed +> +< HTTP/1.1 300 Multiple Choices +< X-Riak-Vclock: a85hYGDgyGDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt5HlsgCAA== +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=YinLMzyUR9feB17okMytgKsylvh +< Content-Length: 766 +< + +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/x-www-form-urlencoded +Link: ; rel="up" +Etag: 16vic4eU9ny46o4KPiDz1f +Last-Modified: Wed, 10 Mar 2010 18:01:06 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 4v5xOg4bVwUYZdMkqf0d6I +Last-Modified: Wed, 10 Mar 2010 18:00:04 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6nr5tDTmhxnwuAFJDd2s6G +Last-Modified: Wed, 10 Mar 2010 17:58:08 GMT + +{"bar":"baz"} +--YinLMzyUR9feB17okMytgKsylvh +Content-Type: application/json +Link: ; rel="up" +Etag: 6zRSZFUJlHXZ15o9CG0BYl +Last-Modified: Wed, 10 Mar 2010 17:55:03 GMT + +{"foo":"bar"} +--YinLMzyUR9feB17okMytgKsylvh-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/fetch-search-index.md b/content/riak/kv/3.0.4/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..d423bec916 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/fetch-search-index.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Fetch Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Fetch Search Index" + identifier: "http_fetch_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/fetch-search-index + - /riak/kv/3.0.4/dev/references/http/fetch-search-index +--- + +Retrieves information about a Riak Search [index]({{}}riak/kv/3.0.4/developing/usage/search/#simple-setup). + +## Request + +``` +GET /search/index/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` - No Search index with that name is currently + available +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the index is found, Riak will output a JSON object describing the +index, including its name, the [`n_val`]({{}}riak/kv/3.0.4/developing/app-guide/replication-properties/#a-primer-on-n-r-and-w) associated with it, and the [search schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas) used by the index. Here is an example: + +```json +{ + "name": "my_index", + "n_val": 3, + "schema": "_yz_default" +} +``` + + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/fetch-search-schema.md b/content/riak/kv/3.0.4/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..96bbb526c4 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/fetch-search-schema.md @@ -0,0 +1,42 @@ +--- +title: "HTTP Fetch Search Schema" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Fetch Search Schema" + identifier: "http_fetch_search_schema" + weight: 116 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/fetch-search-schema + - /riak/kv/3.0.4/dev/references/http/fetch-search-schema +--- + +Retrieves a Riak KV [search schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas). + +## Request + +``` +GET /search/schema/ +``` + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `404 Object Not Found` +* `503 Service Unavailable` - The request timed out internally + +## Response + +If the schema is found, Riak will return the contents of the schema as +XML (all Riak Search schemas are XML). + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/get-bucket-props.md b/content/riak/kv/3.0.4/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..197ada0ef7 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/get-bucket-props.md @@ -0,0 +1,86 @@ +--- +title: "HTTP Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Get Bucket Properties" + identifier: "http_get_bucket_props" + weight: 100 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/get-bucket-props + - /riak/kv/3.0.4/dev/references/http/get-bucket-props +--- + +Reads the bucket or bucket type properties. + +## Request + +```bash +GET /buckets/bucket/props +``` + +Or, to read bucket properties from a bucket in a bucket type: + +```bash +GET /types/type/buckets/bucket/props +``` + +Optional query parameters (only valid for the old format): + +* `props` - whether to return the bucket properties (`true` is the default) +* `keys` - whether to return the keys stored in the bucket. (`false` is the +default). See also [HTTP List Keys]({{}}riak/kv/3.0.4/developing/api/http/list-keys). + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` + +The JSON object in the response will contain up to two entries, `"props"` and +`"keys"`, which are present or missing, according to the optional query +parameters. The default is for only `"props"` to be present. + +See [HTTP Set Bucket Properties]({{}}riak/kv/3.0.4/developing/api/http/set-bucket-props) for more information about the available +bucket properties. See [Managing Bucket Types Through the Command Line]({{< baseurl >}}riak/kv/3.0.4/using/reference/bucket-types/#managing-bucket-types-through-the-command-line) for more details about reading bucket types using the `riak-admin bucket-type` interface. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/props +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 368 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","n_val":3,"allow_mult":false,"last_write_wins":false," +precommit":[],"postcommit":[],"chash_keyfun":{"mod":"riak_core_util","fun":" +chash_std_keyfun"},"linkfun":{"mod":"riak_kv_wm_link_walker","fun":" +mapreduce_linkfun"},"old_vclock":86400,"young_vclock":20,"big_vclock":50," +small_vclock":10,"r":"quorum","w":"quorum","dw":"quorum","rw":"quorum"}} +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/link-walking.md b/content/riak/kv/3.0.4/developing/api/http/link-walking.md new file mode 100644 index 0000000000..051e6e5699 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/link-walking.md @@ -0,0 +1,129 @@ +--- +title: "HTTP Link Walking" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Link Walking" + identifier: "http_link_walking" + weight: 118 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/link-walking + - /riak/kv/3.0.4/dev/references/http/link-walking +--- + +{{% note title="Deprecation Warning" %}} +This feature is deprecated and will be removed in a future version. +{{% /note %}} + +Link walking (traversal) finds and returns objects by following links attached +to them, starting from the object specified by the bucket and key portion. It +is a special case of [MapReduce]({{}}riak/kv/3.0.4/developing/usage/mapreduce), and can be expressed more verbosely as such. +[Read more about Links]({{}}riak/kv/3.0.4/learn/glossary/#links). + +## Request + +```bash +GET /buckets/bucket/keys/key/[bucket],[tag],[keep] +``` + +{{% note title="Link filters" %}} +A link filter within the request URL is made of three parts, separated by +commas: + +* Bucket - a bucket name to limit the links to +* Tag - a "riaktag" to limit the links to +* Keep - 0 or 1, whether to return results from this phase + +Any of the three parts may be replaced with `_` (underscore), signifying that +any value is valid. Multiple phases of links can be followed by adding +additional path segments to the URL, separating the link filters by slashes. +The final phase in the link-walking query implicitly returns its results. +{{% /note %}} + +## Response + +Normal status codes: + +* `200 OK` + +Typical error codes: + +* `400 Bad Request` - if the format of the query in the URL is invalid +* `404 Not Found` - if the origin object of the walk was missing + +Important headers: + +* `Content-Type` - always `multipart/mixed`, with a boundary specified + +> **Understanding the response body** +> +> The response body will always be `multipart/mixed`, with each +chunk representing a single phase of the link-walking query. Each phase will +also be encoded in `multipart/mixed`, with each chunk representing a +single object that was found. If no objects were found or "keep" was not set on +the phase, no chunks will be present in that phase. Objects inside phase +results will include `Location` headers that can be used to determine +bucket and key. In fact, you can treat each object-chunk similarly to a complete +response from [fetching the object]({{}}riak/kv/3.0.4/developing/api/http/fetch-object), without the status +code. + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/riak/test/doc3/test,_,1/_,next,1 +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test/doc3/test,_,1/_,next,1 HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Expires: Wed, 10 Mar 2010 20:24:49 GMT +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: multipart/mixed; boundary=JZi8W8pB0Z3nO3odw11GUB4LQCN +< Content-Length: 970 +< + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=OjZ8Km9J5vbsmxtcn1p48J91cJP + +--OjZ8Km9J5vbsmxtcn1p48J91cJP +X-Riak-Vclock: a85hYGDgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKf0cIszUnMTBzHYVKbIhEUl+VK4spDFTPxhHzFyqhEoVQz7wkSAGLMGuz6FSocFIUijE3pt7HlGBhnqejARXmq0QyZnnxE6jwVJBwFgA= +Location: /riak/test/doc +Content-Type: application/json +Link: ; rel="up", ; riaktag="next" +Etag: 3pvmY35coyWPxh8mh4uBQC +Last-Modified: Wed, 10 Mar 2010 20:14:13 GMT + +{"riak":"CAP"} +--OjZ8Km9J5vbsmxtcn1p48J91cJP-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN +Content-Type: multipart/mixed; boundary=RJKFlAs9PrdBNfd74HANycvbA8C + +--RJKFlAs9PrdBNfd74HANycvbA8C +X-Riak-Vclock: a85hYGBgzGDKBVIsbLvm1WYwJTLmsTLcjeE5ypcFAA== +Location: /riak/test/doc2 +Content-Type: application/json +Link: ; rel="up" +Etag: 6dQBm9oYA1mxRSH0e96l5W +Last-Modified: Wed, 10 Mar 2010 18:11:41 GMT + +{"foo":"bar"} +--RJKFlAs9PrdBNfd74HANycvbA8C-- + +--JZi8W8pB0Z3nO3odw11GUB4LQCN-- +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/list-buckets.md b/content/riak/kv/3.0.4/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..2f421ce616 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/list-buckets.md @@ -0,0 +1,68 @@ +--- +title: "HTTP List Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "List Buckets" + identifier: "http_list_buckets" + weight: 103 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/list-buckets + - /riak/kv/3.0.4/dev/references/http/list-buckets +--- + +Lists all known buckets (ones that have keys stored in them). + +{{% note title="Not for production use" %}} +Similar to the list keys operation, this requires traversing all keys stored +in the cluster and should not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets?buckets=true + +# Using a non-default bucket type +GET /types//buckets?buckets=true +``` + +Required query parameter: + +* **buckets=true** - required to invoke the list-buckets functionality + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Content-Type - application/json` + +The JSON object in the response will contain a single entry, "buckets", which +will be an array of bucket names. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets?buckets=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 21 + +{"buckets":["files"]} +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/list-keys.md b/content/riak/kv/3.0.4/developing/api/http/list-keys.md new file mode 100644 index 0000000000..e9043eab8f --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/list-keys.md @@ -0,0 +1,80 @@ +--- +title: "HTTP List Keys" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "List Keys" + identifier: "http_list_keys" + weight: 104 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/list-keys + - /riak/kv/3.0.4/dev/references/http/list-keys +--- + +Lists keys in a bucket. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```bash +# Using the default bucket type +GET /buckets/bucket/keys?keys=true # List all keys +GET /buckets/bucket/keys?keys=stream # Stream keys to the client + +# Using a non-default bucket type +GET /types//buckets/bucket/keys?keys=true +GET /types//buckets/bucket/keys?keys=stream +``` + +Required query parameters: + +* `keys` - defaults to `false`. When set to `true` all keys will be returned in +a single payload. When set to `stream`, keys will be returned in +chunked-encoding. + +## Response + +Normal response codes: + +* `200 OK` + +Important headers: + +* `Content-Type` - `application/json` +* `Transfer-Encoding` - `chunked` when the `keys` query parameter is set to +`stream`. + +The JSON object in the response will contain up to two entries, +`"props"` and `"keys"` which are present or missing according to the +query parameters and format used. If `keys=stream` in the query +parameters, multiple JSON objects in chunked-encoding will be returned +containing `"keys"` entries. + +## Example + +```curl +$ curl -i http://localhost:8098/buckets/jsconf/keys?keys=true +HTTP/1.1 200 OK +Vary: Accept-Encoding +Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +Date: Fri, 30 Sep 2011 15:24:35 GMT +Content-Type: application/json +Content-Length: 239 + +{"keys":["challenge.jpg","puddi.png","basho.gif","puddikid.jpg","yay.png"," +thinking.png","victory.gif","slides","joyent.png","seancribbs-small.jpg"," +trollface.jpg","riak_logo_animated1.gif","victory.jpg","challenge.png"," +team_cribbs.png"]} +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/list-resources.md b/content/riak/kv/3.0.4/developing/api/http/list-resources.md new file mode 100644 index 0000000000..c5c934726c --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/list-resources.md @@ -0,0 +1,84 @@ +--- +title: "HTTP List Resources" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "List Resources" + identifier: "http_list_resources" + weight: 112 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/list-resources + - /riak/kv/3.0.4/dev/references/http/list-resources +--- + +List available HTTP resources for the Riak node. This can be used by clients to +automatically recognize the location of the resources for specific operations. + +The standard resources are: + +* `riak_kv_wm_buckets` - [Bucket Operations]({{}}riak/kv/3.0.4/developing/api/http/#bucket-operations) +* `riak_kv_wm_index` - [HTTP Secondary Indexes]({{}}riak/kv/3.0.4/developing/api/http/secondary-indexes) +* `riak_kv_wm_link_walker` - [HTTP Link Walking]({{}}riak/kv/3.0.4/developing/api/http/link-walking) +* `riak_kv_wm_mapred` - [HTTP MapReduce]({{}}riak/kv/3.0.4/developing/api/http/mapreduce) +* `riak_kv_wm_object`- [Object/Key Operations]({{}}riak/kv/3.0.4/developing/api/http/#object-key-operations) +* `riak_kv_wm_ping` - [HTTP Ping]({{}}riak/kv/3.0.4/developing/api/http/ping) +* `riak_kv_wm_props` - [HTTP Set Bucket Properties]({{}}riak/kv/3.0.4/developing/api/http/set-bucket-props) +* `riak_kv_wm_stats` - [HTTP Status]({{}}riak/kv/3.0.4/developing/api/http/status) + +## Request + +```bash +GET / +``` + +Headers: + +* `Accept` - `application/json` or `text/html` + +## Response + +Normal status codes: + +* `200 OK` + +Important headers: + +* `Link` - all resources that are described in the response body, but in Link +form + +## Example + +Request JSON response + +```curl +$ curl -i http://localhost:8098 -H "Accept: application/json" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:18:31 GMT +Content-Type: application/json +Content-Length: 398 + +{"riak_kv_wm_buckets":"/buckets","riak_kv_wm_buckets":"/riak","riak_kv_wm_counter":"/buckets","riak_kv_wm_index":"/buckets","riak_kv_wm_keylist":"/buckets","riak_kv_wm_link_walker":"/buckets","riak_kv_wm_link_walker":"/riak","riak_kv_wm_mapred":"/mapred","riak_kv_wm_object":"/buckets","riak_kv_wm_object":"/riak","riak_kv_wm_ping":"/ping","riak_kv_wm_props":"/buckets","riak_kv_wm_stats":"/stats"} + +# Request HTML response +curl -i http://localhost:8098 -H "Accept: text/html" +HTTP/1.1 200 OK +Vary: Accept +Server: MochiWeb/1.1 WebMachine/1.10.0 (never breaks eye contact) +Link: ; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_buckets",; rel="riak_kv_wm_counter",; rel="riak_kv_wm_index",; rel="riak_kv_wm_keylist",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_link_walker",; rel="riak_kv_wm_mapred",; rel="riak_kv_wm_object",; rel="riak_kv_wm_object",; rel="riak_kv_wm_ping",; rel="riak_kv_wm_props",; rel="riak_kv_wm_stats" +Date: Wed, 27 Nov 2013 20:20:05 GMT +Content-Type: text/html +Content-Length: 666 + + +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/mapreduce.md b/content/riak/kv/3.0.4/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..2cd921b39b --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/mapreduce.md @@ -0,0 +1,74 @@ +--- +title: "HTTP MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "MapReduce" + identifier: "http_mapreduce" + weight: 108 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/mapreduce + - /riak/kv/3.0.4/dev/references/http/mapreduce +--- + +[MapReduce]({{}}riak/kv/3.0.4/developing/usage/mapreduce) is a generic way to query Riak by specifying inputs and constructing a set of map, reduce, and link phases through which data will flow. + +## Request + +```bash +POST /mapred +``` + +Important headers: +* `Content-Type` - must always be `application/json`. The format of the request body is described in detail on the [MapReduce]({{}}riak/kv/3.0.4/developing/usage/mapreduce) page. + +Optional query parameters: +* `chunked` - when set to `true`, results will be returned as they are received in `multipart/mixed` format using chunked-encoding. + +_+This request must include an entity (body), which is the JSON form of the MapReduce query.+_ + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `400 Bad Request` - if an invalid job is submitted. +* `500 Internal Server Error` - if there was an error in processing a map or reduce function +* `503 Service Unavailable` - if the job timed out before it could complete + +Important headers: +* `Content-Type` - `application/json` when `chunked` is not true, otherwise `multipart/mixed` with `application/json` sections. + +## Example + +```curl +$ curl -v -d '{"inputs":"test", "query":[{"link":{"bucket":"test"}},{"map":{"language":"javascript","name":"Riak.mapValuesJson"}}]}' -H "Content-Type: application/json" http://127.0.0.1:8098/mapred +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /mapred HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 117 +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 30 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +[{"foo":"bar"},{"riak":"CAP"}] +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/ping.md b/content/riak/kv/3.0.4/developing/api/http/ping.md new file mode 100644 index 0000000000..0f30cb01a1 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/ping.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Ping" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Ping" + identifier: "http_ping" + weight: 110 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/ping + - /riak/kv/3.0.4/dev/references/http/ping +--- + +Checks if the server is alive. This is useful for monitoring tools, load-balancers and automated scripts. + +## Request + +```bash +GET /ping +``` + +## Response + +Normal status codes: + +* `200 OK` + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/ping +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /ping HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/html +< Content-Length: 2 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +OK +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/reset-bucket-props.md b/content/riak/kv/3.0.4/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..49c5967214 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/reset-bucket-props.md @@ -0,0 +1,61 @@ +--- +title: "HTTP Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Reset Bucket Properties" + identifier: "http_reset_bucket_props" + weight: 102 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/reset-bucket-props + - /riak/kv/3.0.4/dev/references/http/reset-bucket-props +--- + +Resets bucket properties like `n_val` and `allow_mult` back to the +default settings. + +## Request + +```bash +DELETE /buckets/bucket/props +``` + +Resetting bucket properties is not available via the old API format. + +## Response + +Normal status codes: + +* `204 No Content` + +## Example + +```curl +$ curl -XDELETE -v localhost:8098/buckets/bucket/props {13:47} +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... +* connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> DELETE /buckets/bucket/props HTTP/1.1 +> User-Agent: curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.2 (someone had painted it blue) +< Date: Tue, 06 Nov 2012 21:56:17 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/search-index-info.md b/content/riak/kv/3.0.4/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..97dbbfea05 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/search-index-info.md @@ -0,0 +1,56 @@ +--- +title: "HTTP Search Index Info" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Search Index Info" + identifier: "http_search_index_info" + weight: 114 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/search-index-info + - /riak/kv/3.0.4/dev/references/http/search-index-info +--- + +Retrieves information about all currently available [Search indexes]({{}}riak/kv/3.0.4/developing/usage/search) in JSON format. + +## Request + +``` +GET /search/index +``` + +## Response + +If there are no currently available Search indexes, a `200 OK` will be +returned but with an empty list as the response value. + +Below is the example output if there is one Search index, called +`test_index`, currently available: + +```json +[ + { + "n_val": 3, + "name": "test_index", + "schema": "_yz_default" + } +] +``` + +#### Normal Response Codes + +* `200 OK` + +#### Typical Error Codes + +* `404 Object Not Found` - Typically returned if Riak Search is not + currently enabled on the node +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/search-query.md b/content/riak/kv/3.0.4/developing/api/http/search-query.md new file mode 100644 index 0000000000..bfe0c4cfa9 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/search-query.md @@ -0,0 +1,73 @@ +--- +title: "HTTP Search Query" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Search Query" + identifier: "http_search_query" + weight: 113 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/search-query + - /riak/kv/3.0.4/dev/references/http/search-query +--- + +Performs a [Riak KV Search]({{}}riak/kv/3.0.4/developing/usage/search) query. + +## Request + +``` +GET /search/query/ +``` + +## Optional Query Parameters + +* `wt` - The [response + writer](https://cwiki.apache.org/confluence/display/solr/Response+Writers) + to be used when returning the Search payload. The currently + available options are `json` and `xml`. The default is `xml`. +* `q` - The actual Search query itself. Examples can be found in + [Using Search]({{}}riak/kv/3.0.4/developing/usage/search). If a query is not specified, Riak will return + information about the index itself, e.g. the number of documents + indexed. + +## Normal Response Codes + +* `200 OK` + +## Typical Error Codes + +* `400 Bad Request` - Returned when, for example, a malformed query is + supplied +* `404 Object Not Found` - Returned if the Search index you are + attempting to query does not exist +* `503 Service Unavailable` - The request timed out internally + +## Response + +If a `200 OK` is returned, then the Search query has been successful. +Below is an example JSON response from querying an index that currently +has no documents associated with it: + +```json +{ + "response": { + "docs": [], + "maxScore": 0.0, + "numFound": 0, + "start": 0 + }, + "responseHeader": { + "status": 0, + "QTime": 10, + "params": { /* internal info from the query */ } + } +} +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/secondary-indexes.md b/content/riak/kv/3.0.4/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..9390d1915e --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/secondary-indexes.md @@ -0,0 +1,95 @@ +--- +title: "HTTP Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Secondary Indexes" + identifier: "http_2i" + weight: 109 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/secondary-indexes + - /riak/kv/3.0.4/dev/references/http/secondary-indexes +--- + +[Secondary Indexes]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) allows an application to tag a Riak object with one or more field/value pairs. The object is indexed under these field/value pairs, and the application can later query the index to retrieve a list of matching keys. + +## Request + +### Exact Match + +```bash +GET /buckets/mybucket/index/myindex_bin/value +``` + +### Range Query + +``` +GET /buckets/mybucket/index/myindex_bin/start/end +``` + +#### Range query with terms + +To see the index values matched by the range, use `return_terms=true`. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true +``` + +### Pagination + +Add the parameter `max_results` for pagination. This will limit the results and provide for the next request a `continuation` value. + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500 +GET /buckets/mybucket/index/myindex_bin/start/end?return_terms=true&max_results=500&continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM= +``` + +### Streaming + +``` +GET /buckets/mybucket/index/myindex_bin/start/end?stream=true +``` + +## Response + +Normal status codes: + ++ `200 OK` + +Typical error codes: + ++ `400 Bad Request` - if the index name or index value is invalid. ++ `500 Internal Server Error` - if there was an error in processing a map or reduce function, or if indexing is not supported by the system. ++ `503 Service Unavailable` - if the job timed out before it could complete + +## Example + +```curl +$ curl -v http://localhost:8098/buckets/mybucket/index/field1_bin/val1 +* About to connect() to localhost port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to localhost (127.0.0.1) port 8098 (#0) +> GET /buckets/mybucket/index/field1_bin/val1 HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8r zlib/1.2.3 +> Host: localhost:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 19 +< +* Connection #0 to host localhost left intact +* Closing connection #0 +{"keys":["mykey1"]}% +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/set-bucket-props.md b/content/riak/kv/3.0.4/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..590d689406 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/set-bucket-props.md @@ -0,0 +1,116 @@ +--- +title: "HTTP Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Set Bucket Properties" + identifier: "http_set_bucket_props" + weight: 101 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/set-bucket-props + - /riak/kv/3.0.4/dev/references/http/set-bucket-props +--- + +Sets bucket properties like "n_val" and "allow_mult". + +## Request + +```bash +PUT /buckets/bucket/props +``` + +Important headers: + +* `Content-Type` - `application/json` + +The body of the request should be a JSON object with a single entry "props". +Unmodified bucket properties may be omitted. + +Available properties: + +* `n_val` (integer > 0) - the number of replicas for objects in this bucket +* `allow_mult` (true or false) - whether to allow sibling objects to be created +(concurrent updates) +* `last_write_wins` (true or false) - whether to ignore object history (vector +clock) when writing +* `precommit` - [precommit hooks]({{}}riak/kv/3.0.4/developing/usage/commit-hooks) +* `postcommit` - [postcommit hooks]({{}}riak/kv/3.0.4/developing/usage/commit-hooks) +* `r, w, dw, rw` - default quorum values for operations on keys in the bucket. +Valid values are: + * `"all"` - all nodes must respond + * `"quorum"` - (n_val/2) + 1 nodes must respond. *This is the default.* + * `"one"` - equivalent to 1 + * *Any integer* - must be less than or equal to n_val +* `backend` - when using `riak_kv_multi_backend`, which named backend to use for +the bucket +* `node_confirms` - declares the number of diverse physical node acks required for a write +to be successful + +Other properties do exist but are not commonly modified. + +{{% note title="Property types" %}} +Make sure you use the proper types for attributes like **n_val** and +**allow_mult**. If you use strings instead of integers and booleans +respectively, you may see some odd errors in your logs, saying something like +`"{badarith,[{riak_kv_util,normalize_rw_value,2},]}"`. +{{% /note %}} + +{{% note title="Node Confirms" %}} +`node_confirms` is a tunable for durability. When operating in a failure state, Riak will store replicas in fallback vnodes, and in some case multiple fallbacks may be on the same physical node. `node_confirms` is an option that specifies how many distinct physical nodes must acknowledge a write for it to be considered successful. + +When riak receives a 'put', it starts up a riak_kv_put_fsm (finite state machine). This prepares and then validates the options, then calls any precommit hooks, before executing a put to the local vnode in the preflist, which becomes the co-ordinating node. This then waits for the local vnode response before executing the put request remotely on the two remaining nodes in the preflist. + +The fsm then waits for the remote vnode responses, and as it receives responses, it adds these results and checks whether enough results have been collected to satisfy the bucket properties such as 'dw' and 'pw'. +When analysing the responses, Riak will count the number of different nodes from which results have been returned. The finite state machine can now be required to wait for a minimum number of confirmations from different nodes, whilst also ensuring all other configured options are satisfied. + +Once all options are satisfied, the response is returned, post commit hooks are called and the fsm finishes. +{{% /note %}} + +## Response + +Normal status codes: + +* `204 No Content` + +Typical error codes: + +* `400 Bad Request` - if the submitted JSON is invalid +* `415 Unsupported Media Type` - if the Content-Type was not set to +application/json in the request + +If successful, no content will be returned in the response body. + +## Example + +```curl +$ curl -v -XPUT http://127.0.0.1:8098/buckets/test/props \ + -H "Content-Type: application/json" -d '{"props":{"n_val":5}}' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/props HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 +OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> Content-Length: 21 +> +< HTTP/1.1 204 No Content +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/status.md b/content/riak/kv/3.0.4/developing/api/http/status.md new file mode 100644 index 0000000000..f5578c57f9 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/status.md @@ -0,0 +1,173 @@ +--- +title: "HTTP Status" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Status" + identifier: "http_status" + weight: 111 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/status + - /riak/kv/3.0.4/dev/references/http/status +--- + +Reports about the performance and configuration of the Riak node to which it was requested. You must have the `{riak_kv_stat,true}` configuration setting in app.config for this endpoint to be active. + +## Performance + +Repeated requests to the `/stats` endpoint do not have a negative +performance impact as the statistics are cached internally in Riak. + +## Request + +```bash +GET /stats +``` + +Important headers: + +* `Accept` - determines whether the response will be formatted in `application/json` or `text/plain`. + +## Response + +Normal status codes: +* `200 OK` + +Typical error codes: +* `404 Not Found` - if `riak_kv_stat` is not enabled + +Important headers: +* `Content-Type` - `application/json` or `text/plain` (JSON with added line-breaks) + +## Example + +```curl +$ curl -v http://127.0.0.1:8098/stats -H "Accept: text/plain" +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /stats HTTP/1.1 +> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: text/plain +> +< HTTP/1.1 200 OK +< Vary: Accept, Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: text/plain +< Content-Length: 2102 +< +{ + "vnode_gets": 0, + "vnode_puts": 0, + "read_repairs": 0, + "vnode_gets_total": 0, + "vnode_puts_total": 0, + "node_gets": 0, + "node_gets_total": 0, + "node_get_fsm_time_mean": "undefined", + "node_get_fsm_time_median": "undefined", + "node_get_fsm_time_95": "undefined", + "node_get_fsm_time_99": "undefined", + "node_get_fsm_time_100": "undefined", + "node_puts": 0, + "node_puts_total": 0, + "node_put_fsm_time_mean": "undefined", + "node_put_fsm_time_median": "undefined", + "node_put_fsm_time_95": "undefined", + "node_put_fsm_time_99": "undefined", + "node_put_fsm_time_100": "undefined", + "read_repairs_total": 0, + "cpu_nprocs": 84, + "cpu_avg1": 251, + "cpu_avg5": 174, + "cpu_avg15": 110, + "mem_total": 7946684000.0, + "mem_allocated": 4340880000.0, + "nodename": "riak@127.0.0.1", + "connected_nodes": [ + + ], + "sys_driver_version": "1.5", + "sys_global_heaps_size": 0, + "sys_heap_type": "private", + "sys_logical_processors": 2, + "sys_otp_release": "R13B04", + "sys_process_count": 189, + "sys_smp_support": true, + "sys_system_version": "Erlang R13B04 (erts-5.7.5) [[source]] [[64-bit]] [[smp:2:2]] [[rq:2]] [[async-threads:5]] [[hipe]] [[kernel-poll:true]]", + "sys_system_architecture": "i386-apple-darwin10.3.0", + "sys_threads_enabled": true, + "sys_thread_pool_size": 5, + "sys_wordsize": 8, + "ring_members": [ + "riak@127.0.0.1" + ], + "ring_num_partitions": 64, + "ring_ownership": "[{'riak@127.0.0.1',64}]", + "ring_creation_size": 64, + "storage_backend": "riak_kv_bitcask_backend", + "pbc_connects_total": 0, + "pbc_connects": 0, + "pbc_active": 0, + "riak_kv_version": "0.11.0", + "riak_core_version": "0.11.0", + "bitcask_version": "1.0.1", + "luke_version": "0.1", + "webmachine_version": "1.7.1", + "mochiweb_version": "1.7.1", + "erlang_js_version": "0.4", + "runtime_tools_version": "1.8.3", + "crypto_version": "1.6.4", + "os_mon_version": "2.9.1", + "sasl_version": "2.1.9", + "stdlib_version": "1.16.5", + "kernel_version": "2.13.5" +} +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Output Explanation + +The output of `/stats` contains the output of `riak-admin status` detailed in the [Inspecting a Node]({{}}riak/kv/3.0.4/using/cluster-operations/inspecting-node) doc, plus the below stats generated by the Riak Core application. + +Stat | Description +------------------------------|--------------------------------------------------- +riak_core_stat_ts | The last time (in Epoch time) Riak Core stats were generated +ignored_gossip_total | Total number of ignored gossip messages since node was started +rings_reconciled_total | Total number of ring reconciliation operations since node was started +rings_reconciled | Number of ring reconciliation operations in the last minute +gossip_received | Number of gossip messages received in the last minute +rejected_handoffs | Total number of ownership handoff operations rejected by the node since it was started +handoff_timeouts | Total number of handoff timeouts encountered by this node since it was started +dropped_vnode_requests_total | Total number of requests dropped by local vnodes since the node was started +converge_delay_min | Minimum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_max | Maximum time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +converge_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_min | Minimum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_max | Maximum time in milliseconds taken to calculate partition rebalance during a cluster membership change +rebalance_delay_mean | Mean time in milliseconds describing time taken for the ring to converge after ring changes +rebalance_delay_last | Last observed histogram value in milliseconds describing time taken for the ring to converge after ring changes +riak_kv_vnodes_running | Number of local Riak KV virtual nodes running +riak_kv_vnodeq_min | Minimum queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_median | Median queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_mean | Mean queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_max | Max queue size of all local Riak KV virtual nodes in the last minute +riak_kv_vnodeq_total | Total queue size of all local Riak KV virtual nodes in the last minute +riak_pipe_vnodes_running | Number of local Riak Pipe virtual nodes running +riak_pipe_vnodeq_min | Minimum queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_median | Median queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_mean | Mean queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_max | Max queue size of local Riak Pipe virtual nodes in the last minute +riak_pipe_vnodeq_total | Total queue size of all local Riak Pipe virtual nodes in the last minute + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/store-object.md b/content/riak/kv/3.0.4/developing/api/http/store-object.md new file mode 100644 index 0000000000..5342d559a8 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/store-object.md @@ -0,0 +1,150 @@ +--- +title: "HTTP Store Object" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Store Object" + identifier: "http_store_object" + weight: 106 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/store-object + - /riak/kv/3.0.4/dev/references/http/store-object +--- + +Stores an object under the specified bucket / key. Storing an object comes in +two forms, depending on whether you want to use a key of your choosing, or let +Riak assign a key to a new object. + +## Request + +```bash +POST /types/type/buckets/bucket/keys # Riak-defined key +PUT /types/type/buckets/bucket/keys/key # User-defined key +POST /buckets/bucket/keys # Riak-defined key +PUT /buckets/bucket/keys/key # User-defined key +``` + +For the sake of compatibility with older clients, `POST` is also acceptable in +the form where the key is specified. + +Important headers: + +* `Content-Type` must be set for the stored object. Set what you expect to +receive back when next requesting it. +* `X-Riak-Vclock` if the object already exists, the vector clock attached to the +object when read. +* `X-Riak-Meta-*` - any additional metadata headers that should be stored with +the object. +* `X-Riak-Index-*` - index entries under which this object should be indexed. +[Read more about Secondary Indexing]({{}}riak/kv/3.0.4/developing/api/http/secondary-indexes) +* `Link` - user and system-defined links to other resources. [Read more about Links.]({{}}riak/kv/3.0.4/developing/api/http/link-walking) + +Optional headers (only valid on `PUT`): + +* `If-None-Match`, `If-Match`, `If-Modified-Since`, and `If-Unmodified-Since` +invoke conditional request semantics, matching on the `ETag` and `Last-Modified` +of the existing object. These can be used to prevent overwriting a modified +object. If the test fails, you will receive a `412 Precondition Failed` +response. This does not prevent concurrent writes; it is possible for the +condition to evaluate to true for multiple requests if the requests occur at the +same time. + +Optional query parameters: + +* `w` (write quorum) how many replicas to write to before returning a successful +response (default is defined by the bucket level) +* `dw` (durable write quorum) how many replicas to commit to durable storage +before returning a successful response (default is defined at the bucket level) +* `pw` how many primary replicas must be online to attempt a write (default is +defined at the bucket level) +* `returnbody=[true|false]` whether to return the contents of the stored object. + +*This request must include a body (entity).* + +## Response + +Normal status codes: + +* `201 Created` (when submitting without a key) +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +Typical error codes: + +* `400 Bad Request` - e.g. when r, w, or dw parameters are invalid (> N) +* `412 Precondition Failed` if one of the conditional request headers failed to +match (see above) + +Important headers: + +* `Location` a relative URL to the newly-created object (when submitting without +a key) + +If `returnbody=true`, any of the response headers expected from [HTTP Fetch Object]({{}}riak/kv/3.0.4/developing/api/http/fetch-object) may be present. Like when fetching the object, `300 Multiple Choices` +may be returned if siblings existed or were created as part of the operation, +and the response can be dealt with similarly. + +## Example: Storing Without Key + +```curl +$ curl -v http://127.0.0.1:8098/buckets/test/keys \ + -H "Content-Type: text/plain" -d 'this is a test' +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> POST /buckets/test/keys HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: text/plain +> Content-Length: 14 +> +< HTTP/1.1 201 Created +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Location: /buckets/test/keys/bzPygTesROPtGGVUKfyvp2RR49 +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 0 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +``` + +## Example: Storing With Key + +```curl +$ curl -v -XPUT -d '{"bar":"baz"}' -H "Content-Type: application/json" -H "X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA==" http://127.0.0.1:8098/buckets/test/keys/doc?returnbody=true +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> PUT /buckets/test/keys/doc?returnbody=true HTTP/1.1 +> User-Agent: curl/7.19.4 (universal-apple-darwin10.0) libcurl/7.19.4 OpenSSL/0.9.8l zlib/1.2.3 +> Host: 127.0.0.1:8098 +> Accept: */* +> Content-Type: application/json +> X-Riak-Vclock: a85hYGBgzGDKBVIszMk55zKYEhnzWBlKIniO8mUBAA== +> Content-Length: 13 +> +< HTTP/1.1 200 OK +< X-Riak-Vclock: a85hYGBgymDKBVIszMk55zKYEhnzWBlKIniO8kGF2TyvHYIKfwcJZwEA +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (participate in the frantic) +< Link: ; rel="up" +< Date: Fri, 30 Sep 2011 15:24:35 GMT +< Content-Type: application/json +< Content-Length: 13 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"bar":"baz"} +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/store-search-index.md b/content/riak/kv/3.0.4/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..b1dc8407d9 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/store-search-index.md @@ -0,0 +1,57 @@ +--- +title: "HTTP Store Search Index" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Store Search Index" + identifier: "http_store_search_index" + weight: 115 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/store-search-index + - /riak/kv/3.0.4/dev/references/http/store-search-index +--- + +Creates a new Riak Search [index]({{}}riak/kv/3.0.4/developing/usage/search/#simple-setup). + +## Request + +``` +PUT /search/index/ +``` + +## Optional Request Body + +If you run a `PUT` request to this endpoint without a request body, Riak +will create a new Search index that uses the [default Search schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas/#the-default-schema), i.e. `_yz_default`. + +To specify a different schema, however, you must pass Riak a JSON object +as the request body in which the `schema` field specifies the name of +the schema to use. If you've [stored a schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas/#custom-schemas) called `my_custom_schema`, the following `PUT` +request would create an index called `my_index` that used that schema: + +```curl +curl -XPUT http://localhost:8098/search/index/my_index \ + -H "Content-Type: application/json" \ + -d '{"schema": "my_custom_schema"}' +``` + +More information can be found in [Using Search]({{}}riak/kv/3.0.4/developing/usage/search). + +## Normal Response Codes + +* `204 No Content` - The index has been successfully created + +## Typical Error Codes + +* `409 Conflict` - The index cannot be created because there is + already an index with that name +* `503 Service Unavailable` - The request timed out internally + + + + + diff --git a/content/riak/kv/3.0.4/developing/api/http/store-search-schema.md b/content/riak/kv/3.0.4/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..2ef066694b --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/http/store-search-schema.md @@ -0,0 +1,54 @@ +--- +title: "HTTP Store Search Schema" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Store Search Schema" + identifier: "http_store_search_schema" + weight: 117 + parent: "apis_http" +toc: true +aliases: + - /riak/3.0.4/dev/references/http/store-search-schema + - /riak/kv/3.0.4/dev/references/http/store-search-schema +--- + +Creates a new Riak [Search schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas). + +## Request + +``` +PUT /search/schema/ +``` + +## Required Form Data + +In order to create a new Search schema, you must pass Riak a properly +formed XML schema. More information can be found in the [Search Schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas) document. If you've created a schema and stored it in the filed +`my_schema.xml` and would like to create a new schema called +`my_custom_schema`, you would use the following HTTP request: + +```curl +curl -XPUT http://localhost:8098/search/schema/my_custom_schema \ + -H "Content-Type: application/xml" \ + --data-binary @my_schema.xml +``` + +## Normal Response + +* `204 No Content` - The schema has been successfully created + +## Typical Error Codes + +* `400 Bad Request` - The schema cannot be created because there is + something wrong with the schema itself, e.g. an XML formatting error + that makes Riak Search unable to parse the schema +* `409 Conflict` - The schema cannot be created because there is + already a schema with that name +* `503 Service Unavailable` - The request timed out internally + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..4694e2a488 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers.md @@ -0,0 +1,189 @@ +--- +title: "Protocol Buffers Client API" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Protocol Buffers API" + identifier: "apis_pbc" + weight: 103 + parent: "developing_apis" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers + - /riak/kv/3.0.4/dev/references/protocol-buffers +--- + +This is an overview of the operations you can perform using the +[Protocol Buffers](https://code.google.com/p/protobuf/) Client (PBC) +interface to Riak, and can be used as a guide for developing a +PBC-compliant Riak client. + +## Protocol + +Riak listens on a TCP port (8087 by default) for incoming connections. +Once connected, the client can send a stream of requests on the same +connection. + +Each operation consists of a [request message](https://developers.google.com/protocol-buffers/docs/encoding) and one or more response messages. Messages are all encoded the same way, consisting of: + +* 32-bit length of message code + Protocol Buffers message in network + order +* 8-bit message code to identify the Protocol Buffers message +* N bytes of Protocol Buffers-encoded message + +### Example + +``` +00 00 00 07 09 0A 01 62 12 01 6B +|----Len---|MC|----Message-----| + +Len = 0x07 +Message Code (MC) = 0x09 = RpbGetReq +RpbGetReq Message = 0x0A 0x01 0x62 0x12 0x01 0x6B + +Decoded Message: +bucket: "b" +key: "k" +``` + +## Message Codes + +Code | Message | +:----|:--------| +0 | `RpbErrorResp` | +1 | `RpbPingReq` | +2 | `RpbPingResp` | +3 | `RpbGetClientIdReq` | +4 | `RpbGetClientIdResp` | +5 | `RpbSetClientIdReq` | +6 | `RpbSetClientIdResp` | +7 | `RpbGetServerInfoReq` | +8 | `RpbGetServerInfoResp` | +9 | `RpbGetReq` | +10 | `RpbGetResp` | +11 | `RpbPutReq` | +12 | `RpbPutResp` | +13 | `RpbDelReq` | +14 | `RpbDelResp` | +15 | `RpbListBucketsReq` | +16 | `RpbListBucketsResp` | +17 | `RpbListKeysReq` | +18 | `RpbListKeysResp` | +19 | `RpbGetBucketReq` | +20 | `RpbGetBucketResp` | +21 | `RpbSetBucketReq` | +22 | `RpbSetBucketResp` | +23 | `RpbMapRedReq` | +24 | `RpbMapRedResp` | +25 | `RpbIndexReq` | +26 | `RpbIndexResp` | +27 | `RpbSearchQueryReq` | +28 | `RbpSearchQueryResp` | +29 | `RpbResetBucketReq` | +30 | `RpbResetBucketResp` | +31 | `RpbGetBucketTypeReq` | +32 | `RpbSetBucketTypeResp` | +40 | `RpbCSBucketReq` | +41 | `RpbCSUpdateReq` | +50 | `RpbCounterUpdateReq` | +51 | `RpbCounterUpdateResp` | +52 | `RpbCounterGetReq` | +53 | `RpbCounterGetResp` | +54 | `RpbYokozunaIndexGetReq` | +55 | `RpbYokozunaIndexGetResp` | +56 | `RpbYokozunaIndexPutReq` | +57 | `RpbYokozunaIndexPutResp` | +58 | `RpbYokozunaSchemaGetReq` | +59 | `RpbYokozunaSchemaGetResp` | +60 | `RpbYokozunaSchemaPutReq` | +80 | `DtFetchReq` | +81 | `DtFetchResp` | +82 | `DtUpdateReq` | +83 | `DtUpdateResp` | +253 | `RpbAuthReq` | +254 | `RpbAuthResp` | +255 | `RpbStartTls` | + +{{% note title="Message Definitions" %}} +All Protocol Buffers messages are defined in the `riak.proto` and other +`.proto` files in the `/src` directory of the +RiakPB project. +{{% /note %}} + +### Error Response + +If the request does not result in an error, Riak will return one of a +variety of response messages, e.g. `RpbGetResp` or `RpbPutResp`, +depending on which request message is sent. + +If the server experiences an error processing a request, however, it +will return an `RpbErrorResp` message instead of the response expected +for the given request (e.g. `RbpGetResp` is the expected response to +`RbpGetReq`). Error messages contain an error string and an error code, +like this: + +```protobuf +message RpbErrorResp { + required bytes errmsg = 1; + required uint32 errcode = 2; +} +``` + +### Values + +* `errmsg` - A string representation of what went wrong +* `errcode` - A numeric code. Currently, only `RIAKC_ERR_GENERAL=1` + is defined. + +## Bucket Operations + +* [PBC List Buckets]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/list-buckets) +* [PBC List Keys]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/list-keys) +* [PBC Get Bucket Properties]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-props) +* [PBC Set Bucket Properties]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-props) +* [PBC Reset Bucket Properties]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/reset-bucket-props) + +## Object/Key Operations + +* [PBC Fetch Object]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/fetch-object) +* [PBC Store Object]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/store-object) +* [PBC Delete Object]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/delete-object) + +## Query Operations + +* [PBC MapReduce]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/mapreduce) +* [PBC Secondary Indexes]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/secondary-indexes) +* [PBC Search]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/search) + +## Server Operations + +* [PBC Ping]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/ping) +* [PBC Server Info]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/server-info) + +## Bucket Type Operations + +* [PBC Get Bucket Type]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-type) +* [PBC Set Bucket Type]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-type) + +## Data Type Operations + +* [PBC Data Type Fetch]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-fetch) +* [PBC Data Type Union]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-union) +* [PBC Data Type Store]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-store) +* [PBC Data Type Counter Store]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-counter-store) +* [PBC Data Type Set Store]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-set-store) +* [PBC Data Type Map Store]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-map-store) + +## Yokozuna Operations + +* [PBC Yokozuna Index Get]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-get) +* [PBC Yokozuna Index Put]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-put) +* [PBC Yokozuna Index Delete]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-delete) +* [PBC Yokozuna Schema Get]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/yz-schema-get) +* [PBC Yokozuna Schema Put]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/yz-schema-put) + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..b60b42a75a --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,34 @@ +--- +title: "PBC Auth Request" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Auth Request" + identifier: "pbc_auth_request" + weight: 125 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/auth-req + - /riak/kv/3.0.4/dev/references/protocol-buffers/auth-req +--- + +Sends a username (`user`) and password (`password`) to Riak as part of +an authentication request. Both values are sent as binaries. + +## Request + +```protobuf +message RpbAuthReq { + required bytes user = 1; + required bytes password = 2; +} +``` + +For more on authentication, see our documentation on [Authentication and Authorization]({{}}riak/kv/3.0.4/using/security/basics). + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..105d0f9e22 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,82 @@ +--- +title: "PBC Coverage Queries" +description: "" +project: "riak_kv" +project_version: "3.0.4" +menu: + riak_kv-3.0.4: + name: "Coverage Queries" + identifier: "pbc_coverage_queries" + weight: 108 + parent: "apis_pbc" +version_history: + in: "2.1.4+" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/coverage-queries + - /riak/kv/3.0.4/dev/references/protocol-buffers/coverage-queries +--- + +Prepare for parallelizable +[secondary index queries](../secondary-indexes/) by requesting a +coverage plan. The response will be multiple slices of the cluster, as +identified by a TCP endpoint and an opaque binary to be included with +each 2i query. + +## Request + +```protobuf +message RpbCoverageReq { + optional bytes type = 1; + required bytes bucket = 2; + optional uint32 min_partitions = 3; + optional bytes replace_cover = 4; + repeated bytes unavailable_cover = 5; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the data is stored + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`type` | The name of the bucket type, if this bucket is not in the default (pre-2.0) bucket type. +`min_partitions` | The minimum number of cluster slices. `undefined` results in a direct map of the internal coverage plan, which targets the minimum number of nodes necessary to retrieve all data. An integer will be rounded up to the nearest power of 2 greater than or equal to the ring size. +`replace_cover` | If a client cannot reach the server designated by a previous coverage response, the opaque binary can be sent with a new coverage request via this parameter and a new plan component will be calculated and returned. +`unavailable_cover` | List of opaque binaries representing other unreachable endpoints to help Riak determine what servers the client cannot currently use. + +## Response + +The results of a coverage query are returned as a list of endpoints +with opaque binaries to be included with secondary index queries. + +```protobuf +message RpbCoverageResp { + repeated RpbCoverageEntry entries = 1; +} + +message RpbCoverageEntry { + required bytes ip = 1; + required uint32 port = 2; + optional bytes keyspace_desc = 3; + required bytes cover_context = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`ip` | The IP address of the server containing a subset of the data. Depending on the environment, this address may require local translation to deal with routing or firewall constraints. +`port` | The port to contact on the server. +`keyspace_desc` | A human-readable description of the keyspace. Not intended to be used programmatically except potentially for logging. +`cover_context` | The opaque binary to be used in secondary index queries (and possibly future coverage queries to indicate that this server appears offline or otherwise non-functional to the client). + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..f54f22800f --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,104 @@ +--- +title: "PBC Delete Object" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Delete Object" + identifier: "pbc_delete_object" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/delete-object + - /riak/kv/3.0.4/dev/references/protocol-buffers/delete-object +--- + +Delete an object in the specified [bucket type]({{}}riak/kv/3.0.4/using/cluster-operations/bucket-types)/bucket/key location. + +## Request + +```protobuf +message RpbDelReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 rw = 3; + optional bytes vclock = 4; + optional uint32 r = 5; + optional uint32 w = 6; + optional uint32 pr = 7; + optional uint32 pw = 8; + optional uint32 dw = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + +#### Required Parameters + +Parameter | Description | +:---------|:------------| +`bucket` | The name of the bucket in which the object is stored +`key` | The key under which the object is stored + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description | +:---------|:------------| +`rw` | How many replicas to delete before returning a successful response +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`vclock` | Opaque vector clock provided by an earlier `RpbGetResp` message Used to prevent deleting of objects that have been modified since the last GET request (sent as a byte array) +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`type` | The bucket types associated with the object. If the bucket type is not specified, the `default` bucket type will be used, as is the case for all messages sent to Riak that have the bucket type as an optional parameter. + +## Response + +Only the message code is returned. + +## Example + +#### Request + +``` +Hex 00 00 00 12 0D 0A 0A 6E 6F 74 61 62 75 63 6B 65 + 74 12 01 6B 18 01 +Erlang <<0,0,0,18,13,10,10,110,111,116,97,98,117,99,107,101,116,18,1,107,24,1>> + +RpbDelReq protoc decode: +bucket: "notabucket" +key: "k" +rw: 1 + +``` + +#### Response + +``` +Hex 00 00 00 01 0E +Erlang <<0,0,0,1,14>> + +RpbDelResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..90c34085cc --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Counter Store" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Data Type Counter Store" + identifier: "pbc_dt_counter_store" + weight: 117 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/dt-counter-store + - /riak/kv/3.0.4/dev/references/protocol-buffers/dt-counter-store +--- + +An operation to update a [counter]({{}}riak/kv/3.0.4/developing/data-types). + +## Request + +```protobuf +message CounterOp { + optional sint64 increment = 1; +} +``` + +The `increment` value specifies how much the counter will be incremented +or decremented, depending on whether the `increment` value is positive +or negative. This operation can be used to update counters that are +stored on their own in a key or [within a map]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-map-store). + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..958fa7c064 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,131 @@ +--- +title: "PBC Data Type Fetch" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Data Type Fetch" + identifier: "pbc_dt_fetch" + weight: 114 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/dt-fetch + - /riak/kv/3.0.4/dev/references/protocol-buffers/dt-fetch +--- + +The equivalent of [`RpbGetReq`]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/fetch-object) for [Riak Data Types]({{}}riak/kv/3.0.4/developing/data-types). This request results in a `DtFetchResp` +message (explained in the **Response** section below). + +## Request + +```protobuf +message DtFetchReq { + required bytes bucket = 1; + required bytes key = 2; + required bytes type = 3; + optional uint32 r = 4; + optional uint32 pr = 5; + optional bool basic_quorum = 6; + optional bool notfound_ok = 7; + optional uint32 timeout = 8; + optional bool sloppy_quorum = 9; + optional uint32 n_val = 10; + optional bool include_context = 11 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`key` | The key where the Data Type is stored +`type` | The [Using Bucket Types]({{}}riak/kv/3.0.4/using/cluster-operations/bucket-types) of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map) + +#### Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `r` and +`pr`, provided that that integer value is less than or equal +to N, _or_ a special value denoting `one` +(`4294967295-1`), `quorum` +(`4294967295-2`), `all` +(`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description +:---------|:----------- +`r` | Read quorum, i.e. how many replicas need to agree when retrieving the object +`pr` | Primary read quorum, i.e. how many primary replicas need to be available when retrieving the object +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes to which the delete request will be sent +`include_context` | If this parameter is set to `true`, the Data Type's opaque "context" will be returned to the client + +## Response + +The response to a fetch request ([`DtFetchReq`]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-fetch)) is a `DtFetchResp` message. + +```protobuf +message DtFetchResp { + enum DataType { + COUNTER = 1; + SET = 2; + MAP = 3; + } + + optional bytes context = 1; + required DataType type = 2; + optional DtValue value = 3; +} +``` + +If the `include_context` option is specified, an opaque "context" value +will be returned along with the user-readable data. When sending an +update request, the client should send this context as well, just as one +would send a [vclock]({{}}riak/kv/3.0.4/learn/glossary/#vector-clock) for standard KV updates. + +The type of the Data Type is specified in the `type` field, and must be +one of the three possible values of the `DataType` enum (`COUNTER`, +`SET`, or `MAP`). + +The current value of the Data Type is contained in the `value` field, +which itself contains a `DtValue` message. This message will have the +following structure: + +```protobuf +message DtValue { + optional sint64 counter_value = 1; + repeated bytes set_value = 2; + repeated MapEntry map_value = 3; +} +``` + +If the Data Type queried is a counter, it will return an integer value +for the counter; it a set, it will return the sets current value, in +bytes, if a map it will return a `MapEntry` message. `MapEntry` messages +are structured as follows: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..93e2622c84 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,77 @@ +--- +title: "PBC Data Type Map Store" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Data Type Map Store" + identifier: "pbc_dt_map_store" + weight: 119 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/dt-map-store + - /riak/kv/3.0.4/dev/references/protocol-buffers/dt-map-store +--- + +An operation to be applied to a value stored in a map (the contents of an update operation). The operation field that is present depends on the type of the field to which it is applied. All operations apply to individual fields nested in the map, i.e. counter-specific operations apply to specified counters in the map, set-specific operations to sets, etc. + +## Request + +Operations on maps are requested using a `MapOp` message, which has the following structure: + +```protobuf +message MapOp { + repeated MapField adds = 1; + repeated MapField removes = 2; + repeated MapUpdate updates = 3; +} +``` + +In a `MapOp` message, you can either add or remove fields (sets, counters, or maps) to or from the map or update a field or multiple fields. You can include as many field additions or removals and/or field updates as you wish. + +Adding or removing a field involves including a `MapField` message in your `MapOp` operation: + +```protobuf +message MapField { + enum MapFieldType { + COUNTER = 1; + SET = 2; + REGISTER = 3; + FLAG = 4; + MAP = 5; + } + required bytes name = 1; + required MapFieldType type = 2; +} +``` + +The `MapFieldType` specifies which type of field is being updated, and must be one of the possible values of the `MapFieldType` enum (either `COUNTER`, `SET`, `REGISTER`, `FLAG`, or `MAP`). The `name` parameter specifies the name of the field that will be updated. + +If you wish to update a map field, you can do so using a `MapUpdate` message, which has the following structure: + +```protobuf +message MapUpdate { + enum FlagOp { + ENABLE = 1; + DISABLE = 2; + } + required MapField field = 1; + optional CounterOp counter_op = 2; + optional SetOp set_op = 3; + optional bytes register_op = 4; + optional FlagOp flag_op = 5; + optional MapOp map_op = 6; +} +``` + +The `MapField` parameter is explained above. The operations used to update fields depend on the Data Type in that field, i.e. `CounterOp` messages to update counters, `SetOp` messages to update sets, etc. Updating counters is covered in [PBC Data Type Counter Store]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-counter-store) while updating sets is covered in [PBC Data Type Set Store]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-set-store). + +If you are updating a flag, you do so by including a `FlagOp` message. As shown in the `MapUpdate` message above, this operation takes one of two values: `ENABLE` and `DISABLE` (`1` and `2`, respectively). + +Updating a register does not involve sending a special message type. Instead, you must set the register to a desired value by specifying a binary for the `register_op` parameter. + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..4a281bf5d8 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,36 @@ +--- +title: "PBC Data Type Set Store" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Data Type Set Store" + identifier: "pbc_dt_set_store" + weight: 118 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/dt-set-store + - /riak/kv/3.0.4/dev/references/protocol-buffers/dt-set-store +--- + +An operation to update a set, either on its own (at the bucket/key +level) or [inside of a map]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-map-store). + +## Request + +```protobuf +message SetOp { + repeated bytes adds = 1; + repeated bytes removes = 2; +} +``` + +Set members are binary values that can only be added (`adds`) or removed +(`removes`) from a set. You can add and/or remove as many members of a +set in a single message as you would like. + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..35a49600ed --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,132 @@ +--- +title: "PBC Data Type Store" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Data Type Store" + identifier: "pbc_dt_store" + weight: 116 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/dt-store + - /riak/kv/3.0.4/dev/references/protocol-buffers/dt-store +--- + +A request to update the value of a [Riak Data Type]({{}}riak/kv/3.0.4/developing/data-types). + +## Request + +A `DtUpdateReq` message requires that you specify the location of the +Data Type in Riak, which operations are to be performed, and whether the +Data Type's opaque context should be returned in the resulting +`DtUpdateResp`. + +The `DtOp` value specifies which Data Type-specific operation is being +performed. More on that in the [PBC Data Type Union]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-union) document. + +```protobuf +message DtUpdateReq { + required bytes bucket = 1; + optional bytes key = 2; + required bytes type = 3; + optional bytes context = 4; + required DtOp op = 5; + optional uint32 w = 6; + optional uint32 dw = 7; + optional uint32 pw = 8; + optional bool return_body = 9 [default=false]; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bool include_context = 13 [default=true]; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`type` | The bucket type of the bucket in which the Data Type is stored, _not_ the type of Data Type (i.e. counter, set, or map). Learn more about [using bucket types]({{}}riak/kv/3.0.4/using/cluster-operations/bucket-types). + +Also required is a `DtOp` message that specifies which operation is to +be performed, depending on whether the Data Type being updated is a +[counter]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-counter-store), [set]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-set-store), or [map]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-map-store). + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key where the Data Type is stored. If not specified, Riak will assign a random key and return that key to the client if `return_body` is set to `true`. +`context` | The opaque binary "context" that informs Riak which version of a data type the client has seen, analogous to [vector clocks]({{}}riak/kv/3.0.4/learn/glossary/#vector-clock) +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_body` | Whether to return the contents of the stored object. Defaults to `false`. +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored +`include_context` | If `return_body` is set to `true`, the Data Type's opaque "context" will be returned to the client when the `DtUpdateResp` is sent to the client. + +## Response + +The response to a Data Type update request is analogous to +[`RpbPutResp`]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/store-object) for KV operations. If the +`return_body` is set in the update request message (as explained above), +the message will include the opaque context of the Data Type (`context`) +and the new value of the Data Type _after_ the update has completed +(depending on whether the Data Type is a counter, set, or map). If no +key was specified in the update request, it will include the +Riak-assigned key (`key`). + +```protobuf +message DtUpdateResp { + optional bytes key = 1; + optional bytes context = 2; + optional sint64 counter_value = 3; + repeated bytes set_value = 4; + repeated MapEntry map_value = 5; +} +``` + +Assuming `return_body` is set to `true`: if a counter is updated, the +response will include an integer as the `counter_value`; if a set is +updated, a list of binaries will be return as the `set_value`; and if a +map is updated, the returned `map_value` will be a `MapEntry` message. +That message takes the following form: + +```protobuf +message MapEntry { + required MapField field = 1; + optional sint64 counter_value = 2; + repeated bytes set_value = 3; + optional bytes register_value = 4; + optional bool flag_value = 5; + repeated MapEntry map_value = 6; +} +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..57a23bbf7e --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,35 @@ +--- +title: "PBC Data Type Union" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Data Type Union" + identifier: "pbc_dt_union" + weight: 115 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/dt-union + - /riak/kv/3.0.4/dev/references/protocol-buffers/dt-union +--- + +A "union" type for update operations. + +## Request + +```protobuf +message DtOp { + optional CounterOp counter_op = 1; + optional SetOp set_op = 2; + optional MapOp map_op = 3; +} +``` + +The included operation depends on the Data Type that is being updated. +`DtOp` messages are sent only as part of a [`DtUpdateReq`]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/dt-store) message. + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..9bc251552b --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,185 @@ +--- +title: "PBC Fetch Object" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Fetch Object" + identifier: "pbc_fetch_object" + weight: 105 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/fetch-object + - /riak/kv/3.0.4/dev/references/protocol-buffers/fetch-object +--- + +Fetch an object from the specified bucket type/bucket/key location +(specified by `bucket`, `type`, and `key`, respectively). If the bucket +type is not specified, the `default` bucket type will be used, as is the +case for all messages sent to Riak that have the bucket type as an +optional parameter. + +## Request + +```protobuf +message RpbGetReq { + required bytes bucket = 1; + required bytes key = 2; + optional uint32 r = 3; + optional uint32 pr = 4; + optional bool basic_quorum = 5; + optional bool notfound_ok = 6; + optional bytes if_modified = 7; + optional bool head = 8; + optional bool deletedvclock = 9; + optional uint32 timeout = 10; + optional bool sloppy_quorum = 11; + optional uint32 n_val = 12; + optional bytes type = 13; +} +``` + + +## Optional Parameters + +> **Note on defaults and special values** +> +> All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-props) for more information. +> +> Furthermore, you can assign an integer value to the `r` and +`pr` parameters, provided that that integer value is less than or +equal to N, or a special value denoting `one` +(`4294967295-1`), `quorum` (`4294967295-2`), +`all` (`4294967295-3`), or `default` +(`4294967295-4`). + +Parameter | Description | +:---------|:------------| +`basic_quorum` | Whether to return early in some failure cases, e.g. when `r=1` and you get 2 errors and a success basic_quorum=true would return an error +`notfound_ok` | Whether to treat `not found` responses as successful reads for the purposes of R +`if_modified` | When a vclock is supplied as this option, the response will only return the object if the vclocks don't match +`head` | If set to `true`, Riak will return the object with the value(s) set as empty, which allows you to get the metadata without a potentially large value accompanying it +`deletedvclock` | If set to `true`, Riak will return the tombstone's vclock, if applicable +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable + +## Response + +```protobuf +message RpbGetResp { + repeated RpbContent content = 1; + optional bytes vclock = 2; + optional bool unchanged = 3; +} +``` + +#### Values + +Value | Description +:-----|:----------- +`content` | The value plus metadata entries for the object. If there are siblings, there will be more than one entry. If the key is not found, the content will be empty. +`vclock` | The opaque vector clock that must be included in the `RpbPutReq` to resolve the siblings +`unchanged` | If `if_modified` was specified in the GET request but the object has not been modified, this will be set to `true` + +The content entries hold the object value and any metadata. +Below is the structure of a RpbContent message, which is +included in GET/PUT responses (`RpbGetResp` (above) and +[`RpbPutResp`]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/store-object), respectively): + +```protobuf +message RpbContent { + required bytes value = 1; + optional bytes content_type = 2; + optional bytes charset = 3; + optional bytes content_encoding = 4; + optional bytes vtag = 5; + repeated RpbLink links = 6; + optional uint32 last_mod = 7; + optional uint32 last_mod_usecs = 8; + repeated RpbPair usermeta = 9; + repeated RpbPair indexes = 10; + optional bool deleted = 11; +} +``` + +From the above, we can see that an `RpbContent` message will always +contain the binary `value` of the object. But it could also contain any +of the following optional parameters: + +* `content_type` - The content type of the object, e.g. `text/plain` + or `application/json` +* `charset` - The character encoding of the object, e.g. `utf-8` +* `content_encoding` - The content encoding of the object, e.g. + `video/mp4` +* `vtag` - The object's [vtag]({{}}riak/kv/3.0.4/learn/glossary/#vector-clock) +* `links` - This parameter is associated with the now-deprecated link + walking feature and should not be used by Riak clients +* `last_mod` - A timestamp for when the object was last modified, in + [ISO 8601 time](http://en.wikipedia.org/wiki/ISO_8601) +* `last_mod_usecs` - A timestamp for when the object was last modified, + in [Unix time](http://en.wikipedia.org/wiki/Unix_time) +* `usermeta` - This field stores user-specified key/value metadata + pairs to be associated with the object. `RpbPair` messages used to + send metadata of this sort are structured like this: + + ```protobuf + message RpbPair { + required bytes key = 1; + optional bytes value = 2; + } + ``` + Notice that both a key and value can be stored or just a key. + `RpbPair` messages are also used to attach [secondary indexes]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) to objects (in the optional + `indexes` field). +* `deleted` - Whether the object has been deleted (i.e. whether a + tombstone for the object has been found under the specified key) + +{{% note title="Note on missing keys" %}} +Remember: if a key is not stored in Riak, an `RpbGetResp` response without the +`content` and `vclock` fields will be returned. This should be mapped to +whatever convention the client language uses to return not found. The Erlang +client, for example, returns the atom `{error, notfound}`. +{{% /note %}} + +## Example + +#### Request + +``` +Hex 00 00 00 07 09 0A 01 62 12 01 6B +Erlang <<0,0,0,7,9,10,1,98,18,1,107>> + +RpbGetReq protoc decode: +bucket: "b" +key: "k" +``` + +#### Response + +``` +Hex 00 00 00 4A 0A 0A 26 0A 02 76 32 2A 16 33 53 44 + 6C 66 34 49 4E 4B 7A 38 68 4E 64 68 79 49 6D 4B + 49 72 75 38 BB D7 A2 DE 04 40 E0 B9 06 12 1F 6B + CE 61 60 60 60 CC 60 CA 05 52 2C AC C2 5B 3F 65 + 30 25 32 E5 B1 32 EC 56 B7 3D CA 97 05 00 +Erlang <<0,0,0,74,10,10,38,10,2,118,50,42,22,51,83,68,108,102,52,73,78,75,122, + 56,104,78,100,104,121,73,109,75,73,114,117,56,187,215,162,222,4,64, + 224,185,6,18,31,107,206,97,96,96,96,204,96,226,82,44,172,194,91,63, + 101,48,37,50,229,177,50,236,86,183,61,202,151,5,0>> + +RpbGetResp protoc decode: +content { + value: "v2" + vtag: "3SDlf4INKz8hNdhyImKIru" + last_mod: 1271442363 + last_mod_usecs: 105696 +} +vclock: "k316a```314`312005R,254302[?e0%23452612354V267=312227005000" +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..9969de65c5 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,114 @@ +--- +title: "PBC Get Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Get Bucket Properties" + identifier: "pbc_get_bucket_props" + weight: 102 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/get-bucket-props + - /riak/kv/3.0.4/dev/references/protocol-buffers/get-bucket-props +--- + +Fetch a bucket's properties. + +## Request + +```protobuf +message RpbGetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +The bucket's name (`bucket`) must be specified. The [bucket type]({{}}riak/kv/3.0.4/using/cluster-operations/bucket-types) parameter (`type`) is optional. If it is not specified, +the `default` bucket type will be used. + +## Response + +When an `RpbGetBucketReq` message is sent to Riak, it will respond with +an `RpbGetBucketResp` message, which returns the bucket's properties: + +```protobuf +message RpbGetBucketResp { + required RpbBucketProps props = 1; +} +``` + +The `RpbBucketProps` value itself is structured as follows: + +```protobuf +message RpbBucketProps { + optional uint32 n_val = 1; + optional bool allow_mult = 2; + optional bool last_write_wins = 3; + repeated RpbCommitHook precommit = 4; + optional bool has_precommit = 5 [default = false]; + repeated RpbCommitHook postcommit = 6; + optional bool has_postcommit = 7 [default = false]; + optional RpbModFun chash_keyfun = 8; + optional RpbModFun linkfun = 9; + optional uint32 old_vclock = 10; + optional uint32 young_vclock = 11; + optional uint32 big_vclock = 12; + optional uint32 small_vclock = 13; + optional uint32 pr = 14; + optional uint32 r = 15; + optional uint32 w = 16; + optional uint32 pw = 17; + optional uint32 dw = 18; + optional uint32 rw = 19; + optional bool basic_quorum = 20; + optional bool notfound_ok = 21; + optional bytes backend = 22; + optional bool search = 23; + enum RpbReplMode { + FALSE = 0; + REALTIME = 1; + FULLSYNC = 2; + TRUE = 3; + } + optional RpbReplMode repl = 24; + optional bytes search_index = 25; + optional bytes datatype = 26; + optional bool consistent = 27; +} +``` + +#### Optional Response Values + +Each `RpbBucketProps` message returns all of the properties associated +with a particular bucket. Default values for bucket properties, as well +as descriptions of all of the above properties, can be found in the +[configuration file]({{}}riak/kv/3.0.4/configuring/reference/#default-bucket-properties) documentation. + +It should be noted that the value of an `RpbBucketProps` message may +include other message types, such as `RpbModFun` (specifying +module-function pairs for bucket properties that require them) and +`RpbCommitHook` (specifying the module-function pair and name of a +commit hook). Those message types are structured like this: + +```protobuf +message RpbModFun { + required bytes module = 1; + required bytes function = 2; +} + +message RpbCommitHook { + optional RpbModFun modfun = 1; + optional bytes name = 2; +} +``` + +{{% note title="Note on `RpbReplMode`" %}} +The `RpbReplMode` is of use only to users of Riak CS's [Multi-Datacenter Replication capabilities]({{< baseurl >}}riak/cs/2.1.1/cookbooks/multi-datacenter-overview/) +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..51fb2676a7 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,37 @@ +--- +title: "PBC Get Bucket Type" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Get Bucket Type" + identifier: "pbc_get_bucket_type" + weight: 112 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/get-bucket-type + - /riak/kv/3.0.4/dev/references/protocol-buffers/get-bucket-type +--- + +Gets the bucket properties associated with a [bucket type]({{}}riak/kv/3.0.4/using/cluster-operations/bucket-types). + +## Request + +```protobuf +message RpbGetBucketTypeReq { + required bytes type = 1; +} +``` + +Only the name of the bucket type needs to be specified (under `name`). + +## Response + +A bucket type's properties will be sent to the client as part of an +[`RpbBucketProps`]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-props) message. + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..1b56aec02e --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,65 @@ +--- +title: "PBC Get Client ID" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Get Client ID" + identifier: "pbc_get_client_id" + weight: 127 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/get-client-id + - /riak/kv/3.0.4/dev/references/protocol-buffers/get-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Get the client id used for this connection. Client ids are used for +conflict resolution and each unique actor in the system should be +assigned one. A client id is assigned randomly when the socket is +connected and can be changed using [Set Client ID]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/set-client-id). + +## Request + +Just the `RpbGetClientIdReq` message code. No request message defined. + +## Response + +```protobuf +// Get ClientId Request - no message defined, just send RpbGetClientIdReq +message code +message RpbGetClientIdResp { + required bytes client_id = 1; // Client id in use for this connection +} +``` + +## Example + +Request + +``` +Hex 00 00 00 01 03 +Erlang <<0,0,0,1,3>> +``` + + +Response + +``` +Hex 00 00 00 07 04 0A 04 01 65 01 B5 +Erlang <<0,0,0,7,4,10,4,1,101,1,181>> + +RpbGetClientIdResp protoc decode: +client_id: "001e001265" +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..9cf0e6014c --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,80 @@ +--- +title: "PBC List Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "List Buckets" + identifier: "pbc_list_buckets" + weight: 100 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/list-buckets + - /riak/kv/3.0.4/dev/references/protocol-buffers/list-buckets +--- + +List all of the bucket names available. + +{{% note title="Caution" %}} +This call can be expensive for the server. Do not use in performance-sensitive +code. +{{% /note %}} + + +## Request + +Only the message code is required. + +## Response + + +```protobuf +message RpbListBucketsResp { + repeated bytes buckets = 1; +} +``` + + +Values + +* `buckets` - Buckets on the server + +## Example + +#### Request + +```bash +Hex 00 00 00 01 0F +Erlang <<0,0,0,1,15>> + +RpbListBucketsReq - only message code defined +``` + + +#### Response + +```bash +Hex 00 00 00 2A 10 0A 02 62 31 0A 02 62 35 0A 02 62 + 34 0A 02 62 38 0A 02 62 33 0A 03 62 31 30 0A 02 + 62 39 0A 02 62 32 0A 02 62 36 0A 02 62 37 +Erlang <<0,0,0,42,16,10,2,98,49,10,2,98,53,10,2,98,52,10,2,98,56,10,2,98,51,10, + 3,98,49,48,10,2,98,57,10,2,98,50,10,2,98,54,10,2,98,55>> + +RpbListBucketsResp protoc decode: +buckets: "b1" +buckets: "b5" +buckets: "b4" +buckets: "b8" +buckets: "b3" +buckets: "b10" +buckets: "b9" +buckets: "b2" +buckets: "b6" +buckets: "b7" +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..c16b36cc69 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,101 @@ +--- +title: "PBC List Keys" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "List Keys" + identifier: "pbc_list_keys" + weight: 101 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/list-keys + - /riak/kv/3.0.4/dev/references/protocol-buffers/list-keys +--- + +List all of the keys in a bucket. This is a streaming call, with +multiple response messages sent for each request. + +{{% note title="Not for production use" %}} +This operation requires traversing all keys stored in the cluster and should +not be used in production. +{{% /note %}} + +## Request + +```protobuf +message RpbListKeysReq { + required bytes bucket = 1; +} +``` + +Optional Parameters + +* `bucket` - bucket to get keys from + +## Response + +```protobuf +message RpbListKeysResp { + repeated bytes keys = 1; + optional bool done = 2; +} +``` + +#### Values + +* **keys** - batch of keys in the bucket. +* **done** - set true on the last response packet + +## Example + +#### Request + +```bash +Hex 00 00 00 0B 11 0A 08 6C 69 73 74 6B 65 79 73 +Erlang <<0,0,0,11,17,10,8,108,105,115,116,107,101,121,115>> + +RpbListKeysReq protoc decode: +bucket: "listkeys" + +``` + +#### Response Packet 1 + +```bash +Hex 00 00 00 04 12 0A 01 34 +Erlang <<0,0,0,4,18,10,1,52>> + +RpbListKeysResp protoc decode: +keys: "4" + +``` + +#### Response Packet 2 + +```bash +Hex 00 00 00 08 12 0A 02 31 30 0A 01 33 +Erlang <<0,0,0,8,18,10,2,49,48,10,1,51>> + +RpbListKeysResp protoc decode: +keys: "10" +keys: "3" +``` + + +#### Response Packet 3 + +```bash +Hex 00 00 00 03 12 10 01 +Erlang <<0,0,0,3,18,16,1>> + +RpbListKeysResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..62117a3dc6 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,153 @@ +--- +title: "PBC MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "MapReduce" + identifier: "pbc_mapreduce" + weight: 107 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/mapreduce + - /riak/kv/3.0.4/dev/references/protocol-buffers/mapreduce +--- + +Execute a MapReduce job. + +## Request + + +```protobuf +message RpbMapRedReq { + required bytes request = 1; + required bytes content_type = 2; +} +``` + + +Required Parameters + +* `request` - MapReduce job +* `content_type` - Encoding for MapReduce job + +Mapreduce jobs can be encoded in two different ways + +* `application/json` - JSON-encoded MapReduce job +* `application/x-erlang-binary` - Erlang external term format + +The JSON encoding is the same as [REST API]({{}}riak/kv/3.0.4/developing/usage/mapreduce/#rest) and +the external term format is the same as the [local Erlang API]({{}}riak/kv/3.0.4/developing/app-guide/advanced-mapreduce/#erlang) + +## Response + +The results of the MapReduce job is returned for each phase that +generates a result, encoded in the same format the job was submitted in. +Multiple response messages will be returned followed by a final message +at the end of the job. + +```protobuf +message RpbMapRedResp { + optional uint32 phase = 1; + optional bytes response = 2; + optional bool done = 3; +} +``` + + +Values + +* `phase` - Phase number of the MapReduce job +* `response` - Response encoded with the content_type submitted +* `done` - Set `true` on the last response packet + +## Example + +Here is how submitting a JSON encoded job to sum up a bucket full of +JSON encoded values. + +``` +{"inputs": "bucket_501653", + "query": + [{"map": {"arg": null, + "name": "Riak.mapValuesJson", + "language": "javascript", + "keep": false}}, + {"reduce": {"arg": null, + "name": "Riak.reduceSum", + "language": "javascript", + "keep": true}}]}" +``` + +Request + +```bash +Hex 00 00 00 F8 17 0A E2 01 7B 22 69 6E 70 75 74 73 + 22 3A 20 22 62 75 63 6B 65 74 5F 35 30 31 36 35 + 33 22 2C 20 22 71 75 65 72 79 22 3A 20 5B 7B 22 + 6D 61 70 22 3A 20 7B 22 61 72 67 22 3A 20 6E 75 + 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 69 61 + 6B 2E 6D 61 70 56 61 6C 75 65 73 4A 73 6F 6E 22 + 2C 20 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A + 61 76 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 + 70 22 3A 20 66 61 6C 73 65 7D 7D 2C 20 7B 22 72 + 65 64 75 63 65 22 3A 20 7B 22 61 72 67 22 3A 20 + 6E 75 6C 6C 2C 20 22 6E 61 6D 65 22 3A 20 22 52 + 69 61 6B 2E 72 65 64 75 63 65 53 75 6D 22 2C 20 + 22 6C 61 6E 67 75 61 67 65 22 3A 20 22 6A 61 76 + 61 73 63 72 69 70 74 22 2C 20 22 6B 65 65 70 22 + 3A 20 74 72 75 65 7D 7D 5D 7D 12 10 61 70 70 6C + 69 63 61 74 69 6F 6E 2F 6A 73 6F 6E +Erlang <<0,0,0,248,23,10,226,1,123,34,105,110,112,117,116,115,34,58,32,34,98, + 117,99,107,101,116,95,53,48,49,54,53,51,34,44,32,34,113,117,101,114, + 121,34,58,32,91,123,34,109,97,112,34,58,32,123,34,97,114,103,34,58,32, + 110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107,46, + 109,97,112,86,97,108,117,101,115,74,115,111,110,34,44,32,34,108,97, + 110,103,117,97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112, + 116,34,44,32,34,107,101,101,112,34,58,32,102,97,108,115,101,125,125, + 44,32,123,34,114,101,100,117,99,101,34,58,32,123,34,97,114,103,34,58, + 32,110,117,108,108,44,32,34,110,97,109,101,34,58,32,34,82,105,97,107, + 46,114,101,100,117,99,101,83,117,109,34,44,32,34,108,97,110,103,117, + 97,103,101,34,58,32,34,106,97,118,97,115,99,114,105,112,116,34,44,32, + 34,107,101,101,112,34,58,32,116,114,117,101,125,125,93,125,18,16,97, + 112,112,108,105,99,97,116,105,111,110,47,106,115,111,110>> + +RpbMapRedReq protoc decode: +request: "{"inputs": "bucket_501653", "query": [{"map": {"arg": null, +"name": "Riak.mapValuesJson", "language": "javascript", "keep": false}}, + {"reduce": {"arg": null, "name": "Riak.reduceSum", "language": +"javascript", "keep": true}}]}" +content_type: "application/json" + +``` + + +Response 1 - result from phase 1 + +```bash +Hex 00 00 00 08 18 08 01 12 03 5B 39 5D +Erlang <<0,0,0,8,24,8,1,18,3,91,57,93>> + +RpbMapRedResp protoc decode: +phase: 1 +response: "[[9]]" + +``` + + +Response 2 - end of MapReduce job + +```bash +Hex 00 00 00 03 18 18 01 +Erlang <<0,0,0,3,24,24,1>> + +RpbMapRedResp protoc decode: +done: true + +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/ping.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..7265705bcb --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/ping.md @@ -0,0 +1,46 @@ +--- +title: "PBC Ping" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Ping" + identifier: "pbc_ping" + weight: 110 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/ping + - /riak/kv/3.0.4/dev/references/protocol-buffers/ping +--- + +Check if the server is alive + +## Request + +Just the `RpbPingReq` message code. No request message defined. + +## Response + +Just the `RpbPingResp` message code. No response message defined. + +## Example + +Request + +```bash +Hex 00 00 00 01 01 +Erlang <<0,0,0,1,1>> +``` + +Response + +```bash +Hex 00 00 00 01 02 +Erlang <<0,0,0,1,2>> +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..f39c13b310 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,63 @@ +--- +title: "PBC Reset Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Reset Bucket Properties" + identifier: "pbc_reset_bucket_props" + weight: 104 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/reset-bucket-props + - /riak/kv/3.0.4/dev/references/protocol-buffers/reset-bucket-props +--- + +Request to reset the properties of a given bucket or bucket type. + +## Request + +```protobuf +message RpbResetBucketReq { + required bytes bucket = 1; + optional bytes type = 2; +} +``` + +You must specify the name of the bucket (`bucket`) and optionally a +[bucket type]({{}}riak/kv/3.0.4/developing/usage/bucket-types) using the `type` value. If you do not +specify a bucket type, the `default` bucket type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Request to reset the properties for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0A 1D 0A 07 66 72 69 65 6E 64 73 +Erlang <<0,0,0,10,29,10,7,102,114,105,101,110,100,115>> + +RpbResetBucketReq protoc decode: +bucket: "friends" + +``` + +#### Response + +```bash +Hex 00 00 00 01 1E +Erlang <<0,0,0,1,30>> + +RpbResetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/search.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..ca5b5a07b1 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/search.md @@ -0,0 +1,152 @@ +--- +title: "PBC Search" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Search" + identifier: "pbc_search" + weight: 109 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/search + - /riak/kv/3.0.4/dev/references/protocol-buffers/search +--- + +Send a Search request to retrieve a list of documents, along with a few +stats. + +## Request + + +```protobuf +message RpbSearchQueryReq { + required bytes q = 1; + required bytes index = 2; + optional uint32 rows = 3; + optional uint32 start = 4; + optional bytes sort = 5; + optional bytes filter = 6; + optional bytes df = 7; + optional bytes op = 8; + repeated bytes fl = 9; + optional bytes presort = 10; +} +``` + +Required Parameters + +* `q` - The contents of the query +* `index` - The name of the index to search + +Optional Parameters + +* `rows` - The maximum number of rows to return +* `start` - A start offset, i.e. the number of keys to skip before + returning values +* `sort` - How the search results are to be sorted +* `filter` - Filters search with additional query scoped to inline + fields +* `df` - Override the `default_field` setting in the schema file +* `op` - `and` or `or`, to override the `default_op` operation setting + in the schema file +* `fl` - Return the fields limit +* `presort` - Presort. The options are `key` or `score` + + +## Response + +The results of a search query are returned as a repeating list of 0 or +more `RpbSearchDoc`s. `RpbSearchDoc`s themselves are composed of 0 or +more key/value pairs (`RpbPair`) that match the given request +parameters. It also returns the maximum search score and the number of +results. + + +```protobuf +// RbpPair is a generic key/value pair datatype used for +// other message types +message RpbPair { + required bytes key = 1; + optional bytes value = 2; +} + +message RpbSearchDoc { + repeated RpbPair fields = 1; +} + +message RpbSearchQueryResp { + repeated RpbSearchDoc docs = 1; + optional float max_score = 2; + optional uint32 num_found = 3; +} +``` + +Values + +* `docs` - A list of docs that match the search request +* `max_score` - The top score returned +* `num_found` - Returns the total number of values matched by this + search + + +## Example + +Request + +Here we search for any animals that being with the string `pig`. We only +want the first 100, and sort the values by a `name` field. + +```bash +RpbSearchQueryReq protoc decode: +q: "pig*" +index: "animals" +rows: 100 +start: 0 +sort: "name" + +Hex 00 00 00 1A 1B 0A 04 70 69 67 2A 12 07 61 6E + 69 6D 61 6C 73 18 64 20 00 2A 04 6E 61 6D 65 +Erlang <<0,0,0,26,27,10,4,112,105,103,42,18,7,97,110, + 105,109,97,108,115,24,100,32,0,42,4,110,97, + 109,101>> +``` + +Response + +```bash +Hex 00 00 00 36 1B 0A 1D 0A 0D 0A 06 61 6E 69 6D + 61 6C 12 03 70 69 67 0A 0C 0A 04 6E 61 6D 65 + 12 04 66 72 65 64 0A 12 0A 10 0A 06 61 6E 69 + 6D 61 6C 12 06 70 69 67 65 6F 6E 18 02 +Erlang <<0,0,0,54,27,10,29,10,13,10,6,97,110,105,109, + 97,108,18,3,112,105,103,10,12,10,4,110,97, + 109,101,18,4,102,114,101,100,10,18,10,16,10, + 6,97,110,105,109,97,108,18,6,112,105,103, + 101,111,110,24,2>> + +RpbSearchQueryResp protoc decode: +docs { + fields { + key: "animal" + value: "pig" + } + fields { + key: "name" + value: "fred" + } +} +docs { + fields { + key: "animal" + value: "pigeon" + } +} +num_found: 2 +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..f71382ea9e --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,125 @@ +--- +title: "PBC Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Secondary Indexes" + identifier: "pbc_secondary_indexes" + weight: 108 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/secondary-indexes + - /riak/kv/3.0.4/dev/references/protocol-buffers/secondary-indexes +--- + +Request a set of keys that match a secondary index query. + +## Request + +```protobuf +message RpbIndexReq { + enum IndexQueryType { + eq = 0; + range = 1; + } + required bytes bucket = 1; + required bytes index = 2; + required IndexQueryType qtype = 3; + optional bytes key = 4; + optional bytes range_min = 5; + optional bytes range_max = 6; + optional bool return_terms = 7; + optional bool stream = 8; + optional uint32 max_results = 9; + optional bytes continuation = 10; + optional uint32 timeout = 11; + optional bytes type = 12; + optional bytes term_regex = 13; + optional bool pagination_sort = 14; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket in which the Data Type is stored +`index` | The name of the index to be queried +`qtype` | The type of index query to be performed. This can take either of the two possible values of the `IndexQueryType` enum: `eq` for an exact index match for the given `key` or `range` for a range query + +#### Optional Parameters + +Parameter | Description +:---------|:----------- +`key` | The name of the index to be queried if `qtype` is set to `eq` +`range_min` and `range_max` | The minimum and maximum values for a range query if `qtype` is set to `range` +`return_terms` | If set to `true`, the response will include matched indexed values (for range queries only) +`stream` | If set to `true`, keys matching the index query will be streamed to the client instead of waiting for `max_results` or the full result to be tabulated +`max_results` | If pagination is turned on, the number of results to be returned to the client +`continuation` | If set to `true`, values are returned in a paginated response +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`type` | The bucket type of the bucket that is being queried. If not set, the bucket type `default` will be used. Learn more about [using bucket types]({{}}riak/kv/3.0.4/developing/usage/bucket-types). +`term_regex` | If set to a regular expression (as a binary), a term filter will be applied to the index query +`pagination_sort` | If set to `true`, paginated results will be sorted, first by index value, then by key + +## Response + +The results of a Secondary Index query are returned as a repeating list +of 0 or more keys that match the given request parameters. + +```protobuf +message RpbIndexResp { + repeated bytes keys = 1; + repeated RpbPair results = 2; + optional bytes continuation = 3; + optional bool done = 4; +} +``` + +#### Values + +Parameter | Description +:---------|:----------- +`keys` | A list of keys that match the index request +`results` | If `return_terms` is specified with range queries, used to return matched index values as key/value pairs in `RpbPair` messages. More on `RpbPair` messages can be found in [PBC Fetch Object]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/fetch-object). +`continuation` | Used for paginated responses +`done` | Used for streaming. The value will be `true` when the current stream is done (either `max_results` has been reached or there are no more results). + +## Example + +#### Request + +Here we look for any exact matches of `chicken` on an `animal_bin` index +for a bucket named `farm`. + +```bash +RpbIndexReq protoc decode: +bucket: "farm" +index: "animal_bin" +qtype: 0 +key: "chicken" + +Hex 00 00 00 1E 19 0A 04 66 61 72 6D 12 0A 61 6E 69 + 6D 61 6C 5F 62 69 6E 18 00 22 07 63 68 69 63 6B 65 6E +Erlang <<0,0,0,30,25,10,10,4,102,97,114,109,18,10,97,110,105, + 109,97,108,95,98,105,110,24,0,34,7,99,104,105,99,107, + 101,110>> +``` + +#### Response + +```bash +Hex 00 00 00 0F 1A 0A 03 68 65 6E 0A 07 72 6F 6F 73 74 65 72 +Erlang <<0,0,0,15,26,10,3,104,101,110,10,7,114,111,111,115,116,101,114>> + +RpbIndexResp protoc decode: +keys: "hen" +keys: "rooster" +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/server-info.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..d19ed02aee --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,62 @@ +--- +title: "PBC Server Info" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Server Info" + identifier: "pbc_server_info" + weight: 111 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/server-info + - /riak/kv/3.0.4/dev/references/protocol-buffers/server-info +--- + +A message from Riak that contains two pieces of information about the +server: the name of the node and the version of Riak in use on that +node. + +## Request + +A request consists only of the `RpbGetServerInfoReq` message code. No +request message is defined. + +## Response + +```protobuf +message RpbGetServerInfoResp { + optional bytes node = 1; + optional bytes server_version = 2; +} +``` + +## Example + +#### Request + +```bash +Hex 00 00 00 01 07 +Erlang <<0,0,0,1,7>> + +RpbGetServerInfoReq - only message code defined +``` + +#### Response + +```bash +Hex 00 00 00 17 08 0A 0E 72 69 61 6B 40 31 32 37 2E + 30 2E 30 2E 31 12 04 30 2E 31 30 +Erlang <<0,0,0,23,8,10,14,114,105,97,107,64,49,50,55,46,48,46,48,46,49,18,4,48, + 46,49,48>> + +RpbGetServerInfoResp protoc decode: +node: "riak@127.0.0.1" +server_version: "0.10" +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..eac95a9255 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,72 @@ +--- +title: "PBC Set Bucket Properties" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Set Bucket Properties" + identifier: "pbc_set_bucket_props" + weight: 103 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/set-bucket-props + - /riak/kv/3.0.4/dev/references/protocol-buffers/set-bucket-props +--- + +Sets the properties for a bucket. + +## Request + +```protobuf +message RpbSetBucketReq { + required bytes bucket = 1; + required RpbBucketProps props = 2; + optional bytes type = 3; +} +``` + +You must specify the name of the bucket (`bucket`) and include an +`RpbBucketProps` message. More on that message type can be found in the +[PBC Get Bucket Properties]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-props) documentation. + +You can also specify a [bucket type]({{}}riak/kv/3.0.4/developing/usage/bucket-types) using the +`type` value. If you do not specify a bucket type, the `default` bucket +type will be used by Riak. + +## Response + +Only the message code is returned. + +## Example + +Change `allow_mult` to true for the bucket `friends`: + +#### Request + +```bash +Hex 00 00 00 0E 15 0A 07 66 72 69 65 6E 64 73 12 02 + 10 01 +Erlang <<0,0,0,14,21,10,7,102,114,105,101,110,100,115,18,2,16,1>> + +RpbSetBucketReq protoc decode: +bucket: "friends" +props { + allow_mult: true +} + +``` + +#### Response + +```bash +Hex 00 00 00 01 16 +Erlang <<0,0,0,1,22>> + +RpbSetBucketResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..98c9608a46 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,35 @@ +--- +title: "PBC Set Bucket Type" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Set Bucket Type" + identifier: "pbc_set_bucket_type" + weight: 113 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/set-bucket-type + - /riak/kv/3.0.4/dev/references/protocol-buffers/set-bucket-type +--- + +Assigns a set of [bucket properties]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-props) to a +[bucket type]({{}}riak/kv/3.0.4/developing/usage/bucket-types). + +## Request + +```protobuf +message RpbSetBucketTypeReq { + required bytes type = 1; + required RpbBucketProps props = 2; +} +``` + +The `type` field specifies the name of the bucket type as a binary. The +`props` field contains an [`RpbBucketProps`]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/get-bucket-props). + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..b176faab84 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,66 @@ +--- +title: "PBC Set Client ID" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Set Client ID" + identifier: "pbc_set_client_id" + weight: 126 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/set-client-id + - /riak/kv/3.0.4/dev/references/protocol-buffers/set-client-id +--- + +{{% note title="Deprecation notice" %}} +The use of client IDs in conflict resolution is now deprecated in Riak. If you +are building or maintaining a Riak client that is intended to be compatible +with Riak 1.4 or later, you can safely ignore client IDs. +{{% /note %}} + +Set the client ID for this connection. A library may want to set the +client ID if it has a good way to uniquely identify actors across +reconnects. This will reduce vector clock bloat. + +## Request + +```protobuf +message RpbSetClientIdReq { + required bytes client_id = 1; // Client id to use for this connection +} +``` + + +## Response + +Just the `RpbSetClientIdResp` message code. + +## Example + +Request + +``` +Hex 00 00 00 07 05 0A 04 01 65 01 B6 +Erlang <<0,0,0,7,5,10,4,1,101,1,182>> + +RpbSetClientIdReq protoc decode: +client_id: "001e001266" + +``` + + +Response + +``` +Hex 00 00 00 01 06 +Erlang <<0,0,0,1,6>> + +RpbSetClientIdResp - only message code defined +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/store-object.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..4ba12522a2 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,154 @@ +--- +title: "PBC Store Object" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Store Object" + identifier: "pbc_store_object" + weight: 106 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/store-object + - /riak/kv/3.0.4/dev/references/protocol-buffers/store-object +--- + +Stores an object under the specified location, as determined by the +intended [key]({{}}riak/kv/3.0.4/learn/concepts/keys-and-objects), [bucket]({{}}riak/kv/3.0.4/learn/concepts/buckets), and [bucket type]({{}}riak/kv/3.0.4/developing/usage/bucket-types). A bucket must always be specified (via +`bucket`), whereas key (`key`) and bucket type (`type`) are optional. If +no key is specified, Riak will assign a random key to the object. If no +[bucket type]({{}}riak/kv/3.0.4/developing/usage/bucket-types) is assigned, Riak will assign +`default`, which means that the [default bucket configuration]({{}}riak/kv/3.0.4/configuring/reference/#default-bucket-properties) will be used. + +#### Request + +```protobuf +message RpbPutReq { + required bytes bucket = 1; + optional bytes key = 2; + optional bytes vclock = 3; + required RpbContent content = 4; + optional uint32 w = 5; + optional uint32 dw = 6; + optional bool return_body = 7; + optional uint32 pw = 8; + optional bool if_not_modified = 9; + optional bool if_none_match = 10; + optional bool return_head = 11; + optional uint32 timeout = 12; + optional bool asis = 13; + optional bool sloppy_quorum = 14; + optional uint32 n_val = 15; + optional bytes type = 16; +} +``` + +#### Required Parameters + +Parameter | Description +:---------|:----------- +`bucket` | The name of the bucket, in bytes, in which the key/value is to reside +`content` | The new or updated contented of the object. Uses the same `RpbContent` message returned as part of an `RpbGetResp` message, documented in [PBC Fetch Object]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/fetch-object) + +#### Optional Parameters + +{{% note title="Note on defaults and special values" %}} +All of the optional parameters below have default values determined on a +per-bucket basis. Please refer to the documentation on [setting bucket properties](../set-bucket-props) for more information. + +Furthermore, you can assign an integer value to the `w`, `dw`, `pr`, and +`pw`, provided that that integer value is less than or equal to N, _or_ +a special value denoting `one` (`4294967295-1`), `quorum` +(`4294967295-2`), `all` (`4294967295-3`), or `default` (`4294967295-4`). +{{% /note %}} + +Parameter | Description +:---------|:----------- +`key` | The key to create/update. If not specified, Riak will generate a random key and return that key as part of the response to that request. +`vclock` | Opaque vector clock provided by an earlier RpbGetResp message. Omit if this is a new key or if you deliberately want to create a sibling. +`w` | Write quorum, i.e. how many replicas to write to before returning a successful response +`dw` | Durable write quorum, i.e. how many replicas to commit to durable storage before returning a successful response +`return_body` | Whether to return the contents of the now-stored object. Defaults to `false`. +`pw` | Primary write quorum, i.e. how many primary nodes must be up when the write is attempted +`return_head` | Return the metadata for the now-stored object without returning the value of the object +`timeout` | The timeout duration, in milliseconds, after which Riak will return an error message +`sloppy_quorum` | If this parameter is set to `true`, the next available node in the ring will accept requests if any primary node is unavailable +`n_val` | The number of nodes on which the value is to be stored + +The `if_not_modified`, `if_none_match`, and `asis` parameters are set +only for messages sent between nodes in a Riak cluster and should not be +set by Riak clients. + +#### Response + +```bash +message RpbPutResp { + repeated RpbContent contents = 1; + optional bytes vclock = 2; + optional bytes key = 3; +} +``` + +If `return_body` is set to `true` on the PUT request, the `RpbPutResp` +will contain the current object after the PUT completes, in `contents`, +as well as the object's [causal context]({{}}riak/kv/3.0.4/learn/concepts/causal-context), in the `vclock` +field. The `key` will be sent only if the server generated a random key +for the object. + +If `return_body` is not set and no key is generated, the PUT response +will be empty. + +## Example + +#### Request + +``` +Hex 00 00 00 1C 0B 0A 01 62 12 01 6B 22 0F 0A 0D 7B + 22 66 6F 6F 22 3A 22 62 61 72 22 7D 28 02 38 01 +Erlang <<0,0,0,28,11,10,1,98,18,1,107,34,15,10,13,123,34,102,111,111,34,58,34, + 98,97,114,34,125,40,2,56,1>> + +RpbPutReq protoc decode: +bucket: "b" +key: "k" +content { + value: "{"foo":"bar"}" +} +w: 2 +return_body: true + +``` + +#### Response + +``` +Hex 00 00 00 62 0C 0A 31 0A 0D 7B 22 66 6F 6F 22 3A + 22 62 61 72 22 7D 2A 16 31 63 61 79 6B 4F 44 39 + 36 69 4E 41 68 6F 6D 79 65 56 6A 4F 59 43 38 AF + B0 A3 DE 04 40 90 E7 18 12 2C 6B CE 61 60 60 60 + CA 60 CA 05 52 2C 2C E9 0C 86 19 4C 89 8C 79 AC + 0C 5A 21 B6 47 F9 20 C2 6C CD 49 AC 0D 77 7C A0 + 12 FA 20 89 2C 00 +Erlang <<0,0,0,98,12,10,49,10,13,123,34,102,111,111,34,58,34,98,97,114,34,125, + 42,22,49,99,97,121,107,79,68,57,54,105,78,65,104,111,109,121,101,86, + 106,79,89,67,56,175,176,163,222,4,64,144,231,24,18,44,107,206,97,96, + 96,96,202,96,226,82,44,44,233,12,134,25,76,137,140,121,172,12,90,33, + 182,71,249,32,194,108,205,73,172,13,119,124,160,18,250,32,137,44,0>> + +RpbPutResp protoc decode: +contents { + value: "{"foo":"bar"}" + vtag: "1caykOD96iNAhomyeVjOYC" + last_mod: 1271453743 + last_mod_usecs: 406416 +} +vclock: "k316a```312`312005R,,351014206031L211214y254014Z!266G371 +302l315I254rw|240022372 211,000" + +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..ec77b2b9a2 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,37 @@ +--- +title: "PBC Yokozuna Index Delete" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Yokozuna Index Delete" + identifier: "pbc_yz_index_delete" + weight: 122 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/yz-index-delete + - /riak/kv/3.0.4/dev/references/protocol-buffers/yz-index-delete +--- + +Delete a search index. + +## Request + +The `name` parameter is the name of the index to delete, as a binary. + +```protobuf +message RpbYokozunaIndexDeleteReq { + required bytes name = 1; +} +``` + +## Response + +Returns a [RpbDelResp]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..5dfee651c8 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,63 @@ +--- +title: "PBC Yokozuna Index Get" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Yokozuna Index Get" + identifier: "pbc_yz_index_get" + weight: 120 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/yz-index-get + - /riak/kv/3.0.4/dev/references/protocol-buffers/yz-index-get +--- + +Retrieve a search index from Riak Search. + +## Request + +The `name` parameter is the name of the index to fetch as a binary. + +```protobuf +message RpbYokozunaIndexGetReq { + optional bytes name = 1; +} +``` + +## Response + +If a `name` is passed through the `RpbYokozunaIndexGetReq` request, zero +or one `index` objects are returned. If `name` is empty, then a list of +all indexes will be returned. + +Both requests will return a response of this form. + +```protobuf +message RpbYokozunaIndexGetResp { + repeated RpbYokozunaIndex index = 1; +} +``` + +This message will contain any number of `RpbYokozunaIndex` messages, +depending on how many indexes are returned. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/3.0.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the +index is stored (for GET requests) or on which you wish the index to be +stored (for PUT requests). An index's `n_val` must match the associated +bucket's `n_val`. + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..94ea37c539 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,49 @@ +--- +title: "PBC Yokozuna Index Put" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Yokozuna Index Put" + identifier: "pbc_yz_index_put" + weight: 121 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/yz-index-put + - /riak/kv/3.0.4/dev/references/protocol-buffers/yz-index-put +--- + +Create a new index or modify an existing index. + +## Request + +```protobuf +message RpbYokozunaIndexPutReq { + required RpbYokozunaIndex index = 1; +} +``` + +Each message must contain a `RpbYokozunaIndex` message providing +information about the index being stored. + +```protobuf +message RpbYokozunaIndex { + required bytes name = 1; + optional bytes schema = 2; + optional uint32 n_val = 3; +} +``` + +Each message specifying an index must include the index's name as a +binary (as `name`). Optionally, you can specify a [`schema`]({{}}riak/kv/3.0.4/developing/usage/search-schemas) name and/or an `n_val`, i.e. the number of nodes on which the index is stored (for GET requests) or on which you wish the index to be stored (for PUT requests). An index's `n_val` must match the associated bucket's `n_val`. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..e8c9c35407 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,52 @@ +--- +title: "PBC Yokozuna Schema Get" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Yokozuna Schema Get" + identifier: "pbc_yz_schema_get" + weight: 123 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/yz-schema-get + - /riak/kv/3.0.4/dev/references/protocol-buffers/yz-schema-get +--- + +Fetch a [search schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas) from Riak Search. + +## Request + +In a request message, you only need to specify the name of the schema as +a binary (under `name`); + +```protobuf +message RpbYokozunaSchemaGetReq { + required bytes name = 1; // Schema name +} +``` + +## Response + +```protobuf +message RpbYokozunaSchemaGetResp { + required RpbYokozunaSchema schema = 1; +} +``` + +The response message will include a `RpbYokozunaSchema` structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message includes the schema `name` and its xml `content`. + + + + diff --git a/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..253b9f7fb0 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,45 @@ +--- +title: "PBC Yokozuna Schema Put" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Yokozuna Schema Put" + identifier: "pbc_yz_schema_put" + weight: 124 + parent: "apis_pbc" +toc: true +aliases: + - /riak/3.0.4/dev/references/protocol-buffers/yz-schema-put + - /riak/kv/3.0.4/dev/references/protocol-buffers/yz-schema-put +--- + +Create a new Solr [search schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas). + +## Request + +```protobuf +message RpbYokozunaSchemaPutReq { + required RpbYokozunaSchema schema = 1; +} +``` + +Each message must contain a `RpbYokozunaSchema` object structure. + +```protobuf +message RpbYokozunaSchema { + required bytes name = 1; + optional bytes content = 2; +} +``` + +This message *must* include both the schema `name` and its Solr [search schema]({{}}riak/kv/3.0.4/developing/usage/search-schemas) `content` as XML. + +## Response + +Returns a [RpbPutResp]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/#message-codes) code with no data on success. + + + + diff --git a/content/riak/kv/3.0.4/developing/api/repl-hooks.md b/content/riak/kv/3.0.4/developing/api/repl-hooks.md new file mode 100644 index 0000000000..6d6b824a81 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/api/repl-hooks.md @@ -0,0 +1,196 @@ +--- +title_supertext: "Riak Multi-Datacenter Replication:" +title: "Hooks API" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Multi-Datacenter REPL Hooks API" + identifier: "apis_repl_hooks" + weight: 100 + parent: "developing_apis" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v2/hooks + - /riak/kv/3.0.4/ops/mdc/v2/hooks +--- +[object]: https://github.com/basho/riak_kv/blob/master/src/riak_object.erl + +This document is a guide to developing extensions for Riak's +Multi-Datacenter Replication feature. + +## Replication Hooks + +Riak allows applications to register replication hooks to control +either of the following: + +* when extra objects need to be replicated along with the current object +* when an object should _not_ be replicated. + +To register a hook, you must call the following function in an +application-specific Erlang module, where `MyMod` is to be replaced +with the name of your custom module: + +```erlang +riak_core:register([{repl_helper, MyMod}]). +``` + +## Replication Hook API + +A replication hook must implement the following functions: + +### send_realtime/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook controls whether an [object][object] +replicated in realtime should be sent. To send this object, return `ok`; +to prevent the object from being sent, return `cancel`. You can also +return a list of Riak objects to be replicated immediately *before* the +current object. This is useful when you have an object that refers to +other objects, e.g. a chunked file, and want to ensure that all of the +dependency objects are replicated before the dependent object. + +### send/2 + +```erlang +(riak_object, RiakClient) -> ok | cancel | [riak_object] +``` + +This hook is used in fullsync replication. To send this +[object][object], +return `ok`; to prevent the object from being sent, return `cancel`. You +can also return a list of Riak objects to be replicated immediately +*before* the current object. This is useful for when you have an object +that refers to other objects, e.g. a chunked file, and want ensure that +all the dependency objects are replicated before the dependent object. + +### recv/1 + +```erlang +(riak_object) -> ok | cancel +``` + +When an [object][object] +is received by the client site, this hook is run. You can use it to +update metadata or to deny the object. + +## Implementing a Sample Replication Hook + +The following is a simple replication hook that will log when an object +is received via replication. For more information about the functions in +the sample, see the [Replication Hook API](#replication-hook-api) section below. + +Here is the relevant Erlang code: + +```erlang +%% Riak Enterprise MDC replication hook sample + +-module(riak_replication_hook_sample). +-export([register/0]). +-export([recv/1, send/2, send_realtime/2]). + +register() -> + riak_core:wait_for_service(riak_repl), + lager:log(info, self(), + "Automatically registering ~p hook with riak_core", + [?MODULE_STRING]), + riak_core:register([{repl_helper, ?MODULE}]), + case lists:member({undefined,?MODULE}, + app_helper:get_env(riak_core,repl_helper, [])) of + true -> + lager:log(info, self(), + "Successfully registered ~p hook with riak_core", + [?MODULE_STRING]); + false -> + lager:log(info, self(), + "Failed to register ~p hook with riak_core", + [?MODULE_STRING]) + end, + ok. + +recv(Object) -> + % This is a BLOCKING function. + % Longer-running processes should be handled asynchronously. + lager:log(info, self(), "Called recv(~p)", [riak_object:key(Object)]), + ok. + +send_realtime(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. + +send(_Object, _RiakClient) -> + % Do Nothing function -- These hooks are called in predictable + % but complex ways especially as the number of replication + % sites (Version 2 Replication) or sinks (Version 3 Replication) + % increase. + ok. +``` + +Save the above code as `riak_replication_hook_sample.erl`. + +To install the sample hook, compile `riak_replication_hook_sample.erl`. + +{{% note title="Note on the Erlang compiler" %}} + +[erlc]: http://erlang.org/doc/man/erlc.html +You must use the Erlang compiler [`erlc`][erlc] +associated with the Riak installation or the version of Erlang used when +compiling Riak from source. For packaged Riak installations, you can +consult **Table 1** (below) for the default location of +Riak’s `erlc` for each supported platform. If you compiled +from source, use the `erlc` from the Erlang version you used +to compile Riak. +{{% /note %}} + +Distribution | Path +:------------|:---- +CentOS & RHEL Linux | `/usr/lib64/riak/erts-5.10.3/bin/erlc` | +Debian & Ubuntu Linux | `/usr/lib/riak/erts-5.10.3/bin/erlc` | +FreeBSD | `/usr/local/lib/riak/erts-5.10.3/bin/erlc` | +SmartOS | `/opt/local/lib/riak/erts-5.10.3/bin/erlc` +Solaris 10 | `/opt/riak/lib/erts-5.10.3/bin/erlc` + +**Table 1**: Erlang compiler executable location for packaged Riak +installations on supported platforms + +Once you have determined the location of the Erlang compiler, e.g. on +Ubuntu, compiling is as simple as: + +```bash +/usr/lib/riak/erts-5.10.3/bin/erlc riak_replication_hook_sample.erl +``` + +This will create a `riak_replication_hook_sample.beam` file in the same +directory as the corresponding `.erl` file. Copy this `.beam` file into +the subdirectory where you want to store the custom hook: + +```bash +cp riak_replication_hook_sample.beam /path/to/replication/hook +``` + +Add a `-pa` argument to your `vm.args` file to specify the path where +your compiled `.beam` file lives: + +```bash +-pa /path/to/replication/hook +``` + +Finally, add a `-run` argument to your `vm.args` file to register the +hook: + +```bash +-run riak_replication_hook_sample register +``` + + + + + diff --git a/content/riak/kv/3.0.4/developing/app-guide.md b/content/riak/kv/3.0.4/developing/app-guide.md new file mode 100644 index 0000000000..fe9492889f --- /dev/null +++ b/content/riak/kv/3.0.4/developing/app-guide.md @@ -0,0 +1,420 @@ +--- +title: "Riak KV Application Guide" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Application Guide" + identifier: "developing_app_guide" + weight: 105 + parent: "developing" +toc: true +aliases: + - /riak/3.0.4/dev/using/application-guide/ + - /riak/kv/3.0.4/dev/using/application-guide/ +--- + +[usage conflict resolution]: {{}}riak/kv/3.0.4/developing/usage/conflict-resolution +[dev data model#log]: {{}}riak/kv/3.0.4/developing/data-modeling/#log-data +[dev data model#sensor]: {{}}riak/kv/3.0.4/developing/data-modeling/#sensor-data +[concept eventual consistency]: {{}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[dev data model#user]: {{}}riak/kv/3.0.4/developing/data-modeling/#user-data +[dev kv model]: {{}}riak/kv/3.0.4/developing/key-value-modeling +[dev data types]: {{}}riak/kv/3.0.4/developing/data-types +[dev data types#counters]: {{}}riak/kv/3.0.4/developing/data-types/#counters +[dev data types#sets]: {{}}riak/kv/3.0.4/developing/data-types/#sets +[dev data types#maps]: {{}}riak/kv/3.0.4/developing/data-types/#maps +[usage create objects]: {{}}riak/kv/3.0.4/developing/usage/creating-objects +[usage search]: {{}}riak/kv/3.0.4/developing/usage/search +[use ref search]: {{}}riak/kv/3.0.4/using/reference/search +[usage 2i]: {{}}riak/kv/3.0.4/developing/usage/secondary-indexes +[dev client libraries]: {{}}riak/kv/3.0.4/developing/client-libraries +[concept crdts]: {{}}riak/kv/3.0.4/learn/concepts/crdts +[dev data model]: {{}}riak/kv/3.0.4/developing/data-modeling +[usage mapreduce]: {{}}riak/kv/3.0.4/developing/usage/mapreduce +[apps mapreduce]: {{}}riak/kv/3.0.4/developing/app-guide/advanced-mapreduce +[use ref 2i]: {{}}riak/kv/3.0.4/using/reference/secondary-indexes +[plan backend leveldb]: {{}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{}}riak/kv/3.0.4/setup/planning/backend/bitcask +[plan backend memory]: {{}}riak/kv/3.0.4/setup/planning/backend/memory +[plan backend leveled]: {{}}riak/kv/3.0.4/setup/planning/backend/leveled +[obj model java]: {{}}riak/kv/3.0.4/developing/getting-started/java/object-modeling +[obj model ruby]: {{}}riak/kv/3.0.4/developing/getting-started/ruby/object-modeling +[obj model python]: {{}}riak/kv/3.0.4/developing/getting-started/python/object-modeling +[obj model csharp]: {{}}riak/kv/3.0.4/developing/getting-started/csharp/object-modeling +[obj model nodejs]: {{}}riak/kv/3.0.4/developing/getting-started/nodejs/object-modeling +[obj model erlang]: {{}}riak/kv/3.0.4/developing/getting-started/erlang/object-modeling +[obj model golang]: {{}}riak/kv/3.0.4/developing/getting-started/golang/object-modeling +[concept strong consistency]: {{}}riak/kv/3.0.4/using/reference/strong-consistency +[use ref strong consistency]: {{}}riak/kv/3.0.4/using/reference/strong-consistency +[cluster ops strong consistency]: {{}}riak/kv/3.0.4/using/cluster-operations/strong-consistency +[config strong consistency]: {{}}riak/kv/3.0.4/configuring/strong-consistency +[apps strong consistency]: {{}}riak/kv/3.0.4/developing/app-guide/strong-consistency +[usage update objects]: {{}}riak/kv/3.0.4/developing/usage/updating-objects +[apps replication properties]: {{}}riak/kv/3.0.4/developing/app-guide/replication-properties +[install index]: {{}}riak/kv/3.0.4/setup/installing +[getting started]: {{}}riak/kv/3.0.4/developing/getting-started +[usage index]: {{}}riak/kv/3.0.4/developing/usage +[glossary]: {{}}riak/kv/3.0.4/learn/glossary + +So you've decided to build an application using Riak as a data store. We +think that this is a wise choice for a broad variety of use cases. But +using Riak isn't always straightforward, especially if you're used to +developing with relational databases like like MySQL or PostgreSQL or +non-persistent key/value stores like Redis. So in this guide, we'll walk +you through a set of questions that should be asked about your use case +before getting started. The answer to those questions may inform +decisions about which Riak features you should use, what kind of +replication and conflict resolution strategies you should employ, and +perhaps even how parts of your application should be built. + +## What Kind of Data Are You Storing? + +This is an important initial question for two reasons: + +1. Not all data is a good fit for Riak. If your data isn't a good fit, +we would advise that you seek out a storage system that better suits +your needs. +2. The kinds of data that you're storing should guide your decision both +about _how_ to store and access your data in Riak and about which Riak +features would be helpful (and which ones might even be harmful). + +### Good Fits for Riak + +Riak tends to be an excellent choice if you're dealing with any of the +following: + +* **Immutable data** - While Riak provides several means of + [resolving conflicts][usage conflict resolution] between different replicas + of objects, those processes can lead to slower performance in some + cases. Storing immutable data means that you can avoid those processes + altogether and get the most out of Riak. +* **Small objects** - Riak was not built as a store for large objects + like video files or other + [BLOB](http://en.wikipedia.org/wiki/Binary_large_object)s. We built + [Riak CS](https://riak.com/riak-cloud-storage/) for that. Riak is + great, however, for JSON, [log files][dev data model#log], [sensor data][dev data model#sensor], HTML files, and other objects that tend + to run smaller than 1 MB. +* **Independent objects** - Objects that do not have interdependencies + on other objects are a good fit for Riak's [eventually consistent][concept eventual consistency] nature. +* **Objects with "natural" keys** - It is almost always advisable to + build keys for objects out of timestamps, [usernames][dev data model#user], + or other ["natural" markers][dev kv model] that distinguish + that object from other objects. Data that can be modeled this way fits + nicely with Riak because Riak emphasizes extremely fast object lookup. +* **Data compatible with [Riak Data Types][dev data types]** - If + you're working with mutable data, one option is to run basic CRUD + operations on that data in a standard key/value fashion and either + manage conflict resolution yourself or allow Riak to do so. But if + your data can be modeled as a [counter][dev data types#counters], + [set][dev data types#sets], or [map][dev data types#maps], you + should seriously consider using [Riak Data Types][dev data types], + which can speed application development and transfer a great deal of + complexity away from the application and to Riak itself. + +### Not-so-good Fits for Riak + +Riak may not such be a good choice if you use it to store: + +* **Objects that exceed 1-2MB in size** - If you will be + storing a lot of objects over that size, we would recommend checking + out [Riak CS]({{}}riak/cs/latest/) instead, as Riak + CS was built to solve this problem. Storing large objects in Riak will + typically lead to substandard performance. +* **Objects with complex interdependencies** - If your data cannot be + easily denormalized or if it requires that objects can be easily + assembled into and accessible as larger wholes---think columns or + tables---then you might want to consider a relational database + instead. + +### Conclusion + +If it sounds like Riak is a good choice for some or all of your +application's data needs, move on to the next sections, where you can +find out more about which Riak features are recommendable for your use +case, how you should model your data, and what kinds of data modeling +and development strategies we recommend. + +## Which Features Should You Consider? + +Basic CRUD key/value operations are almost always the most performant +operations when using Riak. If your needs can be served using CRUD +operations, we recommend checking out our tutorial on [key/value modeling][dev kv model] for some basic guidelines. But if basic CRUD key/value +operations don't quite suffice for your use case, Riak offers a variety +of features that may be just what you're looking for. In the sections +immediately below, you can find brief descriptions of those features as +well as relevant links to Basho documentation. + +## Search + +Riak Search provides you with [Apache +Solr](http://lucene.apache.org/solr/)-powered full-text indexing and +querying on top of the scalability, fault tolerance, and operational +simplicity of Riak. Our motto for Riak Search: **Write it like Riak. +Query it like Solr**. That is, you can store objects in Riak [like normal][usage create objects] and run full-text queries on those objects later on +using the Solr API. + +* [Using Search][usage search] - Getting started with Riak Search +* [Search Details][use ref search] - A detailed overview of the concepts and design + consideration behind Riak Search +* [Search Schema][usage search schema] - How to create custom schemas for extracting data + from Riak Search + +### When to Use Search + +* **When you need a rich querying API** - Riak Search gives you access + to the entirety of [Solr](http://lucene.apache.org/solr/)'s extremely + broad API, which enables you to query on the basis of wildcards, + strings, booleans, geolocation, ranges, language-specific fulltext, + and far more. You can even use Search in conjunction with [Riak Data Types][dev data types] \(documentation coming soon). + +> **Search is preferred for querying** +> +> In general, you should consider Search to be the default choice for +nearly all querying needs that go beyond basic CRUD/KV operations. If +your use case demands some sort of querying mechanism and you're in +doubt about what to use, you should assume that Search is the right tool +for you. + +### When Not to Use Search + +* **When deep pagination is needed** - At the moment, you should + consider [secondary indexes][usage 2i] instead of + Search if your use case requires deep pagination. This will be + changed, however, in a future release of Riak, at which point you + should consider Search the default choice for _all_ querying needs. +* **In large clusters** - In clusters larger than 8-10 nodes, you may + experience slower performance when using Search. In clusters of that + size, we would recommend using Search in a limited fashion, setting + up a separate, dedicated cluster for Search data, or finding another + solution. + +## Riak Data Types + +When performing basic K/V operations, Riak is agnostic toward the actual +data stored within objects. Beginning with Riak 2.0, however, you now +have access to operations-based objects based on academic research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf). Riak +Data Types enable you to update and read [counters][dev data types#counters], +[sets][dev data types#sets], and [maps][dev data types#maps] directly in Riak, as well as [registers][dev data types#maps] and [flags][dev data types#maps] inside of Riak maps. + +The beauty of Riak Data Types is that all convergence logic is handled +by Riak itself according to deterministic, Data Type-specific rules, +which means that your application doesn't need to reason about +[siblings][usage conflict resolution]. In many cases, this can +unburden applications of the need to handle object convergence on their +own. + +* [Using Data Types][dev data types] - A guide to setting up Riak to use Data Types, + including a variety of code samples for all of the Basho's official + [client libraries][dev client libraries] +* [Data Types][concept crdts] - A theoretical treatment of Riak Data Types, along + with implementation details +* [Data Modeling with Riak Data Types][dev data model] - An object modeling example that relies on Riak Data Types. + +> **Note**: +> +> Riak Data Types can be used in conjunction with Riak Search, +meaning that the data stored in counters, sets, and maps can be indexed +and searched just like any other data in Riak. Documentation on Data +Types and Search is coming soon. + +### When to Use Riak Data Types + +* **When your data fits** - If the data that you're storing can be + modeled as one of the five available types, Riak Data Types could be a + very good option. Please note that in many cases there may not be a + 1:1 correspondence between the five available types and the data that + you'd like to store, but there may be workarounds to close the gap. + Most things that can be stored as JSON, for example, can be stored as + maps (though with modifications). +* **When you don't need to reason about siblings** - If your use case + doesn't require that your application have access to siblings and + allows for sibling convergence logic to take place at the Riak level + rather than at the application level, then Riak Data Types are well + worth exploring. + +### When Not to Use Riak Data Types + +* **When you need to provide your own convergence logic** - If your + application needs to have access to all sibling values, then Riak Data + Types are not a good choice because they by definition do not produce + siblings. +* **When your data just doesn't fit** - While the five existing Data + Types allow for a great deal of flexibility and a wide range of use + cases, they don't cover all use cases. If you have data that requires + a modeling solution that can't be covered, you should stick to + standard K/V operations. +* **When object size is of significant concern** - Riak Data Types + behave much like other Riak objects, but they tend to carry more + metadata than normal Riak objects, especially maps. In most cases the + metadata payload will be a small percentage of the object's total + size, but if you want to keep objects as lean as possible, it may be + better to stick to normal K/V operations. + +## MapReduce + +Riak's MapReduce feature enables you to perform batch processing jobs in +a way that leverages Riak's distributed nature. When a MapReduce job is +sent to Riak, Riak automatically distributes the processing work to +where the target data lives, which can reduce network bandwidth. Riak +comes equipped with a set of default MapReduce jobs that you can employ, +or you can write and run your own MapReduce jobs in +[Erlang](http://www.erlang.org/). + +* [Using MapReduce][usage mapreduce] - A general guide to using MapReduce +* [Advanced MapReduce][apps mapreduce] - A more in-depth guide to MapReduce, + including code samples and implementation details + +### When to Use MapReduce + +* **Batch processing only** - You should use MapReduce only when truly + truly necessary. MapReduce jobs are very computationally expensive and + can degrade performance in production clusters. You should restrict + MapReduce usage to infrequent batch processing operations, preferably + carried out at times when your cluster is experiencing load that is + well below average. + +### When Not to Use MapReduce + +* **When another Riak feature will do** - Before even considering + using MapReduce, you should thoroughly investigate [Riak Search][usage search] or [secondary indexes][usage 2i] as possible + solutions to your needs. + +In general, you should not think of MapReduce as, for example, Hadoop +within Riak. While it can be useful for certain types of +non-primary-key-based queries, it is neither a "Big Data" processing +tool nor an indexing mechanism nor a replacement for [Riak Search][usage search]. If you do need a tool like Hadoop or Apache Spark, you should +consider using Riak in conjunction with a more suitable data processing +tool. + +## Secondary Indexes (2i) + +Using basic key/value operations in Riak sometimes leads to the +following problem: how do I know which keys I should look for? Secondary +indexes (2i) provide a solution to this problem, enabling you to tag +objects with either binary or integer metadata and then query Riak for +all of the keys that share specific tags. 2i is especially useful if +you're storing binary data that is opaque to features like [Riak Search][usage search]. + +* [Using Secondary Indexes][usage 2i] - A general guide to using 2i, along + with code samples and information on 2i features like pagination, + streaming, and sorting +* [Advanced Secondary Indexes][use ref 2i] - Implementation details behind 2i + +### When to Use Secondary Indexes + +* **When you require deep pagination** - At the moment, 2i's + deep pagination capabilities are more performant than those offered + by Search if you require pagination of more than 3-5 pages. This + will change, however, in the future, at which point we will + recommend using Search instead. + +### When Not to Use Secondary Indexes + +* **For most querying purposes** - If your use case does not + involve deep pagination, we recommend Search over 2i for _all_ + querying purposes. +* **If you're using Bitcask** - 2i is available only in the + [LevelDB][plan backend leveldb] backend. If you'd like to use [Bitcask][plan backend bitcask] or the [Memory][plan backend memory] backend, you will not be able to use 2i. + +## Mixed Approach + +One thing to always bear in mind is that Riak enables you to mix and +match a wide variety of approaches in a single cluster. You can use +basic CRUD operations for some of your data, index some of your data to +be queried by Riak Search, use Riak Data Types for another subset, etc. +You are always free to use a wide array of Riak features---or you can +use none at all and stick to key/value operations. + +## How Should You Model Your Data? + +It's difficult to offer universally applicable data modeling guidelines +because data models differ so markedly from use case to use case. What +works when storing [user data][dev data model#user], for example, might +be a poor fit when working with [sensor data][dev data model#sensor]. +Nonetheless, there's a variety of material in our documentation that +might be helpful when thinking about data modeling: + +* Object Modeling in Riak KV: + - [Java][obj model java] + - [Ruby][obj model ruby] + - [Python][obj model python] + - [C#][obj model csharp] + - [NodeJS][obj model nodejs] + - [Erlang][obj model erlang] + - [Go][obj model golang] +* [Key/Value Modeling][dev kv model] + +### Data Types + +One feature to always bear in mind when using Riak is [Riak Data Types][dev data types]. If some or all of your data can be modeled in +accordance with one of the available Data Types---flags (similar to +Booleans), registers (good for storing small binaries or text snippets), +[counters][dev data types#counters], [sets][dev data types#sets], +or [maps][dev data types#maps]---you might be able to streamline +application development by using them as an alternative to key/value +operations. In some cases, it might even be worthwhile to transform your +data modeling strategy in accordance with To see if this feature might +be a good fit for your application, we recommend checking out the +following documentation: + +* [Data Types][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## What are Your Consistency Requirements? + +Riak has traditionally been thought of as an [eventually consistent][concept eventual consistency], AP system, i.e. as a system that +favors availability and partition tolerance over data consistency. In +Riak versions 2.0 and later, the option of applying strong consistency +guarantees is available to developers that want to use Riak as a strict +CP system. One of the advantages of Riak's approach to strong +consistency is that you don't need to store all of your data in a +strongly consistent fashion if you use this feature. Instead, you can +mix and match a CP approach with an AP approach in a single cluster in +any way you wish. + +If you need some or all of your data to be subject to strong consistency +requirements, we recommend checking out the following documentation: + +* [Strong Consistency][use ref strong consistency] +* [Using Strong Consistency][apps strong consistency] +* [Managing Strong Consistency][cluster ops strong consistency] + +## Are Your Objects Mutable? + +Although Riak always performs best when storing and retrieving immutable +data, Riak also handles mutable objects very ably using a variety of +eventual consistency principles. Storing mutable data in Riak, however, +can get tricky because it requires you to choose and implement a +conflict resolution strategy for when object conflicts arise, which is a +normal occurrence in Riak. For more implementation details, we recommend +checking out the following docs: + +* [Conflict Resolution][usage conflict resolution] +* [Object Updates][usage update objects] +* [Replication Properties][apps replication properties] + +## Getting Started + +If you have a good sense of how you will be using Riak for your +application (or if you just want to experiment), the following guides +will help you get up and running: + +* [Installing Riak KV][install index] - Install Riak KV and start up a 5-node Riak + cluster +* [Client Libraries][dev client libraries] - A listing of official and non-official client + libraries for building applications with Riak +* [Getting Started with Client Libraries][getting started] - How to + get up and going with one of Basho's official client libraries (Java, + Ruby, Python, and Erlang) +* [Developing with Riak KV: Usage][usage index] - A guide to basic key/value operations and other common tasks in Riak KV. +* [Riak KV Glossary][glossary] - A listing of frequently used terms in Riak's + documentation + + + + + diff --git a/content/riak/kv/3.0.4/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/3.0.4/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..fd915f50b4 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,802 @@ +--- +title: "Advanced MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Advanced MapReduce" + identifier: "app_guide_mapreduce" + weight: 103 + parent: "developing_app_guide" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/mapreduce/ + - /riak/kv/3.0.4/dev/advanced/mapreduce/ +--- + +[usage 2i]: {{}}riak/kv/3.0.4/developing/usage/secondary-indexes +[apps replication properties]: {{}}riak/kv/3.0.4/developing/app-guide/replication-properties +[use ref custom code]: {{}}riak/kv/3.0.4/using/reference/custom-code +[usage bucket types]: {{}}riak/kv/3.0.4/developing/usage/bucket-types +[glossary vnode]: {{}}riak/kv/3.0.4/learn/glossary/#vnode +[config reference]: {{}}riak/kv/3.0.4/configuring/reference +[google mr]: http://research.google.com/archive/mapreduce.html +[mapping list]: http://hackage.haskell.org/package/base-4.7.0.0/docs/Prelude.html#v:map +[function contrib]: https://github.com/basho/riak_function_contrib +[erlang client]: https://github.com/basho/riak-erlang-client +[`set-union`]: http://en.wikipedia.org/wiki/Union_(set_theory)#Definition + + +> **Use MapReduce sparingly** +> +> In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive, to the extent that they can degrade performance in +production clusters operating under load. Because of this potential for performance degradation, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. + +MapReduce, the data processing paradigm popularized by +[Google][google mr], is provided by Riak KV to aggregate +results as background batch processes. + +## MapReduce + +In Riak KV, MapReduce is one of the primary methods for +non-primary-key-based querying alongside +[secondary indexes][usage 2i]. Riak KV allows you to +run MapReduce jobs using Erlang or JavaScript. + +{{% note title="Deprecation Warning" %}} +Javascript MapReduce is deprecated and will be removed in a future version. +{{% /note %}} + + +### Why Do We Use MapReduce for Querying Riak KV? + +Key/value stores like Riak KV generally do not offer the kinds of complex +querying capabilities found in other data storage systems, such as +relational databases. MapReduce enables you to perform powerful queries +over the data stored in Riak KV but should be used with caution. + +The main goal of MapReduce is to spread the processing of a query across +many systems to take advantage of parallel processing power. This is +generally done by dividing the query into several steps, i.e. dividing +the dataset into several chunks and then running those step/chunk pairs +on separate physical hosts. Riak KV's MapReduce has an additional goal: +increasing data locality. When processing a large dataset, it's often +much more efficient to take the computation to the data than it is to +bring the data to the computation. + +"Map" and "Reduce" are phases in the query process. Map functions take +one piece of data as input and produce zero or more results as output. +If you're familiar with [mapping over a list][mapping list] +in functional programming languages, you're already familiar with the +"Map" steps in a MapReduce query. + +## MapReduce caveats + +MapReduce should generally be treated as a fallback rather than a +standard part of an application. There are often ways to model data +such that dynamic queries become single key retrievals, which are +dramatically faster and more reliable in Riak KV, and tools such as Riak +search and 2i are simpler to use and may place less strain on a +cluster. + +### R=1 + +One consequence of Riak KV's processing model is that MapReduce queries +have an effective `R` value of 1. The queries are distributed +to a representative sample of the cluster where the data is expected to +be found, and if one server lacks a copy of data it's supposed to have, +a MapReduce job will not attempt to look for it elsewhere. + +For more on the value of `R`, see our documentation on [replication properties][apps replication properties]. + +### Key lists + +Asking Riak KV to generate a list of all keys in a production environment +is generally a bad idea. It's an expensive operation. + +Attempting to constrain that operation to a bucket (e.g., +`mapred_bucket` as used below) does not help because Riak KV must still +pull all keys from storage to determine which ones are in the +specified bucket. + +If at all possible, run MapReduce against a list of known keys. + +### Code distribution + +As we'll discuss in this document, the functions invoked from Erlang +MapReduce must be available on all servers in the cluster unless +using the client library from an Erlang shell. + +### Security restrictions + +If Riak's security functionality is enabled, there are two +restrictions on MapReduce that come into play: + +* The `riak_kv.mapreduce` permission must be granted to the user (or + via the user's groups) +* Other than the module `riak_kv_mapreduce`, any Erlang modules + distributed with Riak KV will **not** be accessible to custom MapReduce + code unless made available via the `add_path` mechanism documented + in [Installing Custom Code][use ref custom code]. + +## How Riak KV's MapReduce Queries Are Specified + +MapReduce queries in Riak KV have two components: (1) a list of inputs and +(2) a list of "steps," or "phases." + +Each element of the input list is an object location, as specified by +[bucket type][usage bucket types], bucket, and key. This location may +also be annotated with "key-data," which will be passed as an +argument to a map function when evaluated on the object stored under +that bucket-key pair. + +Each element of the phases list is a description of a map function, a +reduce function, or a link function. The description includes where to +find the code for the phase function (for map and reduce phases), static +data passed to the function every time it is executed during that phase, +and a flag indicating whether or not to include the results of that +phase in the final output of the query. + +The phase list describes the chain of operations through which each +input will flow. That is, the initial inputs will be fed to the first +phase in the list and the output of that phase will be fed as input to +the next phase in the list. This stream will continue through the final +phase. + +## How Phases Work + +### Map Phase + +The input list to a map phase must be a list of (possibly annotated) +bucket-key pairs. For each pair, Riak KV will send the request to evaluate +the map function to the partition that is responsible for storing the +data for that bucket-key. The [vnode][glossary vnode] hosting that partition +will look up the object stored under that bucket-key and evaluate the +map function with the object as an argument. The other arguments to the +function will be the annotation, if any is included, with the +bucket-key, and the static data for the phase, as specified in the +query. + +{{% note title="Tombstones" %}} +Be aware that most Riak KV clusters will retain deleted objects for some +period of time (3 seconds by default), and the MapReduce framework does +not conceal these from submitted jobs. These tombstones can be +recognized and filtered out by looking for `X-Riak-Deleted` +in the object metadata with a value of `true`. +{{% /note %}} + +### Reduce Phase + +Reduce phases accept any list of data as input, and produce any list of +data as output. They also receive a phase-static value, specified in the +query definition. + +The most important thing to understand is that the function defining the +reduce phase may be evaluated multiple times, and the input of later +evaluations will include the output of earlier evaluations. + +For example, a reduce phase may implement the +[`set-union`] function. In that case, the first set of inputs might be `[1,2.9.1]`, +and the output would be `[1,2,3]`. When the phase receives more inputs, +say `[3,4,5]`, the function will be called with the concatenation of the +two lists: `[1,2,3,3,4,5]`. + +Other systems refer to the second application of the reduce function as +a "re-reduce." There are at least a few reduce-query implementation +strategies that work with Riak KV's model. + +One strategy is to implement the phase preceding the reduce phase such +that its output is "the same shape" as the output of the reduce phase. +This is how the examples in this document are written, and the way that +we have found produces the cleanest code. + +An alternative strategy is to make the output of a reduce phase +recognizable such that it can be extracted from the input list on +subsequent applications. For example, if inputs from the preceding phase +are numbers, outputs from the reduce phase could be objects or strings. +This would allow the function to find the previous result and apply new +inputs to it. + +### How a Link Phase Works in Riak KV + +Link phases find links matching patterns specified in the query +definition. The patterns specify which buckets and tags links must have. + +"Following a link" means adding it to the output list of this phase. The +output of this phase is often most useful as input to a map phase or to +another reduce phase. + +## Invoking MapReduce + +To illustrate some key ideas, we'll define a simple module that +implements a map function to return the key value pairs contained in a +bucket and use it in a MapReduce query via Riak KV's HTTP API. + +Here is our example MapReduce function: + +```erlang +-module(mr_example). + +-export([get_keys/3]). + +% Returns bucket and key pairs from a map phase +get_keys(Value,_Keydata,_Arg) -> + [{riak_object:bucket(Value),riak_object:key(Value)}]. +``` + +Save this file as `mr_example.erl` and proceed to compiling the module. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the +Riak KV installation or the version of Erlang used when compiling Riak KV from +source. +{{% /note %}} + +Compiling the module is a straightforward process: + +```bash +erlc mr_example.erl +``` + +Successful compilation will result in a new `.beam` file, `mr_example.beam`. + +Send this file to your operator, or read about [installing custom code][use ref custom code] +on your Riak KV nodes. Once your file has been installed, all that +remains is to try the custom function in a MapReduce query. For +example, let's return keys contained within a bucket named `messages` +(please pick a bucket which contains keys in your environment). + +```curl +curl -XPOST localhost:8098/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' +``` + +The result should be a JSON map of bucket and key names expressed as key/value pairs. + +{{% note %}} +Be sure to install the MapReduce function as described above on all of +the nodes in your cluster to ensure proper operation. +{{% /note %}} + + +## Phase functions + +MapReduce phase functions have the same properties, arguments, and +return values whether you write them in Javascript or Erlang. + +### Map phase functions + +Map functions take three arguments (in Erlang, arity-3 is required). +Those arguments are: + + 1. `Value`: the value found at a key. This will be a Riak object, which + in Erlang is defined and manipulated by the `riak_object` module. + In Javascript, a Riak object looks like this: + + ```javascript + { + "bucket_type" : BucketTypeAsString, + "bucket" : BucketAsString, + "key" : KeyAsString, + "vclock" : VclockAsString, + "values" : [ + { + "metadata" : { + "X-Riak-VTag":VtagAsString, + "X-Riak-Last-Modified":LastModAsString, + "Links":[...List of link objects], + // ...other metadata... + }, + "data" : ObjectData + }, + // ...other metadata/data values (siblings)... + ] + } + ``` + 2. *KeyData* : key data that was submitted with the inputs to the query or phase. + 3. *Arg* : a static argument for the entire phase that was submitted with the query. + +A map phase should produce a list of results. You will see errors if +the output of your map function is not a list. Return the empty list if +your map function chooses not to produce output. If your map phase is +followed by another map phase, the output of the function must be +compatible with the input to a map phase - a list of bucket-key pairs or +`bucket-key-keydata` triples. + +#### Map function examples + +These map functions return the value (data) of the object being mapped: + +```erlang +fun(Value, _KeyData, _Arg) -> + [riak_object:get_value(Value)] +end. +``` + +These map functions filter their inputs based on the arg and return bucket-key pairs for a subsequent map phase: + +```erlang +fun(Value, _KeyData, Arg) -> + Key = riak_object:key(Value), + Bucket = riak_object:bucket(Value), + case erlang:byte_size(Key) of + L when L > Arg -> + [{Bucket,Key}]; + _ -> [] + end +end. +``` + +### Reduce phase functions + +Reduce functions take two arguments. Those arguments are: + +1. *ValueList*: the list of values produced by the preceding phase in the MapReduce query. +2. *Arg* : a static argument for the entire phase that was submitted with the query. + +A reduce function should produce a list of values, but it must also be +true that the function is commutative, associative, and idempotent. That +is, if the input list `[a,b,c,d]` is valid for a given F, then all of +the following must produce the same result: + + +```erlang + F([a,b,c,d]) + F([a,d] ++ F([c,b])) + F([F([a]),F([c]),F([b]),F([d])]) +``` + +#### Reduce function examples + +These reduce functions assume the values in the input are numbers and +sum them: + +```erlang +fun(Values, _Arg) -> + [lists:foldl(fun erlang:'+'/2, 0, Values)] +end. +``` + +These reduce functions sort their inputs: + +```erlang +fun(Values, _Arg) -> + lists:sort(Values) +end. +``` + +## MapReduce Examples + +Riak KV supports describing MapReduce queries in Erlang syntax through the +Protocol Buffers API. This section demonstrates how to do so using the +Erlang client. + +{{% note title="Distributing Erlang MapReduce Code" %}} +Any modules and functions you use in your Erlang MapReduce calls must be +available on all nodes in the cluster. Please read about +[installing custom code]({{}}riak/kv/3.0.4/using/reference/custom-code). +{{% /note %}} + +### Erlang Example + +Before running some MapReduce queries, let's create some objects to +run them on. Unlike the first example when we compiled +`mr_example.erl` and distributed it across the cluster, this time +we'll use the [Erlang client library][erlang client] and shell. + +```erlang +1> {ok, Client} = riakc_pb_socket:start("127.0.0.1", 8087). +2> Mine = riakc_obj:new(<<"groceries">>, <<"mine">>, + term_to_binary(["eggs", "bacon"])). +3> Yours = riakc_obj:new(<<"groceries">>, <<"yours">>, + term_to_binary(["bread", "bacon"])). +4> riakc_pb_socket:put(Client, Yours, [{w, 1}]). +5> riakc_pb_socket:put(Client, Mine, [{w, 1}]). +``` + +Now that we have a client and some data, let's run a query and count how +many occurrences of groceries. + +```erlang +6> Count = fun(G, undefined, none) -> + [dict:from_list([{I, 1} + || I <- binary_to_term(riak_object:get_value(G))])] + end. +7> Merge = fun(Gcounts, none) -> + [lists:foldl(fun(G, Acc) -> + dict:merge(fun(_, X, Y) -> X+Y end, + G, Acc) + end, + dict:new(), + Gcounts)] + end. +8> {ok, [{1, [R]}]} = riakc_pb_socket:mapred( + Client, + [{<<"groceries">>, <<"mine">>}, + {<<"groceries">>, <<"yours">>}], + [{map, {qfun, Count}, none, false}, + {reduce, {qfun, Merge}, none, true}]). +9> L = dict:to_list(R). +``` + +{{% note title="Riak Object Representations" %}} +Note how the `riak_object` module is used in the MapReduce +function but the `riakc_obj` module is used on the client. +Riak objects are represented differently internally to the cluster than +they are externally. +{{% /note %}} + +Given the lists of groceries we created, the sequence of commands above +would result in L being bound to `[{"bread",1},{"eggs",1},{"bacon",2}]`. + +### Erlang Query Syntax + +`riakc_pb_socket:mapred/3` takes a client and two lists as arguments. +The first list contains bucket-key pairs. The second list contains +the phases of the query. + +`riakc_pb_socket:mapred_bucket/3` replaces the first list of +bucket-key pairs with the name of a bucket; see the warnings above +about using this in a production environment. + +#### Inputs + +The `mapred/3` input objects are given as a list of tuples in the +format `{Bucket, Key}` or `{{Bucket, Key}, KeyData}`. `Bucket` and +`Key` should be binaries, and `KeyData` can be any Erlang term. The +former form is equivalent to `{{Bucket,Key},undefined}`. + +#### Query + +The query is given as a list of map, reduce and link phases. Map and +reduce phases are each expressed as tuples in the following form: + + +```erlang +{Type, FunTerm, Arg, Keep} +``` + +`Type` is an atom, either `map` or `reduce`. `Arg` is a static argument +(any Erlang term) to pass to each execution of the phase. `Keep` is +either `true` or `false` and determines whether results from the phase +will be included in the final value of the query. Riak KV assumes that the +final phase will return results. + +`FunTerm` is a reference to the function that the phase will execute and +takes any of the following forms: + +* `{modfun, Module, Function}` where `Module` and `Function` are atoms + that name an Erlang function in a specific module +* `{qfun,Fun}` where `Fun` is a callable fun term (closure or anonymous + function) +* `{jsfun,Name}` where `Name` is a binary that, when evaluated in + Javascript, points to a built-in Javascript function +* `{jsanon, Source}` where `Source` is a binary that, when evaluated in + Javascript is an anonymous function +* `{jsanon, {Bucket, Key}}` where the object at `{Bucket, Key}` contains + the source for an anonymous Javascript function + +{{% note title="qfun Note" %}} +Using `qfun` in compiled applications can be a fragile +operation. Please keep the following points in mind: + +1. The module in which the function is defined must be present and +exactly the same version on both the client and Riak KV nodes. + +2. Any modules and functions used by this function (or any function in +the resulting call stack) must also be present on the Riak KV nodes. + +Errors about failures to ensure both 1 and 2 are often surprising, +usually seen as opaque missing-function or function-clause +errors. Especially in the case of differing module versions, this can be +difficult to diagnose without expecting the issue and knowing of +`Module:info/0`. + +When using the Erlang shell, anonymous MapReduce functions can be +defined and sent to Riak KV instead of deploying them to all servers in +advance, but condition #2 above still holds. +{{% /note %}} + +Link phases are expressed in the following form: + + +```erlang +{link, Bucket, Tag, Keep} +``` + + +`Bucket` is either a binary name of a bucket to match, or the atom `_`, +which matches any bucket. `Tag` is either a binary tag to match, or the +atom `_`, which matches any tag. `Keep` has the same meaning as in map +and reduce phases. + + +> There are a small group of prebuilt Erlang MapReduce functions available +with Riak KV. Check them out [on GitHub](https://github.com/basho/riak_kv/blob/master/src/riak_kv_mapreduce.erl). + +## Bigger Data Examples + +### Loading Data + +This Erlang script will load historical stock-price data for Google +(ticker symbol "GOOG") into your existing Riak KV cluster so we can use it. +Paste the code below into a file called `load_data.erl` inside the `dev` +directory (or download it below). + +```erlang +#!/usr/bin/env escript +%% -*- erlang -*- +main([]) -> + io:format("Requires one argument: filename with the CSV data~n"); +main([Filename]) -> + {ok, Data} = file:read_file(Filename), + Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])), + lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, Lines). + +format_and_insert(Line) -> + JSON = io_lib:format("{\"Date\":\"~s\",\"Open\":~s,\"High\":~s,\"Low\":~s,\"Close\":~s,\"Volume\":~s,\"Adj. Close\":~s}", Line), + Command = io_lib:format("curl -XPUT http://127.0.0.1:8098/buckets/goog/keys/~s -d '~s' -H 'content-type: application/json'", [hd(Line),JSON]), + io:format("Inserting: ~s~n", [hd(Line)]), + os:cmd(Command). +``` + +Make the script executable: + +```bash +chmod +x load_data.erl +``` + +Download the CSV file of stock data linked below and place it in the +`dev` directory where we've been working. + +* [goog.csv](https://github.com/basho/basho_docs/raw/master/extras/data/goog.csv) - Google historical stock data +* [load_stocks.rb](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_stocks.rb) - Alternative script in Ruby to load the data +* [load_data.erl](https://github.com/basho/basho_docs/raw/master/extras/code-examples/load_data.erl) - Erlang script to load data (as shown in snippet) + +Now load the data into Riak KV. + +```bash +./load_data.erl goog.csv +``` + + +### Map only: find the days on which the high was over $600.00 + +From the Erlang shell with the client library loaded, let's define a +function which will check each value in our `goog` bucket to see if +the stock's high for the day was above $600. + +```erlang +> HighFun = fun(O, _, LowVal) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> High = proplists:get_value(<<"High">>, Map, -1.0), +> case High > LowVal of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun +``` + +Now we'll use `mapred_bucket/3` to send that function to the cluster. + +```erlang +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, HighFun}, 600, true}]). + {ok,[{0, + [<<"2007-11-29">>,<<"2008-01-02">>,<<"2008-01-17">>, + <<"2010-01-08">>,<<"2007-12-05">>,<<"2007-10-24">>, + <<"2007-10-26">>,<<"2007-10-11">>,<<"2007-11-09">>, + <<"2007-12-06">>,<<"2007-12-19">>,<<"2007-11-01">>, + <<"2007-11-07">>,<<"2007-11-16">>,<<"2009-12-28">>, + <<"2007-12-26">>,<<"2007-11-05">>,<<"2008-01-16">>, + <<"2007-11-13">>,<<"2007-11-08">>,<<"2007-12-07">>, + <<"2008-01-"...>>,<<"2007"...>>,<<...>>|...]}]} +``` + +#### Map only: find the days on which the close is lower than open + +This example is slightly more complicated: instead of comparing a +single field against a fixed value, we're looking for days when the +stock declined. + +```erlang +> CloseLowerFun = fun(O, _, _) -> +> {struct, Map} = mochijson2:decode(riak_object:get_value(O)), +> Close = proplists:get_value(<<"Close">>, Map, -1.0), +> Open = proplists:get_value(<<"Open">>, Map, -2.0), +> case Close < Open of +> true -> [riak_object:key(O)]; +> false -> [] +> end end. +#Fun + +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, CloseLowerFun}, none, true}]). +{ok,[{0, + [<<"2008-05-13">>,<<"2008-12-19">>,<<"2009-06-10">>, + <<"2006-07-06">>,<<"2006-07-07">>,<<"2009-02-25">>, + <<"2009-07-17">>,<<"2005-10-05">>,<<"2006-08-18">>, + <<"2008-10-30">>,<<"2009-06-18">>,<<"2006-10-26">>, + <<"2008-01-17">>,<<"2010-04-16">>,<<"2007-06-29">>, + <<"2005-12-12">>,<<"2008-08-20">>,<<"2007-03-30">>, + <<"2006-07-20">>,<<"2006-10-24">>,<<"2006-05-26">>, + <<"2007-02-"...>>,<<"2008"...>>,<<...>>|...]}]} +``` + +#### Map and Reduce: find the maximum daily variance in price by month + +Here things start to get tricky. We'll use map to determine each day's +rise or fall, and our reduce phase will identify each month's largest +variance. + +```erlang +DailyMap = fun(O, _, _) -> + {struct, Map} = mochijson2:decode(riak_object:get_value(O)), + Date = binary_to_list(proplists:get_value(<<"Date">>, Map, "0000-00-00")), + High = proplists:get_value(<<"High">>, Map, 0.0), + Low = proplists:get_value(<<"Low">>, Map, 0.0), + Month = string:substr(Date, 1, 7), + [{Month, abs(High - Low)}] +end. + +MonthReduce = fun(List, _) -> + {Highs, _} = lists:foldl( + fun({Month, _Value}=Item, {Accum, PrevMonth}) -> + case Month of + PrevMonth -> + %% Highest value is always first in the list, so + %% skip over this one + {Accum, PrevMonth}; + _ -> + {[Item] ++ Accum, Month} + end + end, + {[], ""}, + List), + Highs + end. +> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyMap}, none, false}, {reduce, {qfun, MonthReduce}, none, true}]). +{ok,[{1, + [{"2010-02",10.099999999999909}, + {"2006-02",11.420000000000016}, + {"2004-08",8.100000000000009}, + {"2008-08",14.490000000000009}, + {"2006-05",11.829999999999984}, + {"2005-10",4.539999999999964}, + {"2006-06",7.300000000000011}, + {"2008-06",9.690000000000055}, + {"2006-03",11.770000000000039}, + {"2006-12",4.880000000000052}, + {"2005-09",9.050000000000011}, + {"2008-03",15.829999999999984}, + {"2008-09",14.889999999999986}, + {"2010-04",9.149999999999977}, + {"2008-06",14.909999999999968}, + {"2008-05",13.960000000000036}, + {"2005-05",2.780000000000001}, + {"2005-07",6.680000000000007}, + {"2008-10",21.390000000000043}, + {"2009-09",4.180000000000007}, + {"2006-08",8.319999999999993}, + {"2007-08",5.990000000000009}, + {[...],...}, + {...}|...]}]} +``` + +#### A MapReduce Challenge + +Here is a scenario involving the data you already have loaded. + +MapReduce Challenge: Find the largest day for each month in terms of +dollars traded, and subsequently the largest overall day. + +*Hint*: You will need at least one each of map and reduce phases. + +## Streaming MapReduce + +Because Riak KV distributes the map phases across the cluster to increase +data locality, you can gain access to the results of those individual +computations as they finish via streaming. Streaming can be very +helpful when getting access to results from a high latency MapReduce job +that only contains map phases. Streaming of results from reduce phases +isn't as useful, but if your map phases return data (keep: true), they +will be returned to the client even if the reduce phases haven't +executed. This will let you use streaming with a reduce phase to collect +the results of the map phases while the jobs are run and then get the +result to the reduce phase at the end. + +### Streaming via the HTTP API + +You can enable streaming with MapReduce jobs submitted to the `/mapred` +resource by adding `?chunked=true` to the url. The response will be sent +using HTTP 1.1 chunked transfer encoding with `Content-Type: multipart/mixed`. +Be aware that if you are streaming a set of serialized objects (like +JSON objects), the chunks are not guaranteed to be separated along the +same boundaries that your serialized objects are. For example, a chunk +may end in the middle of a string representing a JSON object, so you +will need to decode and parse your responses appropriately in the +client. + +### Streaming via the Erlang API + +You can use streaming with Erlang via the Riak KV local client or the +Erlang Protocol Buffers API. In either case, you will provide the call +to `mapred_stream` with a `Pid` that will receive the streaming results. + +For examples, see [MapReduce pbstream.erl]({{}}data/MapReduceExamples/pbstream.erl) + + +## Troubleshooting MapReduce, illustrated + +The most important advice: when developing Erlang MapReduce against +Riak KV, prototype against a development environment using the Erlang +shell. The shell allows for rapid feedback and iteration; once code +needs to be deployed to a server for production use, changing it is +more time-consuming. + +### Module not in path + +```bash +$ curl -XPOST localhost:8098/mapred \ +> -H 'Content-Type: application/json' \ +> -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +{"phase":0,"error":"invalid module named in PhaseSpec function:\n must be a valid module name (failed to load mr_example: nofile)"} +``` + +### Node in process of starting + +```bash +$ curl -XPOST localhost:8098/mapred -H 'Content-Type: application/json' -d '{"inputs":"messages","query":[{"map":{"language":"erlang","module":"mr_example","function":"get_keys"}}]}' + +500 Internal Server Error

Internal Server Error

The server encountered an error while processing this request:
{error,{error,function_clause,
+              [{chashbin,itr_value,
+                         [done],
+                         [{file,"src/chashbin.erl"},{line,139}]},
+               {chashbin,itr_next_while,2,
+                         [{file,"src/chashbin.erl"},{line,183}]},
+...
+```
+
+### Erlang errors
+
+```erlang
+> riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+```
+
+The Erlang shell truncates error messages; when using MapReduce, typically the information you need is buried more deeply within the stack.
+
+We can get a longer error message this way:
+
+```erlang
+> {error, ErrorMsg} = riakc_pb_socket:mapred_bucket(Riak, <<"goog">>, [{map, {qfun, DailyFun}, none, true}]).
+{error,<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dic"...>>}
+
+> io:format("~p~n", [ErrorMsg]).
+<<"{\"phase\":0,\"error\":\"function_clause\",\"input\":\"{ok,{r_object,<<\\\"goog\\\">>,<<\\\"2009-06-10\\\">>,[{r_content,{dict,6,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[[<<\\\"Links\\\">>]],[],[],[],[],[],[],[],[[<<\\\"content-type\\\">>,97,112,112,108,105,99,97,116,105,111,110,47,106,115,111,110],[<<\\\"X-Riak-VTag\\\">>,55,87,101,79,53,120,65,121,50,67,49,77,72,104,54,100,89,65,67,74,55,70]],[[<<\\\"index\\\">>]],[],[[<<\\\"X-Riak-Last-Modified\\\">>|{1405,709865,48668}]],[],[[<<\\\"X-Riak-Meta\\\">>]]}}},<<\\\"{\\\\\\\"Date\\\\\\\":\\\\\\\"2009-06-10\\\\\\\",\\\\\\\"Open\\\\\\\":436.23,\\\\\\\"High\\\\\\\":437.89,\\\\\\\"L...\\\">>}],...},...}\",\"type\":\"error\",\"stack\":\"[{string,substr,[\\\"2009-06-10\\\",0,7],[{file,\\\"string.erl\\\"},{line,207}]},{erl_eval,do_apply,6,[{file,\\\"erl_eval.erl\\\"},{line,573}]},{erl_eval,expr,5,[{file,\\\"erl_eval.erl\\\"},{line,364}]},{erl_eval,exprs,5,[{file,\\\"erl_eval.erl\\\"},{line,118}]},{riak_kv_mrc_map,map,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,172}]},{riak_kv_mrc_map,process,3,[{file,\\\"src/riak_kv_mrc_map.erl\\\"},{line,144}]},{riak_pipe_vnode_worker,process_input,3,[{file,\\\"src/riak_pipe_vnode_worker.erl\\\"},{line,446}]},{riak_pipe_vnode_worker,wait_for_input,...}]\"}">>
+```
+
+Still truncated, but this provides enough context to see the problem:
+`string,substr,[\\\"2009-06-10\\\",0,7]`. Erlang's `string:substr`
+function starts indexing strings at 1, not 0.
+
+### Exceptional tip
+
+When experimenting with MapReduce from the Erlang shell, it is helpful
+to avoid breaking the connection to Riak KV when an exception is trapped
+by the shell. Use `catch_exception`:
+
+```erlang
+> catch_exception(true).
+false
+```
+
+
+
+
diff --git a/content/riak/kv/3.0.4/developing/app-guide/cluster-metadata.md b/content/riak/kv/3.0.4/developing/app-guide/cluster-metadata.md
new file mode 100644
index 0000000000..446e501293
--- /dev/null
+++ b/content/riak/kv/3.0.4/developing/app-guide/cluster-metadata.md
@@ -0,0 +1,72 @@
+---
+title: "Cluster Metadata"
+description: ""
+project: "riak_kv"
+project_version: 3.0.4
+menu:
+  riak_kv-3.0.4:
+    name: "Cluster Metadata"
+    identifier: "app_guide_cluster_metadata"
+    weight: 104
+    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+Cluster metadata is a subsystem inside of Riak that enables systems
+built on top of
+[`riak_core`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+to work with information that is stored cluster wide and can be read
+without blocking on communication over the network.
+
+One notable example of a subsystem of Riak relying on cluster metadata
+is Riak's [bucket types]({{}}riak/kv/3.0.4/using/reference/bucket-types) feature. This feature
+requires that a particular form of key/value pairs, namely bucket type
+names (the key) and their associated bucket properties (the value), be
+asynchronously broadcast to all nodes in a Riak cluster.
+
+Though it is different in crucial respects,
+[etcd](https://coreos.com/docs/cluster-management/setup/getting-started-with-etcd/)
+is a roughly analogous cluster metadata key/value store developed for
+use in [CoreOS](https://coreos.com/) clusters.
+
+## How Cluster Metadata Works
+
+Cluster metadata is different from other Riak data in two essential
+respects:
+
+1. Cluster metadata is intended only for internal Riak applications that
+   require metadata shared on a system-wide basis. Regular stored data,
+   on the other hand, is intended for use outside of Riak.
+2. Because it is intended for use only by applications internal to Riak,
+   cluster metadata can be accessed only internally, via the Erlang
+   interface provided by the
+   [`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+   module; it cannot be accessed externally via HTTP or Protocol Buffers.
+
+The storage system backing cluster metadata is a simple key/value store
+that is capable of asynchronously replicating information to all nodes
+in a cluster when it is stored or modified. Writes require
+acknowledgment from only a single node (equivalent to `w=1` in normal
+Riak), while reads return values only from the local node (equivalent to
+`r=1`). All updates are eventually consistent and propagated to all
+nodes, including nodes that join the cluster after the update has
+already reached all nodes in the previous set of members.
+
+All cluster metadata is eventually stored both in memory and on disk,
+but it should be noted that reads are only from memory, while writes are
+made both to memory and to disk. Logical clocks, namely [dotted version vectors]({{}}riak/kv/3.0.4/learn/concepts/causal-context/#dotted-version-vectors), are used in place of [vector clocks]({{}}riak/kv/3.0.4/learn/concepts/causal-context/#vector-clocks) or timestamps to resolve value conflicts. Values stored as cluster metadata are opaque Erlang
+terms addressed by both prefix and a key.
+
+## Erlang Code Interface
+
+If you'd like to use cluster metadata for an internal Riak application,
+the Erlang interface is defined in the
+[`riak_core_metadata`](https://github.com/basho/riak_core/blob/develop/src/riak_core_metadata.erl)
+module, which allows you to perform a variety of cluster metadata
+operations, including retrieving, modifying, and deleting metadata and
+iterating through metadata keys.
+
+
+
+
diff --git a/content/riak/kv/3.0.4/developing/app-guide/reference.md b/content/riak/kv/3.0.4/developing/app-guide/reference.md
new file mode 100644
index 0000000000..40c18b7086
--- /dev/null
+++ b/content/riak/kv/3.0.4/developing/app-guide/reference.md
@@ -0,0 +1,21 @@
+---
+draft: true
+title: "Reference"
+description: ""
+project: "riak_kv"
+project_version: 3.0.4
+#menu:
+#  riak_kv-3.0.4:
+#    name: "Reference"
+#    identifier: "app_guide_reference"
+#    weight: 104
+#    parent: "developing_app_guide"
+toc: true
+aliases:
+---
+
+**TODO: Add content**
+
+
+
+
diff --git a/content/riak/kv/3.0.4/developing/app-guide/replication-properties.md b/content/riak/kv/3.0.4/developing/app-guide/replication-properties.md
new file mode 100644
index 0000000000..c4b7482aba
--- /dev/null
+++ b/content/riak/kv/3.0.4/developing/app-guide/replication-properties.md
@@ -0,0 +1,584 @@
+---
+title: "Replication Properties"
+description: ""
+project: "riak_kv"
+project_version: 3.0.4
+menu:
+  riak_kv-3.0.4:
+    name: "Replication Properties"
+    identifier: "app_guide_replication_properties"
+    weight: 100
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/3.0.4/dev/advanced/replication-properties
+  - /riak/kv/3.0.4/dev/advanced/replication-properties
+---
+
+[usage bucket types]: {{}}riak/kv/3.0.4/developing/usage/bucket-types
+[concept eventual consistency]: {{}}riak/kv/3.0.4/learn/concepts/eventual-consistency
+[use ref strong consistency]: {{}}riak/kv/3.0.4/using/reference/strong-consistency
+[concept clusters]: {{}}riak/kv/3.0.4/learn/concepts/clusters
+
+Riak was built to act as a multi-node [cluster][concept clusters].  It
+distributes data across multiple physical servers, which enables it to
+provide strong availability guarantees and fault tolerance.
+
+The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which
+undergirds many of the design decisions behind Riak's architecture,
+defines distributed systems in terms of three desired properties:
+consistency, availability, and partition (i.e. failure) tolerance. Riak
+can be used either as an AP, i.e. available/partition-tolerant, system
+or as a CP, i.e. consistent/partition-tolerant, system. The former
+relies on an [eventual consistency][concept eventual consistency] model, while the latter relies on
+a special [strong consistency][use ref strong consistency] subsystem.
+
+Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem)
+dictates that there is a necessary trade-off between data consistency
+and availability, if you are using Riak in an eventually consistent
+manner, you can fine-tune that trade-off. The ability to make these
+kinds of fundamental choices has immense value for your applications and
+is one of the features that differentiates Riak from other databases.
+
+At the bottom of the page, you'll find a [screencast]({{}}riak/kv/3.0.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your
+replication levels to match your application and business needs.
+
+> **Note on strong consistency**
+>
+> An option introduced in Riak version 2.0 is to use Riak as a [strongly consistent]({{}}riak/kv/3.0.4/using/reference/strong-consistency/) system for data in specified buckets. Using Riak in this way is fundamentally different from adjusting replication properties and fine-tuning the availability/consistency trade-off, as it sacrifices
+_all_ availability guarantees when necessary. Therefore, you
+should consult the [Using Strong Consistency]({{}}riak/kv/3.0.4/developing/app-guide/strong-consistency) documentation, as this option will not be covered
+in this tutorial.
+
+## How Replication Properties Work
+
+When using Riak, there are two ways of choosing replication properties:
+1. On a per-request basis
+2. In a more programmatic fashion, [using bucket types][usage bucket types]
+
+### Per-request Replication Properties
+
+The simplest way to apply replication properties to objects stored in
+Riak is to specify those properties
+
+### Replication Properties Through Bucket Types
+
+Let's say, for example, that you want to apply an `n_val` of 5, an `r`
+of 3, and a `w` of 3 to all of the data in some of the [buckets]({{}}riak/kv/3.0.4/learn/concepts/buckets) that
+you're using. In order to set those replication properties, you should
+create a bucket type that sets those properties. Below is an example:
+
+```bash
+riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}'
+riak-admin bucket-type activate custom_props
+```
+
+Now, any time you store an object in a bucket with the type
+`custom_props` those properties will apply to it.
+
+## Available Parameters
+
+The table below lists the most frequently used replication parameters
+that are available in Riak. Symbolic values like `quorum` are discussed
+[below]({{}}riak/kv/3.0.4/developing/app-guide/replication-properties#symbolic-consistency-names). Each
+parameter will be explained in more detail in later sections:
+
+Parameter | Common name | Default value | Description
+:---------|:------------|:--------------|:-----------
+`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored
+`r` | R | `quorum` | The number of servers that must respond to a read request
+`w` | W | `quorum` | Number of servers that must respond to a write request
+`pr` | PR | `0` | The number of primary vnodes that must respond to a read request
+`pw` | PW | `0` | The number of primary vnodes that must respond to a write request
+`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk
+`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter.
+`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`).
+`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses.
+
+## A Primer on N, R, and W
+
+The most important thing to note about Riak's replication controls is
+that they can be at the bucket level. You can use [bucket types]({{}}riak/kv/3.0.4/developing/usage/bucket-types)
+to set up bucket `A` to use a particular set of replication properties
+and bucket `B` to use entirely different properties.
+
+At the bucket level, you can choose how many copies of data you want to
+store in your cluster (N, or `n_val`), how many copies you wish to read
+from at one time (R, or `r`), and how many copies must be written to be
+considered a success (W, or `w`).
+
+In addition to the bucket level, you can also specify replication
+properties on the client side for any given read or write. The examples
+immediately below will deal with bucket-level replication settings, but
+check out the [section below]({{}}riak/kv/3.0.4/developing/app-guide/replication-properties#client-level-replication-settings)
+for more information on setting properties on a per-operation basis.
+
+The most general trade-off to be aware of when setting these values is
+the trade-off between **data accuracy** and **client responsiveness**.
+Choosing higher values for N, R, and W will mean higher accuracy because
+more nodes are checked for the correct value on read and data is written
+to more nodes upon write; but higher values will also entail degraded
+responsiveness, especially if one or more nodes is failing, because Riak
+has to wait for responses from more nodes.
+
+## N Value and Replication
+
+All data stored in Riak will be replicated to the number of nodes in the
+cluster specified by a bucket's N value (`n_val`). The default `n_val`
+in Riak is 3, which means that data stored in a bucket with the default
+N will be replicated to three different nodes, thus storing three
+**replicas** of the object.
+
+In order for this to be effective, you need at least three nodes in your
+cluster. The merits of this system, however, can be demonstrated using
+your local environment.
+
+Let's create a bucket type that sets the `n_val` for any bucket with
+that type to 2. To do so, you must create and activate a bucket type
+that sets this property:
+
+```bash
+riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}'
+riak-admin bucket-type activate n_val_equals_2
+```
+
+Now, all buckets that bear the type `n_val_equals_2` will have `n_val`
+set to 2. Here's an example write:
+
+```curl
+curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \
+  -H "Content-Type: text/plain" \
+  -d "the n_val on this write is 2"
+```
+
+Now, whenever we write to a bucket of this type, Riak will write a
+replica of the object to two different nodes.
+
+{{% note title="A Word on Setting the N Value" %}}
+`n_val` must be greater than 0 and less than or equal to the number of actual
+nodes in your cluster to get all the benefits of replication. We advise
+against modifying the `n_val` of a bucket after its initial creation as this
+may result in failed reads because the new value may not be replicated to all
+the appropriate partitions.
+{{% /note %}}
+
+## R Value and Read Failure Tolerance
+
+Read requests to Riak are sent to all N nodes that are known to be
+currently responsible for the data. The R value (`r`) enables you to
+specify how many of those nodes have to return a result on a given read
+for the read to be considered successful. This allows Riak to provide
+read availability even when nodes are down or laggy.
+
+You can set R anywhere from 1 to N; lower values mean faster response
+time but a higher likelihood of Riak not finding the object you're
+looking for, while higher values mean that Riak is more likely to find
+the object but takes longer to look.
+
+As an example, let's create and activate a bucket type with `r` set to
+`1`. All reads performed on data in buckets with this type require a
+result from only one node.
+
+```bash
+riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}'
+riak-admin bucket-type activate r_equals_1
+```
+
+Here's an example read request using the `r_equals_1` bucket type:
+
+```ruby
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+obj = bucket.get('chimpanzee')
+```
+
+```java
+Location chimpanzeeFact =
+  new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee");
+FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build();
+FetchValue.Response response = client.execute(fetch);
+RiakObject obj = response.getValue(RiakObject.class);
+System.out.println(obj.getValue().toString());
+```
+
+```php
+$response = (new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1')
+  ->build()
+  ->execute();
+
+echo $response->getObject()->getData();
+```
+
+```python
+bucket = client.bucket_type('r_equals_1').bucket('animal_facts')
+bucket.get('chimpanzee')
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                {<<"r_equals_1">>, <<"animal_facts">>},
+                                <<"chimpanzee">>).
+```
+
+```curl
+curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee
+```
+
+As explained above, reads to buckets with the `r_equals_1` type will
+typically be completed more quickly, but if the first node to respond
+to a read request has yet to receive a replica of the object, Riak will
+return a `not found` response (which may happen even if the object lives
+on one or more other nodes). Setting `r` to a higher value will mitigate
+this risk.
+
+## W Value and Write Fault Tolerance
+
+As with read requests, writes to Riak are sent to all N nodes that are
+know to be currently responsible for the data. The W value (`w`) enables
+you to specify how many nodes must complete a write to be considered
+successful---a direct analogy to R. This allows Riak to provide write
+availability even when nodes are down or laggy.
+
+As with R, you can set W to any value between 1 and N. The same
+performance vs. fault tolerance trade-offs that apply to R apply to W.
+
+As an example, let's create and activate a bucket type with `w` set to
+`3`:
+
+```bash
+riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}'
+riak-admin activate w_equals_3
+```
+
+Now, we can attempt a write to a bucket bearing the type `w_equals_3`:
+
+```ruby
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = Riak::RObject.new(bucket, 'giraffe')
+obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.content_type = 'text/plain'
+obj.store
+```
+
+```java
+Location storyKey =
+  new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe");
+RiakObject obj = new RiakObject()
+        .setContentType("text/plain")
+        .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation("giraffe")
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildLocation('giraffe', 'animal_facts', 'w_equals_3')
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket_type('w_equals_3').bucket('animal_facts')
+obj = RiakObject(client, bucket, 'giraffe')
+obj.content_type = 'text/plain'
+obj.data = 'The species name of the giraffe is Giraffa camelopardalis'
+obj.store()
+```
+
+```erlang
+Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>},
+                       <<"giraffe">>,
+                       <<"The species name of the giraffe is Giraffa camelopardalis">>,
+                       <<"text/plain">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-type: text/plain" \
+  -d "The species name of the giraffe is Giraffa camelopardalis" \
+  http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe
+```
+
+Writing our `story.txt` will return a success response from Riak only if
+3 nodes respond that the write was successful. Setting `w` to 1, for
+example, would mean that Riak would return a response more quickly, but
+with a higher risk that the write will fail because the first node it
+seeks to write the object to is unavailable.
+
+## Primary Reads and Writes with PR and PW
+
+In Riak's replication model, there are N [vnodes]({{}}riak/kv/3.0.4/learn/glossary/#vnode),
+called _primary vnodes_, that hold primary responsibility for any given
+key. Riak will attempt reads and writes to primary vnodes first, but in
+case of failure, those operations will go to failover nodes in order to
+comply with the R and W values that you have set. This failover option
+is called _sloppy quorum_.
+
+In addition to R and W, you can also set integer values for the *primary
+read* (PR) and _primary write_ (PW) parameters that specify how many
+primary nodes must respond to a request in order to report success to
+the client. The default for both values is zero.
+
+Setting PR and/or PW to non-zero values produces a mode of operation
+called _strict quorum_. This mode has the advantage that the client is
+more likely to receive the most up-to-date values, but at the cost of a
+higher probability that reads or writes will fail because primary vnodes
+are unavailable.
+
+{{% note title="Note on PW" %}}
+If PW is set to a non-zero value, there is a higher risk (usually very small)
+that failure will be reported to the client upon write. But this does not
+necessarily mean that the write has failed completely. If there are reachable
+primary vnodes, those vnodes will still write the new data to Riak. When the
+failed vnode returns to service, it will receive the new copy of the data via
+either read repair or active anti-entropy.
+{{% /note %}}
+
+## Durable Writes with DW
+
+The W and PW parameters specify how many vnodes must _respond_ to a
+write in order for it to be deemed successful. What they do not specify
+is whether data has actually been written to disk in the storage backend.
+The DW parameters enables you to specify a number of vnodes between 1
+and N that must write the data to disk before the request is deemed
+successful. The default value is `quorum` (more on symbolic names below).
+
+How quickly and robustly data is written to disk depends on the
+configuration of your backend or backends. For more details, see the
+documentation on [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask), [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb), and [multiple backends]({{}}riak/kv/3.0.4/setup/planning/backend/multi).
+
+## Delete Quorum with RW
+
+{{% note title="Deprecation notice" %}}
+It is no longer necessary to specify an RW value when making delete requests.
+We explain its meaning here, however, because RW still shows up as a property
+of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free
+to skip this explanation unless you are curious about the meaning of RW.
+{{% /note %}}
+
+Deleting an object requires successfully reading an object and then
+writing a tombstone to the object's key that specifies that an object
+once resided there. In the course of their operation, all deletes must
+comply with any R, W, PR, and PW values that apply along the way.
+
+If R and W are undefined, however, the RW (`rw`) value will substitute
+for both R and W during object deletes. In recent versions of Riak, it
+is nearly impossible to make reads or writes that do not somehow specify 
+oth R and W, and so you will never need to worry about RW.
+
+## The Implications of `notfound_ok`
+
+The `notfound_ok` parameter is a bucket property that determines how
+Riak responds if a read fails on a node. If `notfound_ok` is set to
+`true` (the default value) and the first vnode to respond doesn't have a
+copy of the object, Riak will assume that the missing value is
+authoritative and immediately return a `not found` result to the client.
+This will generally lead to faster response times.
+
+On the other hand, setting `notfound_ok` to `false` means that the
+responding vnode will wait for something other than a `not found` error
+before reporting a value to the client. If an object doesn't exist under
+a key, the coordinating vnode will wait for N vnodes to respond with
+`not found` before it reports `not found` to the client. This setting
+makes Riak search more thoroughly for objects but at the cost of slower
+response times, a problem can be mitigated by setting `basic_quorum` to
+`true`, which is discussed in the next section.
+
+## Early Failure Return with `basic_quorum`
+
+Setting `notfound_ok` to `false` on a request (or as a bucket property)
+is likely to introduce additional latency. If you read a non-existent
+key, Riak will check all 3 responsible vnodes for the value before
+returning `not found` instead of checking just one.
+
+This latency problem can be mitigated by setting `basic_quorum` to
+`true`, which will instruct Riak to query a quorum of nodes instead of N
+nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5
+nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a
+quorum of 4, 8 nodes a quorum of 5, etc.
+
+The default for `basic_quorum` is `false`, so you will need to
+explicitly set it to `true` on reads or in a bucket's properties. While
+the scope of this setting is fairly narrow, it can reduce latency in
+read-heavy use cases.
+
+## Symbolic Consistency Names
+
+Riak provides a number of "symbolic" consistency options for R, W, PR,
+RW, and DW that are often easier to use and understand than specifying
+integer values. The following symbolic names are available:
+
+* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N.
+* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW.
+* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on.
+* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer.
+
+Not submitting a value for R, W, PR, RW, or DW is the same as using
+`default`.
+
+## Client-level Replication Settings
+
+Adjusting replication properties at the bucket level by [using bucket types][usage bucket types]
+is how you set default properties for _all_ of a bucket's reads and
+writes. But you can also set replication properties for specific reads
+and writes without setting those properties at the bucket level, instead
+specifying them on a per-operation basis.
+
+Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for
+just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s
+statistics from the `nba_stats` bucket.
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r: 2, notfound_ok: true)
+```
+
+```java
+Location johnStocktonStats =
+  new Namespace(new Namespace("nba_stats"), "john_stockton");
+FetchValue fetch = new FetchValue.Builder(johnStocktonStats)
+        .withOption(FetchOption.R, new Quorum(2))
+        .withOption(FetchOption.NOTFOUND_OK, true)
+        .build();
+client.execute(fetch);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\FetchObject($riak))
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('r', 2)
+  ->withParameter('notfound_ok', true)
+  ->build()
+  ->execute();
+```
+
+```python
+bucket = client.bucket('nba_stats')
+obj = bucket.get('john_stockton', r=2, notfound_ok=True)
+```
+
+```erlang
+{ok, Obj} = riakc_pb_socket:get(Pid,
+                                <<"nba_stats">>,
+                                <<"john_stockton">>,
+                                [{r, 2}, {notfound_ok, true}]).
+```
+
+```curl
+curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true
+```
+
+Now, let's say that you want to attempt a write with `w` set to 3 and
+`dw` set to 2. As in the previous example, we'll be using the `default`
+bucket type, which enables us to not specify a bucket type upon write.
+Here's what that would look like:
+
+```ruby
+bucket = client.bucket('nba_stats')
+obj = Riak::RObject.new(bucket, 'michael_jordan')
+obj.content_type = 'application/json'
+obj.data = '{"stats":{ ... large stats object ... }}'
+obj.store(w: 3, dw: 2)
+```
+
+```java
+Location michaelJordanKey =
+  new Location(new Namespace("nba_stats"), "michael_jordan");
+RiakObject obj = new RiakObject()
+        .setContentType("application/json")
+        .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}"));
+StoreValue store = new StoreValue.Builder(obj)
+        .withLocation(michaelJordanKey)
+        .withOption(StoreOption.W, new Quorum(3))
+        .withOption(StoreOption.DW, new Quorum(2))
+        .build();
+client.execute(store);
+```
+
+```php
+(new \Basho\Riak\Command\Builder\StoreObject($riak))
+  ->buildJsonObject('{'stats':{ ... large stats object ... }}')
+  ->buildLocation('john_stockton', 'nba_stats')
+  ->withParameter('w', 3)
+  ->withParameter('dw', 2)
+  ->build()
+  ->execute();
+```
+
+```erlang
+Obj = riakc_obj:new(<<"nba_stats">>,
+                    <<"michael_jordan">>,
+                    <<"{'stats':{ ... large stats object ... }}">>,
+                    <<"application/json">>),
+riakc_pb_socket:put(Pid, Obj).
+```
+
+```curl
+curl -XPUT \
+  -H "Content-Type: application/json" \
+  -d '{"stats":{ ... large stats object ... }}' \
+  http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2
+```
+
+All of Basho's [official Riak clients]({{}}riak/kv/3.0.4/developing/client-libraries) enable you to
+set replication properties this way. For more detailed information,
+refer to the section on [development usage with Riak KV]({{}}riak/kv/3.0.4/developing/usage)
+or to client-specific documentation:
+
+* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md)
+* [Java](http://basho.github.io/riak-java-client/2.0.0/)
+* [Python](http://basho.github.io/riak-python-client/)
+* [Erlang](http://basho.github.io/riak-erlang-client/)
+
+## Illustrative Scenarios
+
+In case the above explanations were a bit too abstract for your tastes,
+the following table lays out a number of possible scenarios for reads
+and writes in Riak and how Riak is likely to respond. Some of these
+scenarios involve issues surrounding conflict resolution, vector clocks,
+and siblings, so we recommend reading the [Vector Clocks]({{}}riak/kv/3.0.4/learn/concepts/causal-context#vector-clocks) documentation for more information.
+
+#### Read Scenarios
+
+These scenarios assume that a read request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client
+2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or siblings
+2 conflicting values reach the coordinating node and vector clocks allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes
+2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes
+2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made)
+
+#### Write Scenarios
+
+These scenarios assume that a write request is sent to all 3 primary
+vnodes responsible for an object.
+
+Scenario | What happens in Riak
+:--------|:--------------------
+A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write
+A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads
+A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value
+
+## Screencast
+
+Here is a brief screencast that shows just how the N, R, and W values
+function in our running 3-node Riak cluster:
+
+
+
+Tuning CAP Controls in Riak from
+Basho Technologies on Vimeo.
+
+
+
+
diff --git a/content/riak/kv/3.0.4/developing/app-guide/strong-consistency.md b/content/riak/kv/3.0.4/developing/app-guide/strong-consistency.md
new file mode 100644
index 0000000000..e84e2482ac
--- /dev/null
+++ b/content/riak/kv/3.0.4/developing/app-guide/strong-consistency.md
@@ -0,0 +1,261 @@
+---
+title: "Strong Consistency"
+description: ""
+project: "riak_kv"
+project_version: 3.0.4
+menu:
+  riak_kv-3.0.4:
+    name: "Strong Consistency"
+    identifier: "app_guide_strong_consistency"
+    weight: 101
+    parent: "developing_app_guide"
+toc: true
+aliases:
+  - /riak/3.0.4/dev/advanced/strong-consistency
+  - /riak/kv/3.0.4/dev/advanced/strong-consistency
+---
+
+[use ref strong consistency]: {{}}riak/kv/3.0.4/using/reference/strong-consistency
+[concept eventual consistency]: {{}}riak/kv/3.0.4/learn/concepts/eventual-consistency
+[use ref strong consistency#trade-offs]: {{}}riak/kv/3.0.4/using/reference/strong-consistency/#trade-offs
+[glossary vnode]: {{}}riak/kv/3.0.4/learn/glossary/#vnode
+[config strong consistency#enable]: {{}}riak/kv/3.0.4/configuring/strong-consistency/#enabling-strong-consistency
+[usage bucket types]: {{}}riak/kv/3.0.4/developing/usage/bucket-types
+[cluster ops bucket types]: {{}}riak/kv/3.0.4/using/cluster-operations/bucket-types
+[apps replication properties]: {{}}riak/kv/3.0.4/developing/app-guide/replication-properties
+[config strong consistency]: {{}}riak/kv/3.0.4/configuring/strong-consistency
+[config strong consistency#fault]: {{}}riak/kv/3.0.4/configuring/strong-consistency/#fault-tolerance
+[concept causal context]: {{}}riak/kv/3.0.4/learn/concepts/causal-context
+[concept causal context#vector]: {{}}riak/kv/3.0.4/learn/concepts/causal-context/#vector-clocks
+[concept version vector]: {{}}riak/kv/3.0.4/learn/concepts/causal-context/#dotted-version-vectors
+[usage conflict resolution]: {{}}riak/kv/3.0.4/developing/usage/conflict-resolution
+[usage update objects]: {{}}riak/kv/3.0.4/developing/usage/updating-objects
+[use ref strong consistency#vs]: {{}}riak/kv/3.0.4/using/reference/strong-consistency/#strong-vs.-eventual-consistency
+[dev client libraries]: {{}}riak/kv/3.0.4/developing/client-libraries
+[getting started]: {{}}riak/kv/3.0.4/developing/getting-started
+[config strong consistency#details]: {{}}riak/kv/3.0.4/configuring/strong-consistency/#implementation-details
+
+> **Please Note:**
+>
+> Riak KV's strong consistency is an experimental feature and may be removed from the product in the future. Strong consistency is not commercially supported or production-ready. Strong consistency is incompatible with Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its usage in any production environment.
+
+In versions 2.0 and later, Riak allows you to create buckets that
+provide [strong consistency][use ref strong consistency] guarantees for the data stored within
+them, enabling you to use Riak as a CP system (consistent plus partition
+tolerant) for all of the data in that bucket. You can store just some of
+your data in strongly consistent buckets or all of your data, depending
+on your use case. Strong consistency was added to complement Riak's
+standard [eventually consistent][concept eventual consistency], high
+availability mode.
+
+## Tradeoffs
+
+When data is stored in a bucket with strong consistency guarantees, a
+value is guaranteed readable by any client _immediately_ after a
+successful write has occurred to a given key. In this sense, single-key
+strongly consistent operations are atomic, and operations on a given key
+are [linearizable](http://en.wikipedia.org/wiki/Linearizability). This
+behavior comes at the expense of availability because a [quorum][use ref strong consistency#trade-offs] of primary [vnodes][glossary vnode] responsible for the key must be online and reachable or the request will
+fail.
+
+This trade-off is unavoidable for strongly consistent data, but the
+[choice is now yours](http://en.wikipedia.org/wiki/CAP_theorem) to make.
+
+## Enabling Strong Consistency
+
+Complete instructions on enabling strong consistency can be found in
+our documentation on [configuring strong consistency][config strong consistency#enable].
+
+## Creating Consistent Bucket Types
+
+[Strong Consistency][use ref strong consistency] requirements in Riak are applied on a bucket-by-bucket basis, meaning that you can use some buckets in an eventually consistent fashion and others in a strongly consistent
+fashion, depending on your use case.
+
+To apply strong consistency to a bucket, you must create a [bucket type][usage bucket types] that sets the `consistent` bucket property to
+`true`, activate that type, and then apply that type to specific
+bucket/key pairs.
+
+To give an example, we'll create a bucket type called
+`strongly_consistent` with the `consistent` bucket property set to
+`true`:
+
+```bash
+riak-admin bucket-type create strongly_consistent \
+    '{"props":{"consistent":true}}'
+```
+
+> **Note on bucket type names**
+>
+> You can name [bucket types][usage bucket types] whatever you wish, with
+the exception of `default`, which is a reserved term (a full listing of
+the properties associated with the `default` bucket type can be found in
+the documentation on [bucket properties and operations][cluster ops bucket types]).
+
+Once the `strongly_consistent` bucket type has been created, we can
+check the status of the type to ensure that it has propagated through
+all nodes and is thus ready to be activated:
+
+```bash
+riak-admin bucket-type status strongly_consistent
+```
+
+If the console outputs `strongly_consistent has been created and may be
+activated` and the properties listing shows that `consistent` has been
+set to `true`, then you may proceed with activation:
+
+```bash
+riak-admin bucket-type activate strongly_consistent
+```
+
+When activation is successful, the console will return the following:
+
+```bash
+strongly_consistent has been activated
+```
+
+Now, any bucket that bears the type `strongly_consistent`---or whatever
+you wish to name it---will provide strong consistency guarantees.
+
+Elsewhere in the Riak docs, you can find more information on [using bucket types][usage bucket types], on the concept of [strong consistency][use ref strong consistency], and on strong
+consistency [for operators][config strong consistency].
+
+## Replication Properties
+
+Strongly consistent operations in Riak function much differently from
+their [eventually consistent][concept eventual consistency] counterparts.
+Whereas eventually consistent operations enable you to set values for a
+variety of [replication properties][apps replication properties] either on each request or at the
+bucket level, [using bucket types][usage bucket types], these settings are quietly ignored
+for strongly consistent operations. These settings include `r`, `pr`,
+`w`, `rw`, and others. Two replication properties that _can_ be set,
+however, are `n_val` and `return_body`.
+
+The `n_val` property is extremely important for two reasons:
+
+1. It dictates how fault tolerant a strongly consistent bucket is. More
+   information can be found in [our recommendations for operators][config strong consistency#fault].
+2. Once the `n_val` property is set for a given bucket type, it cannot
+   be changed. If you wish to change the `n_val` for one or more
+   strongly consistent buckets [using bucket types][usage bucket types], you will need to
+   create a new bucket type with the desired `n_val`.
+
+We also recommend setting the `n_val` on strongly consistent buckets to
+at least 5. More on why we make this recommendation can be found in
+[Fault Tolerance][config strong consistency#fault].
+
+## Causal Context
+
+Riak uses [causal context][concept causal context] to determine the causal history of objects.
+In versions of Riak KV prior to 2.0, [vector clocks][concept causal context#vector] were used to provide objects with causal context
+metadata. In Riak versions 2.0 and later there is an option to use
+[dotted version vectors][concept version vector], which function much like vector clocks from
+the standpoint of clients, but with important advantages over vector
+clocks.
+
+While we strongly recommend attaching context to objects for all
+updates---whether traditional vector clocks or the newer dotted version
+vectors---they are purely [optional][usage conflict resolution] for all
+eventually consistent operations in Riak. This is not the case for
+strongly consistent operations. **When modifying strongly consistent
+objects in Riak, you _must_ attach a causal context**.
+
+If you attempt to modify a strongly consistent object without attaching
+a context to the request, the request will always fail. And while it is
+possible to make writes to non-existing keys without attaching context,
+we recommend doing this only if you are certain that the key does not
+yet exist.
+
+Instructions on using causal context can be found in our documentation
+on [object updates][usage update objects].
+
+## Strongly Consistent Writes
+
+Writing to strongly consistent keys involves some of the same best
+practices that we advise when writing to eventually consistent keys. We
+recommend bearing the following in mind:
+
+1. If you _know_ that a key does not yet exist, you can write to that
+   key without supplying a context with the object. If you are unsure, then you should default to supplying a context object.
+2. If an object already exists under a key, strong consistency demands
+   that you supply a [causal context](#causal-context). If you do not supply one, the update
+   will necessarily fail.
+3. Because strongly consistent writes must occasionally
+   [sacrifice availability][use ref strong consistency#vs] for the sake of
+   consistency, **strongly consistent updates can fail even under normal
+   conditions**, particularly in the event of concurrent updates.
+
+## Error Messages
+
+For the most part, performing reads, writes, and deletes on data in
+strongly consistent buckets works much like it does in
+non-strongly-consistent-buckets. One important exception to this is how
+writes are performed. Strongly consistent buckets cannot allow siblings
+by definition, and so all writes to existing keys must include a context
+with the object.
+
+If you attempt a write to a non-empty key without including causal
+context, you will receive the following error:
+
+```ruby
+Riak::Conflict: The object is in conflict (has siblings) and cannot be treated singly or saved:
+```
+
+```java
+java.lang.IllegalArgumentException: VClock cannot be null.
+```
+
+```php
+$response->isSuccess();  // false
+$response->getStatusCode(); // 412
+```
+
+```python
+riak.RiakError: 'failed'
+```
+
+```erlang
+{error,<<"failed">>}
+```
+
+```curl
+412 Precondition Failed

Precondition Failed

Precondition Failed


mochiweb+webmachine web server
+``` + +> **Getting Started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries][dev client libraries], you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started][getting started] section. + +## Known Issue with Client Libraries + +All of Basho's official [client libraries][dev client libraries] currently convert errors returned by Riak into generic exceptions, with a message derived from the error message returned by Riak. In many cases this presents no +problems, since many error conditions are normal when using Riak. + +When working with strong consistency, however, operations like +[conditional puts][config strong consistency#details] commonly +produce errors that are difficult for clients to interpret. For example, +it is expected behavior for conditional puts to fail in the case of +concurrent updates to an object. At present, the official Riak clients +will convert this failure into an exception that is no different from +other error conditions, i.e. they will not indicate any +strong-consistency-specific errors. + +The best solution to this problem at the moment is to catch these +exceptions on the application side and parse server-side error messages +to see if the error involved a conditional failure. If so, you should +set up your application to retry any updates, perhaps a specified number +of times or perhaps indefinitely, depending on the use case. + +If you do set up a retry logic of this sort, however, it is necessary +to retry the entire read/modify/put cycle, meaning that you will need +to fetch the object, modify it, and then write. If you perform a simple +put over and over again, without reading the object, the update will +continue to fail. + +A future version of Riak will address these issues by modifying the +server API to more accurately report errors specific to strongly +consistent operations. + + + + diff --git a/content/riak/kv/3.0.4/developing/app-guide/write-once.md b/content/riak/kv/3.0.4/developing/app-guide/write-once.md new file mode 100644 index 0000000000..a60744c93e --- /dev/null +++ b/content/riak/kv/3.0.4/developing/app-guide/write-once.md @@ -0,0 +1,159 @@ +--- +title: "Write Once" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Write Once" + identifier: "app_guide_write_once" + weight: 102 + parent: "developing_app_guide" +toc: true +version_history: + in: "2.1.0+" +aliases: + - /riak/3.0.4/dev/advanced/write-once + - /riak/kv/3.0.4/dev/advanced/write-once +--- + +[glossary vnode]: {{}}riak/kv/3.0.4/learn/glossary/#vnode +[bucket type]: {{}}riak/kv/3.0.4/developing/usage/bucket-types +[Riak data types]: {{}}riak/kv/3.0.4/developing/data-types +[strong consistency]: {{}}riak/kv/3.0.4/developing/app-guide/strong-consistency + +Write-once buckets are buckets whose entries are intended to be written exactly once and never updated or overwritten. Buckets of this type circumvent the normal "coordinated PUT" path, which would otherwise result in a read on the coordinating vnode before the write. Avoiding coordinated PUTs results in higher throughput and lower PUT latency, though at the cost of different semantics in the degenerate case of sibling resolution. + +{{% note %}} +Write-once buckets do not support Riak commit hooks. Because Riak objects are +inserted into the realtime queue using a postcommit hook, realtime replication +is unavailable for write-once buckets. Fullsync replication will, however, +replicate the data. +{{% /note %}} + +## Configuration + +When the new `write_once` [bucket type][bucket type] parameter is set to +`true`, buckets of type will treat all key/value entries as semantically "write +once;" once written, entries should not be modified or overwritten by the user. + +The `write_once` property is a boolean property applied to a bucket type and +may only be set at bucket creation time. Once a bucket type has been set with +this property and activated, the `write_once` property may not be modified. + +The `write_once` property is incompatible with [Riak data types][Riak data types] +and [strong consistency][strong consistency], This means that if you attempt +to create a bucket type with the `write_once` property set to `true`, any +attempt to set the `datatype` parameter or to set the `consistent` parameter +to `true` will fail. + +The `write_once` property may not be set on the default bucket type, and may +not be set on individual buckets. If you set the `lww` or `allow_mult` +parameters on a write-once bucket type, those settings will be ignored, as +sibling values are disallowed by default. + +The following example shows how to configure a bucket type with the +`write_once` property: + +```bash +riak-admin bucket-type create my-bucket-type '{"props": {"write_once": true}}' +# my-bucket-type created + +riak-admin bucket-type activate my-bucket-type +# my-bucket-type has been activated + +riak-admin bucket-type status my-bucket-type +# my-bucket-type is active +... +write_once: true +... +``` + +## Quorum + +The write path used by write-once buckets supports the `w`, `pw`, and `dw` +configuration values. However, if `dw` is specified, then the value of `w` is +taken to be the maximum of the `w` and `dw` values. For example, for an `n_val` +of 3, if `dw` is set to `all`, then `w` will be `3`. + +This write additionally supports the `sloppy_quorum` property. If set to +`false`, only primary nodes will be selected for calculation of write quorum +nodes. + +## Runtime + +The write-once path circumvents the normal coordinated PUT code path, and +instead sends write requests directly to all [vnodes][glossary vnode] (or +vnode proxies) in the effective preference list for the write operation. + +In place of the `put_fsm` used in the normal path, we introduce a collection of +new intermediate worker processes (implementing `gen_server` behavior). The +role of these intermediate processes is to dispatch put requests to vnode or +vnode proxies in the preflist and to aggregate replies. Unlike the `put_fsm`, +the write-once workers are long-lived for the lifecycle of the `riak_kv` +application. They are therefore stateful and store request state in a state- +local dictionary. + +The relationship between the `riak_client`, write-once workers, and vnode +proxies is illustrated in the following diagram: + +
+![Write Once]({{}}images/write_once.png) +
+ +## Client Impacts + +Since the write-once code path is optimized for writes of data that will not +be updated and therefore may potentially issue asynchronous writes, some +client features might not work as expected. For example, PUT requests asking +for the object to be returned will behave like requests that do not +request the object to be returned when they are performed against write-once +buckets. + + +## Siblings + +As mentioned, entries in write-once buckets are intended to be written only +once---users who are not abusing the semantics of the bucket type should not be +updating or over-writing entries in buckets of this type. However, it is +possible for users to misuse the API, accidentally or otherwise, which might +result in incomparable entries for the same key. + +In the case of siblings, write-once buckets will resolve the conflict by +choosing the "least" entry, where sibling ordering is based on a deterministic +SHA-1 hash of the objects. While this algorithm is repeatable and deterministic +at the database level, it will have the appearance to the user of "random write +wins." + +{{% note %}} +As mentioned in [Configuration](#configuration), write-once buckets and Riak +Data Types are incompatible because of this. +{{% /note %}} + + +## Handoff + +The write-once path supports handoff scenarios, such that if a handoff occurs +during PUTs in a write-once bucket, the values that have been written will be +handed off to the newly added Riak node. + +## Asynchronous Writes + +For backends that support asynchronous writes, the write-once path will +dispatch a write request to the backend and handle the response +asynchronously. This behavior allows the vnode to free itself for other work +instead of waiting on the write response from the backend. + +At the time of writing, the only backend that supports asynchronous writes is +LevelDB. Riak will automatically fall back to synchronous writes with all other +backends. + +{{% note title="Note on the `multi` backend" %}} +The [Multi]({{}}riak/kv/3.0.4/setup/planning/backend/multi) backend does not +support asynchronous writes. Therefore, if LevelDB is used with the Multi +backend, it will be used in synchronous mode. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.4/developing/client-libraries.md b/content/riak/kv/3.0.4/developing/client-libraries.md new file mode 100644 index 0000000000..be0c72ec39 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/client-libraries.md @@ -0,0 +1,294 @@ +--- +title: "Client Libraries" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Client Libraries" + identifier: "developing_client_libraries" + weight: 106 + parent: "developing" +toc: true +aliases: + - /riak/3.0.4/dev/using/libraries + - /riak/kv/3.0.4/dev/using/libraries +--- + +## Basho-Supported Libraries + +Basho officially supports a number of open-source client libraries for a +variety of programming languages and environments. + +Language | Source | Documentation | Download +:--------|:-------|:--------------|:-------- +Java | [riak-java-client](https://github.com/basho/riak-java-client) | [javadoc](http://basho.github.com/riak-java-client), [wiki](https://github.com/basho/riak-java-client/wiki) | [Maven Central](http://search.maven.org/?#search%7Cgav%7C1%7Cg%3A%22com.basho.riak%22%20AND%20a%3A%22riak-client%22) | +Ruby | [riak-ruby-client](https://github.com/basho/riak-ruby-client) | [GitHub Pages](http://basho.github.io/riak-ruby-client/) | [RubyGems](https://rubygems.org/gems/riak-client) +Python | [riak-python-client](https://github.com/basho/riak-python-client) | [sphinx](http://basho.github.com/riak-python-client) | [PyPI](http://pypi.python.org/pypi?:action=display&name=riak#downloads) +C# | [riak-dotnet-client](https://github.com/basho/riak-dotnet-client) | [api docs](http://basho.github.io/riak-dotnet-client-api/), [wiki](https://github.com/basho/riak-dotnet-client/wiki) | [NuGet package](http://www.nuget.org/List/Packages/RiakClient), [GitHub Releases](https://github.com/basho/riak-dotnet-client/releases) +Node.js | [riak-nodejs-client](https://github.com/basho/riak-nodejs-client) | [api docs](http://basho.github.com/riak-nodejs-client/), [wiki](https://github.com/basho/riak-nodejs-client/wiki) | [NPM](https://www.npmjs.com/package/basho-riak-client), [GitHub Releases](https://github.com/basho/riak-nodejs-client/releases) +PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) +Erlang | [riak-erlang-client (riakc)](https://github.com/basho/riak-erlang-client) | [edoc](http://basho.github.com/riak-erlang-client/) | [GitHub](https://github.com/basho/riak-erlang-client) +Go | [riak-go-client](https://github.com/basho/riak-go-client) | [GoDoc](https://godoc.org/github.com/basho/riak-go-client) | [GitHub](https://github.com/basho/riak-go-client) + +**Note**: All official clients use the integrated issue tracker on +GitHub for bug reporting. + +In addition to the official clients, Basho provides some unofficial +client libraries, listed below. There are also many client libraries and +related [community projects]({{}}community/projects/). + + +## Community Libraries + +The Riak Community is developing at a break-neck pace, and the number of +community-contributed libraries and drivers is growing right along side +it. Here is a list of projects that may suit your programming needs or +curiosities. If you know of something that needs to be added or are +developing something that you wish to see added to this list, please +fork the [Riak Docs repo on GitHub](https://github.com/basho/basho_docs) +and send us a pull request. + +{{% note title="Note on community-produced libraries" %}} +All of these projects and libraries are at various stages of completeness and +may not suit your application's needs based on their level of maturity and +activity. +{{% /note %}} + +### Client Libraries and Frameworks + +#### C/C++ + +* [riak-cpp](https://github.com/ajtack/riak-cpp) - A C++ Riak client + library for use with C++11 compilers +* [Riak C Driver](https://github.com/fenek/riak-c-driver) - A library + to communicate with Riak using cURL and Protocol Buffers +* [Riack](https://github.com/trifork/riack) - A simple C client + library +* [Riack++](https://github.com/TriKaspar/riack_cpp) - A C++ wrapper + around riack + +#### Clojure + +* [knockbox](https://github.com/reiddraper/knockbox) - An eventual + consistency toolbox for Clojure +* [Welle](http://clojureriak.info) - An expressive Clojure client with + batteries included +* [clj-riak](http://github.com/mmcgrana/clj-riak) - Clojure bindings + to the Riak Protocol Buffers API +* [sumo](https://github.com/reiddraper/sumo) - A Protocol + Buffer-specific client for Riak with KV, 2i, and MapReduce support +* [kria](https://github.com/bluemont/kria) - Riak 2.0 Asynchronous + (NIO.2) Clojure client. Callback driven, low level, Protocol Buffer + API, Java 7. + +#### ColdFusion + +* [Riak-Cache-Extension](https://github.com/getrailo/Riak-Cache-Extension) - A Riak-backed cache extension for Railo/ColdFusion + +#### Common Lisp + +* [cl-riak (1)](https://github.com/whee/cl-riak) +* [cl-riak (2)](https://github.com/eriknomitch/cl-riak) + +#### Dart + +* [riak-dart](https://github.com/agilord/riak_dart_client) - HTTP + client for Riak written in Dart + +#### Django (Python) + +* [django-riak-sessions](https://github.com/flashingpumpkin/django-riak-sessions) - Riak-based Session Backend for Django +* [Django Riak Engine](https://github.com/oubiwann/django-riak-engine) - A Riak backend for Django + +#### Erlang + +* [Uriak Pool](https://github.com/unisontech/uriak_pool) - Erlang + connection pool library from the team at + [Unison](http://www.unison.com) +* [Riak PBC Pool](https://github.com/snoopaloop/Riak-PBC-Pool) - Riak + Protocol Buffer Client pool application +* [Pooly](https://github.com/aberman/pooly) - Riak Process Pool +* [riakpool](https://github.com/dweldon/riakpool) - Application for + maintaining a dynamic pool of Protocol Buffer client connections to a + Riak database +* [pooler](https://github.com/seth/pooler) - An OTP Process Pool + Application +* [krc](https://github.com/klarna/krc) - A simple wrapper around the + official Riak client for Erlang +* [riakc_pool](https://github.com/brb/riakc_pool) - A really simple + Riak client process pool based on poolboy + +#### Go + +* [riaken](https://github.com/riaken) - A fast and extendable Riak + Protocol Buffer Client +* [goriakpbc](https://github.com/tpjg/goriakpbc) - A Golang Riak + client inspired by the Ruby riak-client from Basho and riakpbc from mrb +* [riakpbc](https://github.com/mrb/riakpbc) - A Riak Protocol Buffer + client in Go +* [goriak](https://github.com/zegl/goriak) - Go language driver for Riak KV + +#### Grails + +* [Grails ORM for Riak](http://www.grails.org/plugin/riak) + +#### Griffon + +* [Riak Plugin for + Griffon](http://docs.codehaus.org/display/GRIFFON/Riak+Plugin) + +#### Groovy + +* [spring-riak](https://github.com/jbrisbin/spring-riak) - Riak + support from Groovy and/or Java + +#### Haskell + +* [Riak Haskell Client](https://github.com/markhibberd/riak-haskell-client) - A fast Haskell client library from the team at MailRank. + +#### Java + +* [Riak-Java-PB-Client](http://github.com/krestenkrab/riak-java-pb-client) - Java Client Library for Riak based on the Protocol Buffers API +* [Asynchronous Riak Java Client](https://github.com/jbrisbin/riak-async-java-client) - Asynchronous, NIO-based Protocol Buffers client for Riak +* [Riak Module for the Play + Framework](http://www.playframework.org/modules/riak-head/home) + +#### Lisp-flavored Erlang + +* [Gutenberg](https://github.com/dysinger/gutenberg/) - Riak MapReduce + examples written in LFE + +#### Node.js + +* [zukai](https://github.com/natural/zukai) - Riak ODM for Node.js + from Troy Melhase +* [riak-pb](https://github.com/CrowdProcess/riak-pb) - Riak Protocol + Buffers client for Node.js from the team at + [CrowdProcess](http://crowdprocess.com) +* [node_riak](https://github.com/mranney/node_riak) - Voxer's + production Node.js client for Riak. +* [riakpbc](https://github.com/nlf/riakpbc) - A simple Riak Protocol + Buffer client library for Node.js +* [nodiak](https://npmjs.org/package/nodiak) - Supports bulk + get/save/delete, sibling auto-resolution, MapReduce chaining, Search, + and 2i's +* [resourceful-riak](https://github.com/admazely/resourceful-riak) - A + Riak engine to the + [resourceful](https://github.com/flatiron/resourceful/) model + framework from [flatiron](https://github.com/flatiron/) +* [Connect-Riak](https://github.com/frank06/connect-riak) - Riak + session store for Connect backed by [Riak-js](http://riakjs.org/) +* [Riak-js](http://riakjs.com) - Node.js client for Riak with support + for HTTP and Protocol Buffers +* [Riakjs-model](https://github.com/dandean/riakjs-model) - a model + abstraction around riak-js +* [Node-Riak](http://github.com/orlandov/node-riak) - A wrapper around + Node's HTTP facilities for communicating with Riak +* [riak-dc](https://github.com/janearc/riak-dc) - A very thin, very small + http-based interface to Riak using promises intended to be used for small + tools like command-line applications; aims to have the "most-synchronous- + like" interface. +* [Nori](https://github.com/sgonyea/nori) - Experimental Riak HTTP + library for Node.js modeled after Ripple +* [OrionNodeRiak](http://github.com/mauritslamers/OrionNodeRiak) - Node-based server and database-frontend for Sproutcore +* [Chinood](https://npmjs.org/package/chinood) - Object data mapper + for Riak built on Nodiak +* [SimpleRiak](https://npmjs.org/package/simpleriak) - A very simple + Riak HTTP client + +#### OCaml + +* [Riak OCaml Client](http://metadave.github.com/riak-ocaml-client/) - Riak OCaml client +* [OCaml Riakc](https://github.com/orbitz/ocaml-riakc) - A Protocol + Buffers client for Riak + +#### Perl + +* [Net::Riak](http://search.cpan.org/~franckc/Net-Riak/) - A Perl + interface to Riak +* [AnyEvent-Riak adapter](http://github.com/franckcuny/anyevent-riak) - Non-blocking Riak adapter using anyevent +* [riak-tiny](https://github.com/tempire/riak-tiny) - Perl interface + to Riak without Moose +* [Riak::Light](https://metacpan.org/module/Riak::Light) - Fast and + lightweight Perl client for Riak (PBC only) + +#### PHP + +* [riak-client](https://github.com/php-riak/riak-client) - A Riak + 2.0-compliant PHP client with support for Protocol Buffers by [Fabio + Silva](https://github.com/FabioBatSilva) +* [Ripple-PHP](https://github.com/KevBurnsJr/ripple-php) - A port of + Ripple to PHP +* [riiak](https://bitbucket.org/intel352/riiak) - A Riak PHP client + library for the [Yii Framework](http://www.yiiframework.com/) +* [riak-php](https://github.com/marksteele/riak-php) - A Riak PHP + client with support for Protocol Buffers +* [RiakBundle](https://github.com/remialvado/RiakBundle) - [Symfony](http://symfony.com) Bundle designed to ease interaction + with Riak +* [php_riak](https://github.com/TriKaspar/php_riak) - A PHP extension + written in C, Both Riak client and PHP session module + +#### Python + +* [Aioriak](https://github.com/rambler-digital-solutions/aioriak) - Asyncio PBC Riak 2.0+ client library. (Based on official Basho + python client) +* [Riakasaurus](https://github.com/calston/riakasaurus) - A Riak + client library for Twisted (based on txriak) +* [RiakKit](http://shuhaowu.com/riakkit) - A small Python ORM that + sits on top of riak-python-client, similar to mongokit and couchdbkit +* [riakalchemy](https://github.com/Linux2Go/riakalchemy) - Object + mapper for Riak written in Python +* [riak_crdt](https://github.com/ericmoritz/riak_crdt) - A CRDT + (Conflict-Free Replicated Data Type) loader for Riak using the [CRDT + API](https://github.com/ericmoritz/crdt) +* [txriak](https://launchpad.net/txriak) - A Twisted module for + communicating with Riak via the HTTP interface +* [txriakidx](https://github.com/williamsjj/txriakidx) - Riak client + for Twisted Python that implements transparent indexes + +#### Racket + +* [riak.rkt](https://github.com/shofetim/riak.rkt) - Racket API to + Riak +* [Racket Riak](https://github.com/dkvasnicka/racket-riak) - Racket + 1.3.x API to Riak + +#### Ruby + +* [Risky](https://github.com/aphyr/risky) - A lightweight Ruby ORM for + Riak +* [riak_sessions](http://github.com/igorgue/riak_sessions) - Riak-backed session storage for Rack +* [Riaktor](http://github.com/benmyles/riaktor) - Ruby client and + object mapper for Riak +* [dm-riak-adapter](http://github.com/mikeric/dm-riak-adapter) - DataMapper adapter for Riak +* [Riak PB Client](https://github.com/sgonyea/riak-pbclient) - Riak + Protocol Buffer Client in Ruby +* [Devise-Ripple](http://github.com/frank06/devise-ripple) - An ORM + strategy to use Devise with Riak +* [ripple-anaf](http://github.com/bkaney/ripple-anaf) - Accepts nested + attributes support for Ripple +* [Pabst](https://github.com/sgonyea/pabst) - Cross-platform Ruby + extension for Protocol Buffers written in both Objective-C and + Objective-C++ + +#### Scala + +* [Riakka](http://github.com/timperrett/riakka) - Scala library for + talking to Riak +* [Ryu](http://github.com/softprops/ryu) - A Tornado Whirlwind Kick + Scala client for the Riak raw HTTP interface +* [Raiku](https://github.com/gideondk/Raiku) - An Akka IO- and + Sentinel-driven Riak Scala client + +#### Smalltalk + +* [Phriak](http://www.squeaksource.com/Phriak/) - A Riak client for + Pharo Smalltalk based on Runar Jordahl's EpigentRiakInterface +* [EpigentRiakInterface](http://www.squeaksource.com/EpigentRiakInterface/) - A Pharo Smalltalk interface to Riak. There is also a blog post + with some additional info about the client + [here](http://blog.epigent.com/2011/03/riak-interface-for-pharo-smalltalk.html). + + + + diff --git a/content/riak/kv/3.0.4/developing/data-modeling.md b/content/riak/kv/3.0.4/developing/data-modeling.md new file mode 100644 index 0000000000..5b27ead77f --- /dev/null +++ b/content/riak/kv/3.0.4/developing/data-modeling.md @@ -0,0 +1,15 @@ +--- +layout: redirect +target: "riak/kv/3.0.4/learn/use-cases/" +aliases: +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/, but -- to maintain +the git history of this (possibly malformed?) file -- we're going to start off +by using this generated redirect. + + + + diff --git a/content/riak/kv/3.0.4/developing/data-types.md b/content/riak/kv/3.0.4/developing/data-types.md new file mode 100644 index 0000000000..c97ef11791 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/data-types.md @@ -0,0 +1,279 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Data Types" + identifier: "developing_data_types" + weight: 102 + parent: "developing" +toc: true +aliases: + - /riak/3.0.4/dev/using/data-types + - /riak/kv/3.0.4/dev/using/data-types + - /riak/3.0.4/dev/data-modeling/data-types + - /riak/kv/3.0.4/dev/data-modeling/data-types +--- + +[wiki crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type#Others +[concept crdt]: ../../learn/concepts/crdts +[ops bucket type]: ../../using/cluster-operations/bucket-types + +Riak KV has Riak-specific data types based on [convergent replicated data types (CRDTs)][wiki crdt]. While Riak KV was built as a data-agnostic key/value store, Riak data types enable you to use Riak KV as a data-aware system and perform transactions on 6 CRDT-inspired data types: + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [GSets](./gsets) +- [Maps](./maps) + +Riak KV also has 1 context-free data type, that has similar usage but does not require contexts. + +- [HyperLogLogs](./hyperloglogs) (abbreviated `hll` in many places) + + +Counters, sets, gsets, maps, and hyperloglogs can be used as bucket-level data types or types that you interact with directly. Flags and registers must be [embedded in maps](./maps). + +For more information on how CRDTs work in Riak KV see [Concepts: Data Types][concept crdt]. + +## Getting Started with Riak Data Types + +The following section explains how to set up a bucket that uses Riak data types. To get started using Riak data types: + +1. [Create a bucket with the `datatype` parameter set](#creating-a-bucket-with-a-riak-data-type). +2. [Confirm the bucket was properly configured](#confirm-bucket-configuration). +3. [Activate the bucket type](#activate-bucket-type). + +### Creating a Bucket with a Riak Data Type + +First create a [bucket type][ops bucket type] that sets the `datatype` bucket parameter to either `counter`, `map`, `set`, or `hll`. + +The following would create a separate bucket type for each of the four +bucket-level data types: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The names `maps`, `sets`, `counters`, `hlls` and `gsets` are not reserved +terms. You are free to name bucket types whatever you like, with +the exception of `default`. + +### Confirm Bucket configuration + +Once you've created a bucket with a Riak data type, you can check +to make sure that the bucket property configuration associated with that +type is correct. This can be done through the `riak-admin` interface: + +```bash +riak-admin bucket-type status maps +``` + +This will return a list of bucket properties and their associated values +in the form of `property: value`. If our `maps` bucket type has been set +properly, we should see the following pair in our console output: + +``` +datatype: map +``` + +### Activate Bucket type + +If a bucket type has been properly constructed, it needs to be activated +to be usable in Riak. This can also be done using the `bucket-type` +command interface: + +```bash +riak-admin bucket-type activate maps +``` + +To check whether activation has been successful, simply use the same +`bucket-type status` command shown above. + +See the [Usage Examples](#usage-examples) section for further information on using Riak data types in the context of an application. + +## Required Bucket Properties + +In order for Riak data types to work the bucket should have the following bucket properties: + +- `allow_mult = true` +- `last_write_wins = false` + +These settings are set by default and should not be changed. + +## Data Types and Context + +Data type context is similar to [causal context](../../learn/concepts/causal-context): it tells Riak KV which version of the data type a client is attempting to modify. Context is required by Riak KV when making decisions about convergence. + +If no context is given when attempting a remove or remove-like operation, the operation may fail (removing a field that is not present) or succeed and remove more than intended (removing updates unseen by the client). + +> **Note** +> +> The counter data type does not use context; Riak KV will return an empty value when the context is requested from a counter. + +In the example below we'll fetch the context [from a user data map created for Ahmed](./maps#create-a-map): + +```java +// Using the "ahmedMap" Location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +System.out.prinntln(ctx.getValue().toString()) + +// An indecipherable string of Unicode characters should then appear +``` + +```ruby +bucket = client.bucket('users') +ahmed_map = Riak::Crdt::Map.new(bucket, 'ahmed_info', 'maps') +ahmed_map.instance_variable_get(:@context) + +# => "\x83l\x00\x00\x00\x01h\x02m\x00\x00\x00\b#\t\xFE\xF9S\x95\xBD3a\x01j" +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getContext(); // g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```python +bucket = client.bucket_type('maps').bucket('users') +ahmed_map = Map(bucket, 'ahmed_info') +ahmed_map.context + +# g2wAAAABaAJtAAAACCMJ/vlTlb0zYQFq +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Note: using a previous UpdateMap or FetchMap result +Console.WriteLine(format: "Context: {0}", args: Convert.ToBase64String(result.Context)); + +// Output: +// Context: g2wAAAACaAJtAAAACLQFHUkv4m2IYQdoAm0AAAAIxVKxCy5pjMdhCWo= +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("context: '%s'", rslt.context.toString('base64')); +}); + +// Output: +// context: 'g2wAAAACaAJtAAAACLQFHUmjDf4EYTBoAm0AAAAIxVKxC6F1L2dhSWo=' +``` + +```erlang +%% You cannot fetch a data type's context directly using the Erlang +%% client. This is actually quite all right, as the client automatically +%% manages contexts when making updates. +``` + +> **Context with the Ruby, Python, and Erlang clients** +> +> In the Ruby, Python, and Erlang clients, you will not need to manually +handle context when making data type updates. The clients will do it all +for you. The one exception amongst the official clients is the Java +client. We'll explain how to use data type contexts with the Java client +directly below. + +### Context with the Java and PHP Clients + +With the Java and PHP clients, you'll need to manually fetch and return data type contexts for the following operations: + +* Disabling a flag within a map +* Removing an item from a set (whether the set is on its own or within a + map) +* Removing a field from a map + +Without context, these operations simply will not succeed due to the +convergence logic driving Riak data types. The example below shows you +how to fetch a data type's context and then pass it back to Riak. More +specifically, we'll remove the `paid_account` flag from the map: + +```java +// This example uses our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate removePaidAccountField = new MapUpdate() + .removeFlag("paid_account"); +UpdateMap update = new UpdateMap.Builder(ahmedMap, removePaidAccountField) + .withContext(ctx) + .build(); +client.execute(update); +``` + + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +## Usage Examples + +- [Flags](./maps#flags) +- [Registers](./maps#registers) +- [Counters](./counters) +- [Sets](./sets) +- [Maps](./maps) +- [GSets](./gsets) +- [Hyperloglogs](./hyperloglogs) + +The pages listed above detail using Riak data types at the application level using Basho's [officially supported Riak KV clients](../client-libraries). For more on getting started with client libraries check out the [Developing with Riak KV: Getting Started](../getting-started) section. + +All the examples use the bucket type names from above (`counters`, `sets`, and `maps`). You're free to substitute your own bucket type names if you wish. + +## Data Types and Search + +Riak data types can be searched like any other object, but with the +added benefit that your data type is indexed as a different type by Solr, +the search platform behind Riak Search. + +In our Search documentation we offer a [full tutorial](../usage/searching-data-types) as well as a variety of [examples](../usage/searching-data-types/), including code +samples from each of our official client libraries. + + + + diff --git a/content/riak/kv/3.0.4/developing/data-types/counters.md b/content/riak/kv/3.0.4/developing/data-types/counters.md new file mode 100644 index 0000000000..e4ff3a480b --- /dev/null +++ b/content/riak/kv/3.0.4/developing/data-types/counters.md @@ -0,0 +1,635 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Counters" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Counters" + identifier: "data_types_counters" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.4/dev/using/data-types/counters + - /riak/kv/3.0.4/dev/using/data-types/counters + - /riak/3.0.4/dev/data-modeling/data-types/counters + - /riak/kv/3.0.4/dev/data-modeling/data-types/counters +--- + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#counters-within-maps). A counter's value can only be a positive integer, negative integer, or zero. + +The examples in this section will show you how to use counters on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `counter`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `counter`: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +``` + +> **Note** +> +> The `counters` bucket type name provided above is an example and is not required to be `counters`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status counters +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `counters` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: counter +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate counters +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status counters +``` + +After creating and activating our new `counters` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `counters` bucket type created and activated above and a bucket called `counters`: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. +Namespace countersBucket = new Namespace("counters", "counters"); +Location location = new Location(countersBucket, ""); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +``` + +```php +$bucket = new \Basho\Riak\Bucket('counters', 'counters'); +``` + +```python +bucket = client.bucket_type('counters').bucket('counters') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// You can either use the appropriate Options class or the Builder + +// Options: +var options = new FetchCounterOptions("counters", "counters", ""); + +// Builder: +FetchCounter cmd = new FetchCounter.Builder() + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("") + .Build(); +``` + +```javascript +// The following can be passed as options to FetchCounter +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + +## Create a Counter + +To create a counter, you need to specify a bucket/key pair to hold that +counter. Here is the general syntax for doing so: + +```java +// Here, we'll use the Namespace object that we created above and +// incorporate it into a Location object that includes the key (as yet +// unspecified) for our counter + +// Using the countersBucket Namespace object from above: +Location counter = new Location(countersBucket, ""); + +// Or we can specify the Location all at once: +Location counter = new Location(new Namespace("counters", "counters"), ""); +``` + +```ruby +counter = Riak::Crdt::Counter.new(bucket, key, bucket_type) + +# Or you can specify a bucket and bucket type all at once and pass that +# into the constructor +bucket = client.bucket_type(bucket_type).bucket(bucket) +counter = Riak::Crdt::Counter.new(bucket, key) +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('key', $bucket); +``` + +```python +# The client detects the bucket type's data type and automatically +# returns the right datatype for you, in this case a counter +counter = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Counter + +counter = Counter(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", ""); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +``` + +```javascript +// The following can be passed as options to the *Counter methods on the +// Node.js Client object +var options = { + bucketType: 'counters', + bucket: 'counters', + key: '' +}; +``` + +```erlang +%% Counters are not encapsulated with the bucket/key in the Erlang +%% client. See the examples below for more information. +``` + +```curl +# This will create a counter with an initial value of 0 + +curl -XPOST http://localhost:8098/types/counters/buckets//datatypes/ \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +Let's say that we want to create a counter called `traffic_tickets` in +our `counters` bucket to keep track of our legal misbehavior. We can +create this counter and ensure that the `counters` bucket will use our +`counters` bucket type like this: + +```java +// Using the countersBucket Namespace object from above: + +Location trafficTickets = new Location(countersBucket, "traffic_tickets"); +``` + +```ruby +bucket = client.bucket_type('counters').bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all counter buckets to use the counters bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:counter] = 'counters' + +# This would enable us to create our counter without specifying a bucket type +bucket = client.bucket('counters') +counter = Riak::Crdt::Counter.new(bucket, 'traffic_tickets') +``` + +```php +# using the $bucket var created earlier +$location = new \Basho\Riak\Location('traffic_tickets', $bucket); +``` + +```python +bucket = client.bucket_type('counters').bucket('traffic_tickets') +counter = bucket.new('traffic_tickets') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResult = cmd.Result; +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +``` + +```erlang +Counter = riakc_counter:new(). + +%% Counters in the Erlang client are opaque data structures that collect +%% operations as you mutate them. We will associate the data structure +%% with a bucket type, bucket, and key later on. +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 0}' +``` + +## Increment a Counter + +Now that our client knows which bucket/key pairing to use for our +counter, `traffic_tickets` will start out at 0 by default. If we happen +to get a ticket that afternoon, we can increment the counter: + +```java +// Using the "trafficTickets" Location from above: + +CounterUpdate cu = new CounterUpdate(1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment + +# This will increment the counter both on the application side and in +Riak +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment() + +# Updates are staged locally and have to be explicitly sent to Riak +# using the store() method. +counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +UpdateCounter updateCmd = new UpdateCounter.Builder(increment: 1) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value will be 1 +``` + +```javascript +// Using the options from above: + +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 1 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter1 = riakc_counter:increment(Counter). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 1}' +``` + +## Increment a Counter by More Than 1 + +The default value of an increment operation is 1, but you can increment +by more than 1 (but always by an integer). + +Continuing with our `traffic_tickets` example, let's say we receive 5 tickets in a single day: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(5); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.increment(5) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(5) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.increment(5) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateCounter.Builder(5) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .WithReturnBody(true); + +UpdateCounter updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CounterResponse response = updateCmd.Response; +// response.Value is 5 more than before + +// To decrement: +// Modify the builder's increment, then construct a new command +builder.WithIncrement(-5); +updateCmd = builder.Build(); + +rslt = client.Execute(updateCmd); +CheckResult(rslt); + +response = updateCmd.Response; +// response.Value is 5 less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets', + increment: 5 +}; +client.updateCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Counter2 = riakc_counter:increment(5, Counter1). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"increment": 5}' +``` + +## Retrieve Counter Value + +We can retrieve the value of the counter and view how many tickets have accumulated: + +```java +// Using the "trafficTickets" Location from above: +FetchCounter fetch = new FetchCounter.Builder(trafficTickets) + .build(); +FetchCounter.Response response = client.execute(fetch); +RiakCounter counter = response.getDatatype(); +Long ticketsCount = counter.view(); +``` + +```ruby +counter.value +# Output will always be an integer +``` + +```php +$trafficTickets = (new \Basho\Riak\Command\Builder\FetchCounter($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getCounter(); + +$trafficTickets->getData(); # returns an integer +``` + +```python +counter.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, whereas the call above would return +# 6, the call below will return 0' since we started with an empty +# counter: + +counter.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any changes to the counter that have not yet been +# sent to Riak +counter.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var fetchCounterOptions = new FetchCounterOptions("counters", "counters", "traffic_tickts"); +FetchCounter cmd = new FetchCounter(fetchCounterOptions); +RiakResult rslt = client.Execute(cmd); +CounterResponse response = cmd.Response; +// response.Value has the counter value +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counters', + key: 'traffic_tickets' +}; +client.fetchCounter(options, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.error("bt: %s, b: %s, k: %s, counter: NOT FOUND", + options.bucketType, options.bucket, options.key); + } else { + logger.info("bt: %s, b: %s, k: %s, counter: %d", + options.bucketType, options.bucket, options.key, + rslt.counterValue); + } + } +); +``` + +```erlang +riakc_counter:dirty_value(Counter2). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, whereas the call above would return +%% '6', the call below will return '0' since we started with an empty +%% counter: + +riakc_counter:value(Counter2). + +%% To fetch the value stored on the server, use the call below: + +{ok, CounterX} = riakc_pb_socket:fetch_type(Pid, + {<<"counters">>, <<"counters">>}, + <<"traffic_tickets">>). +``` + +```curl +curl http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets + +# Response: +{"type":"counter", "value": } +``` + +## Decrement a Counter + +Counters enable you to decrement values in addition to incrementing them as seen above. + +For example, let's say we hire an expert lawyer who gets one of the traffic tickets stricken from our record: + +```java +// Using the "trafficTickets" Location from above: +CounterUpdate cu = new CounterUpdate(-1); +UpdateCounter update = new UpdateCounter.Builder(trafficTickets, cu) + .build(); +client.execute(update); +``` + +```ruby +counter.decrement + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```php +(new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(-3) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +counter.decrement() + +# Just like incrementing, you can also decrement by more than one, e.g.: +counter.decrement(3) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var updateCmd = new UpdateCounter.Builder(-3) + .WithBucketType("counters") + .WithBucket("counters") + .WithKey("traffic_tickets") + .Build(); + +rslt = client.Execute(updateCmd); +response = updateCmd.Response; +// response.Value is three less than before +``` + +```javascript +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -1 +}; + +// As with incrementing, you can also decrement by more than one, e.g.: +var options = { + bucketType: 'counters', + bucket: 'counter', + key: 'traffic_tickets', + increment: -3 +}; +``` + +```erlang +Counter3 = riakc_counter:decrement(Counter2). + +%% As with incrementing, you can also decrement by more than one: + +Counter4 = riakc_counter:decrement(3, Counter3). + +%% At some point, we'll want to send our local updates to the server +%% so they get recorded and are visible to others. Extract the update +%% using the to_op/1 function, then pass it to +%% riakc_pb_socket:update_type/4,5. + +riakc_pb_socket:update_type(Pid, {<<"counters">>,<<"counters">>}, + <<"traffic_tickets">>, + riakc_counter:to_op(Counter4)). +``` + +```curl +curl -XPOST http://localhost:8098/types/counters/buckets/counters/datatypes/traffic_tickets \ + -H "Content-Type: application/json" \ + -d '{"decrement": 3}' +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/data-types/gsets.md b/content/riak/kv/3.0.4/developing/data-types/gsets.md new file mode 100644 index 0000000000..6fbee62757 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/data-types/gsets.md @@ -0,0 +1,631 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types:GSets" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "GSets" + identifier: "data_types_gsets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.4/dev/using/data-types/gsets + - /riak/kv/3.0.4/dev/using/data-types/gsets + - /riak/3.0.4/dev/data-modeling/data-types/gsets + - /riak/kv/3.0.4/dev/data-modeling/data-types/gsets +--- + +GSets are a bucket-level Riak data type that can be used by themselves or associated with a bucket/key pair. They do not yet have the ability to be used [within a map like regular sets](../maps#sets-within-maps). + +GSets are collections of unique binary values (such as strings). All of the values in a gset are unique and are automatically sorted alphabetically irresepective of the order they were added. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +Unlike sets, elements can only be added and no element modification or deletion is possible. + +> **Known Issue** +> +> Unlike other data types, gsets require other data to be present in the cluster before they can be created. If you are unable to create a gset on a new cluster, please try [creating a set](../sets#set-up-a-bucket-type) first and then retrying with your gset. Please see [issue #950](https://github.com/basho/riak_core/issues/950) for details. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `gset` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `gset`: + +```bash +riak-admin bucket-type create gsets '{"props":{"datatype":"gset"}}' +``` + +> **Note** +> +> The `gsets` bucket type name provided above is an example and is not required to be `gsets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status gsets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `gsets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: gset +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate gsets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status gsets +``` + +After creating and activating our new `gsets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a gset and running gset-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a gset: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +gset = bucket.new('2019-11-17') + +# or + +from riak.datatypes import GSet +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with gsets +// by building an Options object or using a Builder +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +// NB: builder.Options will only be set after Build() is called. +FetchGSet fetchGSetCommand = builder.Build(); + +FetchGSetOptions options = new FetchGSetOptions("gsets", "account-12345678", "2019-11-17"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with gsets on the +// basis of the gset's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a GSet + +For the following example, we will use a set to store a list of transactions that occur for an account number on a specific date. +Let's create a Riak gset stored in the key `cities` in the bucket `travel` using the `gsets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("gsets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('2019-11-17', 'account-12345678', 'gsets'); +``` + +```python +bucket = client.bucket_type('gsets').bucket('account-12345678') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +gset = bucket.new('2019-11-17') + +# You can also create a reference to a set explicitly: +from riak.datatypes import GSet + +gset = GSet('account-12345678', '2019-11-17') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the gset with which we want to +// interact: +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); +``` + +```javascript +// Now we'll create a options object for the gset with which we want to +// interact: +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +``` + +```erlang +20191177Gset = riakc_gset:new(). + +%% GSets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty gset through the HTTP interface. GSets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($gset->getData()); +``` + +```python +len(gset) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17"); + +FetchGSet fetchGSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchGSetCommand); +GSetResponse response = fetchGSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("gset '2019-11-17' is not found!"); + } +}); +``` + +```erlang +riakc_gset:size(20191117Gset) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% gset that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a GSet + +But let's say that a pair of transactions occurred today. Let's add them to our `2019-11-17` set: + +```java +// Using our "cities" Location from above: + +GSetUpdate su = new GSetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('transaction a') + ->add('transaction b') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +gset.add('transaction a') +gset.add('transaction b') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "transaction a", "transaction b" }; + +var builder = new UpdateGSet.Builder() + .WithBucketType("gsets") + .WithBucket("account-12345678") + .WithKey("2019-11-17") + .WithAdditions(adds); + +UpdateGSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +GSetResponse response = cmd.Response; +Assert.Contains("transaction a", response.AsStrings.ToArray()); +Assert.Contains("transaction b", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-1234578', + key: '2019-11-17' +}; +var cmd = new Riak.Commands.CRDT.UpdateGSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['transaction a', 'transaction b']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +20191117Gset1 = riakc_gset:add_element(<<"transaction a">>, 20191117Gset), +20191117Gset2 = riakc_gset:add_element(<<"transaction b">>, 20191117Gset1). +``` + +```curl +curl -XPOST http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 \ + -H "Content-Type: application/json" \ + -d '{"add_all":["transaction a", "transaction b"]}' +``` + +## Remove from a GSet + +Removal from a GSet is not possible. + +## Retrieve a GSet + +Now, we can check on which transactions are currently in our gset: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$gset = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($gset->getData()); +``` + +```python +gset.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Transaction a', 'Transaction b']), the call below would +# return frozenset([]). + +gset.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions. +gset.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in GSetResponse.AsStrings) +{ + Console.WriteLine("2019-11-17 Transactions: {0}", value); +} + +// Output: +// 2019-11-17 Transactions: transaction a +// 2019-11-17 Transactions: transaction b +``` + +```javascript +var options = { + bucketType: 'gsets', + bucket: 'account-12345678', + key: '2019-11-17' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("2019-11-17 gset values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: 2019-11-17 gset values: 'transaction a, transaction b' +``` + +```erlang +riakc_gset:dirty_value(20191117Gset3). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_gset:value(20191117Gset3). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"gsets">>,<<"account-12345678">>}, + <<"20191117">>). +``` + +```curl +curl http://localhost:8098/types/gsets/buckets/account-12345678/datatypes/2019-11-17 + +# Response +{"type":"set","value":["transaction a","transaction b"]} +``` + +## Find GSet Member + +Or we can see whether our gset includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('transaction z', $gset->getData()); # false + +in_array('transaction a', $gset->getData()); # true +``` + +```python +'transaction c' in gset +# False + +'transaction a' in gset +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesTransactionZ = response.AsStrings.Any(v => v == "transaction z"); +bool includesTransactionA = response.AsStrings.Any(v => v == "transaction a"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var 2019-11-17_gset = result.values; +2019-11-17_gset.indexOf('transaction z'); // if present, index is >= 0 +2019-11-17_gset.indexOf('transaction a'); // if present, index is >= 0 +``` + +```erlang +%% At this point, GSet3 is the most "recent" set from the standpoint +%% of our application. + +riakc_gset:is_element(<<"transaction z">>, 20191117Gset3). +riakc_gset:is_element(<<"transaction a">>, 20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of GSet + +We can also determine the size of the gset: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($gset->getData()); +``` + +```python +len(gset) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +gsetResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var 2019-11-17_gset_size = result.values.length; +``` + +```erlang +riakc_gset:size(20191117Gset3). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/data-types/hyperloglogs.md b/content/riak/kv/3.0.4/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..750ea1d53b --- /dev/null +++ b/content/riak/kv/3.0.4/developing/data-types/hyperloglogs.md @@ -0,0 +1,643 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: HyperLogLogs" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "HyperLogLogs" + identifier: "data_types_hyperloglogs" + weight: 100 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.4/dev/using/data-types/hyperloglogs + - /riak/kv/3.0.4/dev/using/data-types/hyperloglogs + - /riak/3.0.4/dev/data-modeling/data-types/hyperloglogs + - /riak/kv/3.0.4/dev/data-modeling/data-types/hyperloglogs +--- + +The examples in this section will show you how to use hyperloglogs on their own. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `hyperloglog`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `hyperloglog`: + +```bash +riak_admin bucket-type create hlls '{"props":{"datatype":"hll"}}' +``` + +> **Note** +> +> The `hlls` bucket type name provided above is an example and is not required to be `hlls`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status hlls +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `hlls` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: hll +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate hlls +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status hlls +``` + +After creating and activating our new `hlls` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key +location that contains our counter. + +For this example we'll use the `hlls` bucket type created and activated above and a bucket called `hlls`: + +```erlang +%% Buckets are simply named binaries in the Erlang client. See the +%% examples below for more information +``` + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location hllLocation = + new Location(new Namespace("", ""), ""); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// Buckets and bucket types are simply strings in the Go client. + +// See the examples below for more information, or the full example at +// https://github.com/basho/riak-go-client/blob/master/examples/dev/using/data-types/hyperloglog.go + +// We will need the follow imports to run the examples: +import ( + "fmt" + "os" + "time" + + riak "github.com/basho/riak-go-client" + "errors" +) +``` + +```csharp +// In the C# client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```javascript +// In the Node.js client, buckets are just string parameters to operations. +// See the examples below for more information. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('', '', 'hlls') + ->build(); +``` + +```ruby +bucket = client.bucket_type('hlls').bucket('my_hlls') +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-Data-Type +# requests, which end in /keys/ +``` + + +## Create a HyperLogLog data type + +To create a hyperloglog data structure, you need to specify a bucket/key pair to +hold that hyperloglog. Here is the general syntax for doing so: + +```erlang +HLL = riakc_hll:new(). + +%% Hyperloglogs in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location hllLocation = + new Location(new Namespace("hlls", "hello"), "darkness"); + +// In the Java client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +hll = bucket.new(key) + +# or + +from riak.datatypes import Hll +hll = Hll(bucket, key) +``` + +```go +// In the Go client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```csharp +// In the C# client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```javascript +// In the Node.js client, there is no intermediate "empty" hyperloglog data type. +// Hyperloglogs can be created when an element is added to them, as in the examples below. +``` + +```php +// Note that "hlls" is just an example HLL bucket type name used +// in these examples + +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('gosabres poked you.') + ->add('phprocks viewed your profile.') + ->add('phprocks started following you.') + ->buildBucket('', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +key = "darkness" +hll = Riak::Crdt::HyperLogLog.new(bucket, key) +``` + +```curl +# You cannot create an empty hyperloglog data structure through the HTTP +# interface. +# Hyperloglogs can only be created when an element is added to them, as in the +# examples below. +``` + +Upon creation, our hyperloglog data structure is empty: + +```erlang +HLL. + +%% which will return: +%% {hll,0,[]} +``` + +```java +FetchHll fetch = new FetchHll.Builder(hllLocation) + .build(); +RiakHll hll = client.execute(fetch); +boolean isEmpty = hll.getCardinality() == 0; +``` + +```python +is_empty = hll.value == 0 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +fmt.Println("Hyperloglog isNotFound: ", resp.IsNotFound) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } +}); +// Prints "Not Found" to logger.info. +``` + +```csharp + var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +// Prints "Not Found" to the console. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); + +$response->getCode() == '404'; +``` + +```ruby +puts hll.cardinality +# Prints "0" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","error":"notfound"} +``` + +## Add elements to a HyperLogLog data type + +```erlang +HLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +RepeatHLL1 = riakc_hll:add_element(<<"Jokes">>, HLL), +HLL2 = riakc_hll:add_elements([<<"Are">>, <<"Better">>, <<"Explained">>], HLL1), + +HLL2. + +%% which will return: +%% {hll,0,[<<"Are">>,<<"Better">>,<<"Explained">>, <<"Jokes">>]} +``` + +```java +HllUpdate hllUpdate = new HllUpdate() + .add("Jokes") + .add("Are") + .addAll(Arrays.asList("Better", "Explained", "Jokes")); + +hllUpdate.getElementAdds(); +// Returns the set of ["Jokes", "Are", "Better", "Explained"] +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +// We will add values in the next example +``` + +```csharp +// We will add values in the next example +``` + +```javascript +// We will add values in the next example +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildBucket('my_hlls', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +``` + +```curl +curl -XPOST http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness \ + -H "Content-Type: application/json" \ + -d '{"add_all":["my", "old", "friend"]}' +``` + +However, when using a non-HTTP client, the approximate cardinality/value of our +data structure will be 0, locally, until its pushed to the server and then +[fetched](#retrieve-a-hyperloglog-datatype) from the server. + +```erlang +riakc_hll:value(HLL2) == 0. + +%% which will return: +%% true + +Port = 8087, +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", Port), +Key = <<"Holy Diver">>, +BucketType = <<"hlls">>, +Bucket = {BucketType, <<"rainbow in the dark">>}, + +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(HLL2)). +ok = riakc_pb_socket:update_type(Pid, Bucket, Key, riakc_hll:to_op(RepeatHLL1)). +``` + +```java +// Using hllUpdate and hllLocation from above examples + +UpdateHll update = new UpdateHll.Builder(hllLocation, hllUpdate) + .build(); +client.execute(update); +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = datatypes.Hll(bucket, 'hll_one') +myhll.add('Jokes') +myhll.add('Are') +myhll.add('Better') +myhll.add('Explained') +myhll.add('Jokes') +myhll.store() +# myhll.value == 4 +``` + +```go +adds := [][]byte{ + []byte("Jokes"), + []byte("Are"), + []byte("Better"), + []byte("Explained"), + []byte("Jokes"), +} + +builder := riak.NewUpdateHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + WithAdditions(adds...). + Build() +if err != nil { + return err +} + +return cluster.Execute(cmd) +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness', + additions: ['Jokes', 'Are', 'Better', 'Explained', 'Jokes'], +}; + +client.updateHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```csharp +var adds = new HashSet { "Jokes", "Are", "Better", "Explained", "Jokes" }; + +var update = new UpdateHll.Builder(adds) + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .WithReturnBody(true) + .Build(); + +RiakResult rslt = client.Execute(update); +``` + +```php +$command = (new Command\Builder\UpdateHll($riak_client)) + ->add('Jokes') + ->add('Are') + ->add('Better') + ->add('Explained') + ->add('Jokes') + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$response = $command->execute(); +``` + +```ruby +hll.add('Jokes') +hll.batch do |s| + s.add 'Are' + s.add 'Better' + s.add 'Explained' + s.add 'Jokes' +end +``` + +## Retrieve a HyperLogLog data type + +Now, we can check the approximate count-of (a.k.a. the cardinality of the elements +added to) our hyperloglog data structure: + +```erlang +{ok, HLL3} = riakc_pb_socket:fetch_type(Pid, Bucket, Key), +riakc_hll:value(HLL3) == 4. + +%% which would return: +%% true + +%% We added <<"Jokes">> twice, but, remember, the algorithm only counts the +%% unique elements we've added to the data structure. +``` + +```java +FetchHll hllFetchCmd = new FetchHll.Builder(location).build(); +RiakHll hll = client.execute(hllFetchCmd); +hll.getCardinality(); +// Which returns 4 + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```python +bucket_type = client.bucket_type('hlls') +bucket = bucket_type.bucket('my_hlls') +myhll = bucket.get('hll_one') +# myhll.value == 4 +``` + +```go +var resp *riak.FetchHllResponse + +builder := riak.NewFetchHllCommandBuilder() +cmd, err := builder.WithBucketType("hlls"). + WithBucket("hello"). + WithKey("darkness"). + Build() +if err != nil { + return err +} +if err = cluster.Execute(cmd); err != nil { + return err +} +if fc, ok := cmd.(*riak.FetchHllCommand); ok { + if fc.Response == nil { + return errors.New("expected non-nil Response") + } + resp = fc.Response +} + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +fmt.Println("Hyperloglog cardinality: ", resp.Cardinality) +return nil +``` + +```javascript +var options = { + bucketType: 'hlls', + bucket: 'hello', + key: 'darkness' +}; + +client.fetchHll(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("Not Found"); + } + logger.info("Hyperloglog cardinality is: " + rslt.cardinality); +}); +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```csharp +var fetch = new FetchHll.Builder() + .WithBucketType("hlls") + .WithBucket("hello") + .WithKey("darkness") + .Build(); + +RiakResult rslt = client.Execute(fetch); +Assert.IsTrue(rslt.IsSuccess, rslt.ErrorMessage); + +HllResponse response = fetch.Response; +if (response.NotFound) +{ + Console.WriteLine("Not Found"); +} +else +{ + Console.WriteLine("Hyperloglog cardinality is: " + response.Cardinality); +} + +// Prints "Hyperloglog cardinality is: 4" +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```php +$command = (new Command\Builder\FetchHll($riak_client)) + ->buildLocation('darkness', 'hello', 'hlls') + ->build(); + +$result = $command->execute(); + +// Note: as though we are in a PHP unit test +$this->assertTrue(is_int($response->getHll()->getData())); +$this->assertEquals(4, $response->getHll()->getData()); + +// We added "Jokes" twice, but, remember, the algorithm only counts the +// unique elements we've added to the data structure. +``` + +```ruby +puts hll.cardinality +# Prints "4" +``` + +```curl +curl http://localhost:8098/types/hlls/buckets/hello/datatypes/darkness + +# Response +{"type":"hll","value":"4"} +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/data-types/maps.md b/content/riak/kv/3.0.4/developing/data-types/maps.md new file mode 100644 index 0000000000..3a0d42b476 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/data-types/maps.md @@ -0,0 +1,1885 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Maps" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Maps" + identifier: "data_types_maps" + weight: 102 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.4/dev/using/data-types/maps + - /riak/kv/3.0.4/dev/using/data-types/maps + - /riak/3.0.4/dev/data-modeling/data-types/maps + - /riak/kv/3.0.4/dev/data-modeling/data-types/maps +--- + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Using counters, sets, and maps within maps are similar to working with those types at the bucket level. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with the `datatype` parameter set to `map`, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter set to `map`: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +``` + +> **Note** +> +> The `maps` bucket type name provided above is an example and is not required to be `maps`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status maps +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `map` bucket type has been set properly we should see the following pair in our console output: + +```bash +datatype: map +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate maps +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status maps +``` + +After creating and activating our new `maps` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +First, we need to direct our client to the bucket type/bucket/key location that contains our map. + +The syntax for creating a map is analogous to the +syntax for creating other data types: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location map = + new Location(new Namespace("", ""), ""); +``` + +```ruby +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +map = Riak::Crdt::Map.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', 'bucket', 'bucket_type'); +``` + +```python +# The client detects the bucket type's datatype and automatically +# returns the right datatype for you, in this case a Map. +map = bucket.new(key) + +# This way is also acceptable: +from riak.datatypes import Map +map = Map(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("") + .WithBucket("") + .WithKey(""); +``` + +```javascript +// Options to pass to the various map methods +var options = { + bucketType: '', + bucket: '', + key: '' +}; +``` + +```erlang +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Map + +For this example, say we want to use Riak KV to store information about our company's customers. We'll use the `maps` bucket type created and activated previously and a bucket called `customers`. Each customer's data will be contained in its own key in the `customers` bucket. + +We can create a map for the user Ahmed (`ahmed_info`) using the `maps` bucket type: + +```java +// In the Java client, you specify the location of data types +// before you perform operations on them: + +Location ahmedMap = + new Location(new Namespace("maps", "customers"), "ahmed_info"); +``` + +```ruby +customers = client.bucket_type('maps').bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') + +# Alternatively, the Ruby client enables you to set a bucket type as being +# globally associated with a Riak data type. The following would set all +# map buckets to use the maps bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:map] = 'maps' + +# This would enable us to create our map without specifying a bucket type: + +customers = client.bucket('customers') +map = Riak::Crdt::Map.new(customers, 'ahmed_info') +``` + +```php +$location = new \Basho\Riak\Location('ahmed_info', 'customers', 'maps'); +``` + +```python +customers = client.bucket_type('map_bucket').bucket('customers') +map = customers.net('ahmed_info') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; +``` + +```erlang +Map = riakc_map:new(). + +%% Maps in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty map through the HTTP interface. Maps can only +# be created when a field is added to them, as in the examples below. +``` + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in Riak maps. + +### Registers Within Maps + +Continuing with our previous `customers` example, let's store some information in our map. + +The first piece of information we want to store in our map is Ahmed's name and +phone number, both of which are best stored as registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Ahmed"); +RegisterUpdate ru2 = new RegisterUpdate("5551234567"); +MapUpdate mu = new MapUpdate() + .update("first_name", ru1) + .update("phone_number", ru2); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +# The Ruby client enables you to batch operations together if you're +# performing them on one data type. + +map.batch do |m| + m.registers['first_name'] = 'Ahmed' + m.registers['phone_number'] = '5551234567' +end + +# Integers need to be stored as strings and then converted back when +# the data is retrieved. The following would work as well: +map.registers['phone_number'] = 5551234567.to_s +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Ahmed') + ->updateRegister('phone_number', '5551234567') + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.registers['first_name'].assign('Ahmed') +map.registers['phone_number'].assign('5551234567') + +# Integers need to be stored as strings and then converted back when the +# data is retrieved. The following would work as well: +map.registers['phone_number'].assign(str(5551234567)) + +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("ahmed_info"); + +var mapOperation = new UpdateMap.MapOperation(); + +// Ahmed's first name +mapOperation.SetRegister("first_name", "Ahmed"); + +// Ahmed's phone number +mapOperation.SetRegister("phone_number", "5551234567"); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; +PrintMap(response.Value); +// Output as JSON: +// Map: {"Counters":{},"Sets":{},"Registers":{"first_name":"Ahmed","phone_number":"5551234567"},"Flags":{},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setRegister('first_name', new Buffer('Ahmed')); +mapOp.setRegister('phone_number', new Buffer('5551234567')); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map1 = riakc_map:update({<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Ahmed">>, R) end, + Map), +Map2 = riakc_map:update({<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5551234567">>, R) end, + Map1). +``` + +```curl +# Updates can be performed all at once. The following will create two new +# registers in the map and also set the value of those registers to the +# desired values + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "first_name_register": "Ahmed", + "phone_number_register": "5551234567" + } + }' +``` + +If a register did not previously exist, Riak KV will create that register for you. + +## Flags + +Flags behave much like Boolean values, except that instead of `true` or +`false` flags have the values `enable` or `disable`. + +Flags cannot be used on their own, i.e. a flag cannot be stored in a bucket/key by itself. Instead, flags can only be stored within maps. + +To disable an existing flag, you have to read it or provide [a context](../#data-types-and-context). + +### Flags Within Maps + +Now let's say that we add an Enterprise plan to our pricing model. We'll +create an `enterprise_customer` flag to track whether Ahmed has signed +up for the new plan. He hasn't yet, so we'll set it to `false`: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("enterprise_customer", new FlagUpdate(false)); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.flags['enterprise_customer'] = false +``` + +```php +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_customer', false) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.flags['enterprise_customer'].disable() +map.store() +``` + + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Using our builder from above: + +mapOperation = new UpdateMap.MapOperation(); +mapOperation.SetFlag("enterprise_customer", false); + +builder.WithMapOperation(mapOperation); +cmd = builder.Build(); +rslt = client.Execute(cmd); + +response = cmd.Response; + +// response.Value as JSON: +// Map: {"Counters":{},"Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false},"Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.setFlag('enterprise_customer', false); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"enterprise_customer">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + Map3). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"first_name_register":"Ahmed","phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "enterprise_customer_flag": "disable" + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEBag==" + }' +``` + +We can retrieve the value of that flag at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +System.out.println(map.getFlag("enterprise_customer").view()); +``` + +```ruby +map.flags['enterprise_customer'] + +# false +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +echo $map->getFlag('enterprise_customer'); // false +``` + +```python +map.reload().flags['enterprise_customer'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; +ahmedMap.Flags["enterprise_customer"] +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + console.log("fetched map: %s", JSON.stringify(rslt)); +}); +``` + +```erlang +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. + +riakc_map:dirty_value(Map4). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info +``` + +## Counters Within Maps + +We also want to know how many times Ahmed has visited our website. We'll +use a `page_visits` counter for that and run the following operation +when Ahmed visits our page for the first time: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate mu = new MapUpdate() + .update("page_visits", cu); +UpdateMap update = new UpdateMap.Builder(ahmedMap, new CounterUpdate(1)) + .build(); +client.execute(update); +``` + +```ruby +map.counters['page_visits'].increment + +# This operation may return false even if successful +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('page_visits', $updateCounter) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.counters['page_visits'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.IncrementCounter("page_visits", 1); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +// Map: {"Counters":{"page_visits":3}, + "Sets":{}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.incrementCounter('page_visits', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map3 = riakc_map:update({<<"page_visits">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + Map2). +``` + +```curl +# The following will create a new counter and increment it by 1 + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "page_visits_counter": 1 + } + }' +``` + +Even though the `page_visits` counter did not exist previously, the +above operation will create it (with a default starting point of 0) and +the increment operation will bump the counter up to 1. + +## Sets Within Maps + +We'd also like to know what Ahmed's interests are so that we can better +design a user experience for him. Through his purchasing decisions, we +find out that Ahmed likes robots, opera, and motorcycles. We'll store +that information in a set inside of our map: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .add("robots") + .add("opera") + .add("motorcycles"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + %{ robots opera motorcycles }.each do |interest| + m.sets['interests'].add(interest) + end +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('robots') + ->add('opera') + ->add('motorcycles'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +for interest in ['robots', 'opera', 'motorcycles']: + map.sets['interests'].add(interest) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var interestsAdds = new[] { "robots", "opera", "motorcycles" }; + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", interestsAdds); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +MapResponse response = cmd.Response; + +// Map: {"Counters":{"page_visits":3}, + "Sets":{"interests":["motorcycles","opera","robots"]}, + "Registers":{"first_name":"Ahmed","phone_number":"5551234567"}, + "Flags":{"enterprise_customer":false}, + "Maps":{}} +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.addToSet('interests', 'robots'); +mapOp.addToSet('interests', 'opera'); +mapOp.addToSet('interests', 'motorcycles'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map4 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"robots">>, S) end, Map3), +Map5 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"opera">>, S) end, + Map4), +Map6 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"motorcycles">>, S) end, + Map4). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "add_all": [ + "robots", + "opera", + "motorcycles" + ] + } + } + }' +``` + +We can then verify that the `interests` set includes these three +interests: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +RiakMap map = response.getDatatype(); +RiakSet interestSet = map.getSet("interests"); +Set interests = interestSet.view(); +System.out.println(interests.contains(BinaryValue.create("robots"))); + +// Checking for "opera" and "motorcycles" works the same way +``` + +```ruby +map.batch do |m| + %w{ robots opera motorcycles }.each do |interest| + m.sets['interests'].include? interest + end +end + +# This will return three Boolean values +``` + +```php +$map = (new \Basho\Riak\Command\Builder\FetchMap($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getMap(); + +$sets = $map->getSet('interests'); +var_dump($sets->getData()); +``` + +```python +reloaded_map = map.reload() +for interest in ['robots', 'opera', 'motorcycles']: + interest in reloaded_map.sets['interests'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +Map ahmedMap = response.Value; + +// All of the following return true: +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("opera"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.map.sets['interests'].indexOf('robots') !== -1); +}); +``` + +```erlang +riakc_map:dirty_value(Map6). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info?include_context=false +``` + +We learn from a recent purchasing decision that Ahmed actually doesn't +seem to like opera. He's much more keen on indie pop. Let's change the +`interests` set to reflect that: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate() + .remove("opera") + .add("indie pop"); +MapUpdate mu = new MapUpdate() + .update("interests", su); +UpdateMap update = new UpdateMap.Builder(ahmedMap, mu) + .build(); +client.execute(update); +``` + +```ruby +map.batch do |m| + m.sets['interests'].remove('opera') + m.sets['interests'].add('indie pop') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('indie pop') + ->remove('opera'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +map.sets['interests'].discard('opera') +map.sets['interests'].add('indie pop') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.AddToSet("interests", "indie pop"); +mapOperation.RemoveFromSet("interests", "opera"); + +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); + +MapResponse response = cmd.Response; +Map ahmedMap = response.Value; + +// This is false +ahmedMap.Sets.GetValue("interests").Contains("opera"); + +// These are true +ahmedMap.Sets.GetValue("interests").Contains("indie pop"); +ahmedMap.Sets.GetValue("interests").Contains("robots"); +ahmedMap.Sets.GetValue("interests").Contains("motorcycles"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.removeFromSet('interests', 'opera'); + mapOp.addToSet('interests', 'indie pop'); + + options.context = rslt.context; + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map7 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"opera">>, S) end, Map6), +Map8 = riakc_map:update({<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"indie pop">>, S) end, + Map7). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["motorcycles","opera","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "interests_set": { + "remove": "opera", + "add": "indie pop" + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEEag==" + } + ' +``` + +## Maps Within Maps + +We've stored a wide of variety of information---of a wide variety of +types---within the `ahmed_info` map thus far, but we have yet to explore +recursively storing maps within maps (which can be nested as deeply as +you wish). + +Our company is doing well and we have lots of useful information about +Ahmed, but now we want to store information about Ahmed's contacts as +well. We'll start with storing some information about Ahmed's colleague +Annika inside of a map called `annika_info`. + +First, we'll store Annika's first name, last name, and phone number in +registers: + +```java +// Using our "ahmedMap" location from above: + +RegisterUpdate ru1 = new RegisterUpdate("Annika"); +RegisterUpdate ru2 = new RegisterUpdate("Weiss"); +RegisterUpdate ru3 = new RegisterUpdate("5559876543"); + +MapUpdate annikaUpdate = new MapUpdate() + .update("first_name", ru1) + .update("last_name", ru2) + .update("phone_number", ru3); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.registers['first_name'] = 'Annika' + m.registers['last_name'] = 'Weiss' + m.registers['phone_number'] = 5559876543.to_s +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Annika') + ->updateRegister('last_name', 'Weiss') + ->updateRegister('phone_number', '5559876543'); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].registers['first_name'].assign('Annika') +map.maps['annika_info'].registers['last_name'].assign('Weiss') +map.maps['annika_info'].registers['phone_number'].assign(str(5559876543)) +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); + +var annikaInfoOperation = mapOperation.Map("annika_info"); +annikaInfoOperation.SetRegister("first_name", "Annika"); +annikaInfoOperation.SetRegister("last_name", "Weiss"); +annikaInfoOperation.SetRegister("phone_number", "5559876543"); + +builder.WithMapOperation(mapOperation); +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info') + .setRegister('first_name', 'Annika') + .setRegister('last_name', 'Weiss') + .setRegister('phone_number', '5559876543'); + +options.op = mapOp; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map12 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"first_name">>, register}, + fun(R) -> riakc_register:set(<<"Annika">>, R) end, M) end, + Map11), +Map13 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"last_name">>, register}, + fun(R) -> riakc_register:set(<<"Weiss">>, R) end, M) end, + Map12), +Map14 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"phone_number">>, register}, + fun(R) -> riakc_register:set(<<"5559876543">>, R) end, M) end, + Map13). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "first_name_register": "Annika", + "last_name_register": "Weiss", + "phone_number_register": "5559876543" + } + } + } + } + ' +``` + +The value of a register in a map can be obtained without a special +method: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +String annikaFirstName = response.getDatatype() + .getMap("annika_info") + .getRegister("first_name") + .view() + .toString(); +``` + +```ruby +map.maps['annika_info'].registers['first_name'] + +# "Annika" +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getRegister('first_name'); // Annika +``` + +```python +map.reload().maps['annika_info'].registers['first_name'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Registers.GetValue("first_name"); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var annikaFirstName = + rslt.map.maps['annika_info'].registers['first_name'].toString('utf8'); +}); +``` + +```erlang +riakc_map:dirty_value(Map14). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +Registers can also be removed: + +```java +// This example uses our "ahmedMap" location from above. Operations that +// remove fields from maps require that you first fetch the opaque context +// attached to the map and then include the context in the update operation: + +FetchMap fetch = new FetchMap.Builder(ahmedMap) + .build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .removeRegister("first_name"); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].registers.remove('first_name') +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->removeRegister('first_name'); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($map->getContext()) + ->build() + ->execute(); +``` + +```python +del map.maps['annika_info'].registers['first_name'] +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveRegister("first_name"); + +// Note: using Context from last response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.map('annika_info').removeRegister('first_name'); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +``` + +```erlang +Map15 = riakc_map:update({<<"annika_info">>, map}, + fun(M) -> riakc_map:erase({<<"phone_number">>, register}, M) end, + Map14). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss","phone_number_register":"5559876543"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "remove": ["phone_number_register"] + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEGag==" + } + ' +``` + +Now, we'll store whether Annika is subscribed to a variety of plans +within the company as well: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +Context ctx = response.getContext(); +MapUpdate annikaUpdate = new MapUpdate() + .update("enterprise_plan", new FlagUpdate((false)) + .update("family_plan", new FlagUpdate(false)) + .update("free_plan", new FlagUpdate(true)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withContext(ctx) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].batch do |m| + m.flags['enterprise_plan'] = false + m.flags['family_plan'] = false + m.flags['free_plan'] = true +end +``` + +```php +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('enterprise_plan', false) + ->updateFlag('family_plan', false) + ->updateFlag('free_plan', true); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].flags['enterprise_plan'].disable() +map.maps['annika_info'].flags['family_plan'].disable() +map.maps['annika_info'].flags['free_plan'].enable() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info") + .SetFlag("enterprise_plan", false) + .SetFlag("family_plan", false) + .SetFlag("free_plan", true); + +builder.WithMapOperation(mapOperation); + +MapUpdate cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.setFlag('enterprise_plan', false); + annika_map.setFlag('family_plan', false); + annika_map.setFlag('free_plan', true); + + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context, + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map16 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"enterprise_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map15), +Map17 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"family_plan">>, flag}, + fun(F) -> riakc_flag:disable(F) end, + M) end, + Map16), +Map18 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"free_plan">>, flag}, + fun(F) -> riakc_flag:enable(F) end, + M) end, + Map17). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"first_name_register":"Annika","last_name_register":"Weiss"},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "enterprise_plan_flag": "disable", + "family_plan_flag": "disable", + "free_plan_flag": "enable" + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEHag==" + } + ' +``` + +The value of a flag can be retrieved at any time: + +```java +// Using our "ahmedMap" location from above: + +FetchMap fetch = new FetchMap.Builder(ahmedMap).build(); +FetchMap.Response response = client.execute(fetch); +boolean enterprisePlan = response.getDatatype() + .getMap("annika_info") + .getFlag("enterprise_plan") + .view(); +``` + +```ruby +map.maps['annika_info'].flags['enterprise_plan'] + +# false +``` + +```php +# with param 'returnbody' = 'true', we can fetch the map from our last response +$map->getMap(); + +echo $map->getMap('annika_info')->getFlag('enterprise_plan'); // false +``` + +```python +map.reload().maps['annika_info'].flags['enterprise_plan'].value +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +ahmedMap = response.Value; +ahmedMap.Maps["annika_info"].Flags["enterprise_plan"]; +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var enterprisePlan = + rslt.map.maps.annika_info.flags.enterprise_plan; +}); +``` + +```erlang +riakc_map:dirty_value(Map18). +``` + +```curl +# Specific values for fields inside of maps (or maps within maps, for that +# matter), cannot be obtained directly through the HTTP interface. +``` + +It's also important to track the number of purchases that Annika has +made with our company. Annika just made her first widget purchase: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate annikaUpdate = new MapUpdate() + .update("widget_purchases", new CounterUpdate(1)); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].counters['widget_purchases'].increment +``` + +```php +$updateCounter = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(1); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateCounter('widget_purchases', $updateCounter); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].counters['widget_purchases'].increment() +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").IncrementCounter("widget_purchases", 1); + +builder.WithMapOperation(mapOperation); + +UpdateMap cmd = builder.Build(); +client.Execute(cmd); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +mapOp.map('annika_info').incrementCounter('widget_purchases', 1); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map19 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"widget_purchases">>, counter}, + fun(C) -> riakc_counter:increment(1, C) end, + M) end, + Map18). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "widget_purchases_counter": 1 + } + } + } + } + ' +``` + +Now let's store Annika's interests in a set: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().add("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].add('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].add('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").AddToSet("interests", "tango dancing"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +annika_map.addToSet('interests', 'tango dancing'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map20 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:add_element(<<"tango dancing">>, S) end, + M) end, + Map19). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "add": "tango dancing" + } + } + } + } + } + ' +``` + +We can remove that interest in just the way that we would expect: + +```java +// Using our "ahmedMap" location from above: + +SetUpdate su = new SetUpdate().remove("tango dancing"); +MapUpdate annikaUpdate = new MapUpdate() + .update("interests", su); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].sets['interests'].remove('tango dancing') +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->remove('tango dancing'); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateSet('interests', $updateSet); + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withContext($response->getMap()->getContext()) + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].sets['interests'].discard('tango dancing') +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").RemoveFromSet("interests", "tango dancing"); + +// Note: using Context from previous response +builder + .WithMapOperation(mapOperation) + .WithContext(response.Context); +client.Execute(builder.Build()); +``` + +```javascript +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info' +}; + +client.fetchMap(options, function (err, rslt) { + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var annika_map = mapOp.map('annika_info'); + annika_map.removeFromSet('interests', 'tango dancing'); + + options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp, + context: rslt.context + }; + + client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +Map21 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"interests">>, set}, + fun(S) -> riakc_set:del_element(<<"tango dancing">>, S) end, + M) end, + Map20). +``` + +```curl +curl http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info + +# Response +{"type":"map","value":{"annika_info_map":{"enterprise_plan_flag":false,"family_plan_flag":false,"first_name_register":"Annika","free_plan_flag":true,"interests_set":["tango dancing"],"last_name_register":"Weiss","widget_purchases_counter":1},"enterprise_customer_flag":false,"first_name_register":"Ahmed","interests_set":["indie pop","motorcycles","robots"],"page_visits_counter":1,"phone_number_register":"5551234567"},"context":"g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag=="} + +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "interests_set": { + "remove": "tango dancing" + } + } + } + }, + "context" : "g2wAAAABaAJtAAAADCMJ/vn2jOEXAAAAAWEKag==" + } + ' +``` + +If we wanted to add store information about one of Annika's specific +purchases, we could do so within a map: + +```java +// Using our "ahmedMap" location from above: + +MapUpdate purchaseUpdate = new MapUpdate() + .update("first_purchase", new FlagUpdate(true) + .update("amount", new RegisterUpdate("1271")) + .update("items", new SetUpdate().add("large widget")); +MapUpdate annikaUpdate = new MapUpdate() + .update("purchase", purchaseUpdate); +MapUpdate ahmedUpdate = new MapUpdate() + .update("annika_info", annikaUpdate); +UpdateMap update = new UpdateMap.Builder(ahmedMap, ahmedUpdate) + .withUpdate(ahmedUpdate) + .build(); +client.execute(update); +``` + +```ruby +map.maps['annika_info'].maps['purchase'].batch do |m| + m.flags['first_purchase'] = true + m.register['amount'] = 1271.to_s + m.sets['items'].add('large widget') +end +``` + +```php +$updateSet = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('large widget'); + +$purchaseMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateFlag('first_purchase', true) + ->updateRegister('amount', '1271') + ->updateSet('items', $updateSet); + +$annikaMap = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('purchase', $purchaseMap); + +$response = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('annika_info', $annikaMap) + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +map.maps['annika_info'].maps['purchase'].flags['first_purchase'].enable() +map.maps['annika_info'].maps['purchase'].register['amount'].assign(str(1271)) +map.maps['annika_info'].maps['purchase'].sets['items'].add('large widget') +# and so on +map.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var mapOperation = new UpdateMap.MapOperation(); +mapOperation.Map("annika_info").Map("purchase") + .SetFlag("first_purchase", true) + .SetRegister("amount", "1271") + .AddToSet("items", "large widget"); + +builder.WithMapOperation(mapOperation); +client.Execute(builder.Build()); +``` + +```javascript +var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); +var annika_map = mapOp.map('annika_info'); +var annika_purchase_map = annika_map.map('purchase'); +annika_purchase_map.setFlag('first_purchase', true); +annika_purchase_map.setRegister('amount', '1271'); +annika_purchase_map.addToSet('items', 'large widget'); + +var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'ahmed_info', + op: mapOp +}; + +client.updateMap(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Map22 = riakc_map:update( + {<<"annika_info">>, map}, + fun(M) -> riakc_map:update( + {<<"purchase">>, map}, + fun(M) -> riakc_map:update( + {<<"first_purchase">>, flag}, + fun(R) -> riakc_flag:enable(R) end, + M) end, + M) end, + Map21 +). +``` + +```curl +curl -XPOST http://localhost:8098/types/maps/buckets/customers/datatypes/ahmed_info \ + -H "Content-Type: application/json" \ + -d ' + { + "update": { + "annika_info_map": { + "update": { + "purchase_map": { + "update": { + "first_purchase_flag": "enable", + "amount_register": "1271", + "items_set": { + "add": "large widget" + } + } + } + } + } + } + } + ' +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/data-types/sets.md b/content/riak/kv/3.0.4/developing/data-types/sets.md new file mode 100644 index 0000000000..338f240654 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/data-types/sets.md @@ -0,0 +1,773 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Data Types: Sets" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Sets" + identifier: "data_types_sets" + weight: 101 + parent: "developing_data_types" +toc: true +aliases: + - /riak/3.0.4/dev/using/data-types/sets + - /riak/kv/3.0.4/dev/using/data-types/sets + - /riak/3.0.4/dev/data-modeling/data-types/sets + - /riak/kv/3.0.4/dev/data-modeling/data-types/sets +--- + +Sets are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used [within a map](../maps#sets-within-maps). + +Sets are collections of unique binary values (such as strings). All of +the values in a set are unique. + +For example, if you attempt to add the element `shovel` to a set that already contains `shovel`, the operation will be ignored by Riak KV. + +## Set Up a Bucket Type + +> If you've already created and activated a bucket type with `set` as the `datatype` parameter, skip to the [next section](#client-setup). + +Start by creating a bucket type with the `datatype` parameter `set`: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +``` + +> **Note** +> +> The `sets` bucket type name provided above is an example and is not required to be `sets`. You are free to name bucket types whatever you like, with the exception of `default`. + +After creating a bucket with a Riak data type, confirm the bucket property configuration associated with that type is correct: + +```bash +riak-admin bucket-type status sets +``` + +This returns a list of bucket properties and their values +in the form of `property: value`. + +If our `sets` bucket type has been set properly we should see the following pair in our console output: + +``` +datatype: set +``` + +Once we have confirmed the bucket type is properly configured, we can activate the bucket type to be used in Riak KV: + +```bash +riak-admin bucket-type activate sets +``` + +We can check if activation has been successful by using the same `bucket-type status` command shown above: + +```bash +riak-admin bucket-type status sets +``` + +After creating and activating our new `sets` bucket type, we can setup our client to start using the bucket type as detailed in the next section. + +## Client Setup + +Using sets involves creating a bucket/key pair to house a set and running set-specific operations on that pair. + +Here is the general syntax for creating a bucket type/bucket/key +combination to handle a set: + +```java +// In the Java client, a bucket/bucket type combination is specified +// using a Namespace object. To specify bucket, bucket type, and key, +// use a Location object that incorporates the Namespace object, as is +// done below. + +Location set = + new Location(new Namespace("", ""), ""); +``` + +```ruby +# Note: both the Riak Ruby Client and Ruby the language have a class +# called Set. Make sure that you refer to the Ruby version as ::Set and +# the Riak client version as Riak::Crdt::Set + +bucket = client.bucket_type('bucket_type_name').bucket('bucket_name') +set = Riak::Crdt::Set.new(bucket, key) +``` + +```php +$location = new \Basho\Riak\Location('key', new \Basho\Riak\Bucket('bucket_name', 'bucket_type')); +``` + +```python +# Note: The Python standard library `collections` module has an abstract +# base class called Set, which the Riak Client version subclasses as +# `riak.datatypes.Set`. These classes are not directly interchangeable. +# In addition to the base methods, `riak.datatypes.Set` also +# implements the `add` and `discard` methods from +# `collections.MutableSet`, but does not implement the rest of its +# API. Be careful when importing, or simply use the instances returned +# by `RiakBucket.get()` and `RiakBucket.new()` instead of directly +# importing the class. + +set = bucket.new(key) + +# or + +from riak.datatypes import Set +set = Set(bucket, key) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// As with counters, with the Riak .NET Client you interact with sets +// by building an Options object or using a Builder +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +// NB: builder.Options will only be set after Build() is called. +FetchSet fetchSetCommand = builder.Build(); + +FetchSetOptions options = new FetchSetOptions("sets", "travel", "cities"); + +// These two options objects are equal +Assert.AreEqual(options, builder.Options); +``` + +```javascript +// As with counters, with the Riak Node.js Client you interact with sets on the +// basis of the set's location in Riak, as specified by an options object. +// Below is an example: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +%% Like counters, sets are not encapsulated in a +%% bucket/key in the Erlang client. See below for more +%% information. +``` + +```curl +curl http://localhost:8098/types//buckets//datatypes/ + +# Note that this differs from the URL structure for non-data type requests, +# which end in /keys/ +``` + +## Create a Set + +For the following example, we will use a set to store a list of cities that we +want to visit. Let's create a Riak set stored in the key `cities` in the bucket `travel` using the `sets` bucket type created previously: + +```java +// In the Java client, you specify the location of Data Types +// before you perform operations on them: + +Location citiesSet = + new Location(new Namespace("sets", "travel"), "cities"); +``` + +```ruby +travel = client.bucket_type('sets').bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') + +# Alternatively, the Ruby client enables you to set a bucket type as +# being globally associated with a Riak data type. The following would +# set all set buckets to use the sets bucket type: + +Riak::Crdt::DEFAULT_BUCKET_TYPES[:set] = 'sets' + +# This would enable us to create our set without specifying a bucket +# type: +travel = client.bucket('travel') +cities_set = Riak::Crdt::Set.new(travel, 'cities') +``` + +```php +$location = new \Basho\Riak\Location('cities', 'travel', 'sets'); +``` + +```python +travel = client.bucket_type('sets').bucket('travel') + +# The client detects the bucket type's data type and automatically +# returns the right data type for you, in this case a Riak set. +cities_set = travel.new('cities') + +# You can also create a reference to a set explicitly: +from riak.datatypes import Set + +cities_set = Set(travel, 'cities') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +// Now we'll create a Builder object for the set with which we want to +// interact: +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); +``` + +```javascript +// Now we'll create a options object for the set with which we want to +// interact: +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +``` + +```erlang +CitiesSet = riakc_set:new(). + +%% Sets in the Erlang client are opaque data structures that +%% collect operations as you mutate them. We will associate the data +%% structure with a bucket type, bucket, and key later on. +``` + +```curl +# You cannot create an empty set through the HTTP interface. Sets can +# only be created when an element is added to them, as in the examples +# below. +``` + +Upon creation, our set is empty. We can verify that it is empty at any +time: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +RiakSet set = response.getDatatype(); +boolean isEmpty = set.viewAsSet().isEmpty(); +``` + +```ruby +cities_set.empty? +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +count($set->getData()); +``` + +```python +len(cities_set) == 0 +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var builder = new FetchSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities"); + +FetchSet fetchSetCommand = builder.Build(); +RiakResult rslt = client.Execute(fetchSetCommand); +SetResponse response = fetchSetCommand.Response; +// response.Value will be null +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.notFound) { + logger.info("set 'cities' is not found!"); + } +}); +``` + +```erlang +riakc_set:size(CitiesSet) == 0. + +%% Query functions like size/1, is_element/2, and fold/3 operate over +%% the immutable value fetched from the server. In the case of a new +%% set that was not fetched, this is an empty collection, so the size +%% is 0. +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","error":"notfound"} +``` + +## Add to a Set + +But let's say that we read a travel brochure saying that Toronto and +Montreal are nice places to go. Let's add them to our `cities` set: + +```java +// Using our "cities" Location from above: + +SetUpdate su = new SetUpdate() + .add("Toronto") + .add("Montreal"); +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .build(); +client.execute(update); +``` + +```ruby +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```php +# use $location from earlier +$response = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Toronto') + ->add('Montreal') + ->atLocation($location) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +cities_set.add('Toronto') +cities_set.add('Montreal') +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var adds = new[] { "Toronto", "Montreal" }; + +var builder = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("travel") + .WithKey("cities") + .WithAdditions(adds); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +Assert.Contains("Toronto", response.AsStrings.ToArray()); +Assert.Contains("Montreal", response.AsStrings.ToArray()); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +var cmd = new Riak.Commands.CRDT.UpdateSet.Builder() + .withBucketType(options.bucketType) + .withBucket(options.bucket) + .withKey(options.key) + .withAdditions(['Toronto', 'Montreal']) + .withCallback( + function (err, rslt) { + if (err) { + throw new Error(err); + } + } + ) + .build(); +client.execute(cmd); +``` + +```erlang +CitiesSet1 = riakc_set:add_element(<<"Toronto">>, CitiesSet), +CitiesSet2 = riakc_set:add_element(<<"Montreal">>, CitiesSet1). +``` + +```curl +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"add_all":["Toronto", "Montreal"]}' +``` + +## Remove from a Set + +Later on, we hear that Hamilton and Ottawa are nice cities to visit in +Canada, but if we visit them, we won't have time to visit Montreal, so +we need to remove it from the list. + +Note that removing an element from a set is trickier than adding elements. In +order to remove an item (or multiple items), we need to first fetch the +set, which provides our client access to the set's [causal context](../../../learn/concepts/causal-context). + +Once we've fetched the set, we can remove the element `Montreal` and +store the set: + +```java +// Using our "citiesSet" Location from above + +// First, we get a response +FetchSet fetch = new FetchSet.Builder(citiesSet).build(); +FetchSet.Response response = client.execute(fetch); + +// Then we can fetch the set's causal context +Context ctx = response.getContext(); + +// Now we build a SetUpdate operation +SetUpdate su = new SetUpdate() + .remove("Montreal") + .add("Hamilton") + .add("Ottawa"); + +// Finally, we update the set, specifying the context +UpdateSet update = new UpdateSet.Builder(citiesSet, su) + .withContext(ctx) + .build(); +client.execute(update); + +// More information on using causal context with the Java client can be +// found at the bottom of this document +``` + +```ruby +cities_set.remove('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +``` + +```php +# use $location & $response from earlier +(new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('Hamilton') + ->add('Ottawa') + ->remove('Montreal') + ->atLocation($location) + ->withContext($response->getSet()->getContext()) + ->build() + ->execute(); +``` + +```python +cities_set.discard('Montreal') +cities_set.add('Hamilton') +cities_set.add('Ottawa') +cities_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +var removes = new[] { "Montreal" }; +var adds = new[] { "Hamilton", "Ottawa" }; + +// Note: +// using the builder from above +// using the Context member from the above response +builder + .WithAdditions(adds) + .WithRemovals(removes) + .WithContext(response.Context); + +UpdateSet cmd = builder.Build(); +RiakResult rslt = client.Execute(cmd); +SetResponse response = cmd.Response; + +// using System.Linq +var responseStrings = response.AsStrings.ToArray(); + +Assert.Contains("Toronto", responseStrings); +Assert.Contains("Hamilton", responseStrings); +Assert.Contains("Ottawa", responseStrings); +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + + // NB: clone package https://www.npmjs.com/package/clone + var update_opts = clone(options); + update_opts.context = rslt.context; + update_opts.additions = ['Hamilton', 'Ottawa']; + update_opts.removals = ['Montreal', 'Ottawa']; + + client.updateSet(update_opts, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +CitiesSet3 = riakc_set:del_element(<<"Montreal">>, CitiesSet2), +CitiesSet4 = riakc_set:add_element(<<"Hamilton">>, CitiesSet3), +CitiesSet5 = riakc_set:add_element(<<"Ottawa">>, CitiesSet4). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Montreal","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="} + +curl -XPOST http://localhost:8098/types/sets/buckets/travel/datatypes/cities \ + -H "Content-Type: application/json" \ + -d '{"remove": "Montreal","add_all":["Hamilton", "Ottawa"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWECag=="}' +``` + +## Retrieve a Set + +Now, we can check on which cities are currently in our set: + +```java +// Using our "cities" Location from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); +for (BinaryValue city : binarySet) { + System.out.println(city.toStringUtf8()); +} +``` + +```ruby +cities_set.members + +# +``` + +```php +# use $location from earlier +$set = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->atLocation($location) + ->build() + ->execute() + ->getSet(); + +var_dump($set->getData()); +``` + +```python +cities_set.dirty_value + +# The value fetched from Riak is always immutable, whereas the "dirty +# value" takes into account local modifications that have not been +# sent to the server. For example, where the call above would return +# frozenset(['Toronto', 'Hamilton', 'Ottawa']), the call below would +# return frozenset([]). + +cities_set.value + +# To fetch the value stored on the server, use the call below. Note +# that this will clear any unsent additions or deletions. +cities_set.reload() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +foreach (var value in setResponse.AsStrings) +{ + Console.WriteLine("Cities Set Value: {0}", value); +} + +// Output: +// Cities Set Value: Hamilton +// Cities Set Value: Ottawa +// Cities Set Value: Toronto +``` + +```javascript +var options = { + bucketType: 'sets', + bucket: 'travel', + key: 'cities' +}; +client.fetchSet(options, function(err, rslt) { + if (err) { + throw new Error(err); + } + + logger.info("cities set values: '%s'", + rslt.values.join(', ')); +}); + +// Output: +// info: cities set values: 'Hamilton, Ottawa, Toronto' +``` + +```erlang +riakc_set:dirty_value(CitiesSet5). + +%% The value fetched from Riak is always immutable, whereas the "dirty +%% value" takes into account local modifications that have not been +%% sent to the server. For example, where the call above would return +%% [<<"Hamilton">>, <<"Ottawa">>, <<"Toronto">>], the call below would +%% return []. These are essentially ordsets: + +riakc_set:value(CitiesSet5). + +%% To fetch the value stored on the server, use the call below: + +{ok, SetX} = riakc_pb_socket:fetch_type(Pid, + {<<"sets">>,<<"travel">>}, + <<"cities">>). +``` + +```curl +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities + +# Response +{"type":"set","value":["Hamilton","Ottawa","Toronto"],"context":"g2wAAAABaAJtAAAADCMJ/vn7tg36AAAAAWEEag=="} + +# You can also fetch the value of the set without the context included: +curl http://localhost:8098/types/sets/buckets/travel/datatypes/cities?include_context=false + +# Response +{"type":"set","value":["Hamilton", "Ottawa", "Toronto"]} +``` + +## Find Set Member + +Or we can see whether our set includes a specific member: + +```java +// Using our "citiesSet" from above: + +FetchSet fetch = new FetchSet.Builder(citiesSet) + .build(); +FetchSet.Response response = client.execute(fetch); +Set binarySet = response.getDatatype().view(); + +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Vancouver"))); +System.out.println(binarySet.contains(BinaryValue.createFromUtf8("Ottawa"))); +``` + +```ruby +cities_set.include? 'Vancouver' +# false + +cities_set.include? 'Ottawa' +# true +``` + +```php +in_array('Vancouver', $set->getData()); # false + +in_array('Ottawa', $set->getData()); # true +``` + +```python +'Vancouver' in cities_set +# False + +'Ottawa' in cities_set +# True +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +bool includesVancouver = response.AsStrings.Any(v => v == "Vancouver"); +bool includesOttawa = response.AsStrings.Any(v => v == "Ottawa"); +``` + +```javascript +// Use standard javascript array method indexOf() + +var cities_set = result.values; +cities_set.indexOf('Vancouver'); // if present, index is >= 0 +cities_set.indexOf('Ottawa'); // if present, index is >= 0 +``` + +```erlang +%% At this point, Set5 is the most "recent" set from the standpoint +%% of our application. + +riakc_set:is_element(<<"Vancouver">>, CitiesSet5). +riakc_set:is_element(<<"Ottawa">>, CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + +## Size of Set + +We can also determine the size of the set: + +```java +// Using our "citiesSet" from above: + +int numberOfCities = citiesSet.size(); +``` + +```ruby +cities_set.members.length +``` + +```php +count($set->getData()); +``` + +```python +len(cities_set) +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Using/DataTypes.cs + +using System.Linq; + +// Note: this enumerates the IEnumerable +setResponse.Values.Count(); +``` + +```javascript +// Use standard javascript array property length + +var cities_set_size = result.values.length; +``` + +```erlang +riakc_set:size(CitiesSet5). +``` + +```curl +# With the HTTP interface, this can be determined from the output of +# a fetch command like the one displayed in the example above +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/faq.md b/content/riak/kv/3.0.4/developing/faq.md new file mode 100644 index 0000000000..f11bb0d3d8 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/faq.md @@ -0,0 +1,592 @@ +--- +title_supertext: "Developing with Riak KV" +title: "Frequently Asked Questions" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Developing FAQ" + identifier: "developing_faq" + weight: 108 + parent: "developing" +toc: true +aliases: + - /riak/3.0.4/community/faqs/developing + - /riak/kv/3.0.4/community/faqs/developing +--- + +[[Basho Bench]: {{}}riak/kv/3.0.4/using/performance/benchmarking +[Bitcask]: {{}}riak/kv/3.0.4/setup/planning/backend/bitcask +[Bucket Properties]: {{}}riak/kv/3.0.4/developing/usage +[built-in functions list]: https://github.com/basho/riak_kv/blob/master/priv/mapred_builtins.js +[commit hooks]: {{}}riak/kv/3.0.4/developing/usage/commit-hooks +[Configuration Files]: {{}}riak/kv/3.0.4/configuring/reference +[contrib.basho.com]: https://github.com/basho/riak_function_contrib +[Erlang Riak Client]: {{}}riak/kv/3.0.4/developing/client-libraries +[MapReduce]: {{}}riak/kv/3.0.4/developing/usage/mapreduce +[Memory]: {{}}riak/kv/3.0.4/setup/planning/backend/memory +[Riak CS]: {{}}riak/cs/2.1.1 +[System Planning]: {{}}riak/kv/3.0.4/setup/planning/start/#network-configuration-load-balancing +[vector clocks]: {{}}riak/kv/3.0.4/learn/concepts/causal-context#vector-clocks + + +## General + + +**Q: How can I automatically expire a key from Riak? I want to regularly purge items from Riak that are older than a certain timestamp, but MapReduce times out on large numbers of items. Can I expire data automatically?** + +**A:** + If you're using [Bitcask], the default storage backend, and you want items to expire at a consistent interval (assuming that they are not updated), set the `expiry_secs` option in `app.config`. Items that persist past this threshold will not be returned on get/fetch operations and will eventually be removed from disk by Bitcask's merging process. For example: + + ```erlang + {bitcask, [ + {data_root, "data/bitcask"}, + {expiry_secs, 86400} %% Expire after a day + ]}, + ``` + + There is no limit on how large or small the `expiry_secs` setting can be as long as it is greater than 0. + + You can also set auto-expire using the [Memory] storage backend, but it will be limited by RAM. + + +**Q: Is there better performance for a few objects in many buckets, or many objects in a few buckets?** + + +**A:** + Generally speaking, it does not matter if you have many buckets with a small number of objects or a small number of buckets with a large number of objects. Buckets that use the cluster's default bucket properties (which can be set in your `app.config` file) are essentially free. + + If the buckets require different bucket properties, however, those custom properties incur some cost because changes in bucket properties must be gossiped around the cluster. If you create many, many buckets with custom properties, the cost can indeed have an impact. + + +**Q: Can I list buckets or keys in production?** + + +**A:** + It is *not* recommended that you list the buckets in production because it is a costly operation irrespective of the bucket's size. + + Buckets are not like directories on a file system or tables in a database; rather, they are logical properties applied to objects, i.e. there is no *actual* separation of objects by bucket. + + A filter must be applied to all of the objects in the system in order to find those residing in a particular bucket. Buckets are intended for configuration purposes (e.g. replication properties) rather than for general queries. + + To keep track of groups of objects there are several options with various trade-offs: secondary indexes, search, or a list using links. + + +**Q: Why do secondary indexes (2i) return inconsistent results after using `force-remove` to drop a node from the cluster?** + + +**A:** + The Riak key/value store distributes values across all of the partitions in the ring. In order to minimize synchronization issues with secondary indexes, Riak stores index information in the same partition as the data values. + + When a node fails or is taken out of the cluster without using riak-admin leave, all of the data held by that node is lost to the cluster. This leaves N - 1 consistent replicas of the data. If `riak-admin force-remove` is used to remove the downed node, the remaining clusters will claim the partitions the failed node previously held. The data in the newly claimed vnodes will be made consistent one key at a time through the read-repair mechanism as each key is accessed, or through Active Anti-entropy (AAE) if enabled. + + As a simplistic example, consider this hypothetical cluster: + + * 5 nodes (nodes A-E) + * ring size = 16 + * `n_val` = 3. + + For this example, I am using simple small integers instead of the actual 160-bit partition index values for the sake of simplicity. The partitions are assigned to the nodes as follows: + +``` +A: 0-5-10-15 +B: 1-6-11 +C: 2-7-12 +D: 3-8-12 +E: 4-9-14 +``` + When a value is stored in Riak, the `{bucket, key}` is hashed to determine its first primary partition, and the value is stored in that partition and the next `n_val` - 1 partitions in the ring. + A preflist consists of the vnode which owns the key, and the next `n_val` vnodes in the ring, in order. In this scenario there are 16 preflists: + + + + + + +
0-1-21-2-32-3-43-4-5
4-5-65-6-76-7-87-8-9
8-9-109-10-1110-11-1211-12-13
12-13-1413-14-1514-15-015-0-1
+ + Index information for each partition is co-located with the value data. In order to get a full result set for a secondary index query, Riak will need to consult a "covering set" of vnodes that includes at least one member of each preflist. This will require a minimum of 1/`n_val` of the vnodes, rounded up, in this case 6. There are 56 possible covering sets consisting of 6 vnodes: + + + + + + + + + + + + + + + + +
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-2-5-8-10-13
0-2-5-8-11-130-2-5-8-11-140-3-4-7-10-130-3-5-7-10-13
0-3-5-8-10-130-3-5-8-11-130-3-5-8-11-140-3-6-7-10-13
0-3-6-8-10-130-3-6-8-11-130-3-6-8-11-140-3-6-9-10-13
0-3-6-9-11-130-3-6-9-11-140-3-6-9-12-130-3-6-9-12-14
0-3-6-9-12-151-2-5-8-11-141-3-5-8-11-141-3-6-8-11-14
1-3-6-9-11-141-3-6-9-12-141-3-6-9-12-151-4-5-8-11-14
1-4-6-8-11-141-4-6-9-11-141-4-6-9-12-141-4-6-9-12-15
1-4-7-8-11-141-4-7-9-11-141-4-7-9-12-141-4-7-9-12-15
1-4-7-10-11-141-4-7-10-12-141-4-7-10-12-151-4-7-10-13-14
1-4-7-10-13-152-3-6-9-12-152-4-6-9-12-152-4-7-9-12-15
2-4-7-10-12-152-4-7-10-13-152-5-6-9-12-152-5-7-9-12-15
2-5-7-10-12-152-5-7-10-13-152-5-8-9-12-152-5-8-10-12-15
2-5-8-10-13-152-5-8-11-12-152-5-8-11-13-152-5-8-11-14-15
+ + When a node fails or is marked down, its vnodes will not be considered for coverage queries. Fallback vnodes will be created on other nodes so that PUT and GET operations can be handled, but only primary vnodes are considered for secondary index coverage queries. If a covering set cannot be found, `{error, insufficient_vnodes}` will be returned. Thus, the reply will either be complete or an error. + + When a node is `force-remove`d, it is dropped from the cluster without transferring its data to other nodes, and the remaining nodes then claim the unowned partitions, designating new primary replicas to comply with `n_val`, but they do not immediately populate the data or indexes. + + Read repair, triggered by GETs or PUTs on the individual keys, and/or Active Anti-Entropy, will eventually repopulate the data, restoring consistency. + A GET operation for a key will request the data from all of the vnodes in its preflist, by default waiting for over half of them to respond. This results in consistent responses to get even when one of the vnodes in the preflist has been compromised. + + Secondary index queries, however, consult a covering set which may include only 1 member of the preflist. If that vnode is empty due to the `force-remove` operation, none of the keys from that preflist will be returned. + + Continuing with the above example, consider if node C is force removed. + This is one possible configuration after rebalancing: + +``` +A: 0-5-10-15 +B: 1-6-11-2* +D: 3-8-12-7* +E: 4-9-14-12* +``` + + Vnodes 2,7, and 12 (marked with `*`) are newly created primary partitions that do not contain any values or index information. + + In this new 4-node configuration any coverage set that includes vnodes 2,7, or 12 will return incomplete results until consistency is restored via read-repair or AAE, because not all vnodes will contain the data that would otherwise be present. + + + So making a couple of assumptions for demonstration purposes: + + 1. The keys `a`, `b`, and `c` are stored in the following preflists: + + ``` + a - 0-1-2 + b - 6-7-8 + c - 10-11-12 + ``` + + 2. The cluster is not loaded, so no GET/PUT or other coverage queries are being performed + + 3. AAE is not enabled + + The coordinating node (the one that receives the request from the client) will attempt to spread the load by not using the same partitions for successive coverage queries. + + The results from secondary index queries that should return all 3 keys will vary depending on the nodes chosen for the coverage set. Of the 56 possible covering sets ... + + * 20 sets (35.7% of sets) will return all 3 keys `{a,b,c}`: + + + + + + +
0-2-5-8-10-130-2-5-8-11-130-2-5-8-11-140-3-5-8-10-13
0-3-5-8-11-130-3-5-8-11-140-3-6-8-10-130-3-6-8-11-13
0-3-6-8-11-140-3-6-9-10-130-3-6-9-11-130-3-6-9-11-14
1-2-5-8-11-141-3-5-8-11-141-3-6-8-11-141-3-6-9-11-14
1-4-5-8-11-141-4-6-8-11-141-4-6-9-11-141-4-7-8-11-14
+ + * 24 sets (42.9%) will return 2 of the 3 keys: + + + + + + + + + + + +
`{a,b}` (7 sets)
0-3-6-9-12-130-3-6-9-12-140-3-6-9-12-151-3-6-9-12-14
1-3-6-9-12-151-4-6-9-12-141-4-6-9-12-15 
`{a,c}` (12 sets)
0-1-4-7-10-130-2-4-7-10-130-2-5-7-10-130-3-4-7-10-13
0-3-5-7-10-130-3-6-7-10-131-4-7-10-11-141-4-7-10-12-14
1-4-7-10-12-151-4-7-10-13-141-4-7-10-13-151-4-7-9-11-14
`{b,c}` (5 sets)
2-5-8-10-12-152-5-8-10-13-152-5-8-11-12-152-5-8-11-14-15
2-5-8-11-13-15 
+ + * 10 sets (17.8%) will return only one of the 3 keys: + + + + + + + +
`{a}` (2 sets)
1-4-7-9-12-141-4-7-9-12-15 
`{b}` (4 sets)
2-3-6-9-12-152-4-6-9-12-152-5-6-9-12-152-5-8-9-12-15
`{c}` (4 sets)
2-4-7-10-12-152-4-7-10-13-152-5-7-10-12-152-5-7-10-13-15
+ + * 2 sets (3.6%) will not return any of the 3 keys + + +
2-4-7-9-12-152-5-7-9-12-15
+ +**Q: How do I load 3rd-party Javascript libraries for use in MapReduce functions?** + Is it possible to load third-party javascript libraries (like Underscore.js) to be available in MapReduce functions? + + +**A:** + Yes. For JavaScript, this can be done in `app.config` in `js_source_dir` in the `riak_kv` settings: + + ```erlang + {js_source_dir, "/etc/riak/javascript"}, + ``` + + For Erlang code (please note that you need compiled modules in this dir), set `add_paths` in the `riak_kv` section: + + ```erlang + {add_paths, "/etc/riak/erlang"}, + ``` + + You can find more details in the [Configuration Files] document. + +**Q: Is it possible to use key filtering to just return a list of keys that match a particular pattern without performing a MapReduce on it?** + When running a MapReduce query, a map phase results in Riak pulling an object off of disk. Some queries are only interested in the keys of an object and not the value. Is it possible to run a MapReduce query that does not have to pull objects off of disk? + + +**A:** + Yes. Specifying a MapReduce query with just a reduce phase will avoid any need to pull data off of disk. To return the results of a key filtering query you can do the following: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + ["ends_with","1"] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity" + } + } + ] + } + ``` + + There is also a reduce function for counting inputs. This function can be used to count keys in a bucket without reading objects from disk: + + ```json + { + "inputs": { + "bucket": "test", + "key_filters": [ + [ + "ends_with","1" + ] + ] + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs" + } + } + ] + } + ``` + + +**Q: How can I observe object sizes and sibling counts?** + + +**A:** + `riak-admin status` will return the following stats, which give the mean and median along with the 95th, 99th, and 100th percentile object size and sibling counts. + + ``` + node_get_fsm_siblings_mean : 0 + node_get_fsm_siblings_median : 0 + node_get_fsm_siblings_95 : 0 + node_get_fsm_siblings_99 : 0 + node_get_fsm_siblings_100 : 0 + node_get_fsm_objsize_mean : 0 + node_get_fsm_objsize_median : 0 + node_get_fsm_objsize_95 : 0 + node_get_fsm_objsize_99 : 0 + node_get_fsm_objsize_100 : 0 + ``` + + +**Q: A node left the cluster before handing off all data. How can I resolve this?** + + +**A:** + In versions of Riak earlier than Riak 1.0, there are cases in which a node that is leaving the cluster will shut down before handing off all of its data. This has been resolved in Riak 1.0. + + If you encounter this issue, you can rely upon the `read-repair` command to restore your lost replicas. Simply send a `HEAD` request for each key in your data set and Riak will restore replicas as needed. + + Alternatively, if the node that left prematurely is still installed/available, you can manually re-initiate handoff using the following sequence. This approach requires entering code directly into the Erlang console of a running Riak node, and is therefore most appropriate for users with a support contract with Basho that can ask for help if anything goes wrong. + + **Manual approach**: Restart the node that prematurely left by using `riak console`. Then copy/paste the following sequence, changing the first line to point to a node still in your cluster. Handoff should then restart, but there may be no visual indicator. Simply leave the node running for awhile. It should eventually hand off all data and then shut down. Verify handoff by once again checking the size of your data directories. + + ```erlang + ClusterNode = 'riak@127.0.0.1'. + + application:set_env(riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}). + {ok, Ring} = rpc:call(ClusterNode, riak_core_ring_manager, get_my_ring, []). + Ring2 = setelement(2, Ring, node()). + riak_core_ring_manager:set_my_ring(Ring2). + riak_core_ring_manager:write_ringfile(). + [gen_server:cast(riak_core_node_watcher, {up, Node, [riak_kv]}) || Node + ``` + + +**Q: Is there a limit on the size of files that can be stored on Riak?** + + +**A:** + There isn't a limit on object size, but we suggest you keep it to no more than 1-2MB for performance reasons. Variables such as network speed can directly affect the maximum usable object size for a given cluster. You should use a tool like [Basho Bench] to determine the performance of your cluster with a given object size before moving to production use. Or if your use case demands storing many large objects, you may want to consider the [Riak CS] object storage system, which is designed for precisely that purpose. + + +**Q: Does the bucket name impact key storage size?** + + +**A:** + The storage per key is 40 bytes plus the key size and bucket name size. + + Example: + + Key size: 15 bytes. + Bucket Name size: 10 bytes. + + Total size = 40 + 15 + 10 = **65 bytes**. + + + +**Q: Are Riak-generated keys unique within a bucket?** + + +**A:** + It's not guaranteed, but you are extremely unlikely to get collisions. Riak generates keys using an Erlang-generated unique ID and a timestamp hashed with SHA-1 and base-62 encoded for URL safety. + + +**Q: Where are bucket properties stored?** + + +**A:** + The bucket properties for the default bucket type are stored in the *ring* (metadata stored in each node about the cluster). Rings are gossipped as a single unit, so if possible you should limit your creation of custom buckets under the default bucket type. + Bucket properties for non-default bucket types are stored in the cluster metadata system. The cluster metadata system is a more efficient way of replicating this information around a Riak cluster. + + The bucket properties stay in the ring and cluster metadata even if the bucket is empty. + +**Q: Are Riak keys / buckets case sensitive?** + + +**A:** + Yes, they are case sensitive and treated as binaries (byte buffers). Thus, `mykey` is not equal to `MyKey`. + + +**Q: Can I run my own Erlang applications in the same VM as Riak?** + + +**A:** + We do not recommend running your application inside the same virtual machine as Riak for several reasons. If they are kept separate, the following will hold: + + 1. Your application and Riak will not compete for the same resources and are thus less likely to affect each other's performance and availability. + 2. You will be able to upgrade Riak and your application independently of one another. + 3. When your application or Riak need more capacity, you can scale them separately to meet your production needs. + + +**Q: Is there a simple way to reload an Erlang module for MapReduce across a cluster?** + + +**A:** + Assuming that the module is in your code path, you can run `c:nl(ModName)` from the Erlang console . + + + +**Q: How do I spread requests across---i.e. load balance---a Riak cluster?** + + +**A:** + There are at least two acceptable strategies for load balancing requests across your Riak cluster: **virtual IPs** and **reverse-proxy**. + + For further information see [System Planning]. + + + +**Q: Why does it seem that Bitcask merging is only triggered when a Riak node is restarted?** + There have been situations where the data directory for a Riak node (e.g. `data/bitcask`) grows continually and does not seem to merge. After restarting the node a series of merges are kicked off and the total size of the data directory shrinks. Why does this happen? + + +**A:** + Riak and Bitcask are operating normally. Bitcask's merge behavior is as follows: + + 1. List all of the data files in the Bitcask directory; it should be noted that a Bitcask directory exists for every vnode (e.g. `data/bitcask/0`) + 2. Remove the currently active file from the list; the active file is the one being actively written + 3. Lookup file stats for each data file; this includes percent fragmentation and number of dead bytes + 4. If any of the stats exceed the defined triggers, the Bitcask directory is merged + + The default triggers for a Bitcask directory: + + * `{frag_merge_trigger, 60}, % >= 60% fragmentation` + * `{dead_bytes_merge_trigger, 536870912}, % Dead bytes > 512 MB` + + In the described scenario, merging has not occurred because none of the data files have triggered the merge. After restarting the node, however, the previously active file is now included in the merge trigger analysis and triggers a merge on the Bitcask directory. + + If Riak was never restarted, the merge would eventually happen when writes roll over to a new data file. Bitcask rolls writes over to a new data file once the currently active file has exceeded a certain size (2 GB by default). + + +**Q: When retrieving a list of siblings I am getting the same vtag multiple times.** + When retrieving a list of siblings via the REST interface, I am seeing the same vtag appear multiple times. Is this normal? I thought vtags were unique. Are they referring to the same sibling? + + +**A:** + The vtag is calculated on a `PUT` based on the vclock and is stored as part of the object's metadata. + + It is possible to get siblings with the same vtag during vector clock pruning and read/repair. + + See [vector clocks] for more information. + + + +**Q: How should I structure larger data objects?** + I have a data object that is denormalized, with multiple child data objects, and stored as a nested JSON hash. However, retrieving and storing this object becomes increasingly costly as my application modifies and adds pieces to the object. Would breaking the object into smaller pieces improve performance? What are the tradeoffs? + + +**A:** + The factors involved in deciding whether or not to break this large object into multiple pieces are more concerned with conceptual structure than performance, although performance will be affected. Those factors include: + + 1. How tightly coupled are the child objects to the parent? That is, are they frequently updated at the same time? + 2. How likely are the objects to be updated at the same time by multiple processes? + + If the parent and child objects are not too tightly coupled (or the children are updated much more frequently), then splitting them along conceptual boundaries will improve performance in your application by decreasing payload size and reducing update conflicts. Generally, you will want to add links to connect the objects for easy fetching and traversal. + + +**Q: Is there any way in Riak to limit access to a user or a group of users?** + + +**A:** + Allowing multiple users, also known as multitenancy, is not built into Riak (though it is built into [Riak CS]). Riak has no built-in authentication. + + If you need to restrict access, consider putting an authenticating reverse-proxy server in front of it. + + +**Q: Is there a way to enforce a schema on data in a given bucket?** + Suppose I'd like to set up a bucket to store data adhering to a particular schema. Is there any way to set this up with Riak? This way, when my application attempts to store data in a particular bucket, it will check with this schema first before storing it. Otherwise, it will produce an error. + + +**A:** + Riak does not implement any form of schema validation. A pre-commit hook can be used in this scenario but would need to be written by your development team. You can read more about [commit hooks] in the docs. This document provides two pre-commit hook examples, one in Erlang that restricts objects that are too large and one in Javascript that restricts non-JSON content. + + +**Q: How does the Erlang Riak Client manage node failures?** + Does the Erlang Riak Client manage its own reconnect logic? What should a client do to maintain the connection or reconnect in case of nodes going down? + + +**A:** + The [Erlang Riak Client] gives you several options for how to manage connections. You can set these when starting a `riakc_pb_socket` process or by using the `set_options` function. + + * `queue_if_disconnected` (default: `false`) - requests will be queued when the connection to the server is lost. + * `auto_reconnect` (default: `false`) - if the connection is lost, `riakc_pb_socket` will attempt to reconnect automatically. This is set to `true` if `queue_if_disconnected` is set to `true`. + + If these options are both false, connection errors will be returned to the process-making requests as `{error, Reason}` tuples. + + +**Q: Is there a limiting factor for the number of buckets in a cluster?** + + +**A:** + As long as you use the default bucket properties, buckets consume no resources. Each bucket with non-default bucket properties is stored in the gossiped ring state, so the more buckets with custom properties, the more ring data must be handed off to every node. + + More on [Bucket Properties]. + + +**Q: Is it possible to configure a single bucket's properties in `app.config`?** + + +**A:** + Not a specific bucket, only the defaults. However, you should only need to change them once, since after that the settings will be reflected in the ring state. + + You can read more on `app.config` in [Configuration Files]. + + +**Q: Is there a simple command to delete a bucket?** + + +**A:** + There is no straightforward command to delete an entire bucket. You must delete all of the key/value objects individually. Thus, the following will not work: + + ```curl + curl -X DELETE http://your-host:8098/riak/your-bucket + ``` + + +**Q: Can Riak be configured to fail an update instead of generating a conflict?** + + +**A:** + No. The closest thing would be to use the `If-None-Match` header, but that is only supported in the HTTP interface and probably won't accomplish what you're trying to do. + + +**Q: How can I limit the number of keys retrieved?** + + +**A:** + You'll need to use a [MapReduce] job for this. + + You could also run `keys=stream` and close the connection when you have the designated number. This will not, however, reduce load on the Riak cluster. It will only reduce load on your client. + + +**Q: How is the real hash value for replicas calculated based on the preflist?** + + +**A:** + The hash is calculated first and then the next subsequent *N* partitions are chosen for the preflist. + + +**Q: Do client libraries support load balancing/round robin?** + + +**A:** + + * The Riak Ruby client has failure-aware load balancing. It will round-robin unless there are network errors, in which case other nodes will be preferred. + * The Java client is strictly round robin, but with retries built in. + * The Python client also follows round robin without retries. + * The Erlang client does not support any load balancing. + +## MapReduce + + +**Q: Does the number of keys in a bucket affect the performance of MapReduce?** + + +**A:** + Yes. In general, the smaller the number of keys a bucket holds, the faster MapReduce operations will run. + + +**Q: How do I filter out `not_found` from MapReduce results?** + If I want to filter out the `not_found` in my MapReduce, should I do it in the reduce phase? I have a MapReduce job that returns what I'm looking for, but I want to filter out the `not_found` entries so that I only get a list back with the keys. + + +**A:** + There is a built-in function for this that ships with Riak. Check out `Riak.filterNotFound` from the [built-in functions list]. + + +**Q: Is it possible to call a reduce function at specific intervals during a map function?** + When doing the map step on a whole bucket, can I choose how many keys to map before calling the reduce? I am generating a lot of data in memory and it could be reduced if I could call the following reduce step more often. + + +**A:** + Not currently. The reduce function is run occasionally as the bucket is processed and MapReduce doesn't wait for the whole map process to finish before running the reduce. + + +**Q: When searching over a bucket using MapReduce, is it recommended to perform the search during the map phase or the reduce phase?** + + +**A:** + Aside from the performance considerations of doing a full-bucket [MapReduce], searching is a form of filtering, which should be done in the map phase. + + +**Q: Is it possible to delete data from Riak with a JavaScript MapReduce job?** + + +**A:** + This is not currently possible. If you want to delete objects from MapReduce, use an Erlang reduce phase like the one on [contrib.basho.com]. + + +**Q: Why does MapReduce return a JSON object on occasion instead of an array?** + + +**A:** + `mochijson2` assumes that anything that looks like a proplist---a list of 2-tuples---is turned into a hash: + + ```erlang + list_to_binary(mochijson2:encode([{a , b}, {foo, bar}])). + <<"{\"a\":\"b\",\"foo\":\"bar\"}">> + ``` + + JSON has no "tuple" notion. For the time being, a recommended workaround would be to use a list of length-2 lists. + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started.md b/content/riak/kv/3.0.4/developing/getting-started.md new file mode 100644 index 0000000000..ded8a9eb74 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started.md @@ -0,0 +1,51 @@ +--- +title: "Getting Started Overview" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Getting Started" + identifier: "developing_getting_started" + weight: 100 + parent: "developing" +toc: true +aliases: +--- + +[install index]: {{}}riak/kv/3.0.4/setup/installing +[dev client libraries]: {{}}riak/kv/3.0.4/developing/client-libraries + +Welcome, new Riak developer! This guide will get you started developing +against Riak KV with minimal fuss. + +## Installing Riak KV + +The easiest way to get started with Riak KV is to complete the +[installation][install index] process. + +## Choose Your Programming Language + +Basho officially supports a number of open-source [client libraries][dev client libraries] +for various programming languages and environments. Please select the +language with which you'd like to proceed: + +
    +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
  • +
+ +### Community-supported Client Libraries + +Please see our [client libraries][dev client libraries] page for a listing of +community-supported clients. + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/csharp.md b/content/riak/kv/3.0.4/developing/getting-started/csharp.md new file mode 100644 index 0000000000..1fcafc4f0e --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/csharp.md @@ -0,0 +1,86 @@ +--- +title: "Getting Started with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "C Sharp" + identifier: "getting_started_csharp" + weight: 103 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/csharp + - /riak/kv/3.0.4/dev/taste-of-riak/csharp +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of the .NET Framework or Mono is required. + +### Client Setup + +Install [the Riak .NET Client](https://github.com/basho/riak-dotnet-client/wiki/Installation) through [NuGet](http://nuget.org/packages/RiakClient) or the Visual Studio NuGet package manager. + +{{% note title="Configuring for a remote cluster" %}} +By default, the Riak .NET Client will add a section to your `app.config` file +for a four node local cluster. If you are using a remote cluster, open up +`app.config` and change the `hostAddress` values to point to nodes in your +remote cluster. +{{% /note %}} + +### Connecting to Riak + +Connecting to Riak with the Riak .NET Client requires creating a cluster object and then creating a new client object. + +```csharp +using System; +using RiakClient; + +namespace TasteOfRiak +{ + class Program + { + static void Main(string[] args) + { + // don't worry, we'll use this string later + const string contributors = "contributors"; + IRiakEndpoint cluster = RiakCluster.FromConfig("riakConfig"); + IRiakClient client = cluster.CreateClient(); + } + } +} +``` + +This creates a new `RiakCluster` which is used to create a new `RiakClient`. A `RiakCluster` object handles all the details of tracking active nodes and also provides load balancing. The `RiakClient` is used to send commands to Riak. *Note:* the `IRiakEndpoint` object implements `IDisposable` and should be correctly disposed when you're done communicating with Riak. + +Let's make sure the cluster is online. Add this to your `Main` method: + +```csharp +var pingResult = client.Ping(); + +if (pingResult.IsSuccess) +{ + Console.WriteLine("pong"); +} +else +{ + Console.WriteLine("Are you sure Riak is running?"); + Console.WriteLine("{0}: {1}", pingResult.ResultCode, pingResult.ErrorMessage); +} +``` + +This is some simple code to test that a node in a Riak cluster is online - we send a simple ping message. Even if the cluster isn't present, the Riak .NET Client will return a response message. It's important to check that your activity was successful by using the `IsSuccess` property and then checking any errors and result codes. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.4/developing/getting-started/csharp/crud-operations) + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/3.0.4/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..43351721e2 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,148 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "CRUD Operations" + identifier: "getting_started_csharp_crud" + weight: 100 + parent: "getting_started_csharp" +toc: true +aliases: +--- + +### Creating Objects In Riak + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going to want us to do productive work. Let's create a class to represent some data and save some objects into Riak. + +The Riak .NET Client makes use of a `RiakObject` class to encapsulate Riak key/value objects. At the most basic, a `RiakObject` is responsible for identifying your object and for translating it into a format that can be easily saved to Riak. + +Add the `RiakClient.Models` namespace to your using directive. Your usings should look like this: + +```csharp +using System; +using System.Collections.Generic; +using RiakClient; +using RiakClient.Models; +``` + +Add the `Person` class to the `TasteOfRiak` namespace: + +```csharp +public class Person +{ + public string EmailAddress { get; set; } + public string FirstName { get; set; } + public string LastName { get; set; } +} +``` + +Now let's create some people! + +```csharp +var people = new[] +{ + new Person { + EmailAddress = "bashoman@basho.com", + FirstName = "Basho", + LastName = "Man" + }, + new Person { + EmailAddress = "johndoe@gmail.com", + FirstName = "John", + LastName = "Doe" + } +}; + +foreach (var person in people) +{ + var o = new RiakObject(contributors, person.EmailAddress, person); + var putResult = client.Put(o); + + if (putResult.IsSuccess) + { + Console.WriteLine("Successfully saved {1} to bucket {0}", o.Key, o.Bucket); + } + else + { + Console.WriteLine("Are you *really* sure Riak is running?"); + Console.WriteLine("{0}: {1}", putResult.ResultCode, putResult.ErrorMessage); + } +} +``` + +In this sample, we create a collection of `Person` objects and then save each `Person` to Riak. + +Before saving, we need to create a `RiakObject` that encapsulates the bucket, key, and object to be saved. Once we've created a `RiakObject` from our `Person` object, we can save it to Riak using `Client.Put()`. + +Once again, we check the response from Riak. If things are successful, you'll see a helpful message letting you know that your object has been saved to Riak. If things didn't go as planned, there will be an error message displaying the result code and a helpful error message. + +### Reading from Riak + +Let's find a person! + +```csharp +var result = client.Get(contributors, "bashoman@basho.com"); +if (result.IsSuccess) +{ + bashoman = result.Value.GetObject(); + Console.WriteLine("I found {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", result.ResultCode, result.ErrorMessage); +} +``` + +We use `RiakClient.Get` to retrieve an object from Riak. This returns a `RiakResult` which, like other RiakResults, helpfully encapsulates the communication with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have a successful result, we use `GetObject` to deserialize our object. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```csharp +bashoman.FirstName = "Riak"; + +var o = new RiakObject(contributors, bashoman.EmailAddress, bashoman); +var updateResult = client.Put(o); +if (updateResult.IsSuccess) +{ + Console.WriteLine("Successfully updated {0} in {1}", bashoman.EmailAddress, contributors); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", updateResult.ResultCode, updateResult.ErrorMessage); +} +``` + +Updating an object involves creating a new `RiakObject` then using `RiakClient.Put` to save the existing object. + +### Deleting Data + +```csharp +var deleteResult = client.Delete(contributors, "johndoe@gmail.com"); +if (deleteResult.IsSuccess) +{ + Console.WriteLine("Successfully got rid of John Doe"); +} +else +{ + Console.WriteLine("Something went wrong!"); + Console.WriteLine("{0}: {1}", deleteResult.ResultCode, deleteResult.ErrorMessage); +} +``` + +Just like other operations, we check the results that have come back from Riak to make sure the object was successfully deleted. + +The Riak .NET Client has a lot of additional functionality that makes it easy to build rich, complex applications with Riak. Check out the [documentation](https://github.com/basho/riak-dotnet-client/wiki) to learn more about working with the Riak .NET Client and Riak. + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/3.0.4/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..268cbfc915 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,111 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Object Modeling" + identifier: "getting_started_csharp_object" + weight: 102 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/object-modeling-csharp + - /riak/kv/3.0.4/dev/taste-of-riak/object-modeling-csharp +--- + +To get started, refer to [this source code][1] for the models that we'll +be using. + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. + +For the `Msgs` bucket, let's use a combination of the username and the +posting UTC datetime in an [ISO 8601][iso_8601] +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially when many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, please refer to +[this source code][2] for the repositories that we'll be using. + +[This console application][3] exercises the code that we've written. + +The repository pattern and `TimelineManager` help with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + +[1]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Models +[2]: https://github.com/basho/taste-of-riak/tree/master/csharp/Ch03-Msgy-Schema/Repositories +[3]: https://github.com/basho/taste-of-riak/blob/master/csharp/Ch03-Msgy-Schema/Program.cs +[iso_8601]: http://en.wikipedia.org/wiki/ISO_8601 + + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/csharp/querying.md b/content/riak/kv/3.0.4/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..a0acb8a266 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/csharp/querying.md @@ -0,0 +1,214 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Querying" + identifier: "getting_started_csharp_query" + weight: 101 + parent: "getting_started_csharp" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/querying-csharp + - /riak/kv/3.0.4/dev/taste-of-riak/querying-csharp +--- + +## C Sharp Version Setup + +For the C# version, please download the source from GitHub by either +[cloning][taste_of_riak] the source code repository or downloading the +[current zip of the master branch][master_zip]. The code for this +chapter is in `/csharp`. Open up `TasteOfRiak.sln` in Visual Studio or +your IDE of choice. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POCOs in `Customer.cs`, `Order.cs` and +`OrderSummaries.cs`. Let's put some data into Riak so we can play +with it. + +```csharp +Console.WriteLine("Creating Data"); +Customer customer = CreateCustomer(); +IEnumerable orders = CreateOrders(customer); +OrderSummary orderSummary = CreateOrderSummary(customer, orders); + +Console.WriteLine("Starting Client"); +using (IRiakEndPoint endpoint = RiakCluster.FromConfig("riakConfig")) +{ + IRiakClient client = endpoint.CreateClient(); + + Console.WriteLine("Storing Data"); + + client.Put(ToRiakObject(customer)); + + foreach (Order order in orders) + { + // NB: this adds secondary index data as well + client.Put(ToRiakObject(order)); + } + + client.Put(ToRiakObject(orderSummary)); + + ... + ... + ... +} +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```csharp +Console.WriteLine("Fetching related data by shared key"); +string key = "1"; + +var result = client.Get(customersBucketName, key); +CheckResult(result); +Console.WriteLine("Customer 1: {0}\n", GetValueAsString(result)); + +result = client.Get(orderSummariesBucketName, key); +CheckResult(result); +Console.WriteLine("OrderSummary 1: {0}\n", GetValueAsString(result)); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will make a note of where +secondary index data is added to our model objects. + +```csharp +private static RiakObject ToRiakObject(Order order) +{ + var orderRiakObjectId = new RiakObjectId(ordersBucketName, order.Id.ToString()); + var riakObject = new RiakObject(orderRiakObjectId, order); + + IntIndex salesPersonIdIndex = riakObject.IntIndex(ordersSalesPersonIdIndexName); + salesPersonIdIndex.Add(order.SalesPersonId.ToString()); + + BinIndex orderDateIndex = riakObject.BinIndex(ordersOrderDateIndexName); + orderDateIndex.Add(order.OrderDate.ToString("yyyy-MM-dd")); + + return riakObject; +} +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```csharp +// Query for order keys where the SalesPersonId index is set to 9000 +var riakIndexId = new RiakIndexId(ordersBucketName, ordersSalesPersonIdIndexName); +RiakResult indexRiakResult = client.GetSecondaryIndex(riakIndexId, 9000); // NB: *must* use 9000 as integer here. +CheckResult(indexRiakResult); +RiakIndexResult indexResult = indexRiakResult.Value; +Console.WriteLine("Jane's orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +Jane's orders (key values): 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```csharp +// Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 +riakIndexId = new RiakIndexId(ordersBucketName, ordersOrderDateIndexName); +indexRiakResult = client.GetSecondaryIndex(riakIndexId, "2013-10-01", "2013-10-31"); // NB: *must* use strings here. +CheckResult(indexRiakResult); +indexResult = indexRiakResult.Value; +Console.WriteLine("October orders (key values): {0}", string.Join(", ", indexResult.IndexKeyTerms.Select(ikt => ikt.Key))); +``` + +Which returns: + +```text +October orders (key values): 1, 2 +``` + +We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + +[taste_of_riak]: https://github.com/basho/taste-of-riak +[master_zip]: https://github.com/basho/taste-of-riak/archive/master.zip + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/erlang.md b/content/riak/kv/3.0.4/developing/getting-started/erlang.md new file mode 100644 index 0000000000..9604bc5f64 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/erlang.md @@ -0,0 +1,59 @@ +--- +title: "Getting Started with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Erlang" + identifier: "getting_started_erlang" + weight: 105 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/erlang + - /riak/kv/3.0.4/dev/taste-of-riak/erlang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Erlang is +required. You can also use the `erts` Erlang installation that comes +with Riak. + +## Client Setup + +Download the latest Erlang client from GitHub +([zip](https://github.com/basho/riak-erlang-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-erlang-client/)) and +extract it to your working directory. + +Next, open the Erlang console with the client library paths included. + +```bash +erl -pa CLIENT_LIBRARY_PATH/ebin/ CLIENT_LIBRARY_PATH/deps/*/ebin +``` + +Now let’s create a link to the Riak node. If you are using a single +local Riak node, use the following to create the link: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```erlang +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.4/developing/getting-started/erlang/crud-operations) + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/3.0.4/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..c87e6643cb --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,172 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "CRUD Operations" + identifier: "getting_started_erlang_crud" + weight: 100 + parent: "getting_started_erlang" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few Riak objects. For these examples we'll be +using the bucket `test`. + +```erlang +MyBucket = <<"test">>. + +Val1 = 1. +Obj1 = riakc_obj:new(MyBucket, <<"one">>, Val1). +riakc_pb_socket:put(Pid, Obj1). +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```erlang +Val2 = <<"two">>. +Obj2 = riakc_obj:new(MyBucket, <<"two">>, Val2). +riakc_pb_socket:put(Pid, Obj2). +``` + +That was easy. Finally, let’s store something more complex, a tuple this +time. You will probably recognize the pattern by now. + +```erlang +Val3 = {value, 3}. +Obj3 = riakc_obj:new(MyBucket, <<"three">>, Val3). +riakc_pb_socket:put(Pid, Obj3). +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```erlang +{ok, Fetched1} = riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{ok, Fetched2} = riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{ok, Fetched3} = riakc_pb_socket:get(Pid, MyBucket, <<"three">>). + +Val1 =:= binary_to_term(riakc_obj:get_value(Fetched1)). %% true +Val2 =:= riakc_obj:get_value(Fetched2). %% true +Val3 =:= binary_to_term(riakc_obj:get_value(Fetched3)). %% true +``` + +That was easy. We simply request the objects by bucket and key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to do. Let’s update the value in the third +example to 42, update the Riak object, and then save it. + +```erlang +NewVal3 = setelement(2, Val3, 42). +UpdatedObj3 = riakc_obj:update_value(Fetched3, NewVal3). +{ok, NewestObj3} = riakc_pb_socket:put(Pid, UpdatedObj3, [return_body]). +``` + +We can verify that our new value was saved by looking at the value +returned. + +```erlang +rp(binary_to_term(riakc_obj:get_value(NewestObj3))). +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete, as they say. Fortunately, that's +easy too. + +```erlang +riakc_pb_socket:delete(Pid, MyBucket, <<"one">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"two">>). +riakc_pb_socket:delete(Pid, MyBucket, <<"three">>). +``` + +Now we can verify that the objects have been removed from Riak. + +```erlang +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"one">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"two">>). +{error,notfound} =:= riakc_pb_socket:get(Pid, MyBucket, <<"three">>). +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this record that encapsulates some information about +a book. + +```erlang +rd(book, {title, author, body, isbn, copies_owned}). + +MobyDickBook = #book{title="Moby Dick", + isbn="1111979723", + author="Herman Melville", + body="Call me Ishmael. Some years ago...", + copies_owned=3}. +``` + +So we have some information about our Moby Dick collection that we want +to save. Storing this to Riak should look familiar by now: + +```erlang +MobyObj = riakc_obj:new(<<"books">>, + list_to_binary(MobyDickBook#book.isbn), + MobyDickBook). + +riakc_pb_socket:put(Pid, MobyObj). +``` + +Some of you may be thinking: "How does the Erlang Riak client +encode/decode my object?" If we fetch our book back and print the value, +we shall know: + +```erlang +{ok, FetchedBook} = riakc_pb_socket:get(Pid, + <<"books">>, + <<"1111979723">>). + +rp(riakc_obj:get_value(FetchedBook)). +``` + +The response: + +``` +<<131,104,6,100,0,4,98,111,111,107,107,0,9,77,111,98,121, + 32,68,105,99,107,107,0,15,72,101,114,109,97,110,32,77, + 101,108,118,105,108,108,101,107,0,34,67,97,108,108,32, + 109,101,32,73,115,104,109,97,101,108,46,32,83,111,109, + 101,32,121,101,97,114,115,32,97,103,111,46,46,46,107,0, + 10,49,49,49,49,57,55,57,55,50,51,97,3>> +``` + +Erlang binaries! The Riak Erlang client library encodes everything as +binaries. If we wanted to get a `book` object back we could use +`binary_to_term/1` to get our original object back: + +```erlang +rp(binary_to_term(riakc_obj:get_value(FetchedBook))). +``` + +Next let’s clean up our mess: + +```erlang +riakc_pb_socket:delete(Pid, <<"books">>, <<"1111979723">>). +riakc_pb_socket:stop(Pid). +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/3.0.4/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..5d574a26c6 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,342 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Object Modeling" + identifier: "getting_started_erlang_object" + weight: 102 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/object-modeling-erlang + - /riak/kv/3.0.4/dev/taste-of-riak/object-modeling-erlang +--- + +To get started, let's create the records that we'll be using. + +{{% note title="Code Download" %}} +You can also download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/am-dem-erlang-modules/erlang/Ch03-Msgy-Schema). + +The Github version includes Erlang type specifications which have been omitted +here for brevity. +{{% /note %}} + + +```erlang +%% msgy.hrl + +-define(USER_BUCKET, <<"Users">>). +-define(MSG_BUCKET, <<"Msgs">>). +-define(TIMELINE_BUCKET, <<"Timelines">>). +-define(INBOX, "Inbox"). +-define(SENT, "Sent"). + +-record(user, {user_name, full_name, email}). + +-record(msg, {sender, recipient, created, text}). + +-record(timeline, {owner, msg_type, msgs}). +``` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/3.0.4/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```erlang +%% user_repository.erl + +-module(user_repository). +-export([save_user/2, + get_user/2]). +-include("msgy.hrl"). + +save_user(ClientPid, User) -> + RUser = riakc_obj:new(?USER_BUCKET, + list_to_binary(User#user.user_name), + User), + riakc_pb_socket:put(ClientPid, RUser). + +get_user(ClientPid, UserName) -> + {ok, RUser} = riakc_pb_socket:get(ClientPid, + ?USER_BUCKET, + list_to_binary(UserName)), + binary_to_term(riakc_obj:get_value(RUser)). +``` + +
+ +```erlang +%% msg_repository.erl + +-module(msg_repository). +-export([create_msg/3, + get_msg/2]). +-include("msgy.hrl"). + +-spec create_msg(user_name(), user_name(), text()) -> msg(). +create_msg(Sender, Recipient, Text) -> + #msg{sender=Sender, + recipient=Recipient, + created=get_current_iso_timestamp(), + text = Text}. + +-spec get_msg(pid(), riakc_obj:key()) -> msg(). +get_msg(ClientPid, MsgKey) -> + {ok, RMsg} = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + MsgKey), + binary_to_term(riakc_obj:get_value(RMsg)). + +%% @private +-spec get_current_iso_timestamp() -> datetimestamp(). +get_current_iso_timestamp() -> + {_,_,MicroSec} = DateTime = erlang:now(), + {{Year,Month,Day},{Hour,Min,Sec}} = calendar:now_to_universal_time(DateTime), + lists:flatten( + io_lib:format("~4..0B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0B.~6..0B", + [Year, Month, Day, Hour, Min, Sec, MicroSec])). + +``` + +
+ +```erlang +%% timeline_repository.erl + +-module(timeline_repository). +-export([post_msg/2, + get_timeline/4]). +-include("msgy.hrl"). + +post_msg(ClientPid, Msg) -> + %% Save the canonical copy + SavedMsg = save_msg(ClientPid, Msg), + MsgKey = binary_to_list(riakc_obj:key(SavedMsg)), + + %% Post to sender's Sent timeline + add_to_timeline(ClientPid, Msg, sent, MsgKey), + + %% Post to recipient's Inbox timeline + add_to_timeline(ClientPid, Msg, inbox, MsgKey), + ok. + +get_timeline(ClientPid, Owner, MsgType, Date) -> + TimelineKey = generate_key(Owner, MsgType, Date), + {ok, RTimeline} = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + binary_to_term(riakc_obj:get_value(RTimeline)). + +%% -------------------------------------------------------------------- + +%% @private +save_msg(ClientPid, Msg) -> + MsgKey = Msg#msg.sender ++ "_" ++ Msg#msg.created, + ExistingMsg = riakc_pb_socket:get(ClientPid, + ?MSG_BUCKET, + list_to_binary(MsgKey)), + SavedMsg = case ExistingMsg of + {error, notfound} -> + NewMsg = riakc_obj:new(?MSG_BUCKET, list_to_binary(MsgKey), Msg), + {ok, NewSaved} = riakc_pb_socket:put(ClientPid, + NewMsg, + [if_none_match, return_body]), + NewSaved; + {ok, Existing} -> Existing + end, + SavedMsg. + +%% @private +add_to_timeline(ClientPid, Msg, MsgType, MsgKey) -> + TimelineKey = generate_key_from_msg(Msg, MsgType), + ExistingTimeline = riakc_pb_socket:get(ClientPid, + ?TIMELINE_BUCKET, + list_to_binary(TimelineKey)), + UpdatedTimeline = case ExistingTimeline of + {error, notfound} -> + create_new_timeline(Msg, MsgType, MsgKey, TimelineKey); + {ok, Existing} -> + add_to_existing_timeline(Existing, MsgKey) + end, + + {ok, SavedTimeline} = riakc_pb_socket:put(ClientPid, + UpdatedTimeline, + [return_body]), + SavedTimeline. + +%% @private +create_new_timeline(Msg, MsgType, MsgKey, TimelineKey) -> + Owner = get_owner(Msg, MsgType), + Timeline = #timeline{owner=Owner, + msg_type=MsgType, + msgs=[MsgKey]}, + riakc_obj:new(?TIMELINE_BUCKET, list_to_binary(TimelineKey), Timeline). + +%% @private +add_to_existing_timeline(ExistingRiakObj, MsgKey) -> + ExistingTimeline = binary_to_term(riakc_obj:get_value(ExistingRiakObj)), + ExistingMsgList = ExistingTimeline#timeline.msgs, + UpdatedTimeline = ExistingTimeline#timeline{msgs=[MsgKey|ExistingMsgList]}, + riakc_obj:update_value(ExistingRiakObj, UpdatedTimeline). + +%% @private +get_owner(Msg, inbox) -> Msg#msg.recipient; +get_owner(Msg, sent) -> Msg#msg.sender. + +%% @private +generate_key_from_msg(Msg, MsgType) -> + Owner = get_owner(Msg, MsgType), + generate_key(Owner, MsgType, Msg#msg.created). + +%% @private +generate_key(Owner, MsgType, Date) when is_tuple(Date) -> + DateString = get_iso_datestamp_from_date(Date), + generate_key(Owner, MsgType, DateString); + +generate_key(Owner, MsgType, Datetimestamp) -> + DateString = get_iso_datestamp_from_iso_timestamp(Datetimestamp), + MsgTypeString = case MsgType of + inbox -> ?INBOX; + sent -> ?SENT + end, + Owner ++ "_" ++ MsgTypeString ++ "_" ++ DateString. + +%% @private +get_iso_datestamp_from_date(Date) -> + {Year,Month,Day} = Date, + lists:flatten(io_lib:format("~4..0B-~2..0B-~2..0B", [Year, Month, Day])). + +%% @private +get_iso_datestamp_from_iso_timestamp(CreatedString) -> + {Date, _} = lists:split(10,CreatedString), + Date. + +``` + +Finally, let's test them: + +```erlang +%% msgy.erl + +-module(msgy). +-export([main/0]). +-include("msgy.hrl"). + +main() -> + %% Setup our repositories + {ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017), + + %% Create and save users + Joe = #user{user_name="joeuser", + full_name="Joe User", + email="joe.user@basho.com"}, + + Marleen = #user{user_name="marleenmgr", + full_name="Marleen Manager", + email="marleen.manager@basho.com"}, + + user_repository:save_user(Pid, Joe), + user_repository:save_user(Pid, Marleen), + + %% Create new Msg, post to timelines + Msg = msg_repository:create_msg(Marleen#user.user_name, Joe#user.user_name, "Welcome to the company!"), + timeline_repository:post_msg(Pid, Msg), + + + %% Get Joe's inbox for today, get first message + {TodaysDate,_} = calendar:now_to_universal_time(erlang:now()), + JoesInboxToday = timeline_repository:get_timeline(Pid, Joe#user.user_name, inbox, TodaysDate), + + JoesFirstMessage = msg_repository:get_msg(Pid, hd(JoesInboxToday#timeline.msgs)), + + io:format("From: ~s~nMsg : ~s~n~n", [JoesFirstMessage#msg.sender, JoesFirstMessage#msg.text]), + ok. +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/erlang/querying.md b/content/riak/kv/3.0.4/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..2f677600e8 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/erlang/querying.md @@ -0,0 +1,308 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Querying" + identifier: "getting_started_erlang_query" + weight: 101 + parent: "getting_started_erlang" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/querying-erlang + - /riak/kv/3.0.4/dev/taste-of-riak/querying-erlang +--- + + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +A more comprehensive discussion can be found in [Key/Value Modeling]({{}}riak/kv/3.0.4/developing/key-value-modeling). + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, you +can denormalize all that associated data into a single object and store +it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size limits (objects greater than 1MB) +* Shared/referential Data (data that the object doesn't "own") +* Differences in access patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. + +Let's put some data into Riak so we can play with it. Fire up your +Erlang REPL with the client library in the path, and enter in the +following: + +```erlang +rd(customer, {customer_id, name, address, city, state, zip, phone, created_date}). +rd(item, {item_id, title, price}). +rd(order, {order_id, customer_id, salesperson_id, items, total, order_date}). +rd(order_summary_entry, {order_id, total, order_date}). +rd(order_summary, {customer_id, summaries}). + + +Customer = #customer{ customer_id= 1, + name= "John Smith", + address= "123 Main Street", + city= "Columbus", + state= "Ohio", + zip= "43210", + phone= "+1-614-555-5555", + created_date= {{2013,10,1},{14,30,26}}}. + +Orders = [ #order{ + order_id= 1, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "TCV37GIT4NJ", + title= "USB 3.0 Coffee Warmer", + price= 15.99 }, + #item{ + item_id= "PEG10BBF2PP", + title= "eTablet Pro, 24GB, Grey", + price= 399.99 }], + total= 415.98, + order_date= {{2013,10,1},{14,42,26}}}, + + #order{ + order_id= 2, + customer_id= 1, + salesperson_id= 9001, + items= [ + #item{ + item_id= "OAX19XWN0QP", + title= "GoSlo Digital Camera", + price= 359.99 }], + total= 359.99, + order_date= {{2013,10,15},{16,43,16}}}, + + #order { + order_id= 3, + customer_id= 1, + salesperson_id= 9000, + items= [ + #item{ + item_id= "WYK12EPU5EZ", + title= "Call of Battle= Goats - Gamesphere 4", + price= 69.99 }, + #item{ + item_id= "TJB84HAA8OA", + title= "Bricko Building Blocks", + price= 4.99 }], + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}} + ]. + +OrderSummary = #order_summary{ + customer_id= 1, + summaries= [ + #order_summary_entry{ + order_id= 1, + total= 415.98, + order_date= {{2013,10,1},{14,42,26}} + }, + #order_summary_entry{ + order_id= 2, + total= 359.99, + order_date= {{2013,10,15},{16,43,16}} + }, + #order_summary_entry{ + order_id= 3, + total= 74.98, + order_date= {{2013,11,3},{17,45,28}}}]}. + +## Remember to replace the ip and port parameters with those that match your cluster. +{ok, Pid} = riakc_pb_socket:start_link("127.0.0.1", 10017). + +CustomerBucket = <<"Customers">>. +OrderBucket = <<"Orders">>. +OrderSummariesBucket = <<"OrderSummaries">>. + +CustObj = riakc_obj:new(CustomerBucket, + list_to_binary( + integer_to_list( + Customer#customer.customer_id)), + Customer). + +riakc_pb_socket:put(Pid, CustObj). + +StoreOrder = fun(Order) -> + OrderObj = riakc_obj:new(OrderBucket, + list_to_binary( + integer_to_list( + Order#order.order_id)), + Order), + riakc_pb_socket:put(Pid, OrderObj) +end. + +lists:foreach(StoreOrder, Orders). + + +OrderSummaryObj = riakc_obj:new(OrderSummariesBucket, + list_to_binary( + integer_to_list( + OrderSummary#order_summary.customer_id)), + OrderSummary). + +riakc_pb_socket:put(Pid, OrderSummaryObj). + +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```erlang +{ok, FetchedCustomer} = riakc_pb_socket:get(Pid, + CustomerBucket, + <<"1">>). +{ok, FetchedSummary} = riakc_pb_socket:get(Pid, + OrderSummariesBucket, + <<"1">>). +rp({binary_to_term(riakc_obj:get_value(FetchedCustomer)), + binary_to_term(riakc_obj:get_value(FetchedSummary))}). +``` + +Which returns our amalgamated objects: + +```erlang +{#customer{customer_id = 1,name = "John Smith", + address = "123 Main Street",city = "Columbus", + state = "Ohio",zip = "43210",phone = "+1-614-555-5555", + created_date = {{2013,10,1},{14,30,26}}}, + #order_summary{customer_id = 1, + summaries = [#order_summary_entry{order_id = 1, + total = 415.98, + order_date = {{2013,10,1},{14,42,26}}}, + #order_summary_entry{order_id = 2,total = 359.99, + order_date = {{2013,10,15},{16,43,16}}}, + #order_summary_entry{order_id = 3,total = 74.98, + order_date = {{2013,11,3},{17,45,28}}}]}} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```erlang +FormatDate = fun(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + lists:concat([Year,Month,Day,Hour,Min,Sec]) +end. + +AddIndicesToOrder = fun(OrderKey) -> + {ok, Order} = riakc_pb_socket:get(Pid, OrderBucket, + list_to_binary(integer_to_list(OrderKey))), + + OrderData = binary_to_term(riakc_obj:get_value(Order)), + OrderMetadata = riakc_obj:get_update_metadata(Order), + + MD1 = riakc_obj:set_secondary_index(OrderMetadata, + [{{binary_index, "order_date"}, + [FormatDate(OrderData#order.order_date)]}]), + + MD2 = riakc_obj:set_secondary_index(MD1, + [{{integer_index, "salesperson_id"}, + [OrderData#order.salesperson_id]}]), + + Order2 = riakc_obj:update_metadata(Order,MD2), + riakc_pb_socket:put(Pid,Order2) +end. + +lists:foreach(AddIndicesToOrder, [1,2,3]). + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we +have to add entries to the indices at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll lookup the orders +by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```erlang +riakc_pb_socket:get_index_eq(Pid, OrderBucket, {integer_index, "salesperson_id"}, 9000). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"3">>], + undefined,undefined}} +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's id, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`order_date_bin` index for entries between `20131001` and `20131031`. + +```erlang +riakc_pb_socket:get_index_range(Pid, OrderBucket, + {binary_index, "order_date"}, + <<"20131001">>, <<"20131031">>). +``` + +Which returns: + +```erlang +{ok,{index_results_v1,[<<"1">>,<<"2">>], + undefined,undefined}} +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So, to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a + secondary id other than the object's key. +* Indices can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/golang.md b/content/riak/kv/3.0.4/developing/getting-started/golang.md new file mode 100644 index 0000000000..ff6d1eca2b --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/golang.md @@ -0,0 +1,82 @@ +--- +title: "Getting Started with Go" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Go" + identifier: "getting_started_go" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/golang + - /riak/kv/3.0.4/dev/taste-of-riak/golang +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.4/using/running-a-cluster) first and ensure you have +[a working installation of Go](http://golang.org/doc/install). + +## Client Setup + +First install the [Riak Go client](https://github.com/basho/riak-go-client): + +```bash +go get github.com/basho/riak-go-client +``` + +Next download the [Taste of Riak - Go](https://github.com/basho/taste-of-riak/tree/master/go) utilities: + +```bash +go get github.com/basho/taste-of-riak/go/util +``` + +If you are using a single local Riak node, use the following to create a +new client instance: + +```golang +package main + +import ( + "encoding/binary" + "encoding/json" + "sync" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() +} +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.4/developing/getting-started/golang/crud-operations) + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/golang/crud-operations.md b/content/riak/kv/3.0.4/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..0271498274 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,376 @@ +--- +title_supertext: "Getting Started:" +tiGetting Started: CRUD Operations with Go" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "CRUD Operations" + identifier: "getting_started_go_crud" + weight: 100 + parent: "getting_started_go" +toc: true +aliases: +--- + + +## Creating Objects + +First let’s create a few objects and a bucket to keep them in: + +```golang + val1 := uint32(1) + val1buf := make([]byte, 4) + binary.LittleEndian.PutUint32(val1buf, val1) + + val2 := "two" + + val3 := struct{ MyValue int }{3} // NB: ensure that members are exported (i.e. capitalized) + var val3json []byte + val3json, err = json.Marshal(val3) + if err != nil { + util.ErrExit(err) + } + + bucket := "test" + + util.Log.Println("Creating Objects In Riak...") + + objs := []*riak.Object{ + { + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, + }, + { + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), + }, + { + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, + }, + } + + var cmd riak.Command + wg := &sync.WaitGroup{} + + for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } + } + + wg.Wait() +``` + +In our first object, we have stored the integer 1 with the lookup key +of `one`: + +```golang +{ + Bucket: bucket, + Key: "one", + ContentType: "application/octet-stream", + Value: val1buf, +} +``` + +For our second object, we stored a simple string value of `two` with a +matching key: + +```golang +{ + Bucket: bucket, + Key: "two", + ContentType: "text/plain", + Value: []byte(val2), +} +``` + +Finally, the third object we stored was a bit of JSON: + +```golang +{ + Bucket: bucket, + Key: "three", + ContentType: "application/json", + Value: val3json, +} +``` + +## Reading Objects + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +Requesting the objects by key: + +```golang +var cmd riak.Command +wg := &sync.WaitGroup{} + +for _, o := range objs { + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(o). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() + +util.Log.Println("Reading Objects From Riak...") + +d := make(chan riak.Command, len(objs)) + +for _, o := range objs { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + Done: d, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +close(d) +``` + +Converting to JSON to compare a string key to a symbol +key: + +```golang +for done := range d { + f := done.(*riak.FetchValueCommand) + /* un-comment to dump fetched object as JSON + if json, jerr := json.MarshalIndent(f.Response, "", " "); err != nil { + util.ErrLog.Println(jerr) + } else { + util.Log.Println("fetched value: ", string(json)) + } + */ + obj := f.Response.Values[0] + switch obj.Key { + case "one": + if actual, expected := binary.LittleEndian.Uint32(obj.Value), val1; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "two": + if actual, expected := string(obj.Value), val2; actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + case "three": + obj3 = obj + val3.MyValue = 0 + if jerr := json.Unmarshal(obj.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) + } else { + if actual, expected := val3.MyValue, int(3); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj.Key, actual, expected) + } + } + default: + util.ErrLog.Printf("unrecognized key: %s", obj.Key) + } +} +``` + +## Updating Objects + +While some data may be static, other forms of data need to be +updated. + +Let’s update some values: + +```golang +util.Log.Println("Updating Object Three In Riak...") + +val3.MyValue = 42 +obj3.Value, err = json.Marshal(val3) +if err != nil { + util.ErrExit(err) +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj3). + WithReturnBody(true). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} + +svcmd := cmd.(*riak.StoreValueCommand) +svrsp := svcmd.Response +obj3 = svrsp.Values[0] +val3.MyValue = 0 +if jerr := json.Unmarshal(obj3.Value, &val3); jerr != nil { + util.ErrLog.Println(jerr) +} else { + if actual, expected := val3.MyValue, int(42); actual != expected { + util.ErrLog.Printf("key: %s, actual %v, expected %v", obj3.Key, actual, expected) + } +} +util.Log.Println("updated object key: ", obj3.Key) +util.Log.Println("updated object value: ", val3.MyValue) +``` + +## Deleting Objects + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called against either the bucket or the +object. + +```golang +for _, o := range objs { + cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucket(o.Bucket). + WithKey(o.Key). + Build() + if err != nil { + util.ErrLog.Println(err) + continue + } + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := c.ExecuteAsync(a); err != nil { + util.ErrLog.Println(err) + } +} + +wg.Wait() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. + +For example, this `struct` that represents some information about +a book: + +```golang +type Book struct { + ISBN string + Title string + Author string + Body string + CopiesOwned uint16 +} + +book := &Book{ + ISBN: "1111979723", + Title: "Moby Dick", + Author: "Herman Melville", + Body: "Call me Ishmael. Some years ago...", + CopiesOwned: 3, +} +``` + +We now have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```golang +var jbook []byte +jbook, err = json.Marshal(book) +if err != nil { + util.ErrExit(err) +} + +bookObj := &riak.Object{ + Bucket: "books", + Key: book.ISBN, + ContentType: "application/json", + Value: jbook, +} + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(bookObj). + WithReturnBody(false). + Build() +if err != nil { + util.ErrLog.Println(err) +} else { + if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) + } +} +``` + +If we fetch our book back and print the data: + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket("books"). + WithKey(book.ISBN). + Build() +if err != nil { + util.ErrExit(err) +} +if err := c.Execute(cmd); err != nil { + util.ErrLog.Println(err) +} + +fcmd := cmd.(*riak.FetchValueCommand) +bookObj = fcmd.Response.Values[0] +util.Log.Println(string(bookObj.Value)) +``` + +The result is: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +Now, let’s delete the book: + +```golang +... +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/golang/object-modeling.md b/content/riak/kv/3.0.4/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..7924e62a0f --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,552 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Go" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Object Modeling" + identifier: "getting_started_go_object" + weight: 102 + parent: "getting_started_go" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/object-modeling-golang + - /riak/kv/3.0.4/dev/taste-of-riak/object-modeling-golang +--- + +{{% note title="Code Download" %}} +You can download the code for this chapter at +[Github](https://github.com/basho/taste-of-riak/tree/master/go/ch03/models). +{{% /note %}} + +To get started, let's create the models that we'll be using: + +```model.go +package models + +type Model interface { + GetId() string + SetId(id string) +} + +type modelImpl struct { + id string +} + +func (m *modelImpl) SetId(id string) { + m.id = id +} +``` + +Our user model: + +```user.go +package models + +type User struct { + modelImpl + UserName string + FullName string + Email string +} + +func NewUser(userName, fullName, email string) *User { + u := &User{ + UserName: userName, + FullName: fullName, + Email: email, + } + u.SetId(userName) + return u +} + +func (u *User) GetId() string { + return u.UserName +} +``` + +And our message model: + +```msg.go +package models + +import ( + "fmt" + "time" + + util "github.com/basho/taste-of-riak/go/util" +) + +type Msg struct { + modelImpl + Sender string + Recipient string + Text string + Created time.Time +} + +func NewMsg(sender, recipient, text string) *Msg { + m := &Msg{ + Sender: sender, + Recipient: recipient, + Text: text, + Created: time.Now(), + } + m.SetId(m.GetId()) + return m +} + +func (m *Msg) GetId() string { + return fmt.Sprintf("%s_%v", m.Sender, util.Iso8601(m.Created)) +} +``` + +Our timeline model: + +```timeline.go +package models + +type Timeline struct { + modelImpl + MsgKeys []string +} + +type TimelineType byte + +const ( + TimelineType_INBOX TimelineType = iota + TimelineType_SENT +) + +func NewTimeline(id string) *Timeline { + t := &Timeline{} + t.id = id + return t +} + +func (t *Timeline) AddMsg(msgKey string) { + t.MsgKeys = append(t.MsgKeys, msgKey) +} + +func (t *Timeline) GetId() string { + return t.id +} +```` + +We'll be using the bucket `Users` to store our data. We won't be [using bucket types]({{}}riak/kv/3.0.4/developing/usage/bucket-types) here, so we don't need to specify one. + +To use these records to store data, we will first have to create a user +record. Then, when a user creates a message, we will append that message +to one or more timelines. If it's a private message, we'll append it to +the Recipient's `Inbox` timeline and to the User's own `Sent` timeline. +If it's a group message, we'll append it to the Group's timeline, as +well as to the User's `Sent` timeline. + +#### Buckets and keys revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little trickier. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2 MB. Objects larger than that can +hurt performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some modules to +act as repositories that will help us create and work with these records +in Riak: + +```repository.go +package repositories + +import ( + "encoding/json" + "errors" + + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +var ErrUnexpectedSiblings = errors.New("Unexpected siblings in response!") + +type Repository interface { + Get(key string, notFoundOk bool) (models.Model, error) + Save(models.Model) (models.Model, error) + getBucketName() string + getModel() models.Model + getClient() *riak.Client +} + +type repositoryImpl struct { + client *riak.Client +} + +func (ri *repositoryImpl) getClient() *riak.Client { + return ri.client +} + +func get(r Repository, key string, notFoundOk bool) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(notFoundOk). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + fcmd := cmd.(*riak.FetchValueCommand) + + if notFoundOk && len(fcmd.Response.Values) == 0 { + return nil, nil + } + + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just return an unexpected error + return nil, ErrUnexpectedSiblings + } else { + return buildModel(r.getModel(), fcmd.Response.Values[0]) + } +} + +func save(r Repository, m models.Model) (models.Model, error) { + client := r.getClient() + bucket := r.getBucketName() + key := m.GetId() + + cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucket(bucket). + WithKey(key). + WithNotFoundOk(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + modelJson, err := json.Marshal(m) + if err != nil { + return nil, err + } + + var objToInsertOrUpdate *riak.Object + fcmd := cmd.(*riak.FetchValueCommand) + if len(fcmd.Response.Values) > 1 { + // Siblings present that need resolution + // Here we'll just assume the first sibling is the "correct" one + // with which to update with the new Model data + // A conflict resolver can also be part of the options to fetchValue above + objToInsertOrUpdate = fcmd.Response.Values[0] + objToInsertOrUpdate.Value = modelJson + } else { + objToInsertOrUpdate = &riak.Object{ + Bucket: bucket, + Key: key, + ContentType: "application/json", + Charset: "utf8", + Value: modelJson, + } + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(objToInsertOrUpdate). + WithReturnBody(true). + Build() + if err != nil { + return nil, err + } + if err = client.Execute(cmd); err != nil { + return nil, err + } + + scmd := cmd.(*riak.StoreValueCommand) + if len(scmd.Response.Values) > 1 { + return nil, ErrUnexpectedSiblings + } + obj := scmd.Response.Values[0] + return buildModel(r.getModel(), obj) +} + +func buildModel(m models.Model, obj *riak.Object) (models.Model, error) { + err := json.Unmarshal(obj.Value, m) + m.SetId(obj.Key) + return m, err +} +``` + +
+ +```user-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type UserRepository struct { + repositoryImpl +} + +func NewUserRepository(c *riak.Client) *UserRepository { + r := &UserRepository{} + r.client = c + return r +} + +func (u *UserRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(u, key, notFoundOk) +} + +func (u *UserRepository) Save(m models.Model) (models.Model, error) { + return save(u, m) +} + +func (u *UserRepository) getBucketName() string { + return "Users" +} + +func (u *UserRepository) getModel() models.Model { + return &models.User{} +} +``` + +
+ +```msg-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type MsgRepository struct { + repositoryImpl +} + +func NewMsgRepository(c *riak.Client) *MsgRepository { + m := &MsgRepository{} + m.client = c + return m +} + +func (m *MsgRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(m, key, notFoundOk) +} + +func (m *MsgRepository) Save(model models.Model) (models.Model, error) { + return save(m, model) +} + +func (m *MsgRepository) getBucketName() string { + return "Msgs" +} + +func (m *MsgRepository) getModel() models.Model { + return &models.Msg{} +} +``` + +
+ +```timeline-repository.go +package repositories + +import ( + riak "github.com/basho/riak-go-client" + models "github.com/basho/taste-of-riak/go/ch03/models" +) + +type TimelineRepository struct { + repositoryImpl +} + +func NewTimelineRepository(c *riak.Client) *TimelineRepository { + t := &TimelineRepository{} + t.client = c + return t +} + +func (t *TimelineRepository) Get(key string, notFoundOk bool) (models.Model, error) { + return get(t, key, notFoundOk) +} + +func (t *TimelineRepository) Save(m models.Model) (models.Model, error) { + return save(t, m) +} + +func (t *TimelineRepository) getBucketName() string { + return "Timelines" +} + +func (t *TimelineRepository) getModel() models.Model { + return &models.Timeline{} +} +``` + +Finally, let's test them: + +```golang +package main + +import ( + "time" + + mgrs "github.com/basho/taste-of-riak/go/ch03/managers" + models "github.com/basho/taste-of-riak/go/ch03/models" + repos "github.com/basho/taste-of-riak/go/ch03/repositories" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +func main() { + var err error + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + util.Log.Println("Starting Client") + + o := &riak.NewClientOptions{ + RemoteAddresses: util.GetRiakAddresses(), + } + + var client *riak.Client + client, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := client.Stop(); err != nil { + util.ErrExit(err) + } + }() + + userRepo := repos.NewUserRepository(client) + msgRepo := repos.NewMsgRepository(client) + timelineRepo := repos.NewTimelineRepository(client) + timelineMgr := mgrs.NewTimelineManager(timelineRepo, msgRepo) + + util.Log.Println("Creating and saving users") + + marleen := models.NewUser("marleenmgr", "Marleen Manager", "marleen.manager@basho.com") + joe := models.NewUser("joeuser", "Joe User", "joe.user@basho.com") + + var m models.Model + m, err = userRepo.Save(marleen) + if err != nil { + util.ErrExit(err) + } + marleen = m.(*models.User) + + m, err = userRepo.Save(joe) + if err != nil { + util.ErrExit(err) + } + joe = m.(*models.User) + + util.Log.Println("Posting message") + + msg := models.NewMsg(marleen.UserName, joe.UserName, "Welcome to the company!") + if terr := timelineMgr.PostMsg(msg); terr != nil { + util.ErrExit(terr) + } + + util.Log.Println("Getting Joe's inbox for today") + + // Get Joe's inbox for today, get first message + now := time.Now() + joe_tl, terr := timelineMgr.GetTimeline(joe.UserName, models.TimelineType_INBOX, now) + if terr != nil { + util.ErrExit(terr) + } + + for _, msgKey := range joe_tl.MsgKeys { + m, merr := msgRepo.Get(msgKey, false) + if merr != nil { + util.ErrExit(merr) + } + tl_msg := m.(*models.Msg) + util.Log.Println("From: ", tl_msg.Sender) + util.Log.Println("Msg: ", tl_msg.Text) + } +} +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one. +* It keeps our buckets and key names consistent. +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +Also, we can easily compute key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application-dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names. +* How to choose natural keys based on how we want to partition our data. + + + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/golang/querying.md b/content/riak/kv/3.0.4/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..533850c697 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/golang/querying.md @@ -0,0 +1,580 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Go" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Querying" + identifier: "getting_started_go_query" + weight: 101 + parent: "getting_started_go" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/querying-golang + - /riak/kv/3.0.4/dev/taste-of-riak/querying-golang +--- + +## Go Version Setup + +For the Go version, please download the source from GitHub by either [cloning](https://github.com/basho/taste-of-riak) the source code repository or downloading the [current zip of the master branch](https://github.com/basho/taste-of-riak/archive/master.zip). Ensure that the source is located in your `GOPATH`. The code for this chapter is in `go/ch02/ch02.go`. You may import this code into your favorite editor, or just run it from the command line using the `Makefile` if you are running on a *nix* OS. + +>A Quick Note on Querying and Schemas: +> +>Even with a key/value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application started with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak KV, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak KV so we can play with it. + +```golang +package main + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "time" + + riak "github.com/basho/riak-go-client" + util "github.com/basho/taste-of-riak/go/util" +) + +const ( + timeFmt = "2006-01-02 15:04:05" + customersBucket = "Customers" + ordersBucket = "Orders" + orderSummariesBucket = "OrderSummaries" +) + +type Customer struct { + Name string + Address string + City string + State string + Zip string + Phone string + CreatedDate time.Time +} + +type Order struct { + Id string + CustomerId string + SalespersonId string + Items []*OrderItem + Total float32 + Date time.Time +} + +type OrderItem struct { + Id string + Title string + Price float32 +} + +type OrderSummary struct { + CustomerId string + Summaries []*OrderSummaryItem +} + +type OrderSummaryItem struct { + Id string + Total float32 + Date time.Time +} + +func main() { + var err error + var customerId string + + util.Log.Println("Creating Data") + + var cd time.Time + cd, err = time.Parse(timeFmt, "2013-10-01 14:30:26") + if err != nil { + util.ErrExit(err) + } + + customer := &Customer{ + Name: "John Smith", + Address: "123 Main Street", + City: "Columbus", + State: "Ohio", + Zip: "43210", + Phone: "+1-614-555-5555", + CreatedDate: cd, + } + + util.Log.Printf("customer: %v", customer) + + util.Log.Println("Starting Client") + + // un-comment-out to enable debug logging + // riak.EnableDebugLogging = true + + o := &riak.NewClientOptions{ + RemoteAddresses: []string{util.GetRiakAddress()}, + } + + var c *riak.Client + c, err = riak.NewClient(o) + if err != nil { + util.ErrExit(err) + } + + defer func() { + if err := c.Stop(); err != nil { + util.ErrExit(err) + } + }() + + util.Log.Println("Storing Customer") + + var cmd riak.Command + var customerJson []byte + + customerJson, err = json.Marshal(customer) + if err != nil { + util.ErrExit(err) + } + + obj := &riak.Object{ + Bucket: customersBucket, + ContentType: "application/json", + Value: customerJson, + } + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + WithReturnBody(true). + Build() + if err != nil { + util.ErrExit(err) + } + if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) + } + + svc := cmd.(*riak.StoreValueCommand) + customerId = svc.Response.GeneratedKey + if customerId == "" { + util.ErrExit(errors.New("expected generated customer Id")) + } else { + util.Log.Println("Customer ID:", customerId) + } + + util.Log.Println("Storing Data") + + var orders []*Order + orders, err = createOrders(customerId) + if err != nil { + util.ErrExit(err) + } + + var orderSummary *OrderSummary + var orderSummaryJson []byte + orderSummary = createOrderSummary(customerId, orders) + + ccmds := 1 + len(orders) + cmds := make([]riak.Command, ccmds) + + // command to store OrderSummary + orderSummaryJson, err = json.Marshal(orderSummary) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: orderSummariesBucket, + Key: customerId, + ContentType: "application/json", + Value: orderSummaryJson, + } + cmds[0], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + + for i, order := range orders { + // command to store Order + var orderJson []byte + orderJson, err = json.Marshal(order) + if err != nil { + util.ErrExit(err) + } + obj = &riak.Object{ + Bucket: ordersBucket, + Key: order.Id, + ContentType: "application/json", + Value: orderJson, + } + cmds[i+1], err = riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if err != nil { + util.ErrExit(err) + } + } + + errored := false + wg := &sync.WaitGroup{} + for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } + wg.Wait() + if errored { + util.ErrExit(errors.New("error, exiting!")) + } +} + +func createOrders(customerId string) ([]*Order, error) { + o := make([]*Order, 3) + + d, err := time.Parse(timeFmt, "2013-10-01 14:42:26") + if err != nil { + return nil, err + } + o[0] = &Order{ + Id: "1", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "TCV37GIT4NJ", + Title: "USB 3.0 Coffee Warmer", + Price: 15.99, + }, + { + Id: "PEG10BBF2PP", + Title: "eTablet Pro, 24GB; Grey", + Price: 399.99, + }, + }, + Total: 415.98, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-10-15 16:43:16") + if err != nil { + return nil, err + } + o[1] = &Order{ + Id: "2", + CustomerId: customerId, + SalespersonId: "9001", + Items: []*OrderItem{ + { + Id: "OAX19XWN0QP", + Title: "GoSlo Digital Camera", + Price: 359.99, + }, + }, + Total: 359.99, + Date: d, + } + + d, err = time.Parse(timeFmt, "2013-11-03 17:45:28") + if err != nil { + return nil, err + } + o[2] = &Order{ + Id: "3", + CustomerId: customerId, + SalespersonId: "9000", + Items: []*OrderItem{ + { + Id: "WYK12EPU5EZ", + Title: "Call of Battle : Goats - Gamesphere 4", + Price: 69.99, + }, + { + Id: "TJB84HAA8OA", + Title: "Bricko Building Blocks", + Price: 4.99, + }, + }, + Total: 74.98, + Date: d, + } + + return o, nil +} + +func createOrderSummary(customerId string, orders []*Order) *OrderSummary { + + s := &OrderSummary{ + CustomerId: customerId, + Summaries: make([]*OrderSummaryItem, len(orders)), + } + + for i, o := range orders { + s.Summaries[i] = &OrderSummaryItem{ + Id: o.Id, + Total: o.Total, + Date: o.Date, + } + } + + return s +} +``` + +While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders and also holding some relevant data, such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```golang +util.Log.Println("Fetching related data by shared key") + +cmds = cmds[:0] + +// fetch customer +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(customersBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +// fetch OrderSummary +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(orderSummariesBucket). + WithKey(customerId). + Build() +if err != nil { + util.ErrExit(err) +} +cmds = append(cmds, cmd) + +doneChan := make(chan riak.Command) +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Bucket { + case customersBucket: + util.Log.Printf("Customer 1: %v", string(obj.Value)) + case orderSummariesBucket: + util.Log.Printf("OrderSummary 1: %v", string(obj.Value)) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} +``` + +Which returns our amalgamated objects: + +```sh +2015/12/29 09:44:10 OrderSummary 1: {"CustomerId":"I4R9AdTpJ7RL13qj14ED9Qjzbyy","Summaries":[{"Id":"1","Total":415.98,"Date":"2013-10-01T14:42:26Z"},{"Id":"2","Total":359.99,"Date":"2013-10-15T16:43:16Z"},{"Id":"3","Total":74.98,"Date":"2013-11-03T17:45:28Z"}]} +2015/12/29 09:44:10 Customer 1: {"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01T14:30:26Z" +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly look up objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values or ranges of values. To properly show this off, we will add some more data to our application, and add some secondary index entries at the same time: + +```golang +util.Log.Println("Adding Index Data") + +// fetch orders to add index data +cmds = cmds[:0] + +for _, order := range orders { + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucket(ordersBucket). + WithKey(order.Id). + Build() + if err != nil { + util.ErrExit(err) + } + cmds = append(cmds, cmd) +} + +errored = false +for _, cmd := range cmds { + a := &riak.Async{ + Command: cmd, + Done: doneChan, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } +} +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +errored = false +for i := 0; i < len(cmds); i++ { + select { + case d := <-doneChan: + if fv, ok := d.(*riak.FetchValueCommand); ok { + obj := fv.Response.Values[0] + switch obj.Key { + case "1": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-10-01") + case "2": + obj.AddToIntIndex("SalespersonId_int", 9001) + obj.AddToIndex("OrderDate_bin", "2013-10-15") + case "3": + obj.AddToIntIndex("SalespersonId_int", 9000) + obj.AddToIndex("OrderDate_bin", "2013-11-03") + } + scmd, serr := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() + if serr != nil { + util.ErrExit(serr) + } + a := &riak.Async{ + Command: scmd, + Wait: wg, + } + if eerr := c.ExecuteAsync(a); eerr != nil { + errored = true + util.ErrLog.Println(eerr) + } + } else { + util.ErrExit(fmt.Errorf("unknown response command type: %v", reflect.TypeOf(d))) + } + case <-time.After(5 * time.Second): + util.ErrExit(errors.New("fetch operations took too long")) + } +} + +if errored { + util.ErrExit(errors.New("error, exiting!")) +} + +wg.Wait() +close(doneChan) +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we have to add entries to the indexes at the application level. + +Now let's find all of Jane Appleseed's processed orders. We'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`: + +```golang +util.Log.Println("Index Queries") + +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("SalespersonId_int"). + WithIndexKey("9000"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd := cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("Jane's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 Jane's Orders, key: 3 +2015/12/29 09:44:10 Jane's Orders, key: 1 +``` + +Jane processed orders 1 and 3. We used an *integer* index to reference Jane's id, next let's use a *binary* index. + +Let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`: + +```golang +cmd, err = riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucket(ordersBucket). + WithIndexName("OrderDate_bin"). + WithRange("2013-10-01", "2013-10-31"). + Build() +if err != nil { + util.ErrExit(err) +} + +if eerr := c.Execute(cmd); eerr != nil { + util.ErrExit(eerr) +} + +qcmd = cmd.(*riak.SecondaryIndexQueryCommand) +for _, rslt := range qcmd.Response.Results { + util.Log.Println("October's Orders, key: ", string(rslt.ObjectKey)) +} +``` + +Which returns: + +```sh +2015/12/29 09:44:10 October's Orders, key: 1 +2015/12/29 09:44:10 October's Orders, key: 2 +``` + +Easy! We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys. +* You can search for specific values or a range of values. +* Riak will return a list of keys that match the index query. + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/java.md b/content/riak/kv/3.0.4/developing/getting-started/java.md new file mode 100644 index 0000000000..aa968777f2 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/java.md @@ -0,0 +1,93 @@ +--- +title: "Getting Started with Java" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Java" + identifier: "getting_started_java" + weight: 100 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/java + - /riak/kv/3.0.4/dev/taste-of-riak/java +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Java is required. + +## Client Setup + +To include the Riak Java client in your project, add it to your +project's dependencies. Here is a Maven example: + +```xml + + + com.basho.riak + riak-client + 2.1.1 + +``` + +Next, download +[`TasteOfRiak.java`](https://github.com/basho/basho_docs/raw/master/extras/code-examples/TasteOfRiak.java) +source code for this tutorial, and save it to your working directory. + +{{% note title="Configuring for a local cluster" %}} +The `TasteOfRiak.java` file that you downloaded is set up to communicate with +a 1-node Riak cluster listening on `localhost` port 10017. We recommend +modifying the connection info directly within the `setUpCluster()` method. +{{% /note %}} + +If you execute the `TasteOfRiak.java` file within your IDE, you should +see the following: + +``` +Basic object created +Location object created for quote object +StoreValue operation created +Client object successfully created +Object storage operation successfully completed +Success! The object we created and the object we fetched have the same value +Quote object successfully deleted +Book object created +Moby Dick information now stored in Riak +Book object successfully fetched +Success! All of our tests check out +``` + +Since Java doesn’t have a REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting Up the Cluster + +The first step in using the Riak Java client is to create a cluster +object to facilitate all interactions with Riak. You'll see this on line +72: + +```java +RiakCluster cluster = setUpCluster(); +``` + +This calls the private `setUpCluster` method which begins on line 25. +Using that `cluster` object, we can instantiate a client object which +will execute all Riak interactions: + +```java +RiakClient client = new RiakClient(cluster); +``` + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.4/developing/getting-started/java/crud-operations) + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/java/crud-operations.md b/content/riak/kv/3.0.4/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..a4a99e5ef7 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/java/crud-operations.md @@ -0,0 +1,206 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Java" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "CRUD Operations" + identifier: "getting_started_java_crud" + weight: 100 + parent: "getting_started_java" +toc: true +aliases: +--- + +## Creating Objects in Riak + +The first object that we create is a very basic object with a content +type of `text/plain`. Once that object is created, we create a +`StoreValue` operation that will store the object later on down the line + +```java +RiakObject quoteObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("You're dangerous, Maverick")); +Namespace quotesBucket = new Namespace("quotes"); +Location quoteObjectLocation = new Location(quotesBucket, "Icemand"); +StoreValue storeOp = new StoreValue.Builder(quoteObject) + .withLocation(quoteObjectLocation) + .build(); +``` + +We then use our `client` object to execute the storage operation: + +```java +StoreValue.Response response = client.execute(storeOp); +``` + +## Reading Objects from Riak + +After that, we check to make sure that the stored object has the same +value as the object that we created. This requires us to fetch the +object by way of a `FetchValue` operation: + +```java +FetchValue fetchOp = new FetchValue.Builder(quoteObjectLocation) + .build(); +RiakObject fetchedObject = client.execute(fetchOp).getValue(RiakObject.class); +assert(fetchedObject.getValue.equals(quoteObject.getValue())); +``` + +If the values are equal, as they should be, the Java client will say +`Success! The object we created and the object we fetched have the same +value`. If not, then the client will throw an exception. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.4/developing/usage/conflict-resolution/) +documentation. + +## Updating Objects + +Once we've read the object back in from Riak, we can update the object +and store it back as we did before with the `StoreValue` object: + +```java +fetchedObject.setValue(BinaryValue.create("You can be my wingman any time.")); +StoreValue updateOp = new StoreValue.Builder(fetchedObject) + .withLocation(quoteObjectLocation) + .build(); +StoreValue.Response updateOpResp = client.execute(updateOp); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.4/developing/usage/conflict-resolution/) +documentation. + +## Deleting Objects + +Now that we've stored and then fetched the object, we can delete it by +creating and executing a `DeleteValue` operation: + +```java +DeleteValue deleteOp = new DeleteValue.Builder(quoteObjectLocation) + .build(); +client.execute(deleteOp); +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this plain old Java object (POJO) that encapsulates +some knowledge about a book. + +```java +public class Book { + public String title; + public String author; + public String body; + public String isbn; + publict Integer copiesOwned; +} +``` + +By default, the Java Riak client serializes POJOs as JSON. Let's create +a new `Book` object to store: + +```java +Book mobyDick = new Book(); +modyDick.title = "Moby Dick"; +mobyDick.author = "Herman Melville"; +mobyDick.body = "Call me Ishmael. Some years ago..."; +mobyDick.isbn = "11119799723"; +mobyDick.copiesOwned = 3; +``` + +Now we can store that POJO object just like we stored the more simple +object earlier: + +```java +Namespace booksBucket = new Namespace("books"); +Location mobyDickLocation = new Location(booksBucket, "moby_dick"); +StoreValue storeBookOp = new StoreValue.Builder(mobyDick) + .withLocation(mobyDickLocation) + .build(); +client.execute(storeBookOp); +``` + +If we fetch the object (using the same method we showed up above and in +`TasteOfRiak.java`), we should get the following: + +```json +{ + "title": "Moby Dick", + "author": "Herman Melville", + "body": "Call me Ishmael. Some years ago...", + "isbn": "1111979723", + "copiesOwned": 3 +} +``` + +Since we really like Moby Dick, let's buy a couple more copies +and update the POJO. + +To update the POJO, we would use `UpdateValue` by +extending a new `BookUpdate` class as follows: + +```java +public static class BookUpdate extends UpdateValue.Update { + private final Book update; + public BookUpdate(Book update){ + this.update = update; + } + + @Override + public Book apply(Book t) { + if(t == null) { + t = new Book(); + } + + t.author = update.author; + t.body = update.body; + t.copiesOwned = update.copiesOwned; + t.isbn = update.isbn; + t.title = update.title; + + return t; + } +} +``` + +Then using the `BookUpdate` class with our `mobyDick` object: + +```java +mobyDick.copiesOwned = 5; +BookUpdate updatedBook = new BookUpdate(mobyDick); + +UpdateValue updateValue = new UpdateValue.Builder(mobyDickLocation) + .withUpdate(updatedBook).build(); +UpdateValue.Response response = client.execute(updateValue); +``` + +For more in depth information on updating objects and sibling resolution in +Riak, see [Updating Objects]({{}}riak/kv/3.0.4/developing/usage/updating-objects/) +and [Conflict Resolution]({{}}riak/kv/3.0.4/developing/usage/conflict-resolution/) +documention. + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/java/object-modeling.md b/content/riak/kv/3.0.4/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..848cda8fb0 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/java/object-modeling.md @@ -0,0 +1,432 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Java" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Object Modeling" + identifier: "getting_started_java_object" + weight: 102 + parent: "getting_started_java" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/object-modeling-java + - /riak/kv/3.0.4/dev/taste-of-riak/object-modeling-java +--- + +To get started, let's create the models that we'll be using. + +```java +package com.basho.msgy.Models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class Msg { + public String Sender; + public String Recipient; + public String Created; + public String Text; + + public static Msg createNew(String sender, String recipient, String text) { + Msg msg = new Msg(); + msg.Sender = sender; + msg.Recipient = recipient; + msg.Text = text; + msg.Created = GetCurrentISO8601Timestamp(); + return msg; + } + + private static String GetCurrentISO8601Timestamp() { + TimeZone tz = TimeZone.getTimeZone("UTC"); + // Java Dates don't have microsecond resolution :( + // Pad out to microseconds to match other examples. + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'000'"); + df.setTimeZone(tz); + return df.format(new Date()); + } +} + +// ---------------------------------------------------------------------------- + +import java.util.ArrayList; + +public class Timeline { + + public enum TimelineType + { + Inbox, + Sent; + + @Override + public String toString() { + if(this == Inbox) + return "Inbox"; + else + return "Sent"; + } + } + + public Timeline() { + Msgs = new ArrayList(); + } + + public String Owner; + public String Type; + public ArrayList Msgs; +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Models; + +import com.basho.riak.client.convert.RiakKey; + +public class User { + @RiakKey + public String UserName; + + @RiakBucketName + final String bucketName = "msgs"; + + public String FullName; + public String Email; + + public User() {} + + public User(String userName, String fullName, String email) { + this.UserName = userName; + this.FullName = fullName; + this.Email = email; + } +} +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +| `Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. With the +Java client, we can use the `@RiakKey` annotation to tell the client +that we want to use the `UserName` member as the key. It will +automatically use that value in the future, instead of having to pass the +key in as another parameter when storing a value. + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```java +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class MsgRepository { + + static final String BUCKET_NAME = "Msgs"; + protected RiakClient client; + + public MsgRepository(RiakClient client) { + this.client = client; + } + + public Msg get(String msgKey) throws Exception { + Location key = new Location(new Namespace(BUCKET_NAME), msgKey); + FetchValue fetch = new FetchValue.Builder(key).build(); + FetchValue.Response response = client.execute(fetch); + return response.getValue(Msg.class); + } + + public String save(Msg msg) throws Exception { + StoreValue store = new StoreValue.Builder(msg).build(); + client.execute(store); + return generateKey(msg); + } + + private String generateKey(Msg msg) { + return msg.Sender + "_" + msg.Created; + } +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class TimelineRepository { + + static final String BUCKET_NAME = "Timelines"; + protected RiakClient client; + protected MsgRepository msgRepo; + + public TimelineRepository(RiakClient client) { + this.client = client; + this.msgRepo = new MsgRepository(this.client); + } + + public void postMsg(Msg msg) throws Exception { + String msgKey = msgRepo.save(msg); + + // Post to recipient's Inbox timeline + addToTimeline(msg, Timeline.TimelineType.Inbox, msgKey); + + // Post to sender's Sent timeline + addToTimeline(msg, Timeline.TimelineType.Sent, msgKey); + } + + + private void addToTimeline(Msg msg, Timeline.TimelineType type, String msgKey) throws Exception { + String timelineKey = generateKeyFromMsg(msg, type); + + Location loc = new Location(new Namespace(BUCKET_NAME), timelineKey); + FetchValue fetch = new FetchValue.Builder(loc).build(); + Timeline timeline = client.execute(fetch).getValue(Timeline.class); + + if (timeline != null) { + timeline = addToExistingTimeline(timeline,msgKey); + } else { + timeline = createNewTimeline(msg, type, msgKey); + } + + StoreValue store = new StoreValue.Builder(timeline).build(); + client.execute(store); + } + + public Timeline createNewTimeline(Msg msg, Timeline.TimelineType type, String msgKey) { + String owner = getOwner(msg, type); + + Timeline newTimeline = new Timeline(); + newTimeline.Owner = owner; + newTimeline.Type = type.toString(); + newTimeline.Msgs.add(msgKey); + + return newTimeline; + } + + public Timeline addToExistingTimeline(Timeline timeline, String msgKey) { + timeline.Msgs.add(msgKey); + return timeline; + } + + public Timeline getTimeline(String ownerUsername, Timeline.TimelineType type, Date date) throws RiakRetryFailedException { + String timelineKey = generateKey(ownerUsername, type, date); + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(timelineKey, Timeline.class).execute(); + } + + private String generateKeyFromMsg(Msg msg, Timeline.TimelineType type) { + String owner = getOwner(msg, type); + String dateString = msg.Created.substring(0, 10); + return generateKey(owner, type, dateString); + } + + private String getOwner(Msg msg, Timeline.TimelineType type) { + if(type == Timeline.TimelineType.Inbox) + return msg.Recipient; + else + return msg.Sender; + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, Date date) { + String dateString = getIso8601DateStringFromDate(date); + return generateKey(ownerUsername, type, dateString); + } + + private String generateKey(String ownerUsername, Timeline.TimelineType type, String dateString) { + return ownerUsername + "_" + type.toString() + "_" + dateString; + } + + private String getIso8601DateStringFromDate(Date date) { + TimeZone tz = TimeZone.getTimeZone("UTC"); + DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(tz); + return df.format(date); + } + + +} + +// ---------------------------------------------------------------------------- + +package com.basho.msgy.Repositories; + +import com.basho.msgy.Models.User; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakRetryFailedException; +import com.basho.riak.client.bucket.Bucket; + +public class UserRepository { + static final String BUCKET_NAME = "Users"; + protected IRiakClient client; + + public UserRepository(IRiakClient client) { + this.client = client; + } + + public void save(User user) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + bucket.store(user).execute(); + } + + public User get(String UserName) throws RiakRetryFailedException { + Bucket bucket = client.fetchBucket(BUCKET_NAME).execute(); + return bucket.fetch(UserName, User.class).execute(); + } +} + +``` + +Finally, let's test them: + +```java +package com.basho.msgy; + +import com.basho.msgy.Models.Msg; +import com.basho.msgy.Models.Timeline; +import com.basho.msgy.Models.User; +import com.basho.msgy.Repositories.MsgRepository; +import com.basho.msgy.Repositories.TimelineRepository; +import com.basho.msgy.Repositories.UserRepository; +import com.basho.riak.client.IRiakClient; +import com.basho.riak.client.RiakException; +import com.basho.riak.client.RiakFactory; + +import java.util.Date; + +public class MsgyMain { + + public static void main(String[] args) throws RiakException { + // Setup our repositories + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + UserRepository userRepo = new UserRepository(client); + MsgRepository msgRepo = new MsgRepository(client); + TimelineRepository timelineRepo = new TimelineRepository(client); + + // Create and save users + User marleen = new User("marleenmgr", + "Marleen Manager", + "marleen.manager@basho.com"); + + User joe = new User("joeuser", + "Joe User", + "joe.user@basho.com"); + + userRepo.save(marleen); + userRepo.save(joe); + + // Create new Msg, post to timelines + Msg msg = Msg.createNew(marleen.UserName, + joe.UserName, + "Welcome to the company!"); + + timelineRepo.postMsg(msg); + + + // Get Joe's inbox for today, get first message + Timeline joesInboxToday = timelineRepo.getTimeline(joe.UserName, + Timeline.TimelineType.Inbox, + new Date()); + + Msg joesFirstMsg = msgRepo.get(joesInboxToday.Msgs.get(0)); + + System.out.println("From: " + joesFirstMsg.Sender); + System.out.println("Msg : " + joesFirstMsg.Text); + System.out.println(""); + + client.shutdown(); + } +} +``` + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/java/querying.md b/content/riak/kv/3.0.4/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..112b189407 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/java/querying.md @@ -0,0 +1,280 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Java" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Querying" + identifier: "getting_started_java_query" + weight: 101 + parent: "getting_started_java" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/querying-java + - /riak/kv/3.0.4/dev/taste-of-riak/querying-java +--- + +## Java Version Setup + +For the Java version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `/java/Ch02-Schemas-and-Indexes`. You +may import this code into your favorite editor, or just run it from the +command line using the commands in `BuildAndRun.sh` if you are running +on a *nix* OS. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. You can find the source +for these POJO's in `Customer.java`, `Order.java` and +`OrderSummaries.java`. Let's put some data into Riak so we can play +with it. + +```java +// From SipOfRiak.java + +private static Customer createCustomer() { + Customer customer = new Customer(); + customer.CustomerId = 1; + customer.Name = "John Smith"; + customer.Address = "123 Main Street"; + customer.City = "Columbus"; + customer.State = "Ohio"; + customer.Zip = "43210"; + customer.Phone = "+1-614-555-5555"; + customer.CreatedDate = "2013-10-01 14:30:26"; + return customer; +} + +private static ArrayList createOrders() { + ArrayList orders = new ArrayList(); + + Order order1 = new Order(); + order1.OrderId = 1; + order1.CustomerId = 1; + order1.SalespersonId = 9000; + order1.Items.add( + new Item("TCV37GIT4NJ", + "USB 3.0 Coffee Warmer", + 15.99)); + order1.Items.add( + new Item("PEG10BBF2PP", + "eTablet Pro; 24GB; Grey", + 399.99)); + order1.Total = 415.98; + order1.OrderDate = "2013-10-01 14:42:26"; + orders.add(order1); + + Order order2 = new Order(); + order2.OrderId = 2; + order2.CustomerId = 1; + order2.SalespersonId = 9001; + order2.Items.add( + new Item("OAX19XWN0QP", + "GoSlo Digital Camera", + 359.99)); + order2.Total = 359.99; + order2.OrderDate = "2013-10-15 16:43:16"; + orders.add(order2); + + Order order3 = new Order(); + order3.OrderId = 3; + order3.CustomerId = 1; + order3.SalespersonId = 9000; + order3.Items.add( + new Item("WYK12EPU5EZ", + "Call of Battle = Goats - Gamesphere 4", + 69.99)); + order3.Items.add( + new Item("TJB84HAA8OA", + "Bricko Building Blocks", + 4.99)); + order3.Total = 74.98; + order3.OrderDate = "2013-11-03 17:45:28"; + orders.add(order3); + return orders; +} + +private static OrderSummary createOrderSummary(ArrayList orders) { + OrderSummary orderSummary = new OrderSummary(); + orderSummary.CustomerId = 1; + for(Order order: orders) + { + orderSummary.Summaries.add(new OrderSummaryItem(order)); + } + return orderSummary; +} + +public static void main(String[] args) throws RiakException { + + System.out.println("Creating Data"); + Customer customer = createCustomer(); + ArrayList orders = createOrders(); + OrderSummary orderSummary = createOrderSummary(orders); + + System.out.println("Starting Client"); + IRiakClient client = RiakFactory.pbcClient("127.0.0.1", 10017); + + + System.out.println("Creating Buckets"); + Bucket customersBucket = client.fetchBucket("Customers").lazyLoadBucketProperties().execute(); + Bucket ordersBucket = client.fetchBucket("Orders").lazyLoadBucketProperties().execute(); + Bucket orderSummariesBucket = client.fetchBucket("OrderSummaries").lazyLoadBucketProperties().execute(); + + System.out.println("Storing Data"); + customersBucket.store(String.valueOf(customer.CustomerId), customer).execute(); + for (Order order : orders) { + ordersBucket.store(String.valueOf(order.OrderId), order).execute(); + } + orderSummariesBucket.store(String.valueOf(orderSummary.CustomerId), orderSummary).execute(); +``` + +While individual `Customer` and `Order` objects don't change much (or +shouldn't change), the `OrderSummaries` object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +```java + System.out.println("Fetching related data by shared key"); + String key = "1"; + String fetchedCust = customersBucket.fetch(key).execute().getValueAsString(); + String fetchedOrdSum = orderSummariesBucket.fetch(key).execute().getValueAsString(); + System.out.format("Customer 1: %s\n", fetchedCust); + System.out.format("OrderSummary 1: %s\n", fetchedOrdSum); +``` + +Which returns our amalgamated objects: + +```bash +Fetching related data by shared key +Customer 1: {"CustomerId":1,"Name":"John Smith","Address":"123 Main Street","City":"Columbus","State":"Ohio","Zip":"43210","Phone":"+1-614-555-5555","CreatedDate":"2013-10-01 14:30:26"} +OrderSummary 1: {"CustomerId":1,"Summaries":[{"OrderId":1,"Total":415.98,"OrderDate":"2013-10-01 14:42:26"},{"OrderId":2,"Total":359.99,"OrderDate":"2013-10-15 16:43:16"},{"OrderId":3,"Total":74.98,"OrderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +```java + System.out.println("Adding Index Data"); + IRiakObject riakObj = ordersBucket.fetch("1").execute(); + riakObj.addIndex("SalespersonId", 9000); + riakObj.addIndex("OrderDate", "2013-10-01"); + ordersBucket.store(riakObj).execute(); + + IRiakObject riakObj2 = ordersBucket.fetch("2").execute(); + riakObj2.addIndex("SalespersonId", 9001); + riakObj2.addIndex("OrderDate", "2013-10-15"); + ordersBucket.store(riakObj2).execute(); + + IRiakObject riakObj3 = ordersBucket.fetch("3").execute(); + riakObj3.addIndex("SalespersonId", 9000); + riakObj3.addIndex("OrderDate", "2013-11-03"); + ordersBucket.store(riakObj3).execute(); +``` + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +```java + // Query for orders where the SalespersonId index is set to 9000 + List janesOrders = ordersBucket.fetchIndex(IntIndex.named("SalespersonId")) + .withValue(9000).execute(); + + System.out.format("Jane's Orders: %s\n", StringUtil.Join(", ", janesOrders)); +``` + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +```java + // Query for orders where the OrderDate index is between 2013-10-01 and 2013-10-31 + List octoberOrders = ordersBucket.fetchIndex(BinIndex.named("OrderDate")) + .from("2013-10-01").to("2013-10-31").execute(); + + System.out.format("October's Orders: %s\n", StringUtil.Join(", ", octoberOrders)); +``` + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/nodejs.md b/content/riak/kv/3.0.4/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..31c4aa69fb --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/nodejs.md @@ -0,0 +1,104 @@ +--- +title: "Getting Started with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "NodeJS" + identifier: "getting_started_nodejs" + weight: 104 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/nodejs + - /riak/kv/3.0.4/dev/taste-of-riak/nodejs +--- + +[introduction.js]: https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/taste-of-riak/introduction.js +[npm]: https://www.npmjs.com/package/basho-riak-client +[node_js_installation]: https://github.com/basho/riak-nodejs-client/wiki/Installation +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Node.js 0.12 or later is +required. + +Code for these examples is available [here][introduction.js]. To run, follow +these directions: + +```bash +git clone git://github.com/basho/riak-nodejs-client-examples +cd riak-nodejs-client-examples +npm install +node ./app.js +``` + +### Client Setup + +Install [the Riak Node.js Client][node_js_installation] through [NPM][npm]. + +### Connecting to Riak + +Connecting to Riak with the Riak Node.js Client requires creating a new client +object and using the callback argument to know when the client is fully +initialized: + +```javascript +var Riak = require('basho-riak-client'); +var nodes = [ + 'riak-test:10017', + 'riak-test:10027', + 'riak-test:10037', + 'riak-test:10047' +]; +var client = new Riak.Client(nodes, function (err, c) { + // NB: at this point the client is fully initialized, and + // 'client' and 'c' are the same object +}); +``` + +This creates a new `Riak.Client` object which handles all the details of +tracking active nodes and also provides load balancing. The `Riak.Client` object +is used to send commands to Riak. When your application is completely done with +Riak communications, the following method can be used to gracefully shut the +client down and exit Node.js: + +```javascript +client.stop(function (err, rslt) { + // NB: you may wish to check err + process.exit(); +}); +``` + +Let's make sure the cluster is online with a `Ping` request: + +```javascript +var assert = require('assert'); + +client.ping(function (err, rslt) { + if (err) { + throw new Error(err); + } else { + // On success, ping returns true + assert(rslt === true); + } +}); +``` + +This is some simple code to test that a node in a Riak cluster is online - we +send a simple ping message. Even if the cluster isn't present, the Riak Node.js +Client will return a response message. In the callback it is important to check +that your activity was successful by checking the `err` variable. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.4/developing/getting-started/nodejs/crud-operations) + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/3.0.4/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..7599dcf15c --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,138 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "CRUD Operations" + identifier: "getting_started_nodejs_crud" + weight: 100 + parent: "getting_started_nodejs" +toc: true +aliases: +--- + +[nodejs_wiki]: https://github.com/basho/riak-nodejs-client/wiki + +### Creating Objects In Riak KV + +Pinging a Riak cluster sounds like a lot of fun, but eventually someone is going +to want us to do productive work. Let's create some data to save in Riak. + +The Riak Node.js Client makes use of a `RiakObject` class to encapsulate Riak +key/value objects. At the most basic, a `RiakObject` is responsible for +identifying your object and for translating it into a format that can be easily +saved to Riak. + +```javascript +var async = require('async'); + +var people = [ + { + emailAddress: "bashoman@basho.com", + firstName: "Basho", + lastName: "Man" + }, + { + emailAddress: "johndoe@gmail.com", + firstName: "John", + lastName: "Doe" + } +]; + +var storeFuncs = []; +people.forEach(function (person) { + // Create functions to execute in parallel to store people + storeFuncs.push(function (async_cb) { + client.storeValue({ + bucket: 'contributors', + key: person.emailAddress, + value: person + }, + function(err, rslt) { + async_cb(err, rslt); + } + ); + }); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +In this sample, we create a collection of `Person` objects and then save each +`Person` to Riak. Once again, we check the response from Riak. + +### Reading from Riak + +Let's find a person! + +```javascript +var logger = require('winston'); + +client.fetchValue({ bucket: 'contributors', key: 'bashoman@basho.com', convertToJs: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } else { + var riakObj = rslt.values.shift(); + var bashoman = riakObj.value; + logger.info("I found %s in 'contributors'", bashoman.emailAddress); + } + } +); +``` + +We use `client.fetchValue` to retrieve an object from Riak. This returns an +array of `RiakObject` objects which helpfully encapsulates the communication +with Riak. + +After verifying that we've been able to communicate with Riak *and* that we have +a successful result, we use the `value` property to get the object, which has +already been converted to a javascript object due to the use of `convertToJs: +true` in the options. + +### Modifying Existing Data + +Let's say that Basho Man has decided to be known as Riak Man: + +```javascript +bashoman.FirstName = "Riak"; +riakObj.setValue(bashoman); + +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Updating an object involves modifying a `RiakObject` then using +`client.storeValue` to save the existing object. + +### Deleting Data + +```javascript +client.deleteValue({ bucket: 'contributors', key: 'johndoe@gmail.com' }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +Just like other operations, we check the results that have come back from Riak +to make sure the object was successfully deleted. + +The Riak Node.js Client has a lot of additional functionality that makes it easy +to build rich, complex applications with Riak. Check out the +[documentation][nodejs_wiki] to learn more about working with the Riak Node.js +Client and Riak. + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/3.0.4/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..ebe428d975 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Object Modeling" + identifier: "getting_started_nodejs_object" + weight: 102 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/object-modeling-nodejs + - /riak/kv/3.0.4/dev/taste-of-riak/object-modeling-nodejs +--- + +To get started, let's create the models that we'll be using. + +* [`Msg`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js) +* [`Timeline`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/timeline.js) +* [`User`](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js) + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +| Bucket | Key Pattern | Example Key +|:-------|:------------|:----------- +| `Users` | `` | `joeuser` +| `Msgs` | `_` | `joeuser_2014-03-06T02:05:13.556Z` +| `Timelines` | `__` | `joeuser_SENT_2014-03-06`
`marketing_group_INBOX_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `userName` as the key. + +[*Example:* `userName` as key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/user.js#L19-L20) + +For the `Msgs` bucket, let's use a combination of the username and the +posting datetime in an [ISO 8601 +Long](http://en.wikipedia.org/wiki/ISO_8601) format. This combination +gives us the pattern `_`, which produces keys like +`joeuser_2014-03-05T23:20:28Z`. + +[*Example:* `Msg` key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/models/msg.js#L25-L27) + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_SENT_2014-03-06` or `marketing_group_INBOX_2014-03-05`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +* [Base `Repository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/repository.js) +* [`UserRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/user-repository.js) +* [`MsgRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/msg-repository.js) +* [`TimelineRepository` class](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/repositories/timeline-repository.js) +* [`TimelineManager` class that manages `Msg` and `Timeline` objects](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/timeline-manager.js) + +Finally, let's test them: + +[*Example:* Putting it all together](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch03-Msgy-Schema/app.js) + +As you can see, the repository pattern helps us with a few things: + + - It helps us to see if an object exists before creating a new one + - It keeps our buckets and key names consistent + - It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data + + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/nodejs/querying.md b/content/riak/kv/3.0.4/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..7e3d1240a8 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/nodejs/querying.md @@ -0,0 +1,146 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Querying" + identifier: "getting_started_nodejs_query" + weight: 101 + parent: "getting_started_nodejs" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/querying-nodejs + - /riak/kv/3.0.4/dev/taste-of-riak/querying-nodejs +--- + +## Node.js Version Setup + +For the Node.js version, please download the source from GitHub by either +[cloning](https://github.com/basho/taste-of-riak) the source code +repository or downloading the [current zip of the master +branch](https://github.com/basho/taste-of-riak/archive/master.zip). +The code for this chapter is in `nodejs/Ch02-Schemas-and-Indexes`. Be +sure to run `npm install` in this directory prior to running `node +./app.js` to run the code. + +## A Quick Note on Querying and Schemas + +_Schemas_? Yes, we said that correctly: S-C-H-E-M-A-S. It's not a dirty +word. Even in a key/value store, you will still have a logical database +schema of how all the data relates to other data. This can be as simple +as using the same key across multiple buckets for different types of +data to having fields in your data that are related by name. These +querying methods will introduce you to some ways of laying out your data +in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your +application's feet wet with NoSQL is to denormalize your data into +related chunks. For example, with a customer database, you might have +separate tables for customers, addresses, preferences, etc. In Riak, +you can denormalize all that associated data into a single object and +store it into a `Customer` bucket. You can keep pulling in associated +data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. + often) + +At one of these points we will have to split the model. + +## Same Keys, Different Buckets + +The simplest way to split up data would be to use the same identity key +across different buckets. A good example of this would be a `Customer` +object, an `Order` object, and an `OrderSummaries` object that keeps +rolled up info about orders such as total, etc. Let's put some data into +Riak so we can play with it. + +* [*Example:* Creating a customer](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L24-L33) +* [*Example:* Creating orders and order summaries](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L193-L262) + +While individual Customer and Order objects don't change much (or +shouldn't change), the "Order Summary" object will likely change often. +It will do double duty by acting as an index for all a customer's +orders, and also holding some relevant data such as the order total, +etc. If we showed this information in our application often, it's only +one extra request to get all the info. + +[*Example:* Fetching by shared key](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L78-L96) + +Which returns our amalgamated objects: + +```bash +info: Customer 1: {"id":"1","name":"John Smith","address":"123 Main Street","city":"Columbus","state":"Ohio","zip":"43210","phone":"+1-614-555-5555","createdDate":"2013-10-01 14:30:26"} +info: OrderSummary 1: {"customerId":"1","summaries":[{"orderId":"1","total":415.98,"orderDate":"2013-10-01 14:42:26"},{"orderId":"2","total":359.99,"orderDate":"2013-10-15 16:43:16"},{"orderId":"3","total":74.98,"orderDate":"2013-11-03 17:45:28"}]} +``` + +While this pattern is very easy and extremely fast with respect to +queries and complexity, it's up to the application to know about these +intrinsic relationships. + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from an SQL world, Secondary Indexes (2i) are a lot +like SQL indexes. They are a way to quickly look up objects based on a +secondary key, without scanning through the whole dataset. This makes it +very easy to find groups of related data by values, or even ranges of +values. To properly show this off, we will now add some more data to our +application, and add some secondary index entries at the same time. + +[*Example:* Adding index data](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L98-L141) + +As you may have noticed, ordinary key/value data is opaque to 2i, so we +have to add entries to the indexes at the application level. Now let's +find all of Jane Appleseed's processed orders, we'll look up the orders +by searching the `SalespersonId` integer index for Jane's id of `9000`. + +[*Example:* Query for orders where the SalespersonId index is set to 9000](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L143-L159) + +Which returns: + +```text +Jane's Orders: 1, 3 +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference +Jane's ID, next let's use a "binary" index. Now, let's say that the VP +of Sales wants to know how many orders came in during October 2013. In +this case, we can exploit 2i's range queries. Let's search the +`OrderDate` binary index for entries between `2013-10-01` and +`2013-10-31`. + +[*Example:* Query for orders where the OrderDate index is between 2013-10-01 and +2013-10-31](https://github.com/basho/taste-of-riak/blob/master/nodejs/Ch02-Schemas-and-Indexes/app.js#L161-175) + +Which returns: + +```text +October's Orders: 1, 2 +``` + +Boom! Easy-peasy. We used 2i's range feature to search for a range of +values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly look up an object based on a + secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/php.md b/content/riak/kv/3.0.4/developing/getting-started/php.md new file mode 100644 index 0000000000..9f98932efd --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/php.md @@ -0,0 +1,80 @@ +--- +title: "Getting Started with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "PHP" + identifier: "getting_started_php" + weight: 107 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/php + - /riak/kv/3.0.4/dev/taste-of-riak/php +--- + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of PHP is required, and [Composer](https://getcomposer.org/) is required to be installed to fetch the client library package. + +## Client Setup +Download and unzip, or clone the Taste of Riak Sample Code Repository from GitHub ([zip](https://github.com/basho/taste-of-riak/archive/master.zip), [github repository](https://github.com/basho/taste-of-riak)). + +From the `taste-of-riak` directory, use composer to install the Riak PHP 2.0 Client`. + +```bash +php path/to/your/composer.phar install + +# If you did a global install of composer, run this instead: +composer install +``` + +If you set up a local Riak cluster using the [[five minute install]] method, change line 11 from `->onPort(8098)` to `->onPort(10018)`. + +Next, run `php Ch01-CRUD/taste-of-riak.php` to run this chapter's example code. It should output: + +```json +Reading Objects From Riak... +Updating Objects In Riak... +Deleting Objects From Riak... +Working With Complex Objects... +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +Yay, success! + +Since we didn't use PHP's REPL environment, let's walk through the code +to see what it actually did at each step. + +## Setting up the PHP Client and connections + +```php +include_once 'vendor/autoload.php'; + +use Basho\Riak; +use Basho\Riak\Node; +use Basho\Riak\Command; + +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); +``` + +This code will load the library, declare the necessary `use` statements for our code, and then initialize and configure a [Node Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.Builder.html). +Once we call `build()` on the builder, it will return to us a [Node](http://basho.github.io/riak-php-client/class-Basho.Riak.Node.html) object, which we use when building our Riak commands. + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.4/developing/getting-started/php/crud-operations) + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/php/crud-operations.md b/content/riak/kv/3.0.4/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..bbdfc3a204 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/php/crud-operations.md @@ -0,0 +1,187 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "CRUD Operations" + identifier: "getting_started_php_crud" + weight: 100 + parent: "getting_started_php" +toc: true +aliases: +--- + +## Creating Objects In Riak +First, let’s create a few objects and a bucket to keep them in. + +```php +$bucket = new Riak\Bucket('testBucket'); + +$val1 = 1; +$location1 = new Riak\Location('one', $bucket); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val1) + ->atLocation($location1) + ->build(); +$storeCommand1->execute(); +``` + +In this first example we have stored the integer 1 with the lookup key of ‘one’. Next let’s store a simple string value of “two” with a matching key. + +```php +$val2 = 'two'; +$location2 = new Riak\Location('two', $bucket); + +$storeCommand2 = (new Command\Builder\StoreObject($riak)) + ->buildObject($val2) + ->atLocation($location2) + ->build(); +$storeCommand2->execute(); +``` + +That was easy. Finally, let’s store an associative array. You will probably recognize the pattern by now. + +```php +$val3 = ['myValue' => 3]; +$location3 = new Riak\Location('three', $bucket); + +$storeCommand3 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($val3) + ->atLocation($location3) + ->build(); +$storeCommand3->execute(); +``` + +## Reading Objects From Riak +Now that we have a few objects stored, let’s retrieve them and make sure they contain the values we expect. + +```php +$response1 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location1) + ->build() + ->execute(); + +$response2 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location2) + ->build() + ->execute(); + +$response3 = (new Command\Builder\FetchObject($riak)) + ->atLocation($location3) + ->withDecodeAsAssociative() + ->build() + ->execute(); + +print_r($response1->getObject()->getData()); +print_r($response2->getObject()->getData()); +print_r($response3->getObject()->getData()); +``` + +That was easy. We create a [Fetch Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Fetch.html) from a [FetchObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html). +For our object that is an associative array, we also add [`withDecodeAsAssociative()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.FetchObject.html#_withDecodeAsAssociative) to the builder so it returns the object as an associative array instead of an stdClass object. + +In either case, we'll get a [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html) object back, which holds information about the operation, and the result data. + +## Updating Objects In Riak +While some data may be static, other forms of data may need to be updated. This is also easy to accomplish. Let’s update the value of myValue in the 3rd example to 42. + +```php +$object3 = $response3->getObject(); +$data3 = $object3->getData(); + +$data3['myValue'] = 42; +$object3 = $object3->setData(json_encode($data3)); + +$updateCommand = (new Command\Builder\StoreObject($riak)) + ->withObject($object3) + ->atLocation($location3) + ->build(); + +$updateCommand->execute(); +``` + +First we get the Riak [Object](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html) from the [Response](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Response.html), then we get the stored data with [`getData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_getData). We update the data to our liking, then use [`setData()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Object.html#_setData) to set the new data back to the Riak Object. +To store it we use the same pattern as before, but this time we use the [`withObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_withObject) method to tell it to store our updated Riak Object. + +## Deleting Objects From Riak +As a last step, we’ll demonstrate how to delete data. We just build a [Delete Command](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Object.Delete.html) from a [DeleteObject Builder](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.DeleteObject.html), and execute it. + +```php +(new Command\Builder\DeleteObject($riak))->atLocation($location1)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location2)->build()->execute(); +(new Command\Builder\DeleteObject($riak))->atLocation($location3)->build()->execute(); +``` + +### Working With Complex Objects +Since the world is a little more complicated than simple integers and bits of strings, let’s see how we can work with more complex objects. Take for example, this plain old PHP object(POPO) that encapsulates some knowledge about a book. + +```php +class Book +{ + var $title; + var $author; + var $body; + var $isbn; + var $copiesOwned; +} + +$book = new Book(); +$book->isbn = '1111979723'; +$book->title = 'Moby Dick'; +$book->author = 'Herman Melville'; +$book->body = 'Call me Ishmael. Some years ago...'; +$book->copiesOwned = 3; +``` + +Ok, so we have some information about our Moby Dick collection that we want to save. Storing this to Riak should look familiar by now: + +```php +$bookLocation = new Riak\Location($book->isbn, new Riak\Bucket('books')); + +$storeCommand1 = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($book) + ->atLocation($bookLocation) + ->build(); + +$storeCommand1->execute(); +``` + +Some of you may be thinking “But how does the Riak client encode/decode my object”? If we fetch the binary version of our book back and print it as a string, we shall know: + +```php +$fetchBookResponse = (new Command\Builder\FetchObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); + +print('Serialized Object:' . PHP_EOL); +print($fetchBookResponse->getBody() . PHP_EOL); +``` + +```json +Serialized Object: +{"title":"Moby Dick","author":"Herman Melville","body":"Call me Ishmael. Some years ago...","isbn":"1111979723","copiesOwned":3} +``` + +JSON! The library encodes PHP objects as JSON strings when you use the [`buildJsonObject()`](http://basho.github.io/riak-php-client/class-Basho.Riak.Command.Builder.ObjectTrait.html#_buildJsonObject) method on the StoreObject builder. + +Now that we’ve ruined the magic of object encoding, let’s clean up our mess: + +```php +(new Command\Builder\DeleteObject($riak)) + ->atLocation($bookLocation) + ->build() + ->execute(); +``` + +## Next Steps + +More complex use cases can be composed from these initial create, read, update, and delete (CRUD) operations. [In the next chapter]({{}}riak/kv/3.0.4/developing/getting-started/php/querying) we will look at how to store and query more complicated and interconnected data, such as documents. + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/php/querying.md b/content/riak/kv/3.0.4/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..c995b0d303 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/php/querying.md @@ -0,0 +1,408 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with PHP" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Querying" + identifier: "getting_started_php_query" + weight: 101 + parent: "getting_started_php" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/querying-php + - /riak/kv/3.0.4/dev/taste-of-riak/querying-php +--- + +## A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +## Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +## Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```php +atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Class definitions for our models + +class Customer +{ + var $customerId; + var $name; + var $address; + var $city; + var $state; + var $zip; + var $phone; + var $createdDate; +} + +class Order +{ + public function __construct() + { + $this->items = array(); + } + var $orderId; + var $customerId; + var $salespersonId; + var $items; + var $total; + var $orderDate; +} + +class Item +{ + public function __construct($itemId, $title, $price) + { + $this->itemId = $itemId; + $this->title = $title; + $this->price = $price; + } + var $itemId; + var $title; + var $price; +} + +class OrderSummary +{ + public function __construct() + { + $this->summaries = array(); + } + var $customerId; + var $summaries; +} + +class OrderSummaryItem +{ + public function __construct(Order $order) + { + $this->orderId = $order->orderId; + $this->total = $order->total; + $this->orderDate = $order->orderDate; + } + var $orderId; + var $total; + var $orderDate; +} + + +// Creating Data +$customer = new Customer(); +$customer->customerId = 1; +$customer->name = 'John Smith'; +$customer->address = '123 Main Street'; +$customer->city = 'Columbus'; +$customer->state = 'Ohio'; +$customer->zip = '43210'; +$customer->phone = '+1-614-555-5555'; +$customer->createdDate = '2013-10-01 14:30:26'; + + +$orders = []; + +$order1 = new Order(); +$order1->orderId = 1; +$order1->customerId = 1; +$order1->salespersonId = 9000; +$order1->items = [ + new Item( + 'TCV37GIT4NJ', + 'USB 3.0 Coffee Warmer', + 15.99 + ), + new Item( + 'PEG10BBF2PP', + 'eTablet Pro; 24GB; Grey', + 399.99 + ) +]; +$order1->total = 415.98; +$order1->orderDate = '2013-10-01 14:42:26'; +$orders[] = $order1; + +$order2 = new Order(); +$order2->orderId = 2; +$order2->customerId = 1; +$order2->salespersonId = 9001; +$order2->items = [ + new Item( + 'OAX19XWN0QP', + 'GoSlo Digital Camera', + 359.99 + ) +]; +$order2->total = 359.99; +$order2->orderDate = '2013-10-15 16:43:16'; +$orders[] = $order2; + +$order3 = new Order(); +$order3->orderId = 3; +$order3->customerId = 1; +$order3->salespersonId = 9000; +$order3->items = [ + new Item( + 'WYK12EPU5EZ', + 'Call of Battle = Goats - Gamesphere 4', + 69.99 + ), + new Item( + 'TJB84HAA8OA', + 'Bricko Building Blocks', + 4.99 + ) +]; +$order3->total = 74.98; +$order3->orderDate = '2013-11-03 17:45:28'; +$orders[] = $order3; + + +$orderSummary = new OrderSummary(); +$orderSummary->customerId = 1; +foreach ($orders as $order) { + $orderSummary->summaries[] = new OrderSummaryItem($order); +} +unset($order); + + + +// Starting Client +$node = (new Node\Builder) + ->atHost('127.0.0.1') + ->onPort(8098) + ->build(); + +$riak = new Riak([$node]); + +// Creating Buckets +$customersBucket = new Riak\Bucket('Customers'); +$ordersBucket = new Riak\Bucket('Orders'); +$orderSummariesBucket = new Riak\Bucket('OrderSummaries'); + +// Storing Data +$storeCustomer = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($customer) + ->atLocation(new Location($customer->customerId, $customersBucket)) + ->build(); +$storeCustomer->execute(); + +foreach ($orders as $order) { + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($order) + ->atLocation(new Location($order->orderId, $ordersBucket)) + ->build(); + $storeOrder->execute(); +} +unset($order); + +$storeSummary = (new Command\Builder\StoreObject($riak)) + ->buildJsonObject($orderSummary) + ->atLocation(new Location($orderSummary->customerId, $orderSummariesBucket)) + ->build(); +$storeSummary->execute(); +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```php +// Fetching related data by shared key +$fetched_customer = (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $customersBucket)) + ->build()->execute()->getObject()->getData(); + +$fetched_customer->orderSummary = + (new Command\Builder\FetchObject($riak)) + ->atLocation(new Location('1', $orderSummariesBucket)) + ->build()->execute()->getObject()->getData(); + +print("Customer with OrderSummary data: \n"); +print_r($fetched_customer); +``` + +Which returns our amalgamated objects: + +```text +Customer with OrderSummary data: +stdClass Object +( + [customerId] => 1 + [name] => John Smith + [address] => 123 Main Street + [city] => Columbus + [state] => Ohio + [zip] => 43210 + [phone] => +1-614-555-5555 + [createdDate] => 2013-10-01 14:30:26 + [orderSummary] => stdClass Object + ( + [customerId] => 1 + [summaries] => Array + ( + [0] => stdClass Object + ( + [orderId] => 1 + [total] => 415.98 + [orderDate] => 2013-10-01 14:42:26 + ) + + [1] => stdClass Object + ( + [orderId] => 2 + [total] => 359.99 + [orderDate] => 2013-10-15 16:43:16 + ) + + [2] => stdClass Object + ( + [orderId] => 3 + [total] => 74.98 + [orderDate] => 2013-11-03 17:45:28 + ) + ) + ) +) +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +## Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```php +// Adding Index Data +$keys = array(1,2,3); +foreach ($keys as $key) { + $orderLocation = new Location($key, $ordersBucket); + $orderObject = (new Command\Builder\FetchObject($riak)) + ->atLocation($orderLocation) + ->build()->execute()->getObject(); + + $order = $orderObject->getData(); + + $orderObject->addValueToIndex('SalespersonId_int', $order->salespersonId); + $orderObject->addValueToIndex('OrderDate_bin', $order->orderDate); + + $storeOrder = (new Command\Builder\StoreObject($riak)) + ->withObject($orderObject) + ->atLocation($orderLocation) + ->build(); + $storeOrder->execute(); +} +unset($key); + +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```php +// Query for orders where the SalespersonId int index is set to 9000 +$fetchIndex = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('SalespersonId_int') + ->withScalarValue(9000)->build(); +$janes_orders = $fetchIndex->execute()->getResults(); + +print("\n\nJane's Orders: \n"); +print_r($janes_orders); +``` + +Which returns: + +```text +Jane's Orders: +Array +( + [0] => 3 + [1] => 1 +) + +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```php +// Query for orders where the OrderDate bin index is +// between 2013-10-01 and 2013-10-31 +$fetchOctoberOrders = (new Command\Builder\QueryIndex($riak)) + ->inBucket($ordersBucket) + ->withIndexName('OrderDate_bin') + ->withRangeValue('2013-10-01','2013-10-31') + ->withReturnTerms(true) + ->build(); + +$octobers_orders = $fetchOctoberOrders->execute()->getResults(); + +print("\n\nOctober's Orders: \n"); +print_r($octobers_orders); +?> +``` + +Which returns: + +```text +October's Orders: +Array +( + [0] => Array + ( + [2013-10-01 14:42:26] => 1 + ) + + [1] => Array + ( + [2013-10-15 16:43:16] => 2 + ) +) +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. With the October's Orders query we also used the `->withReturnTerms(true)` option, which as you can see will return the values of the matching 2i terms. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys (and terms if needed) that match the index query + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/python.md b/content/riak/kv/3.0.4/developing/getting-started/python.md new file mode 100644 index 0000000000..b34b555cbd --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/python.md @@ -0,0 +1,103 @@ +--- +title: "Getting Started with Python" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Python" + identifier: "getting_started_python" + weight: 102 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/python + - /riak/kv/3.0.4/dev/taste-of-riak/python +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.4/using/running-a-cluster) first. + +To try this flavor of Riak, a working installation of Python is +required, with Python 2.7 preferred. One of the Python package managers, +e.g. `setuptools` or `pip`, is also required to install the client +package. + +You may install `setuptools` on OS X through MacPorts by running `sudo +port install py-distribute`. `setuptools` and `pip` are included in the +Homebrew formula for Python on OS X as well. Just run `brew install +python`. + +## Prerequisites + +First, you must install some packages needed by the Riak Python client: + +* `python-dev` - Header files and a static library for Python +* `libffi-dev` - Foreign function interface library +* `libssl-dev` - libssl and libcrypto development libraries + +### Ubuntu (12.04 & 14.04) + +```bash +sudo apt-get install python-dev libffi-dev libssl-dev +``` + +## Client Setup + +The easiest way to install the client is with `easy_install` or `pip`. +Either of the commands below will ensure that the client and all its +dependencies are installed and on the load path. Depending on where your +Python libraries are held, these may require `sudo`. + +```bash +easy_install riak +pip install riak +``` + +To install from source, download the latest Python client from GitHub +([zip](https://github.com/basho/riak-python-client/archive/master.zip), +[GitHub repository](https://github.com/basho/riak-python-client)), and +extract it to your working directory. + +Now, let's build the client. + +```bash +python setup.py install +``` + +## Connecting to Riak + +Now, let's start the Python REPL and get set up. Enter the following +into the Python REPL: + +```python +import riak +``` +If you are using a single local Riak node, use the following to create a +new client instance: + +```python +myClient = riak.RiakClient(pb_port=8087, protocol='pbc') + +# Because the Python client uses the Protocol Buffers interface by +# default, the following will work the same: +myClient = riak.RiakClient(pb_port=8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```python +myClient = riak.RiakClient(pb_port=10017, protocol='pbc') +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.4/developing/getting-started/python/crud-operations) + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/python/crud-operations.md b/content/riak/kv/3.0.4/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..29cedc9729 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/python/crud-operations.md @@ -0,0 +1,150 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Python" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "CRUD Operations" + identifier: "getting_started_python_crud" + weight: 100 + parent: "getting_started_python" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```python +myBucket = myClient.bucket('test') + +val1 = 1 +key1 = myBucket.new('one', data=val1) +key1.store() +``` + +In this first example, we have stored the integer 1 with the lookup key +of `one`. Next let’s store a simple string value of `two` with a +matching key. + +```python +val2 = "two" +key2 = myBucket.new('two', data=val2) +key2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```python +val3 = {"myValue": 3} +key3 = myBucket.new('three', data=val3) +key3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```python +fetched1 = myBucket.get('one') +fetched2 = myBucket.get('two') +fetched3 = myBucket.get('three') + +assert val1 == fetched1.data +assert val2 == fetched2.data +assert val3 == fetched3.data +``` + +That was easy. We simply request the objects by key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to `42`. + +```python +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +Nothing is complete without a delete. Fortunately, that's easy too. + +```python +fetched1.delete() +fetched2.delete() +fetched3.delete() +``` + +Now we can verify that the objects have been removed from Riak. + +```python +assert myBucket.get('one').exists == False +assert myBucket.get('two').exists == False +assert myBucket.get('three').exists == False +``` + + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take for example, this object that encapsulates some knowledge about a +book. + +```python +book = { + 'isbn': "1111979723", + 'title': "Moby Dick", + 'author': "Herman Melville", + 'body': "Call me Ishmael. Some years ago...", + 'copies_owned': 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now: + +```python +booksBucket = myClient.bucket('books') +newBook = booksBucket.new(book['isbn'], data=book) +newBook.store() +``` + +Some of you may be thinking, "But how does the Python Riak client +encode/decode my object?" If we fetch our book back and print the raw +encoded data, we shall know: + +```python +fetchedBook = booksBucket.get(book['isbn']) + +print(fetchedBook.encoded_data) +``` + +JSON! The Riak Python client library encodes things as JSON when it can. + +```json +{"body": "Call me Ishmael. Some years ago...", +"author": "Herman Melville", "isbn": "1111979723", +"copies_owned": 3, "title": "Moby Dick"} +``` + +If we wanted to get a deserialized object back we would just use the +regular `fetchedBook.data` method. + +Finally, let’s clean up our mess: + +```python +fetchedBook.delete() +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/python/object-modeling.md b/content/riak/kv/3.0.4/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..6d659e5d70 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/python/object-modeling.md @@ -0,0 +1,264 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Python" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Object Modeling" + identifier: "getting_started_python_object" + weight: 102 + parent: "getting_started_python" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/object-modeling-python + - /riak/kv/3.0.4/dev/taste-of-riak/object-modeling-python +--- + +To get started, let's create the data structures that we'll be using. + +```python +from datetime import datetime +import string +import riak + + +marleen = {'user_name': 'marleenmgr', + 'full_name': 'Marleen Manager', + 'email': 'marleen.manager@basho.com'} + +joe = {'user_name': 'joeuser', + 'full_name': 'Joe User', + 'email': 'joe.user@basho.com'} + +msg = {'sender': marleen['user_name'], + 'recipient': joe['user_name'], + 'created': datetime.utcnow().isoformat(), + 'text': 'Welcome to the company!'} +``` + +As you can see, we first create a user, and then we can use that user to +create a message. To send this message we can append it to one or more +`Timeline`s. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06`
`marketing_group_Inbox_2014-03-06` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `__` for groups, which will look like +`joeuser_Sent_2014-03-06` or `marketing_group_Inbox_2014-03-06`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially if many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object model, let's write some +repositories to help create and work with these objects in Riak: + +```python +class UserRepository: + BUCKET = 'Users' + + def __init__(self, client): + self.client = client + + def save(self, user): + riak_obj = self.client.bucket(self.BUCKET).get(user['user_name']) + riak_obj.data = user + return riak_obj.store() + + def get(self, user_name): + riak_obj = self.client.bucket(self.BUCKET).get(user_name) + return riak_obj.data + + +class MsgRepository: + BUCKET = 'Msgs' + + def __init__(self, client): + self.client = client + + def save(self, msg): + msgs = self.client.bucket(self.BUCKET) + key = self._generate_key(msg) + + riak_obj = msgs.get(key) + + if not riak_obj.exists: + riak_obj.data = msg + riak_obj.store(if_none_match=True) + + return riak_obj + + def get(self, key): + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _generate_key(self, msg): + return msg['sender'] + '_' + msg['created'] + + +class TimelineRepository: + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def __init__(self, client): + self.client = client + self.msg_repo = MsgRepository(client) + + def post_message(self, msg): + # Save the canonical copy + saved_message = self.msg_repo.save(msg) + msg_key = saved_message.key + + # Post to sender's Sent timeline + self._add_to_timeline(msg, self.SENT, msg_key) + + # Post to recipient's Inbox timeline + self._add_to_timeline(msg, self.INBOX, msg_key) + + def get_timeline(self, owner, msg_type, date): + key = self._generate_key(owner, msg_type, date) + riak_obj = self.client.bucket(self.BUCKET).get(key) + return riak_obj.data + + def _add_to_timeline(self, msg, msg_type, msg_key): + timeline_key = self._generate_key_from_msg(msg, msg_type) + riak_obj = self.client.bucket(self.BUCKET).get(timeline_key) + + if riak_obj.exists: + riak_obj = self._add_to_existing_timeline(riak_obj, + msg_key) + else: + riak_obj = self._create_new_timeline(riak_obj, + msg, msg_type, + msg_key) + + return riak_obj.store() + + def _create_new_timeline(self, riak_obj, msg, msg_type, msg_key): + owner = self._get_owner(msg, msg_type) + new_timeline = {'owner': owner, + 'msg_type': msg_type, + 'msgs': [msg_key]} + + riak_obj.data = new_timeline + return riak_obj + + def _add_to_existing_timeline(self, riak_obj, msg_key): + riak_obj.data['msgs'].append(msg_key) + return riak_obj + + def _get_owner(self, msg, msg_type): + if msg_type == self.INBOX: + return msg['recipient'] + else: + return msg['sender'] + + def _generate_key_from_msg(self, msg, msg_type): + owner = self._get_owner(msg, msg_type) + return self._generate_key(owner, msg_type, msg['created']) + + def _generate_key(self, owner, msg_type, datetimestr): + dateString = string.split(datetimestr, 'T', 1)[0] + return owner + '_' + msg_type + '_' + dateString + +``` + +Finally, let's test them: + +```python +# Setup our repositories +client = riak.RiakClient(pb_port=10017, protocol='pbc') +userRepo = UserRepository(client) +msgsRepo = MsgRepository(client) +timelineRepo = TimelineRepository(client) + +# Save users +userRepo.save(marleen) +userRepo.save(joe) + +# Post msg to timelines +timelineRepo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timelineRepo.get_timeline( + joe['user_name'], + TimelineRepository.INBOX, + datetime.utcnow().isoformat()) + +joes_first_message = msgsRepo.get(joes_inbox_today['msgs'][0]) + +print 'From: {0}\nMsg : {1}\n\n'.format( + joes_first_message['sender'], + joes_first_message['text']) + +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/python/querying.md b/content/riak/kv/3.0.4/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..177ee8a9a9 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/python/querying.md @@ -0,0 +1,240 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Python" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Querying" + identifier: "getting_started_python_query" + weight: 101 + parent: "getting_started_python" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/querying-python + - /riak/kv/3.0.4/dev/taste-of-riak/querying-python +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```python +import riak + +# Creating Data + +customer = { + 'customer_id': 1, + 'name': "John Smith", + 'address': "123 Main Street", + 'city': "Columbus", + 'state': "Ohio", + 'zip': "43210", + 'phone': "+1-614-555-5555", + 'created_date': "2013-10-01 14:30:26" +} + +orders = [ + { + 'order_id': 1, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "TCV37GIT4NJ", + 'title': "USB 3.0 Coffee Warmer", + 'price': 15.99 + }, + { + 'item_id': "PEG10BBF2PP", + 'title': "eTablet Pro, 24GB, Grey", + 'price': 399.99 + } + ], + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'customer_id': 1, + 'salesperson_id': 9001, + 'items': [ + { + 'item_id': "OAX19XWN0QP", + 'title': "GoSlo Digital Camera", + 'price': 359.99 + } + ], + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'customer_id': 1, + 'salesperson_id': 9000, + 'items': [ + { + 'item_id': "WYK12EPU5EZ", + 'title': "Call of Battle: Goats - Gamesphere 4", + 'price': 69.99 + }, + { + 'item_id': "TJB84HAA8OA", + 'title': "Bricko Building Blocks", + 'price': 4.99 + } + ], + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + }] + +order_summary = { + 'customer_id': 1, + 'summaries': [ + { + 'order_id': 1, + 'total': 415.98, + 'order_date': "2013-10-01 14:42:26" + }, + { + 'order_id': 2, + 'total': 359.99, + 'order_date': "2013-10-15 16:43:16" + }, + { + 'order_id': 3, + 'total': 74.98, + 'order_date': "2013-11-03 17:45:28" + } + ] +} + + +# Starting Client +client = riak.RiakClient(pb_port=10017, protocol='pbc') + +# Creating Buckets +customer_bucket = client.bucket('Customers') +order_bucket = client.bucket('Orders') +order_summary_bucket = client.bucket('OrderSummaries') + + +# Storing Data +cr = customer_bucket.new(str(customer['customer_id']), + data=customer) +cr.store() + +for order in orders: + order_riak = order_bucket.new(str(order['order_id']), + data=order) + order_riak.store() + +os = order_summary_bucket.new(str(order_summary['customer_id']), + data=order_summary) +os.store() +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all customer orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```python +customer = customer_bucket.get('1').data +customer['order_summary'] = order_summary_bucket.get('1').data +customer +``` + +Which returns our amalgamated objects: + +```python +{ + u'city': u'Columbus', u'name': u'John Smith', u'zip': u'43210', + u'created_date': u'2013-10-01 14:30:26', + 'order_summary': { + u'customer_id': 1, u'summaries': [ + {u'order_id': 1, u'order_date': u'2013-10-01 14:42:26', u'total': 415.98}, + {u'order_id': 2, u'order_date': u'2013-10-15 16:43:16', u'total': 359.99}, + {u'order_id': 3, u'order_date': u'2013-11-03 17:45:28', u'total': 74.98} + ]}, + u'phone': u'+1-614-555-5555', u'state': u'Ohio', u'address': u'123 Main Street', + u'customer_id': 1 +} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```python +for i in range(1, 4): + order = order_bucket.get(str(i)) + # Initialize our secondary indices + order.add_index('salesperson_id_int', order.data['salesperson_id']) + order.add_index('order_date_bin', order.data['order_date']) + order.store() +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```python +janes_orders = order_bucket.get_index("salesperson_id_int", 9000) +janes_orders.results +``` + +Which returns: + +```text +['1', '3'] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `2013-10-01` and `2013-10-31`. + +```python +october_orders = order_bucket.get_index("order_date_bin", + "2013-10-01", "2013-10-31") +october_orders.results +``` + +Which returns: + +```text +['1', '2'] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/ruby.md b/content/riak/kv/3.0.4/developing/getting-started/ruby.md new file mode 100644 index 0000000000..c8ab14403c --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/ruby.md @@ -0,0 +1,68 @@ +--- +title: "Getting Started with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Ruby" + identifier: "getting_started_ruby" + weight: 101 + parent: "developing_getting_started" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/ruby + - /riak/kv/3.0.4/dev/taste-of-riak/ruby +--- + + + +If you haven't set up a Riak Node and started it, please visit [Running A Cluster]({{}}riak/kv/3.0.4/using/running-a-cluster) first. To try this flavor +of Riak, a working installation of Ruby is required. + +## Client Setup + +First, install the Riak Ruby client via RubyGems. + +```bash +gem install riak-client +``` + +Start IRB, the Ruby REPL, and let’s get set up. Enter the following into +IRB: + +```ruby +require 'riak' +``` + +If you are using a single local Riak node, use the following to create a +new client instance, assuming that the node is running on `localhost` +port 8087: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 8087) + +# Since the Ruby Riak client uses the Protocol Buffers API by default, +# you can also just enter this: +client = Riak::Client.new(:pb_port => 8087) +``` + +If you set up a local Riak cluster using the [[five-minute install]] +method, use this code snippet instead: + +```ruby +client = Riak::Client.new(:protocol => "pbc", :pb_port => 10017) + +# For the reasons explain in the snippet above, this will also work: +client = Riak::Client.new(:pb_port => 10017) +``` + +We are now ready to start interacting with Riak. + +## Next Steps + +[CRUD Operations]({{}}riak/kv/3.0.4/developing/getting-started/ruby/crud-operations) + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/3.0.4/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..271f57afd2 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,151 @@ +--- +title_supertext: "Getting Started:" +title: "CRUD Operations with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "CRUD Operations" + identifier: "getting_started_ruby_crud" + weight: 100 + parent: "getting_started_ruby" +toc: true +aliases: +--- + +## Creating Objects In Riak + +First, let’s create a few objects and a bucket to keep them in. + +```ruby +my_bucket = client.bucket("test") + +val1 = 1 +obj1 = my_bucket.new('one') +obj1.data = val1 +obj1.store() +``` + +In this first example we have stored the integer 1 with the lookup key +of `one`. Next, let’s store a simple string value of `two` with a +matching key. + +```ruby +val2 = "two" +obj2 = my_bucket.new('two') +obj2.data = val2 +obj2.store() +``` + +That was easy. Finally, let’s store a bit of JSON. You will probably +recognize the pattern by now. + +```ruby +val3 = { myValue: 3 } +obj3 = my_bucket.new('three') +obj3.data = val3 +obj3.store() +``` + +## Reading Objects From Riak + +Now that we have a few objects stored, let’s retrieve them and make sure +they contain the values we expect. + +```ruby +fetched1 = my_bucket.get('one') +fetched2 = my_bucket.get('two') +fetched3 = my_bucket.get('three') + +fetched1.data == val1 +fetched2.data == val2 +fetched3.data.to_json == val3.to_json +``` + +That was easy. we simply request the objects by key. in the last +example, we converted to JSON so we can compare a string key to a symbol +key. + +## Updating Objects In Riak + +While some data may be static, other forms of data may need to be +updated. This is also easy to accomplish. Let’s update the value of +myValue in the 3rd example to 42. + +```ruby +fetched3.data["myValue"] = 42 +fetched3.store() +``` + +## Deleting Objects From Riak + +As a last step, we’ll demonstrate how to delete data. You’ll see that +the delete message can be called either against the bucket or the +object. + +```ruby +my_bucket.delete('one') +obj2.delete() +obj3.delete() +``` + +## Working With Complex Objects + +Since the world is a little more complicated than simple integers and +bits of strings, let’s see how we can work with more complex objects. +Take, for example, this Ruby hash that encapsulates some knowledge about +a book. + +```ruby +book = { + :isbn => '1111979723', + :title => 'Moby Dick', + :author => 'Herman Melville', + :body => 'Call me Ishmael. Some years ago...', + :copies_owned => 3 +} +``` + +All right, so we have some information about our Moby Dick collection +that we want to save. Storing this to Riak should look familiar by now. + +```ruby +books_bucket = client.bucket('books') +new_book = books_bucket.new(book[:isbn]) +new_book.data = book +new_book.store() +``` + +Some of you may be thinking, "But how does the Ruby Riak client +encode/decode my object?" If we fetch our book back and print the raw +data, we shall know: + +```ruby +fetched_book = books_bucket.get(book[:isbn]) +puts fetched_book.raw_data +``` + +Raw Data: + +```json +{"isbn":"1111979723","title":"Moby Dick","author":"Herman Melville", +"body":"Call me Ishmael. Some years ago...","copies_owned":3} +``` + +JSON! The Ruby Riak client will serialize objects to JSON when it comes +across structured data like hashes. For more advanced control over +serialization you can use a library called +[Ripple](https://github.com/basho/ripple), which is a rich Ruby modeling +layer over the basic riak client. Ripple falls outside the scope of +this document but we shall visit it later. + +Now, let’s clean up our mess: + +```ruby +new_book.delete() +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/3.0.4/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..ccd7b9ba39 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,295 @@ +--- +title_supertext: "Getting Started:" +title: "Object Modeling with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Object Modeling" + identifier: "getting_started_ruby_object" + weight: 102 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/object-modeling-ruby + - /riak/kv/3.0.4/dev/taste-of-riak/object-modeling-ruby +--- + +To get started, let's create the models that we'll be using. Since the +[Ruby Riak Client](https://github.com/basho/riak-ruby-client) uses +hashes when converting to and from JSON, we'll use the library +[Hashie](http://rdoc.info/github/intridea/hashie) to help automatically +coerce class properties to and from hashes. You can install this library +with `gem install hashie`. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'hashie' +require 'time' + +class User < Hashie::Dash + property :user_name + property :full_name + property :email +end + +class Msg < Hashie::Dash + property :from + property :to + property :created + property :text +end + +class Timeline < Hashie::Dash + property :owner + property :type + property :msgs +end +``` + +To use these classes to store data, we will first have to create a user. +Then, when a user creates a message, we will append that message to one +or more timelines. If it's a private message, we'll append it to the +Recipient's `Inbox` timeline and the User's own `Sent` timeline. If it's +a group message, we'll append it to the Group's timeline, as well as to +the User's `Sent` timeline. + +#### Buckets and Keys Revisited + +Now that we've worked out how we will differentiate data in the system, +let's figure out our bucket and key names. + +The bucket names are straightforward. We can use `Users`, `Msgs`, and +`Timelines`. The key names, however, are a little more tricky. In past +examples we've used sequential integers, but this presents a problem: we +would need a secondary service to hand out these IDs. This service could +easily be a future bottleneck in the system, so let's use a natural key. +Natural keys are a great fit for key/value systems because both humans +and computers can easily construct them when needed, and most of the +time they can be made unique enough for a KV store. + +Bucket | Key Pattern | Example Key +:------|:------------|:----------- +`Users` | `` | `joeuser` +`Msgs` | `_` | `joeuser_2014-03-06T02:05:13.223556Z` +`Timelines` | `__` | `joeuser_Sent_2014-03-06Z`
`marketing_group_Inbox_2014-03-06Z` | + +For the `Users` bucket, we can be certain that we will want each +username to be unique, so let's use the `username` as the key. For the +`Msgs` bucket, let's use a combination of the username and the posting +datetime in an [ISO 8601 Long](http://en.wikipedia.org/wiki/ISO_8601) +format. This combination gives us the pattern `_`, +which produces keys like `joeuser_2014-03-05T23:20:28`. + +Now for `Timelines`, we need to differentiate between `Inbox` and `Sent` +timelines, so we can simply add that type into the key name. We will +also want to partition each collection object into some time period, +that way the object doesn't grow too large (see note below). + +For `Timelines`, let's use the pattern `__` for +users, and `_Inbox_` for groups, which will look like +`joeuser_Sent_2014-03-06Z` or `marketing_group_Inbox_2014-03-05Z`, +respectively. + +{{% note title="Note" %}} +Riak performs best with objects under 1-2MB. Objects larger than that can hurt +performance, especially many siblings are being created. We will cover +siblings, sibling resolution, and sibling explosions in the next chapter. +{{% /note %}} + +#### Keeping our story straight with repositories + +Now that we've figured out our object models, let's write some +repositories to help create and work with these objects in Riak: + +```ruby +class UserRepository + BUCKET = 'Users' + + def initialize(client) + @client = client + end + + def save(user) + users = @client.bucket(BUCKET) + key = user.user_name + + riak_obj = users.get_or_new(key) + riak_obj.data = user + riak_obj.content_type = 'application/json' + riak_obj.store + end + + def get(user_name) + riak_obj = @client.bucket(BUCKET)[user_name] + User.new(riak_obj.data) + end +end + +class MsgRepository + BUCKET = 'Msgs' + + def initialize(client) + @client = client + end + + def save(msg) + msgs = @client.bucket(BUCKET) + key = generate_key(msg) + + return msgs.get(key) if msgs.exists?(key) + riak_obj = msgs.new(key) + riak_obj.data = msg + riak_obj.content_type = 'application/json' + riak_obj.prevent_stale_writes = true + riak_obj.store(returnbody: true) + end + + def get(key) + riak_obj = @client.bucket(BUCKET).get(key) + Msg.new(riak_obj.data) + end + + def generate_key(msg) + msg.from + '_' + msg.created.utc.iso8601(6) + end +end + +class TimelineRepository + BUCKET = 'Timelines' + SENT = 'Sent' + INBOX = 'Inbox' + + def initialize(client) + @client = client + @msg_repo = MsgRepository.new(client) + end + + def post_message(msg) + # Save the canonical copy + saved_message = @msg_repo.save(msg) + # Post to sender's Sent timeline + add_to_timeline(msg, SENT, saved_message.key) + # Post to recipient's Inbox timeline + add_to_timeline(msg, INBOX, saved_message.key) + end + + def get_timeline(owner, type, date) + riak_obj = @client.bucket(BUCKET).get(generate_key(owner, type, date)) + Timeline.new(riak_obj.data) + end + + private + + def add_to_timeline(msg, type, msg_key) + timeline_key = generate_key_from_msg(msg, type) + riak_obj = nil + + if @client.bucket(BUCKET).exists?(timeline_key) + riak_obj = add_to_existing_timeline(timeline_key, msg_key) + else + riak_obj = create_new_timeline(timeline_key, msg, type, msg_key) + end + + riak_obj.store + end + + def create_new_timeline(key, msg, type, msg_key) + owner = get_owner(msg, type) + riak_obj = @client.bucket(BUCKET).new(key) + riak_obj.data = Timeline.new(owner: owner, + type: type, + msgs: [msg_key]) + riak_obj.content_type = 'application/json' + riak_obj + end + + def add_to_existing_timeline(key, msg_key) + riak_obj = @client.bucket(BUCKET).get(key) + timeline = Timeline.new(riak_obj.data) + timeline.msgs << msg_key + riak_obj.data = timeline + riak_obj + end + + def get_owner(msg, type) + type == INBOX ? msg.to : msg.from + end + + def generate_key_from_msg(msg, type) + owner = get_owner(msg, type) + generate_key(owner, type, msg.created) + end + + def generate_key(owner, type, date) + owner + '_' + type + '_' + date.utc.strftime('%F') + end +end +``` + +Finally, let's test them: + +```ruby +# Setup our repositories +client = Riak::Client.new(protocol: 'pbc', pb_port: 10017) +user_repo = UserRepository.new(client) +msgs_repo = MsgRepository.new(client) +timeline_repo = TimelineRepository.new(client) + +# Create and save users +marleen = User.new(user_name: 'marleenmgr', + full_name: 'Marleen Manager', + email: 'marleen.manager@basho.com') + +joe = User.new(user_name: 'joeuser', + full_name: 'Joe User', + email: 'joe.user@basho.com') + +user_repo.save(marleen) +user_repo.save(joe) + +# Create new Msg, post to timelines +msg = Msg.new(from: marleen.user_name, + to: joe.user_name, + created: Time.now, + text: 'Welcome to the company!') + +timeline_repo.post_message(msg) + +# Get Joe's inbox for today, get first message +joes_inbox_today = timeline_repo.get_timeline(joe.user_name, 'Inbox', Time.now) +joes_first_message = msgs_repo.get(joes_inbox_today.msgs.first) + +puts "From: #{joes_first_message.from}\nMsg : #{joes_first_message.text}" +``` + +As you can see, the repository pattern helps us with a few things: + +* It helps us to see if an object exists before creating a new one +* It keeps our buckets and key names consistent +* It provides us with a consistent interface to work with. + +While this set of repositories solves many of our problems, it is very +minimal and doesn't cover all the edge cases. For instance, what happens +if two different people try to create a user with the same username? + +We can also easily "compute" key names now, but how do we quickly look +up the last 10 messages a user sent? Many of these answers will be +application dependent. If your application shows the last 10 messages in +reverse order, for example, you may want to store that set of data in +another collection object to make lookup faster. There are drawbacks to +every solution, but we recommend seeking out the key/value-based +solution first, as it will likely be the quickest. + +So to recap, in this chapter we learned: + +* How to choose bucket names +* How to choose natural keys based on how we want to partition our data. + + + + + diff --git a/content/riak/kv/3.0.4/developing/getting-started/ruby/querying.md b/content/riak/kv/3.0.4/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..f5b02a14bd --- /dev/null +++ b/content/riak/kv/3.0.4/developing/getting-started/ruby/querying.md @@ -0,0 +1,256 @@ +--- +title_supertext: "Getting Started:" +title: "Querying with Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Querying" + identifier: "getting_started_ruby_query" + weight: 101 + parent: "getting_started_ruby" +toc: true +aliases: + - /riak/3.0.4/dev/taste-of-riak/querying-ruby + - /riak/kv/3.0.4/dev/taste-of-riak/querying-ruby +--- + +#### A Quick Note on Querying and Schemas +_Schemas_? Yes we said that correctly, S-C-H-E-M-A-S. It's not a dirty word. +Even with a Key/Value store, you will still have a logical database schema of how all the data relates to one another. This can be as simple as using the same key across multiple buckets for different types of data, to having fields in your data that are related by name. These querying methods will introduce you to some ways of laying out your data in Riak, along with how to query it back. + +### Denormalization + +If you're coming from a relational database, the easiest way to get your application's feet wet with NoSQL is to denormalize your data into related chunks. For example with a customer database, you might have separate tables for Customers, Addresses, Preferences, etc. In Riak, you can denormalize all that associated data into a single object and store it into a `Customer` bucket. You can keep pulling in associated data until you hit one of the big denormalization walls: + +* Size Limits (objects greater than 1MB) +* Shared/Referential Data (data that the object doesn't "own") +* Differences in Access Patterns (objects that get read/written once vs. often) + +At one of these points we will have to split the model. + +### Same Keys - Different Buckets + +The simplest way to split up data would be to use the same identity key across different buckets. A good example of this would be a `Customer` object, an `Order` object, and an `OrderSummaries` object that keeps rolled up info about orders such as Total, etc. Let's put some data into Riak so we can play with it. + +```ruby +# Encoding: utf-8 + +require 'riak' +require 'pp' + +# Starting Client +client = Riak::Client.new protocol: 'pbc', pb_port: 10017 + +# Creating Data +customer = { + customer_id: 1, + name: 'John Smith', + address: '123 Main Street', + city: 'Columbus', + state: 'Ohio', + zip: '43210', + phone: '+1-614-555-5555', + created_date: Time.parse('2013-10-1 14:30:26') +} + +orders = [ + { + order_id: 1, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'TCV37GIT4NJ', + title: 'USB 3.0 Coffee Warmer', + price: 15.99 + }, + { + item_id: 'PEG10BBF2PP', + title: 'eTablet Pro, 24GB, Grey', + price: 399.99 + } + ], + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + customer_id: 1, + salesperson_id: 9001, + items: [ + { + item_id: 'OAX19XWN0QP', + title: 'GoSlo Digital Camera', + price: 359.99 + } + ], + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + customer_id: 1, + salesperson_id: 9000, + items: [ + { + item_id: 'WYK12EPU5EZ', + title: 'Call of Battle: Goats - Gamesphere 4', + price: 69.99 + }, + { + item_id: 'TJB84HAA8OA', + title: 'Bricko Building Blocks', + price: 4.99 + } + ], + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + }] + +order_summary = { + customer_id: 1, + summaries: [ + { + order_id: 1, + total: 415.98, + order_date: Time.parse('2013-10-1 14:42:26') + }, + { + order_id: 2, + total: 359.99, + order_date: Time.parse('2013-10-15 16:43:16') + }, + { + order_id: 3, + total: 74.98, + order_date: Time.parse('2013-11-3 17:45:28') + } + ] +} + +# Creating Buckets and Storing Data +customer_bucket = client.bucket('Customers') +cr = customer_bucket.new(customer[:customer_id].to_s) +cr.data = customer +cr.store + +order_bucket = client.bucket('Orders') +orders.each do |order| + order_riak = order_bucket.new(order[:order_id].to_s) + order_riak.data = order + order_riak.store +end + +order_summary_bucket = client.bucket('OrderSummaries') +os = order_summary_bucket.new(order_summary[:customer_id].to_s) +os.data = order_summary +os.store +``` + + While individual `Customer` and `Order` objects don't change much (or shouldn't change), the `Order Summaries` object will likely change often. It will do double duty by acting as an index for all a customer's orders, and also holding some relevant data such as the order total, etc. If we showed this information in our application often, it's only one extra request to get all the info. + +```ruby +shared_key = '1' +customer = customer_bucket.get(shared_key).data +customer[:order_summary] = order_summary_bucket.get(shared_key).data +puts "Combined Customer and Order Summary: " +pp customer +``` + +Which returns our amalgamated objects: + +```ruby +# Combined Customer and Order Summary: +{"customer_id"=>1, + "name"=>"John Smith", + "address"=>"123 Main Street", + "city"=>"Columbus", + "state"=>"Ohio", + "zip"=>"43210", + "phone"=>"+1-614-555-5555", + "created_date"=>"2013-10-01 14:30:26 -0400", + :order_summary=> + {"customer_id"=>1, + "summaries"=> + [{"order_id"=>1, + "total"=>415.98, + "order_date"=>"2013-10-01 14:42:26 -0400"}, + {"order_id"=>2, + "total"=>359.99, + "order_date"=>"2013-10-15 16:43:16 -0400"}, + {"order_id"=>3, + "total"=>74.98, + "order_date"=>"2013-11-03 17:45:28 -0500"}]}} +``` + +While this pattern is very easy and extremely fast with respect to queries and complexity, it's up to the application to know about these intrinsic relationships. + + +### Secondary Indexes + +{{% note %}} +Secondary indexes in Riak KV require a sorted backend: [Memory]({{}}riak/kv/3.0.4/setup/planning/backend/memory) or [LevelDB]({{}}riak/kv/3.0.4/setup/planning/backend/leveldb). [Bitcask]({{}}riak/kv/3.0.4/setup/planning/backend/bitcask) does not support secondary indexes. + +See [Using Secondary Indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes) for more information on developing with secondary indexes. +{{% /note %}} + +If you're coming from a SQL world, Secondary Indexes (2i) are a lot like SQL indexes. They are a way to quickly lookup objects based on a secondary key, without scanning through the whole dataset. This makes it very easy to find groups of related data by values, or even ranges of values. To properly show this off, we will now add some more data to our application, and add some secondary index entries at the same time. + +```ruby +(1..3).each do |i| + order = order_bucket.get(i.to_s) + # Initialize our secondary indices + order.indexes['salesperson_id_int'] = [] + order.indexes['order_date_bin'] = [] + + order.indexes['salesperson_id_int'] << order.data['salesperson_id'] + order.indexes['order_date_bin'] << Time.parse(order.data['order_date']) + .strftime('%Y%m%d') + order.store +end +``` + +As you may have noticed, ordinary Key/Value data is opaque to 2i, so we have to add entries to the indexes at the application level. +Now let's find all of Jane Appleseed's processed orders, we'll lookup the orders by searching the `saleperson_id_int` index for Jane's id of `9000`. + +```ruby +puts "#Jane's Orders: " +pp order_bucket.get_index('salesperson_id_int', 9000) +``` + +Which returns: + +```ruby +# Jane's Orders: +["1", "3"] +``` + +Jane processed orders 1 and 3. We used an "integer" index to reference Jane's id, next let's use a "binary" index. +Now, let's say that the VP of Sales wants to know how many orders came in during October 2013. In this case, we can exploit 2i's range queries. Let's search the `order_date_bin` index for entries between `20131001` and `20131031`. + +```ruby +puts "#October's Orders: " +pp order_bucket.get_index('order_date_bin', '20131001'..'20131031') +``` + +Which returns: + +```ruby +# October's Orders: +["1", "2"] +``` + +Boom, easy-peasy. We used 2i's range feature to search for a range of values, and demonstrated binary indexes. + +So to recap: + +* You can use Secondary Indexes to quickly lookup an object based on a secondary id other than the object's key. +* Indexes can have either Integer or Binary(String) keys +* You can search for specific values, or a range of values +* Riak will return a list of keys that match the index query + + + + diff --git a/content/riak/kv/3.0.4/developing/key-value-modeling.md b/content/riak/kv/3.0.4/developing/key-value-modeling.md new file mode 100644 index 0000000000..47695c322f --- /dev/null +++ b/content/riak/kv/3.0.4/developing/key-value-modeling.md @@ -0,0 +1,535 @@ +--- +title: "Riak KV Key/Value Modeling" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Key/Value Modeling" + identifier: "developing_kv_model" + weight: 104 + parent: "developing" +toc: true +aliases: + - /riak/3.0.4/dev/data-modeling/key-value/ + - /riak/kv/3.0.4/dev/data-modeling/key-value/ +--- + +While Riak enables you to take advantage of a wide variety of features +that can be useful in application development, such as [Search]({{}}riak/kv/3.0.4/developing/usage/search), [secondary indexes (2i)]({{}}riak/kv/3.0.4/developing/usage/secondary-indexes/), and [Riak Data Types]({{}}riak/kv/3.0.4/developing/data-types/), Riak almost always performs best when you +build your application around basic CRUD operations (create, read, +update, and delete) on objects, i.e. when you use Riak as a "pure" +key/value store. + +In this tutorial, we'll suggest some strategies for naming and modeling +for key/value object interactions with Riak. If you'd like to use some +of Riak's other features, we recommend checking out the documentation +for each of them or consulting our guide to [building applications with Riak]({{}}riak/kv/3.0.4/developing/app-guide/) for a better sense of which features you might need. + +## Advantages of Key/Value Operations + +Riak's key/value architecture enables it to be more performant than +relational databases in many scenarios because Riak doesn't need to +perform lock, join, union, or other operations when working with +objects. Instead, it interacts with objects on a one-by-one basis, using +**primary key lookups**. + +Primary key lookups store and fetch objects in Riak on the basis of +three basic locators: + +* The object's [key]({{}}riak/kv/3.0.4/learn/concepts/keys-and-objects#keys), which can be anything you + want as long as it is [Unicode compliant](http://www.unicode.org/) +* The [bucket]({{}}riak/kv/3.0.4/learn/concepts/buckets) which houses the object and its key (bucket + names are also Unicode compliant) +* The [bucket type]({{}}riak/kv/3.0.4/developing/usage/bucket-types) that determines the bucket's + [replication]({{}}riak/kv/3.0.4/developing/app-guide/replication-properties) and other properties + +It may be useful to think of this system as analogous to a nested +key/value [hash](http://en.wikipedia.org/wiki/Hash_function) as you +would find in most programming languages. Below is an example from +[Ruby](http://www.ruby-doc.org/core-2.1.2/Hash.html). The hash +`simpsons` contains keys for all of the available seasons, while each +key houses a hash for each episode of that season: + +```ruby +simpsons = { + 'season 1': { + { 'episode 1': 'Simpsons Roasting on an Open Fire' }, + { 'episode 2': 'Bart the Genius' }, + # ... + }, + 'season 2': { + { 'episode 1': 'Bart Gets an "F"' }, + # ... + }, + # ... +} +``` + +If we want to find out the title of an episode, we can retrieve it based +on hash keys: + +```ruby +simpsons['season 4']['episode 12'] + +# => "Marge vs. the Monorail" +``` + +Storing data in Riak is a lot like this. Let's say that we want to store +JSON objects with a variety of information about every episode of the +Simpsons. We could store each season in its own bucket and each episode +in its own key within that bucket. Here's what the URL structure would +look like (for the [HTTP API]({{}}riak/kv/3.0.4/developing/api/http)): + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +The most important benefit of sorting Riak objects this way is that +these types of lookup operations are extremely fast. Riak doesn't need +to search through columns or tables to find an object. If it knows the +bucket/key "address" of the object, so to speak, it can locate that +object just about as quickly with billions of objects in a cluster as +when the cluster holds only a handful of objects. + +## Overcoming the Limitations of Key/Value Operations + +Using any key/value store can be tricky at first, especially if you're +used to relational databases. The central difficulty is that your +application cannot run arbitrary selection queries like `SELECT * FROM +table`, and so it needs to know where to look for objects in advance. + +One of the best ways to enable applications to discover objects in Riak +more easily is to provide **structured bucket and key names** for +objects. This approach often involves wrapping information about the +object _in the object's location data itself_. + +Here are some example sources for bucket or key names: + +* Timestamps, e.g. `2013-11-05T08:15:30-05:00` +* [UUID](http://en.wikipedia.org/wiki/Universally_unique_identifier)s, + e.g. `9b1899b5-eb8c-47e4-83c9-2c62f0300596` +* Geographical coordinates, e.g. `40.172N-21.273E` + +We could use these markers by themselves or in combination with other +markers. For example, sensor data keys could be prefaced by `sensor_` or +`temp_sensor1_` followed by a timestamp (e.g. +`sensor1_2013-11-05T08:15:30-05:00`), or user data keys could be +prefaced with `user_` followed by a UUID (e.g. +`user_9b1899b5-eb8c-47e4-83c9-2c62f0300596`). + +Any of the above suggestions could apply to bucket names as well as key +names. If you were building Twitter using Riak, for example, you could +store tweets from each user in a different bucket and then construct key +names using a combination of the prefix `tweet_` and then a timestamp. +In that case, all the tweets from the user BashoWhisperer123 could be +housed in a bucket named `BashoWhisperer123`, and keys for tweets would +look like `tweet_`. + +The possibilities are essentially endless and, as always, defined by the +use case at hand. + +## Object Discovery with Riak Sets + +Let's say that we've created a solid bucket/key naming scheme for a user +information store that enables your application to easily fetch user +records, which are all stored in the bucket `users` with each user's +username acting as the key. The problem at this point is this: how can +Riak know which user records actually exist? + +One way to determine this is to [list all keys]({{}}riak/kv/3.0.4/developing/api/protocol-buffers/list-keys) in the +bucket `users`. This approach, however, is _not_ recommended, because +listing all keys in a bucket is a very expensive operation that should +not be used in production. And so another strategy must be employed. + +A better possibility is to use [Riak sets]({{}}riak/kv/3.0.4/developing/data-types/#sets) to +store lists of keys in a bucket. Riak sets are a [Riak Data Type]({{}}riak/kv/3.0.4/developing/data-types) that enable you to store lists of binaries or strings in Riak. +Unlike normal Riak objects, you can interact with Riak sets much like +you interact with sets in most programming languages, i.e. you can add +and remove elements at will. + +Going back to our user data example, instead of simply storing user +records in our `users` bucket, we could set up our application to store +each key in a set when a new record is created. We'll store this set in +the bucket `user_info_sets` (we'll keep it simple) and in the key +`usernames`. The following will also assume that we've [set up a bucket type]({{}}riak/kv/3.0.4/developing/data-types/#setting-up-buckets-to-use-riak-data-types) called +`sets`. + +We can interact with that set on the basis of its location: + +```java +Location userIdSet = new Location(new Namespace("sets", "user_info_sets"), "usernames"); + +// With this Location, we can construct fetch operations like this: +FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); +``` + +```ruby +require 'riak' + +set_bucket = client.bucket('user_info_sets') + +# We'll make this set global because we'll use it +# inside of a function later on + +$user_id_set = Riak::Crdt::Set.new(set_bucket, 'usernames', 'sets') +``` + +```php +$command = (new \Basho\Riak\Command\Builder\FetchSet($riak)) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('user_info_sets') +user_id_set = Set(bucket, 'usernames') +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{}}riak/kv/3.0.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{}}riak/kv/3.0.4/developing/getting-started). + +Then, we can create a function that stores a user record's key in that +set every time a record is created: + +```java +// A User class for constructing user records +class User { + public String username; + public String info; + + public User(String username, String info) { + this.username = username; + this.info = info; + } +} + +// A function for storing a user record that has been created +public void storeUserRecord(User user) throws Exception { + // User records themselves will be stored in the bucket "users" + Location userObjectLocation = + new Location(new Namespace("users"), user.username); + RiakObject userObject = new RiakObject() + // We'll keep it simple and store User object data as plain text + .setContentType("text/plain") + .setValue(user.info); + StoreValue store = new StoreValue.Builder(userObjectLocation, userObject) + .build(); + client.execute(store); + + Location userIdSet = + new Location(new Namespace("sets", "user_info_sets"), "usernames"); + SetUpdate su = new SetUpdate() + .add(BinaryValue.create(user.username)); + UpdateSet update = new UpdateSet.Builder(su, update) + .build(); + client.execute(update); +} +``` + +```ruby +class User + attr_accessor :username, :info +end + +def store_record(user) + # First we create an empty object and specify its bucket and key + obj = Riak::RObject.new(client.bucket('users'), user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.raw_data = user.info + obj.store + + # Finally, we'll add the user's username to the set + user_id_set.add(user.username) +end +``` + +```php +class User +{ + public $user_name; + public $info; + + public function __construct($user_name, $info) + { + $this->user_name = $user_name; + $this->info = $info; + } +} + +function store_user(User $user) +{ + (new \Basho\Riak\Command\Builder\StoreObject) + ->buildLocation($user->user_name, 'users') + ->buildJsonObject($user) + ->build() + ->execute(); + + (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->add($user->user_name) + ->build() + ->execute(); +} +``` + +```python +class User: + def __init__(self, username, info): + this.username = username + this.info = info + +# Using the "user_id_set" object from above +def store_record(user): + # First we create an empty object and specify its bucket and key + obj = RiakObject(client, 'users', user.username) + + # We'll keep it simple by storing plain text for each user's info + obj.content_type = 'text/plain' + obj.data = user.info + obj.store() + + # Finally, we'll add the user's username to the set + user_id_set.add(username) + user_id_set.store() +``` + +Now, let's say that we want to be able to pull up all user records in +the bucket at once. We could do so by iterating through the usernames +stored in our set and then fetching the object corresponding to each +username: + +```java +public Set fetchAllUserRecords() { + // Empty builder sets for usernames and User objects + Set userIdSet = new HashSet(); + Set userSet = new HashSet(); + + // Turn the Riak username set into a set of Strings + Location userIdSet = + new Location(new Namespace("sets", "sets"), "usernames"); + FetchSet fetchUserIdSet = new FetchSet.Builder(userIdSet).build(); + RiakSet set = client.execute(fetchUserIdSet).getDatatype(); + set.viewAsSet().forEach((BinaryValue username) -> { + userIdSet.add(username.toString()); + }); + + // Fetch User objects for each of the usernames stored in the set + userIdSet.forEach((String username) -> { + Location userLocation = new Location(new Namespace("users"), username); + FetchValue fetch = new FetchValue.Builder(userLocation).build(); + User user = client.execute(fetch).getValue(User.class); + userSet.add(user); + }); + return userSet; +} +``` + +```ruby +# Using the "user_id_set" set from above + +def fetch_all_user_records + users_bucket = $client.bucket('users') + user_records = Array.new + $user_id_set.members.each do |user_id| + user_record = users_bucket.get(user_id).data + user_records.push(user_record) + end + user_records +end +``` + +```php +function fetch_users() +{ + $users = []; + + $response = (new \Basho\Riak\Command\Builder\UpdateSet) + ->buildLocation('usernames', 'user_info_sets', 'sets') + ->build() + ->execute(); + + $user_names = $response->getSet()->getData(); + foreach($user_names as $user_name) { + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + $users[$user_name] = $response->getObject()->getData(); + } + + return $users; +} +``` + +```python +# We'll create a generator object that will yield a list of Riak objects +def fetch_all_user_records(): + users_bucket = client.bucket('users') + user_id_list = list(user_id_set.reload().value) + for user_id in user_id_list: + yield users_bucket.get(user_id) + +# We can retrieve that list of Riak objects later on +list(fetch_all_user_records()) +``` + +## Naming and Object Verification + +Another advantage of structured naming is that you can prevent queries +for objects that don't exist or that don't conform to how your +application has named them. For example, you could store all user data +in the bucket `users` with keys beginning with the fragment `user_` +followed by a username, e.g. `user_coderoshi` or `user_macintux`. If an +object with an inappropriate key is stored in that bucket, it won't even +be seen by your application because it will only ever query keys that +begin with `user_`: + +```java +// Assuming that we've created a class User: + +public User getUserByUsername(String username) { + String usernameKey = String.format("user_%s", username) + Location loc = new Location("users") + .setKey(usernameKey); + FetchValue fetchUser = new FetchValue.Builder(loc).build(); + FetchValue.Response res = client.execute(fetchUser); + User userObject = res.getValue(User.class); + return userObject; +} +``` + +```ruby +def get_user_by_username(username) + bucket = client.bucket('users') + obj = bucket.get('user_#{username}') + return obj.raw_data +end +``` + +```php +function fetchUser($user_name) +{ + $response = (new \Basho\Riak\Command\Builder\FetchObject) + ->buildLocation($user_name, 'users') + ->build() + ->execute(); + + return $response->getObject()->getData(); +} +``` + +```python +def get_user_by_username(username): + bucket = client.bucket('users') + obj = bucket.get('user_{}'.format(username)) + return obj.data +``` + +## Bucket Types as Additional Namespaces + +Riak [bucket types]({{}}riak/kv/3.0.4/developing/usage/bucket-types) have two essential functions: +they enable you to manage [bucket configurations]({{}}riak/kv/3.0.4/learn/concepts/buckets) in an +efficient and streamlined way and, more importantly for our purposes +here, they act as a third namespace in Riak in addition to buckets and +keys. Thus, in Riak versions 2.0 and later you have access to a third +layer of information for locating objects if you wish. + +While bucket types are typically used to assign different bucket +properties to groups of buckets, you can also create named bucket types +that simply extend Riak's [defaults]({{}}riak/kv/3.0.4/developing/usage/bucket-types/#bucket-types-as-namespaces) or multiple bucket types that have +the same configuration but have different names. + +Here's an example of creating four bucket types that only extend Riak's +defaults: + +```bash +riak-admin bucket-type create john +riak-admin bucket-type create robert +riak-admin bucket-type create jimmy +riak-admin bucket-type create john-paul +``` + +Or you can create five different bucket types that all set `n_val` to 2 +but have different names: + +```bash +riak-admin bucket-type create earth '{"props":{"n_val":2}}' +riak-admin bucket-type create fire '{"props":{"n_val":2}}' +riak-admin bucket-type create wind '{"props":{"n_val":2}}' +riak-admin bucket-type create water '{"props":{"n_val":2}}' +riak-admin bucket-type create heart '{"props":{"n_val":2}}' +``` + +### Bucket Types Example + +To extend our Simpsons example from above, imagine that we become +dissatisfied with our storage scheme because we want to separate the +seasons into good seasons and bad seasons (we'll leave it up to you to +make that determination). + +One way to improve our scheme might be to change our bucket naming +system and preface each bucket name with `good` or `bad`, but a more +elegant way would be to use bucket types instead. So instead of this URL +structure... + +``` +GET/PUT/DELETE /bucket//keys/ +``` + +...we can use this structure: + +``` +GET/PUT/DELETE /types//buckets//keys/ +``` + +That adds an additional layer of namespacing and enables us to think +about our data in terms of a deeper hash than in the example above: + +```ruby +simpsons = { + 'good': { + 'season X': { + { 'episode 1': '' }, + # ... + } + }, + 'bad': { + 'season Y': { + { 'episode 1': '<title>' }, + # ... + } + } +} +``` + +We can fetch the title of season 8, episode 6: + +```ruby +# For the sake of example, we'll classify season 8 as good: + +simpsons['good']['season 8']['episode 6'] + +# => "A Milhouse Divided" +``` + +If your data is best modeled as a three-layered hash, you may want to +consider using bucket types in the way shown above. + +## Resources + +More on key/value modeling in Riak can be found in [this +presentation](http://www.youtube.com/watch?v=-_3Us7Ystyg#aid=P-4heI_bFwo) +by Basho evangelist [Hector Castro](https://github.com/hectcastro), with +the presentation slides available [on Speaker +Deck](https://speakerdeck.com/hectcastro/throw-some-keys-on-it-data-modeling-for-key-value-data-stores-by-example). + + + + diff --git a/content/riak/kv/3.0.4/developing/usage.md b/content/riak/kv/3.0.4/developing/usage.md new file mode 100644 index 0000000000..6f75db0094 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage.md @@ -0,0 +1,138 @@ +--- +title: "Usage Overview" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Usage" + identifier: "developing_usage" + weight: 101 + parent: "developing" +toc: true +aliases: +--- + +## In This Section + +#### [Creating Objects](./creating-objects) + +Creating and storing objects in Riak KV. + +[Learn More >>](./creating-objects) + + +#### [Reading Objects](./reading-objects) + +Reading and fetching objects in Riak KV. + +[Learn More >>](./reading-objects) + + +#### [Updating Objects](./updating-objects) + +Updating objects in Riak KV. + +[Learn More >>](./updating-objects) + + +#### [Deleting Objects](./deleting-objects) + +Deleting objects in Riak KV. + +[Learn More >>](./deleting-objects) + + +#### [Content Types](./content-types) + +Overview of content types and their usage. + +[Learn More >>](./content-types) + + +#### [Using Search](./search) + +Tutorial on using search. + +[Learn More >>](./search) + + +#### [Using MapReduce](./mapreduce) + +Guide to using MapReduce in applications. + +[Learn More >>](./mapreduce) + + +#### [Using Secondary Indexes](./secondary-indexes) + +Overview and usage details of Secondary Indexes (2i). + +[Learn More >>](./secondary-indexes) + + +#### [Bucket Types](./bucket-types) + +Describes how to use bucket properties. + +[Learn More >>](./bucket-types) + + +#### [Using Commit Hooks](./commit-hooks) + +Tutorial on pre-commit and post-commit hook functions. + +[Learn More >>](./commit-hooks) + + +#### [Creating Search Schemas](./search-schemas) + +Step-by-step guide on creating and using custom search schemas. + +[Learn More >>](./search-schemas) + + +#### [Searching with Data Types](./searching-data-types) + +Guide on using search with Data Types. + +[Learn More >>](./searching-data-types) + + +#### [Implementing a Document Store](./document-store) + +Tutorial on using Riak KV as a document store. + +[Learn More >>](./document-store) + + +#### [Custom Extractors](./custom-extractors) + +Details on creating and registering custom extractors with Riak Search. + +[Learn More >>](./custom-extractors) + + +#### [Client-side Security](./security) + +Overview of client-side security. + +[Learn More >>](./security) + + +#### [Replication](./replication) + +Documentation on replication properties and their underlying implementation. + +[Learn More >>](./replication) + + +#### [Conflict Resolution](./conflict-resolution) + +Guide to conflict resolution during object updates. + +[Learn More >>](./conflict-resolution) + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/bucket-types.md b/content/riak/kv/3.0.4/developing/usage/bucket-types.md new file mode 100644 index 0000000000..12669026f1 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/bucket-types.md @@ -0,0 +1,102 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Bucket Types" + identifier: "usage_bucket_types" + weight: 108 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/bucket-types + - /riak/kv/3.0.4/dev/advanced/bucket-types +--- + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +## Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +`default` bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/commit-hooks.md b/content/riak/kv/3.0.4/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..886b6ff731 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/commit-hooks.md @@ -0,0 +1,243 @@ +--- +title: "Using Commit Hooks" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Using Commit Hooks" + identifier: "usage_commit_hooks" + weight: 109 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/using/commit-hooks + - /riak/kv/3.0.4/dev/using/commit-hooks +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types + +Pre- and post-commit hooks are functions that are invoked before or +after an object has been written to Riak. To provide a few examples, +commit hooks can: + +- allow a write to occur with an unmodified object +- modify an object +- fail an update and prevent any modifications to the object + +Post-commit hooks are notified _after the fact_ and should not modify +the object directly. Updating Riak objects while post-commit hooks are +invoked can cause nasty feedback loops which will wedge the hook into an +infinite cycle unless the hook functions are carefully written to detect +and short-circuit such cycles. + +Pre- and post-commit hooks are applied at the [bucket]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets) level, +[using bucket types][usage bucket types]. They are run once per successful response to the +client. + +Both pre- and post-commit hooks are named [Erlang](http://learnyousomeerlang.com/) +functions. + +## Setting Commit Hooks Using Bucket Types + +Because hooks are defined at the bucket level, you can create [bucket types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) +that associate one or more hooks with any bucket that bears that type. +Let's create a bucket type called `with_post_commit` that adds a +post-commit hook to operations on any bucket that bears the +`with_post_commit` type. + +The format for specifying commit hooks is to identify the module (`mod`) +and then the name of the function (`fun`) as a JavaScript object. The +following specifies a commit hook called `my_custom_hook` in the module +`commit_hooks_module`: + +```json +{ + "mod": "commit_hooks_module", + "fun": "my_custom_hook" +} +``` + +When we create our `with_post_commit` bucket type, we add that object +to either the `precommit` or `postcommit` list in the bucket type's +properties. Pre- and post-commit hooks are stored in lists named +`precommit` and `postcommit`, respectively. Let's add the hook we +specified above to the `postcommit` property when we create our bucket +type: + +```bash +riak-admin bucket-type create with_post_commit \ + '{"props":{"postcommit":["my_post_commit_hook"]}' +``` + +Once our bucket type has been created, we must activate it so that it +will be usable through our Riak cluster: + +```bash +riak-admin bucket-type activate with_post_commit +``` + +If the response is `with_post_commit has been activated`, then the +bucket type is ready for use. + +## Pre-Commit Hooks + +Pre-commit hook Erlang functions should take a single argument, the +Riak object being modified. Remember that deletes are also considered +"writes," and so pre-commit hooks will be fired when a delete occurs in +the bucket as well. This means that hook functions will need to inspect +the object for the `X-Riak-Deleted` metadata entry (more on this in our +documentation on [object deletion]({{<baseurl>}}riak/kv/3.0.4/using/reference/object-deletion)) to determine whether a delete is +occurring. + +Erlang pre-commit functions are allowed three possible return values: + +- A Riak object - This can either be the same object passed to the function or an updated version of the object. This allows hooks to modify the object before they are written. +- `fail` - The atom `fail` will cause Riak to fail the write and send a 403 Forbidden error response (in the [HTTP API]({{<baseurl>}}riak/kv/3.0.4/developing/api/http)) along with a generic error message about why the write was blocked. +- `{fail, Reason}` - The tuple `{fail, Reason}` will cause the same behavior as in the case above, but with the addition of `Reason` used as the error text. + +Errors that occur when processing Erlang pre-commit hooks will be +reported in the `sasl-error.log` file with lines that start with +`problem invoking hook`. + +#### Object Size Example + +This Erlang pre-commit hook will limit object values to 5 MB or smaller: + +```erlang +precommit_limit_size(Object) -> + case erlang:byte_size(riak_object:get_value(Object)) of + Size when Size > 5242880 -> {fail, "Object is larger than 5MB."}; + _ -> Object + end. +``` + +The Erlang function `precommit_limit_size` takes the Riak object +(`Object`) as its input and runs a pattern-matching operation on the +object. If the [`erlang:byte_size`](http://www.erlang.org/doc/man/erlang.html#byte_size-1) +function determines that the object's size (determined by the `riak_object:get_value` +function) is greater than 5,242,880 (5 MB in bytes), then the commit +will return failure and the message `Object size is larger than 5 MB`. +This will stop the write. If the object is not larger than 5 MB, Riak +will return the object and allow the write to proceed. + +### Chaining + +The default value of the bucket type's `precommit` property is an empty +list, meaning that no pre-commit hooks are specified by default. Adding +one or more pre-commit hook functions to this list, as documented above, +will cause Riak to start evaluating those hook functions when bucket +entries are created, updated, or deleted. Riak stops evaluating +pre-commit hooks when a hook function fails the commit. + +#### JSON Validation Example + +Pre-commit hooks can be used in many ways in Riak. One such way to use +pre-commmit hooks is to validate data before it is written to Riak. +Below is an example that uses Javascript to validate a JSON object +before it is written to Riak. + +Below is a sample JSON object that will be evaluated by the hook: + +```json +{ + "user_info": { + "name": "Mark Phillips", + "age": "25" + }, + "session_info": { + "id": 3254425, + "items": [29, 37, 34] + } +} +``` + +The following hook will validate the JSON object: + +```erlang +validate(Object) -> + try + mochijson2:decode(riak_object:get_value(Object)), + Object + catch + throw:invalid_utf8 -> + {fail, "Invalid JSON: Illegal UTF-8 character"}; + error:Error -> + {fail, lists:flatten(io_lib:format("Invalid JSON: ~p",[Error]))} + end. +``` + +**Note**: All pre-commit hook functions are executed for each create and update operation. + +## Post-Commit Hooks + +Post-commit hooks are run after a write has completed successfully. More +specifically, the hook function is called immediately before the calling +process is notified of the successful write. + +Hook functions must accept a single argument: the object instance just +written. The return value of the function is ignored. As with pre-commit +hooks, deletes are considered writes, so post-commit hook functions will +need to inspect the object's metadata for the presence of `X-Riak-Deleted` +to determine whether a delete has occurred. As with pre-commit hooks, +errors that occur when processing post-commit hooks will be reported in +the `sasl-error.log` file with lines that start with `problem invoking hook`. + +#### Example + +The following post-commit hook creates a secondary index on the `email` +field of a JSON object: + +```erlang +postcommit_index_on_email(Object) -> + %% Determine the target bucket name + Bucket = erlang:iolist_to_binary([riak_object:bucket(Object),"_by_email"]), + + %% Decode the JSON body of the object + {struct, Properties} = mochijson2:decode(riak_object:get_value(Object)), + + %% Extract the email field + {<<"email">>,Key} = lists:keyfind(<<"email">>,1,Properties), + + %% Create a new object for the target bucket + %% NOTE: This doesn't handle the case where the + %% index object already exists! + IndexObj = riak_object:new( + Bucket, Key, <<>>, %% no object contents + dict:from_list( + [ + {<<"content-type">>, "text/plain"}, + {<<"Links">>, + [ + { + {riak_object:bucket(Object), riak_object:key(Object)}, + <<"indexed">> + }]} + ] + ) + ), + + %% Get a riak client + {ok, C} = riak:local_client(), + + %% Store the object + C:put(IndexObj). +``` + + +### Chaining + +The default value of the bucket `postcommit` property is an empty list, +meaning that no post-commit hooks are specified by default. Adding one +or more post-commit hook functions to the list, as documented above, +will cause Riak to start evaluating those hook functions immediately +after data has been created, updated, or deleted. Each post-commit hook +function runs in a separate process so it's possible for several hook +functions, triggered by the same update, to execute in parallel. + +**Note**: All post-commit hook functions are executed for each create, +update, or delete. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/conflict-resolution.md b/content/riak/kv/3.0.4/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..eef9d04d78 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/conflict-resolution.md @@ -0,0 +1,681 @@ +--- +title: "Conflict Resolution" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Conflict Resolution" + identifier: "usage_conflict_resolution" + weight: 116 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/using/conflict-resolution + - /riak/kv/3.0.4/dev/using/conflict-resolution +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency + +One of Riak's [central goals](../../../learn/why-riak-kv) is high availability. It was built as a [clustered]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters) system in which any [node]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#node) is capable of receiving requests without requiring that +every node participate in each request. + +If you are using Riak in an [eventually consistent]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency) way, conflicts between object values on different nodes is +unavoidable. Often, Riak can resolve these conflicts on its own +internally if you use causal context, i.e. [vector clocks]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context#vector-clocks) or [dotted version vectors]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context#dotted-version-vectors), when updating objects. Instructions on this can be found in the section [below](#siblings). + +{{% note title="Important note on terminology" %}} +In versions of Riak prior to 2.0, vector clocks were the only causal context +mechanism available in Riak, which changed with the introduction of dotted +version vectors in 2.0. Please note that you may frequent find terminology in +client library APIs, internal Basho documentation, and more that uses the term +"vector clock" interchangeably with causal context in general. Riak's HTTP API +still uses a `X-Riak-Vclock` header, for example, even if you are using dotted +version vectors. +{{% /note %}} + +But even when you use causal context, Riak cannot always decide which +value is most causally recent, especially in cases involving concurrent +updates to an object. So how does Riak behave when it can't decide on a +single most-up-to-date value? **That is your choice**. A full listing of +available options can be found in the [section below](#client-and-server-side-conflict-resolution). For now, +though, please bear in mind that we strongly recommend one of the +following two options: + +1. If your data can be modeled as one of the currently available [Riak + Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types), we recommend using one of these types, + because all of them have conflict resolution _built in_, completely + relieving applications of the need to engage in conflict resolution. +2. If your data cannot be modeled as one of the available Data Types, + we recommend allowing Riak to generate [siblings](#siblings) and to design your application to resolve + conflicts in a way that fits your use case. Developing your own + **conflict resolution strategy** can be tricky, but it has clear + advantages over other approaches. + +Because Riak allows for a mixed approach when storing and managing data, +you can apply multiple conflict resolution strategies within a cluster. + +> **Note on strong consistency** +> +> In versions of Riak 2.0 and later, you have the option of using Riak in +a strongly consistent fashion. This document pertains to usage of Riak +as an _eventually_ consistent system. If you'd like to use Riak's +strong consistency feature, please refer to the following documents: +> +> * [Using Strong Consistency]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/strong-consistency) - A guide for developers +> * [Managing Strong Consistency]({{<baseurl>}}riak/kv/3.0.4/configuring/strong-consistency) - A guide for operators +> * [strong consistency][use ref strong consistency] - A more theoretical explication of strong + consistency + +## Client- and Server-side Conflict Resolution + +Riak's eventual consistency model is powerful because Riak is +fundamentally non-opinionated about how data resolution takes place. +While Riak _does_ have a set of [defaults]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties#available-parameters), there are a variety of general +approaches to conflict resolution that are available. In Riak, you can +mix and match conflict resolution strategies at the bucket level, +[using bucket types][usage bucket types]. The most important [bucket properties]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets) +to consider when reasoning about conflict resolution are the +`allow_mult` and `last_write_wins` properties. + +These properties provide you with the following basic options: + +### Timestamp-based Resolution + +If the [`allow_mult`](#siblings) parameter is set to +`false`, Riak resolves all object replica conflicts internally and does +not return siblings to the client. How Riak resolves those conflicts +depends on the value that you set for a different bucket property, +[`last_write_wins`]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets). If `last_write_wins` is set to `false`, +Riak will resolve all conflicts on the basis of +[timestamps](http://en.wikipedia.org/wiki/Timestamp), which are +attached to all Riak objects as metadata. + +The problem with timestamps is that they are not a reliable resolution +mechanism in distributed systems, and they always bear the risk of data +loss. A better yet still-problematic option is to adopt a +last-write-wins strategy, described directly below. + +### Last-write-wins + +Another way to manage conflicts is to set `allow_mult` to `false`, as +with timestamp-based resolution, while also setting the +`last_write_wins` parameter to +`true`. This produces a so-called last-write-wins (LWW) strategy whereby +Riak foregoes the use of all internal conflict resolution strategies +when making writes, effectively disregarding all previous writes. + +The problem with LWW is that it will necessarily drop some writes in the +case of concurrent updates in the name of preventing sibling creation. +If your use case requires that your application be able to reason about +differing values produced in the case of concurrent updates, then we +advise against LWW as a general conflict resolution strategy. + +However, LWW can be useful---and safe---if you are certain that there +will be no concurrent updates. If you are storing immutable data in +which each object is guaranteed to have its own key or engaging in +operations related to bulk loading, you should consider LWW. + +{{% note title="Undefined behavior warning" %}} +Setting both `allow_mult` and `last_write_wins` to `true` necessarily leads to +unpredictable behavior and should always be avoided. +{{% /note %}} + +### Resolve Conflicts on the Application Side + +While setting `allow_mult` to `false` unburdens applications from having +to reason about siblings, delegating that responsibility to Riak itself, +it bears all of the drawbacks explained above. On the other hand, +setting `allow_mult` to `true` has the following benefits: + +* Riak will retain writes even in the case of concurrent updates to a + key, which enables you to capture the benefits of high availability + with a far lower risk of data loss +* If your application encounters siblings, it can apply its own + use-case-specific conflict resolution logic + +Conflict resolution in Riak can be a complex business, but the presence +of this variety of options means that requests to Riak can always be +made in accordance with your data model(s), business needs, and use +cases. For examples of client-side sibling resolution, see the following +client-library-specific docs: + +* [Java]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/nodejs) + +In Riak versions 2.0 and later, `allow_mult` is set to `true` by default +for any [bucket types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) that you create. This means +that if you wish to avoid client-side sibling resolution, you have a few +options: + +* Explicitly create and activate [bucket types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) + that set `allow_mult` to `false` +* Use Riak's [Configuration Files]({{<baseurl>}}riak/kv/3.0.4/configuring/reference) to change the [default bucket properties]({{<baseurl>}}riak/kv/3.0.4/configuring/reference#default-bucket-properties) for your + cluster. If you set the `buckets.default.allow_mult` parameter to + `false`, all bucket types that you create will have `allow_mult` set + to `false` by default. + +## Causal Context + +When a value is stored in Riak, it is tagged with a piece of metadata +called a **causal context** which establishes the object's initial +version. Causal context comes in one of two possible forms, depending +on what value you set for `dvv_enabled`. If set to `true`, [dotted version vectors]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context#dotted-version-vectors) will be used; if set to `false` (the default), [vector clocks]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context#vector-clocks) will be used. + +Causal context essentially enables Riak to compare the different values +of objects stored in Riak and to determine a number of important things +about those values: + + * Whether one value is a direct descendant of the other + * Whether the values are direct descendants of a common parent + * Whether the values are unrelated in recent heritage + +Using the information provided by causal context, Riak is frequently, +though not always, able to resolve conflicts between values without +producing siblings. + +Both vector clocks and dotted version vectors are non human readable and +look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +If `allow_mult` is set to `true`, you should _always_ use causal context +when updating objects, _unless you are certain that no object exists +under that key_. Failing to use causal context with mutable data, +especially for objects that are frequently updated, can lead to +[sibling explosion]({{<baseurl>}}riak/kv/3.0.4/using/performance/latency-reduction#siblings), which can +produce a variety of problems in your cluster. Fortunately, much of the +work involved with using causal context is handled automatically by +Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.4/developing/client-libraries). Examples can be found for each +client library in the [Object Updates]({{<baseurl>}}riak/kv/3.0.4/developing/usage/updating-objects) document. + +## Siblings + +A **sibling** is created when Riak is unable to resolve the canonical +version of an object being stored, i.e. when Riak is presented with +multiple possible values for an object and can't figure out which one is +most causally recent. The following scenarios can create sibling values +inside of a single object: + +1. **Concurrent writes** - If two writes occur simultaneously from +clients, Riak may not be able to choose a single value to store, in +which case the object will be given a sibling. These writes could happen +on the same node or on different nodes. +2. **Stale causal context** - Writes from any client using a stale +[causal context]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context). This is a less likely scenario if a client updates +the object by reading the object first, fetching the causal context +currently attached to the object, and then returning that causal context +to Riak when performing the update (fortunately, our client libraries +handle much of this automatically). However, even if a client follows +this protocol when performing updates, a situation may occur in which an +update happens from a different client while the read/write cycle is +taking place. This may cause the first client to issue the write with an +old causal context value and for a sibling to be created. A client is +"misbehaved" if it habitually updates objects with a stale or no context +object. +3. **Missing causal context** - If an object is updated with no causal +context attached, siblings are very likely to be created. This is an +unlikely scenario if you're using a Basho client library, but it _can_ +happen if you are manipulating objects using a client like `curl` and +forgetting to set the `X-Riak-Vclock` header. + +## Siblings in Action + +Let's have a more concrete look at how siblings work in Riak. First, +we'll create a bucket type called `siblings_allowed` with `allow_mult` +set to `true`: + +```bash +riak-admin bucket-type create siblings_allowed '{"props":{"allow_mult":true}}' +riak-admin bucket-type activate siblings_allowed +riak-admin bucket-type status siblings_allowed +``` + +If the type has been activated, running the `status` command should +return `siblings_allowed is active`. Now, we'll create two objects and +write both of them to the same key without first fetching the object +(which obtains the causal context): + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +RiakObject obj1 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Ren")); +RiakObject obj2 = new RiakObject() + .withContentType("text/plain") + .withValue(BinaryValue.create("Stimpy")); +StoreValue store1 = new StoreValue.Builder(obj1) + .withLocation(bestCharacterKey) + .build(); +StoreValue store2 = new StoreValue.Builder(obj2) + .withLocation(bestCharacterKey) + .build(); +client.execute(store1); +client.execute(store2); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = Riak::RObject.new(bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.raw_data = 'Ren' +obj1.store + +obj2 = Riak::RObject.new(bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.raw_data = 'Stimpy' +obj2.store +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj1 = RiakObject(client, bucket, 'best_character') +obj1.content_type = 'text/plain' +obj1.data = 'Ren' +obj1.store() + +obj2 = RiakObject(client, bucket, 'best_character') +obj2.content_type = 'text/plain' +obj2.data = 'Stimpy' +obj2.store() +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('siblings_allowed'); +obj1.setBucket('nickolodeon'); +obj1.setKey('best_character'); +obj1.setValue('Ren'); + +var obj2 = new Riak.Commands.KV.RiakObject(); +obj2.setContentType('text/plain'); +obj2.setBucketType('siblings_allowed'); +obj2.setBucket('nickolodeon'); +obj2.setKey('best_character'); +obj2.setValue('Ren'); + +var storeFuncs = []; +[obj1, obj2].forEach(function (obj) { + storeFuncs.push( + function (async_cb) { + client.storeValue({ value: obj }, function (err, rslt) { + async_cb(err, rslt); + }); + } + ); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj1 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Ren">>, + <<"text/plain">>), +Obj2 = riakc_obj:new({<<"siblings_allowed">>, <<"nickolodeon">>}, + <<"best_character">>, + <<"Stimpy">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj1), +riakc_pb_socket:put(Pid, Obj2). +``` + +```curl +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Ren" + +curl -XPUT http://localhost:8098/types/siblings_allowed/nickolodeon/whatever/keys/best_character \ + -H "Content-Type: text/plain" \ + -d "Stimpy" +``` + +> **Getting started with Riak KV clients** +> +> If you are connecting to Riak using one of Basho's official +[client libraries]({{<baseurl>}}riak/kv/3.0.4/developing/client-libraries), you can find more information about getting started with your client in [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.4/developing/getting-started) section. + +At this point, multiple objects have been stored in the same key without +passing any causal context to Riak. Let's see what happens if we try to +read contents of the object: + +```java +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); + +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```ruby +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj +``` + +```python +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') +obj.siblings +``` + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', bucket: + 'nickolodeon', key: 'best_character' +}, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("nickolodeon/best_character has '%d' siblings", + rslt.values.length); +}); +``` + +```curl +curl http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Uh-oh! Siblings have been found. We should get this response: + +```java +com.basho.riak.client.cap.UnresolvedConflictException: Siblings found +``` + +```ruby +<Riak::RObject {nickolodeon,best_character} [#<Riak::RContent [text/plain]:"Ren">, #<Riak::RContent [text/plain]:"Stimpy">]> +``` + +```python +[<riak.content.RiakContent object at 0x10a00eb90>, <riak.content.RiakContent object at 0x10a00ebd0>] +``` + +```csharp +Sibling count: 2 + VTag: 1DSVo7VED8AC6llS8IcDE6 + VTag: 7EiwrlFAJI5VMLK87vU4tE +``` + +```javascript +info: nickolodeon/best_character has '2' siblings +``` + +```curl +Siblings: +175xDv0I3UFCfGRC7K7U9z +6zY2mUCFPEoL834vYCDmPe +``` + +As you can see, reading an object with sibling values will result in +some form of "multiple choices" response (e.g. `300 Multiple Choices` in +HTTP). If you're using the HTTP interface and want to view all sibling +values, you can attach an `Accept: multipart/mixed` header to your +request: + +```curl +curl -H "Accept: multipart/mixed" \ + http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character +``` + +Response (without headers): + +``` +ren +--WUnzXITIPJFwucNwfdaofMkEG7H + +stimpy +--WUnzXITIPJFwucNwfdaofMkEG7H-- +``` + +If you select the first of the two siblings and retrieve its value, you +should see `Ren` and not `Stimpy`. + +### Using Causal Context + +Once you are presented with multiple options for a single value, you +must determine the correct value. In an application, this can be done +either in an automatic fashion, using a use case-specific resolver, or +by presenting the conflicting objects to the end user. For more +information on application-side conflict resolution, see our +client-library-specific documentation for the following languages: + +* [Java]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/ruby) +* [Python]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/python) +* [C#]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/csharp) +* [Node.js]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/nodejs) + +We won't deal with conflict resolution in this section. Instead, we'll +focus on how to use causal context. + +After having written several objects to Riak in the section above, we +have values in our object: `Ren` and `Stimpy`. But let's say that we +decide that `Stimpy` is the correct value based on our application's use +case. In order to resolve the conflict, we need to do three things: + +1. Fetch the current object (which will return both siblings) +2. Modify the value of the object, i.e. make the value `Stimpy` +3. Write the object back to the `best_character` key + +What happens when we fetch the object first, prior to the update, is +that the object handled by the client has a causal context attached. At +that point, we can modify the object's value, and when we write the +object back to Riak, _the causal context will automatically be attached +to it_. Let's see what that looks like in practice: + +```java +// First, we fetch the object +Location bestCharacterKey = + new Location(new Namespace("siblings_allowed", "nickolodeon"), "best_character"); +FetchValue fetch = new FetchValue.Builder(bestCharacterKey).build(); +FetchValue.Response res = client.execute(fetch); +RiakObject obj = res.getValue(RiakObject.class); + + +// Then we modify the object's value +obj.setValue(BinaryValue.create("Stimpy")); + +// Then we store the object, which has the vector clock already attached +StoreValue store = new StoreValue.Builder(obj) + .withLocation(bestCharacterKey); +client.execute(store); +``` + +```ruby +# First, we fetch the object +bucket = client.bucket('nickolodeon') +obj = bucket.get('best_character', type: 'siblings_allowed') + +# Then we modify the object's value +obj.raw_data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +obj.store +``` + +```python +# First, we fetch the object +bucket = client.bucket_type('siblings_allowed').bucket('nickolodeon') +obj = bucket.get('best_character') + +# Then we modify the object's value +new_obj.data = 'Stimpy' + +# Then we store the object, which has the vector clock already attached +new_obj.store(vclock=vclock) +``` + +```csharp +// First, fetch the object +var getResult = client.Get(id); + +// Then, modify the object's value +RiakObject obj = getResult.Value; +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +```javascript +client.fetchValue({ + bucketType: 'siblings_allowed', + bucket: 'nickolodeon', + key: 'best_character' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue('Stimpy'); + client.storeValue({ value: riakObj, returnBody: true }, + function (err, rslt) { + if (err) { + throw new Error(err); + } + + assert(rslt.values.length === 1); + } + ); + } +); +``` + +```curl +curl -i http://localhost:8098/types/siblings_allowed/buckets/nickolodeon/keys/best_character + +# In the HTTP interface, the causal context can be found in the +# "X-Riak-Vclock" header. That will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the vector clock +``` + +{{% note title="Concurrent conflict resolution" %}} +It should be noted that it is possible to have two clients that are +simultaneously engaging in conflict resolution. To avoid a pathological +divergence, you should be sure to limit the number of reconciliations and fail +once that limit has been exceeded. +{{% /note %}} + +### Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings +without being reconciled. This can lead to myriad issues. Having an +enormous object in your node can cause reads of that object to crash +the entire node. Other issues include [increased cluster latency]({{<baseurl>}}riak/kv/3.0.4/using/performance/latency-reduction) as the object is replicated and out-of-memory errors. + +### Vector Clock Explosion + +Besides sibling explosion, the vector clock itself can grow extremely +large when a significant volume of updates are performed on a single +object in a small period of time. While updating a single object +_extremely_ frequently is not recommended, you can tune Riak's vector +clock pruning to prevent vector clocks from growing too large too +quickly. More on pruning in the [section below](#vector-clock-pruning). + +### How does `last_write_wins` affect resolution? + +On the surface, it seems like setting `allow_mult` to `false` +(the default) and `last_write_wins` to `true` would result in the same +behavior, but there is a subtle distinction. + +Even though both settings return only one value to the client, setting +`allow_mult` to `false` still uses vector clocks for resolution, whereas +if `last_write_wins` is `true`, Riak reads the timestamp to determine +the latest version. Deeper in the system, if `allow_mult` is `false`, +Riak will still allow siblings to exist when they are created (via +concurrent writes or network partitions), whereas setting +`last_write_wins` to `true` means that Riak will overwrite the value +with the one that has the later timestamp. + +When you don't care about sibling creation, setting `allow_mult` to +`false` has the least surprising behavior: you get the latest value, +but network partitions are handled gracefully. However, for cases in +which keys are rewritten often (and quickly) and the new value isn't +necessarily dependent on the old value, `last_write_wins` will provide +better performance. Some use cases where you might want to use +`last_write_wins` include caching, session storage, and insert-only +(no updates). + +{{% note title="Note on combining `allow_mult` and `last_write_wins`" %}} +The combination of setting both the `allow_mult` and `last_write_wins` +properties to `true` leads to undefined behavior and should not be used. +{{% /note %}} + +## Vector Clock Pruning + +Riak regularly prunes vector clocks to prevent overgrowth based on four +parameters which can be set for any bucket type that you create: + +Parameter | Default value | Description +:---------|:--------------|:----------- +`small_vclock` | `50` | If the length of the vector clock list is smaller than this value, the list's entries will not be pruned +`big_vclock` | `50` | If the length of the vector clock list is larger than this value, the list will be pruned +`young_vclock` | `20` | If a vector clock entry is younger than this value (in milliseconds), it will not be pruned +`old_vclock` | `86400` (one day) | If a vector clock entry is older than this value (in milliseconds), it will be pruned + +This diagram shows how the values of these parameters dictate the vector +clock pruning process: + +![Vclock Pruning]({{<baseurl>}}images/vclock-pruning.png) + +## More Information + +Additional background information on vector clocks: + +* [Vector Clocks on Wikipedia](http://en.wikipedia.org/wiki/Vector_clock) +* [Why Vector Clocks are Easy](http://basho.com/why-vector-clocks-are-easy/) +* [Why Vector Clocks are Hard](http://basho.com/why-vector-clocks-are-hard/) +* The vector clocks used in Riak are based on the [work of Leslie Lamport](http://portal.acm.org/citation.cfm?id=359563) + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..364ed47ff0 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,123 @@ +--- +title_supertext: "Conflict Resolution:" +title: "C Sharp" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "C Sharp" + identifier: "usage_conflict_resolution_csharp" + weight: 103 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.4/dev/using/conflict-resolution/csharp + - /riak/kv/3.0.4/dev/using/conflict-resolution/csharp +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak .NET client][riak_dotnet_client]. + +## How the .NET Client Handles Conflict Resolution + +In the Riak .NET client, every Riak object has a `siblings` property that +provides access to a list of that object's sibling values. If there are no +siblings, that property will return an empty list. + +Here's an example of an object with siblings: + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.WriteLine(format: "Sibling count: {0}", args: obj.Siblings.Count); +foreach (var sibling in obj.Siblings) +{ + Debug.WriteLine( + format: " VTag: {0}", + args: sibling.VTag); +} +``` + +So what happens if the count of `obj.Siblings` is greater than 0, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `Siblings` list and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Siblings` list and will +fetch, update and store the definitive value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Now, modify the object's value +obj.SetObject<string>("Stimpy", RiakConstants.ContentTypes.TextPlain); + +// Then, store the object which has vector clock attached +var putRslt = client.Put(obj); +CheckResult(putRslt); + +obj = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(obj.Siblings.Count == 0); +``` + +### Choosing a value from `Siblings` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +```csharp +var id = new RiakObjectId("siblings_allowed", "nickolodeon", "best_character"); + +var renObj = new RiakObject(id, "Ren", RiakConstants.ContentTypes.TextPlain); +var stimpyObj = new RiakObject(id, "Stimpy", RiakConstants.ContentTypes.TextPlain); + +var renResult = client.Put(renObj); +var stimpyResult = client.Put(stimpyObj); + +var getResult = client.Get(id); +RiakObject obj = getResult.Value; +Debug.Assert(obj.Siblings.Count == 2); + +// Pick the first sibling +RiakObject chosenSibling = getResult.Value.Siblings.First(); + +// Then, store the chosen object +var putRslt = client.Put(chosenSibling); +CheckResult(putRslt); + +RiakObject updatedObject = putRslt.Value; +// Voila, no more siblings! +Debug.Assert(updatedObject.Siblings.Count == 0); +``` + + +[riak_dotnet_client]: https://github.com/basho/riak-dotnet-client + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/conflict-resolution/golang.md b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..4dae3f6d88 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Go" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Go" + identifier: "usage_conflict_resolution_golang" + weight: 106 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.4/dev/using/conflict-resolution/golang + - /riak/kv/3.0.4/dev/using/conflict-resolution/golang +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to usecase-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Go client](https://github.com/basho/riak-go-client). + +## How the Go Client Handles Conflict Resolution + +In the Riak Go client, it is possible that the result of a fetch will return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L68-L70) + +So what happens if the length of `Values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either: fetch, update, and store a +canonical value; or choose a sibling from the `Values` slice and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `Values` slice and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L125-L146) + +### Choosing a value from `Values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings using the first value](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L148-L167) + +### Using `ConflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution type. + +[*Example:* resolving siblings via `ConflictResolver`](https://github.com/basho/riak-go-client/blob/master/examples/dev/using/conflict-resolution/main.go#L169-L210) + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/conflict-resolution/java.md b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..31ba2f3765 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/java.md @@ -0,0 +1,276 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Java" + identifier: "usage_conflict_resolution_java" + weight: 100 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.4/dev/using/conflict-resolution/java + - /riak/kv/3.0.4/dev/using/conflict-resolution/java +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Java +client](https://github.com/basho/riak-java-client). + +## How the Java Client Handles Conflict Resolution + +The official Riak Java client provides a `ConflictResolver` interface +for handling sibling resolution. This interface requires that you +implement a `resolve` method that takes a Java `List` of objects of a +specific type that are stored in Riak and produces a single object of +that type, i.e. converts a `List<T>` to a single `T`. Once that +interface has been implemented, it can be registered as a singleton and +thereby applied to all read operations on a specific data type. Below is +an example resolver for the class `Foo`: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class FooResolver implements ConflictResolver<Foo> { + @Override + public Foo resolve(List<Foo> siblings) { + // Insert your sibling resolution logic here + } +} +``` + +What happens within the `resolve` method is up to you and will always +depend on the use case at hand. You can implement a resolver that +selects a random `Foo` from the list, chooses the `Foo` with the most +recent timestamp (if you've set up the class `Foo` to have timestamps), +etc. In this tutorial we'll provide a simple example to get you started. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```java +public class User { + public String username; + public Set<String> friends; + + public User(String username, Set<String> friends) { + this.username = username; + this.friends = friends; + } +} +``` + +Here's an example of instantiating a new `User` object: + +```java +Set<String> friends = new HashSet<String>(); +friends.add("fred"); +friends.add("barney"); +User bashobunny = new User("bashobunny", friends); +``` + +### Implementing a Conflict Resolution Interface + +So what happens if siblings are present and the user `bashobunny` has +different friend lists in different object replicas? For that we can +implement the `ConflictResolver` class described [above](#how-the-java-client-handles-conflict-resolution). We +need to implement that interface in a way that is specific to the need +at hand, i.e. taking a list of `User` objects and returning the `User` +object that has the longest `friends` list: + +```java +import com.basho.riak.client.api.cap.ConflictResolver; + +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // If there are no objects present, return null + if (siblings.size == 0) { + return null; + // If there is only one User object present, return that object + } else if (siblings.size == 1) { + return siblings.get(0); + // And if there are multiple User objects, return the object + // with the longest list + } else { + int longestList = 0; + User userWithLongestList; + + // Iterate through the User objects to check for the longest + // list + for (User user : siblings) { + if (user.friends.size() > longestList) { + userWithLongestList = user; + longestList = user.friends.size(); + } + } + // If all sibling User objects have a friends list with a length + // of 0, it doesn't matter which sibling is selected, so we'll + // simply select the first one in the list: + return userWithLongestList == null ? siblings.get(0) : userWithLongestList; + } + } +} +``` + +### Registering a Conflict Resolver Class + +To use a conflict resolver, we must register it: + +```java +ConflictResolverFactory factory = ConflictResolverFactory.getInstance(); +factory.registerConflictResolver(User.class, new UserResolver()); +``` + +With the resolver registered, the resolution logic that we have created +will resolve siblings automatically upon read. Registering a custom +conflict resolver can occur at any point in the application's lifecycle +and will be applied on all reads that involve that object type. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Java client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.4/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friends +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement some other conflict resolution strategies as +examples. + +### Merging the Lists + +To avoid losing data like this, a better strategy may be to merge the +lists. We can modify our original `resolve` function in our +`UserResolver` to accomplish precisely that: + +```java +public class UserResolver implements ConflictResolver<User> { + @Override + public User resolve(List<User> siblings) { + // We apply the same logic as before, returning null if the + // key is empty and returning the one sibling if there is only + // one User in the siblings list + if (siblings.size == 0) { + return null; + } else if (siblings.size == 1) { + return siblings.get(0); + } else { + // We begin with an empty Set + Set<String> setBuilder = new HashSet<String>(); + + // We know that all User objects in the List will have the + // same username, since we used the username for the key, so + // we can fetch the username of any User in the list: + String username = siblings.get(0).username; + + // Now for each User object in the list we add the friends + // list to our empty Set + for (User user : siblings) { + setBuilder.addAll(user.friends); + } + + // Then we return a new User object that takes the Set we + // built as the friends list + return new User(username, setBuilder); + } + } +} +``` + +Since the `friends` list is a Java `Set`, we don't need to worry about +duplicate usernames. + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..16c47bffbd --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,62 @@ +--- +title_supertext: "Conflict Resolution:" +title: "NodeJS" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "NodeJS" + identifier: "usage_conflict_resolution_nodejs" + weight: 104 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.4/dev/using/conflict-resolution/nodejs + - /riak/kv/3.0.4/dev/using/conflict-resolution/nodejs +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a conflict resolution strategy that +requires applications to resolve siblings according to use-case-specific +criteria. Here, we'll provide a brief guide to conflict resolution using the +official [Riak Node.js client](https://github.com/basho/riak-nodejs-client). + +## How the Node.js Client Handles Conflict Resolution + +In the Riak Node.js client, the result of a fetch can possibly return an array +of sibling objects. If there are no siblings, that property will return an +array with one value in it. + +[*Example:* creating object with siblings](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L21-L68) + +So what happens if the length of `rslt.values` is greater than 1, as in the case +above? + +In order to resolve siblings, you need to either fetch, update and store a +canonical value, or choose a sibling from the `values` array and store that as +the canonical value. + +## Basic Conflict Resolution Example + +In this example, you will ignore the contents of the `values` array and will +fetch, update and store the definitive value. + +[*Example:* resolving siblings via store](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L91-L111) + +### Choosing a value from `rslt.values` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value. + +[*Example:* resolving siblings via first](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L113-L133) + +### Using `conflictResolver` + +This example shows a basic sibling resolution strategy in which the first +sibling is chosen as the canonical value via a conflict resolution function. + +[*Example:* resolving siblings via `conflictResolver](https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/using/conflict-resolution.js#L135-L170) + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/conflict-resolution/php.md b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..afb8eb3fe1 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/php.md @@ -0,0 +1,244 @@ +--- +title_supertext: "Conflict Resolution:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "PHP" + identifier: "usage_conflict_resolution_php" + weight: 105 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.4/dev/using/conflict-resolution/php + - /riak/kv/3.0.4/dev/using/conflict-resolution/php +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak PHP +client](https://github.com/basho/riak-php-client). + +## How the PHP Client Handles Conflict Resolution + +Every `\Basho\Riak\Object` command returns a `\Basho\Riak\Command\Object\Response` +object, which provides what is needed to handle object conflicts. If siblings exist +and have been returned from the server within the response body, they will be +available within the response object. See below: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('conflicted_key', 'bucket_name', 'bucket_type') + ->build() + ->execute(); + +echo $response->getStatusCode(); // 300 +echo $response->hasSiblings(); // 1 +echo $response->getSiblings(); // \Basho\Riak\Object[] +``` + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends" in the network. +Each user will bear the class `User`, which we'll create below. All of +the data for our application will be stored in buckets that bear the +[bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type +`allow_mult` is set to `true`, which means that Riak will generate +siblings in certain cases---siblings that our application will need to +be equipped to resolve when they arise. + +The question that we need to ask ourselves now is this: if a given user +has sibling values, i.e. if there are multiple `friends` lists and Riak +can't decide which one is most causally recent, which list should be +deemed "correct" from the standpoint of the application? What criteria +should be applied in making that decision? Should the lists be merged? +Should we pick a `User` object at random? + +This decision will always be yours to make. Here, though, we'll keep it +simple and say that the following criterion will hold: if conflicting +lists exist, _the longer list will be the one that our application deems +correct_. So if the user `user1234` has a sibling conflict where one +possible value has `friends` lists with 100, 75, and 10 friends, +respectively, the list of 100 friends will win out. While this might +not make sense in real-world applications, it's a good jumping-off +point. We'll explore the drawbacks of this approach, as well as a better +alternative, in this document as well. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` as well as a `friends` property that +lists the usernames, as strings, of the user's friends. We'll use a +`Set` for the `friends` property to avoid duplicates. + +```php +class User { + public $username; + public $friends; + + public function __construct($username, array $friends = []) + { + $this->username = $username; + $this->friends = $friends; + } + + public function __toString() + { + return json_encode([ + 'username' => $this->username, + 'friends' => $this->friends, + 'friends_count' => count($this->friends) + ]); + } +} +``` + +Here's an example of instantiating a new `User` object: + +```php +$bashobunny = new User('bashobunny', ['fred', 'barney']); +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('bashobunny', 'users', 'siblings') + ->build() + ->execute(); + +echo $response->hasSiblings(); // 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `$response->getSiblings()` array down to one member. +In our case, we need a function that takes a Riak response object as its argument, +applies some logic to the list of values contained in the `siblings` property +of the object, and returns a single value. For our example use case here, we'll +return the sibling with the longest `friends` list: + +```php +use \Basho\Riak; +use \Basho\Riak\Command; + +function longest_friends_list_resolver(Command\Object\Response $response) +{ + if ($response->hasSiblings()) { + $siblings = $response->getSiblings(); + $max_key = 0; + foreach ($siblings as $key => $sibling) { + if ($sibling->getData()['friends_count'] > $siblings[$max_key]->getData()['friends_count']) { + $max_key = $key; + } + } + } + + return $siblings[$max_key]; +} +``` + +We can then embed this function into a more general function for fetching +objects from the users bucket: + +```php +function fetch_user_by_username($username, Riak $riak) +{ + $response = (new Command\Builder\FetchObject($riak)) + ->buildLocation($username, 'users', 'siblings') + ->build() + ->execute(); + + return longest_friends_list_resolver($response); +} + +bashobunny = fetch_user_by_username('bashobunny', $riak); +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official PHP client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.4/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object. + +The drawback to this approach is that it's more or less inevitable that a user +will remove a friend from their friends list, and then that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/conflict-resolution/python.md b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..0ca0dac8a7 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/python.md @@ -0,0 +1,258 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Python" + identifier: "usage_conflict_resolution_python" + weight: 102 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.4/dev/using/conflict-resolution/python + - /riak/kv/3.0.4/dev/using/conflict-resolution/python +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Python +client](https://github.com/basho/riak-python-client). + +## How the Python Client Handles Conflict Resolution + +In the official Python client, every object of the `RiakObject` class +has a `siblings` property that provides access to a list of an object's +sibling values. If there are no siblings, that property will return a +list with only one item. Here's an example of an object with siblings: + +```python +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[<riak.content.RiakContent object at 0x106cc51d0>, <riak.content.RiakContent object at 0x108x1da62c1>] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? The easiest way to resolve siblings automatically with +the Python client is to create a conflict-resolving function that takes +a list of sibling values and returns a single value. Such resolution +functions can be registered either at the object level or the bucket +level. A more complete explanation can be found in the section directly +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will +be of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `friends` property that lists the usernames, as +strings, of the user's friends. We will also create a `to_json` method, +as we'll be storing each `User` object as JSON: + +```python +class User(object): + def __init__(self, username, friends): + self.username = username + self.friends = friends + + def to_json(self): + return vars(self) +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```python +new_user = User('riakuser127', ['captheorem', 'siblingsrule572']) + +new_user.to_json() +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing and Registering a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = bucket.get('bashobunny') + +print len(obj.siblings) > 1 +``` + +If we get `True`, then there are siblings. So what do we do in that +case? The Python client allows us to write a conflict resolution hook +function that will be triggered any time siblings are found, i.e. any +time `len(obj.siblings) > 1`. A hook function like this needs to take a +single `RiakObject` object as its argument, apply some sort of logic to +the list of values contained in the `siblings` property, and ultimately +return a list with a single "correct" value. For our example case, we'll +return the value with the longest `friends` list: + +```python +def longest_friends_list_resolver(riak_object): + # We'll specify a lambda function that operates on the length of + # each sibling's "friends" list: + lm = lambda sibling: len(sibling.data['friends']) + # Then we'll return a list that contains only the object with the + # maximum value for the length of the "friends" list: + riak_object.siblings = [max(riak_object.siblings, key=lm), ] +``` + +### Registering a Conflict Resolver Function + +In the Python client, resolver functions can be registered at the object +level, as in this example: + +```python +bucket = client.bucket_type('siblings').bucket('users') +obj = RiakObject(client, bucket, 'bashobunny') +obj.resolver = longest_friends_list_resolver + +# Now, when the object is loaded from Riak, it will resolve to a single +# value instead of multiple values when both commands are executed: +obj.reload() +obj.store() +``` + +Alternatively, resolvers can be registered at the bucket level, so that +the resolution is applied to all objects in the bucket: + +```python +bucket = client.bucket_type('siblings').bucket('users') +bucket.resolver = longest_friends_list_resolver + +obj = RiakObject(client, bucket, 'bashobunny') +obj.reload() +obj.store() + +# The resolver will also be applied if you perform operations using the +# bucket object: + +bucket.get('bashobunny') +bucket.get('some_other_user') +``` + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` object values and returns a single value. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including code examples +from the official Python client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.4/developing/usage) section. + +## More Advanced Example + +Resolving sibling `User` values on the basis of which user has the +longest `friends` list has the benefit of being simple but it's probably +not a good resolution strategy for our social networking application +because it means that unwanted data loss is inevitable. If one friend +list contains `A`, `B`, and `C` and the other contains `D` and `E`, the +list containing `A`, `B`, and `C` will be chosen. So what about friends +`D` and `E`? Those usernames are essentially lost. In the sections +below, we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```python +from riak.content import RiakContent + +def longest_friends_list_resolver(riak_object): + # We start with an empty set + friends_list = set() + + # Then we add all the friends from all siblings to the set + for user in riak_object.siblings: + friends_list.update(user.data['friends']) + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list. + username = riak_object.siblings[0].data['username'] + new_user = User(username, list(friends_list)) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json() + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..26a07aecfc --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,254 @@ +--- +title_supertext: "Conflict Resolution:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Ruby" + identifier: "usage_conflict_resolution_ruby" + weight: 101 + parent: "usage_conflict_resolution" +toc: true +aliases: + - /riak/3.0.4/dev/using/conflict-resolution/ruby + - /riak/kv/3.0.4/dev/using/conflict-resolution/ruby +--- + +For reasons explained in the [Introduction to conflict resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), we strongly recommend adopting a +conflict resolution strategy that requires applications to resolve +siblings according to use-case-specific criteria. Here, we'll provide a +brief guide to conflict resolution using the official [Riak Ruby +client](https://github.com/basho/riak-ruby-client). + +## How the Ruby Client Handles Conflict Resolution + +In the official Ruby client, every Riak object has a `siblings` property +that provides access to a list of that object's sibling values. If there +are no siblings, that property will return an array with only one item. +Here's an example of an object with siblings: + +```ruby +bucket = client.bucket('seahawks') +obj = bucket.get('coach') +obj.siblings + +# The output: +[#<Riak::RContent [content/type]: "Jim Mora">, #<Riak::RContent [content/type]: "Pete Carroll">] +``` + +So what happens if the length of `obj.siblings` is greater than 1, as in +the case above? In order to resolve siblings, you need to create a +resolution function that takes a Riak object and reduces the `siblings` +array down to a single value. An example is provided in the section +below. + +## Basic Conflict Resolution Example + +Let's say that we're building a social network application and storing +lists of usernames representing each user's "friends." Each user will be +of the class `User`, which we'll create below. All of the data for our +application will be stored in buckets that bear the [bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) `siblings`, and for this bucket type `allow_mult` is set +to `true`, which means that Riak will generate siblings in certain +cases---siblings that our application will need to be equipped to +resolve when necessary. + +The question that we need to ask ourselves at this point is the +following: if a given user has conflicting lists, which list should be +deemed more "correct?" What criteria should be applied? Should the lists +be merged? Should we pick a list at random and deem that list correct? +We'll keep it simple here and say that the following criterion will +hold: if multiple conflict lists exist, _the longer list will be the one +that our application deems correct_. While this might not make sense in +real-world applications, it's a good jumping-off point. + +### Creating Our Data Class + +We'll start by creating a `User` class for each user's data. Each `User` +object will consist of a `username` and a `friends` property that lists +the usernames, as strings, of the user's friends. We will also create a +`to_json` method, as we'll be storing each `User` object as JSON: + +```ruby +class User + def initialize(username, friends) + @username = username + @friends = friends + end + + def to_json + { :username => @username, :friends => @friends } + end +end +``` + +Now, we can create `User` objects and see what they look like as JSON: + +```ruby +new_user = User.new('riakuser127', ['captheorem238', 'siblingsrule572']) + +new_user.to_json +# {'username': 'riakuser127', 'friends': ['captheorem238', 'siblingsrule572']} +``` + +### Implementing a Conflict Resolution Function + +Let's say that we've stored a bunch of `User` objects in Riak and that a +few concurrent writes have led to siblings. How is our application going +to deal with that? First, let's say that there's a `User` object stored +in the bucket `users` (which is of the bucket type `siblings`, as +explained above) under the key `bashobunny`. We can fetch the object +that is stored there and see if it has siblings: + +```ruby +bucket = client.bucket('users') +obj = bucket.get('bashobunny', type: 'siblings') +p obj.siblings.length > 1 +``` + +If we get `true`, then there are siblings. So what do we do in that +case? At this point, we need to write a function that resolves the list +of siblings, i.e. reduces the `obj.siblings` array down to one member. +In our case, we need a function that takes a single Riak object (or +`RObject` in the Ruby client) as its argument, applies some logic to the +list of values contained in the `siblings` property of the object, and +returns a single value. For our example use case here, we'll return the +sibling with the longest `friends` list: + +```ruby +def longest_friends_list_resolver(riak_object) + # The "conflict?" method is built into the Ruby client + if riak_object.conflict? + # The "max_by" method enables us to select the sibling with the + # longest "friends" list + riak_object.siblings.max_by{ |user| user.data['friends'].length } + else + # If there are no siblings, we can simply return the object's + # "content" as is + riak_object.content + end +end +``` + +We can then embed this function into a more general function for +fetching objects from the `users` bucket: + +```ruby +def fetch_user_by_username(username) + bucket = client.bucket('users') + user_object = bucket.get(username) + longest_friends_list_resolve(user_object) + user_object +end + +bashobunny = fetch_user_by_username('bashobunny') +``` + +Now, when a `User` object is fetched (assuming that the username acts as +a key for the object), a single value is returned for the `friends` +list. This means that our application can now use a "correct" value +instead of having to deal with multiple values. + +## Conflict Resolution and Writes + +In the above example, we created a conflict resolver that resolves a +list of discrepant `User` objects and returns a single `User`. It's +important to note, however, that this resolver will only provide the +application with a single "correct" value; it will _not_ write that +value back to Riak. That requires a separate step. When this step should +be undertaken depends on your application. In general, though, we +recommend writing objects to Riak only when the application is ready to +commit them, i.e. when all of the changes that need to be made to the +object have been made and the application is ready to persist the state +of the object in Riak. + +Correspondingly, we recommend that updates to objects in Riak follow +these steps: + +1. **Read** the object from Riak +2. **Resolving sibling conflicts** if they exist, allowing the +application to reason about one "correct" value for the object (this +step is the subject of this tutorial) +3. **Modify** the object +4. **Write** the object to Riak once the necessary changes have been +made + +You can find more on writing objects to Riak, including examples from +the official Ruby client library, in the [Developing with Riak KV: Usage]({{<baseurl>}}riak/kv/3.0.4/developing/usage) section. + +## More Advanced Example + +Resolving sibling User values on the basis of which user has the longest +friends list has the benefit of being simple but it's probably not a +good resolution strategy for our social networking application because +it means that unwanted data loss is inevitable. If one friend list +contains `A`, `B`, and `C` and the other contains `D` and `E`, the list +containing `A`, `B`, and `C` will be chosen. So what about friends `D` +and `E`? Those usernames are essentially lost. In the sections below, +we'll implement an alternative strategy as an example. + +### Merging the Lists + +To avoid losing data like this, a better strategy would be to merge the +lists. We can modify our original resolver function to accomplish +precisely that and will also store the resulting `User` object: + +```ruby +def longest_friends_list_resolver(riak_object) + # An empty array for use later on + friends_list = [] + if riak_object.conflict? + # The "friends" arrays for all siblings will be merged into one + # array + riak_object.siblings.each do |sibling| + friends_list.push(sibling.data['friends']) + end + + # Then we make a new User object. First, we fetch the username from + # any one of the siblings, then we pass in our new friends list, + # calling the "uniq" method to eliminate duplicate usernames. + username = riak_object.siblings[0].data['username'] + new_user = User.new(username, friends_list.uniq) + + # Now we reuse the first sibling as a container for the merged data + riak_object.siblings[0].data = new_user.to_json + + # And finally we set the siblings property to include just the + # single, resolved sibling + riak_object.siblings = [riak_object.siblings[0]] + else + riak_object.content + end +end +``` + +The drawback to this approach is the following: with a conflict +resolution strategy like this, it's more or less inevitable that a user +will remove a friend from their friends list, and that that friend will +end up back on the list during a conflict resolution operation. While +that's certainly not desirable, that is likely better than the +alternative proposed in the first example, which entails usernames being +simply dropped from friends lists. Sibling resolution strategies almost +always carry potential drawbacks of this sort. + +## Riak Data Types + +An important thing to always bear in mind when working with conflict +resolution is that Riak offers a variety of [Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/) that have +specific conflict resolution mechanics built in. If you have data that +can be modeled as a [counter]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#counters), [set]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets), or [map]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps), then you should seriously +consider using those Data Types instead of creating your own +application-side resolution logic. + +In the example above, we were dealing with conflict resolution within a +set, in particular the `friends` list associated with each `User` + +object. The merge operation that we built to handle conflict resolution +is analogous to the resolution logic that is built into Riak sets. For +more information on how you could potentially replace the client-side +resolution that we implemented above, see our [tutorial on Riak sets]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets). + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/content-types.md b/content/riak/kv/3.0.4/developing/usage/content-types.md new file mode 100644 index 0000000000..cf19d1b7bf --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/content-types.md @@ -0,0 +1,192 @@ +--- +title: "Content Types" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Content Types" + identifier: "usage_content_types" + weight: 104 + parent: "developing_usage" +toc: true +aliases: +--- + +Riak KV is a fundamentally content-agnostic database. You can use it to +store anything you want, from JSON to XML to HTML to binaries to images +and beyond. It's important to note that _all_ objects stored in +Riak need a specified content type. If you don't specify a +content type, the reaction will vary based on your client library: + +```java +// In the Java client, the response when storing an object without +// specifying a content type will depend on what is being stored. If you +// store a Java Map, for example, the client will automatically specify +// that the object is "application/json"; if you store a String, the +// client will specify "application/x-www-form-urlencoded"; POJOs are +// stored as JSON by default, and so on. +``` + +```ruby +# In the Ruby client, you must always specify a content type. If you +# you don't, you'll see the following error: +ArgumentError: content_type is not defined! +``` + +```php +# PHP will default to cURLs default content-type for POST & PUT requests: +# application/x-www-form-urlencoded + +# If you use the StoreObject::buildJsonObject() method when building your command, +# it will store the item with application/json as the content-type +``` + +```python +# In the Python client, the default content type is "application/json". +# Because of this, you should always make sure to specify the content +# type when storing other types of data. +``` + +```csharp +// Using the Riak .NET Client, the response when storing an object without +// specifying a content type will depend on what is being stored. +// If you store a Dictionary, for example, the client will +// automatically specify that the object is "application/json"; +// POCOs are stored as JSON by default, and so on. +``` + +```javascript +// In the Node.js client, the default content type is "application/json". +// Because of this, you should always make sure to specify the content +// type when storing other types of data. +``` + +```erlang +%% In the Erlang client, the response when storing an object without +%% specify8ing a content type will depend on what is being stored. If +%% you store a simple binary, for example, the client will automatically +%% specify that the object is "application/octet-stream"; if you store a +%% string, the client will specify "application/x-erlang-binary"; and so +%% on. +``` + +```golang +// In the Go client, you must always specify a content type. +``` + +Because content type negotiation varies so widely from client to client, +we recommend consulting the documentation for your preferred client for +more information. + +## Specifying Content Type + +For all writes to Riak, you will need to specify a content type, for +example `text/plain` or `application/json`. + +```java +Location wildeGeniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +BinaryValue text = BinaryValue.create("I have nothing to declare but my genius"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = Riak::RObject.new(bucket, 'genius') +obj.content_type = 'text/plain' +obj.raw_data = 'I have nothing to declare but my genius' +obj.store +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('genius', 'oscar_wilde', 'quotes') + ->buildObject('I have nothing to declare but my genius!', 'text/plain') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +obj = RiakObject(client, bucket, 'genius') +obj.content_type = 'text/plain' +obj.data = 'I have nothing to declare but my genius' +obj.store() +``` + +```csharp +var id = new RiakObjectId("quotes", "oscar_wilde", "genius"); +var obj = new RiakObject(id, "I have nothing to declare but my genius", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('I have nothing to declare but my genius'); +client.storeValue({ + bucketType: 'quotes', bucket: 'oscar_wilde', key: 'genius', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"quotes">>, <<"oscar_wilde">>}, + <<"genius">>, + <<"I have nothing to declare but my genius">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("I have nothing to declare but my genius"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("quotes"). + WithBucket("oscar_wilde"). + WithKey("genius"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "I have nothing to declare but my genius" \ + http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius + +# Please note that POST is also a valid method for writes, for the sake +# of compatibility +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/creating-objects.md b/content/riak/kv/3.0.4/developing/usage/creating-objects.md new file mode 100644 index 0000000000..dd5e0ddf35 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/creating-objects.md @@ -0,0 +1,555 @@ +--- +title: "Creating Objects in Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Creating Objects" + identifier: "usage_creating_objects" + weight: 100 + parent: "developing_usage" +toc: true +aliases: +--- + +[usage content types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/content-types + +Writes in Riak KV (storing or modifying objects) are like HTTP `PUT` +requests. Here is the basic form of writes: + +``` +PUT /types/<type>/buckets/<bucket>/keys/<key> + +# If you're using HTTP to interact with Riak, you can also use POST +``` + +As an example, let's store an object containing information about a dog named Rufus. We'll store that object in the key `rufus` in the bucket `dogs`, which bears the `animals` [bucket type]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/bucket-types). + +The object we're storing will be very simple, just a basic text snippet +of something that Rufus might say. Let's build the object and then store +it. + +``` java +String quote = "WOOF!"; +Namespace bucket = new Namespace("animals", "dogs"); +Location rufusLocation = new Location(bucket, "rufus"); +RiakObject rufusObject = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create(quote)); +StoreValue storeOp = new StoreValue.Builder(rufusObject) + .withLocation(rufusLocation) + .build(); +client.execute(storeOp); +``` + +``` ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = Riak::RObject.new(bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store +``` + +``` php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->buildObject('WOOF!', 'text/plain') + ->build() + ->execute(); +``` + +``` python +bucket = client.bucket_type('animals').bucket('dogs') +obj = RiakObject(client, bucket, 'rufus') +obj.content_type = 'text/plain' +obj.data = 'WOOF!' +obj.store() +``` + +``` csharp +var id = new RiakObjectId("animals", "dogs", "rufus") +var obj = new RiakObject(id, "WOOF!", "text/plain"); +var result = client.Put(obj); +``` + +``` javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('WOOF!'); +client.storeValue({ + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + value: riakObj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +``` golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("WOOF!"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +``` + +Notice that we specified both a value for the object, i.e. `WOOF!`, and +a content type, `text/plain`. See [content types][usage content types] for more information. + +Now, you run the same read operation as in [Reading Objects]({{<baseurl>}}riak/kv/3.0.4/developing/usage/reading-objects). If the write operation was successful, you should be able to successfully read the object. Please note that the operation will fail if you don't first create the bucket-type `animals` as per the page on [bucket types]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/bucket-types). + +### Store an Object + +Your application will often have its own method of generating the keys +for its data, e.g. on the basis of timestamps. If so, storing that data +is easy. The basic request looks like this. + +``` +PUT /types/TYPE/buckets/BUCKET/keys/KEY + +# If you're using HTTP, POST can be used instead of PUT. The only +# difference between POST and PUT is that you should POST in cases where +# you want Riak to auto-generate a key. More on this can be found in the +# examples below. +``` + +There is no need to intentionally create buckets in Riak. They pop into +existence when keys are added to them, and disappear when all keys have +been removed from them. If you don't specify a bucket's type, the type +[`default`]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) will be applied. + +#### Write Parameters + +Write requests support the following parameters: + +Parameter | Default | Description +:---------|:--------|:----------- +`w` | `quorum` | How many replicas to write to before returning a successful response +`pw` | `0` | How many primary vnodes must respond for a write to be deemed successful +`dw` | `quorum` | How many replicas to commit to durable storage before returning a successful response +`returnbody` | `false` | Whether to return the contents of the stored object + +Here is an example of storing an object (another brief text snippet) +under the key `viper` in the bucket `dodge`, which bears the type +`cars`, with `w` set to `3`: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>, + [{w, 3}]). +riakc_pb_socket:put(Pid, Object). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3 +``` + +Again, the above will only work if the `cars` bucket type has been created and activated. + +#### Return Body + +If `returnbody` is set to `true`, any of the response headers expected +from a read request may be present. Like a `GET` request, `300 Multiple +Choices` may be returned if siblings existed or were created as part of +the operation, and the response can be dealt with similarly. + +Normal HTTP status codes (responses will vary for client libraries): + +* `200 OK` +* `204 No Content` +* `300 Multiple Choices` + +For example, using the same object from above: + +```java +Location viperKey = new Location(new Namespace("cars", "dodge"), "viper"); +BinaryValue text = BinaryValue.create("vroom"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(text); +StoreValue store = new StoreValue.Builder(myKey, obj) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.RETURN_BODY, true) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('cars').bucket('dodge') +obj = Riak::RObject.new(bucket, 'viper') +obj.content_type = 'text/plain' +obj.raw_data = 'vroom' +obj.store(w: 3, returnbody: true) +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('viper', 'dodge', 'cars') + ->buildObject('vroom', 'text/plain') + ->withParameter('w', 3) + ->withParameter('returnbody', 'true') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('cars').bucket('dodge') +obj = RiakObject(client, bucket, 'viper') +obj.content_type = 'text/plain' +obj.data = 'vroom' +obj.store(w=3, return_body=True) +``` + +```csharp +var id = new RiakObjectId("cars", "dodge", "viper"); +var obj = new RiakObject(id, "vroom", "text/plain"); +var options = new RiakPutOptions(); +options.SetW(new Quorum(3)); +options.SetReturnBody(true); +var result = client.Put(obj, options); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('vroom'); + +var options = { + bucketType: 'cars', bucket: 'dodge', key: 'viper', + w: 3, returnBody: true, value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var viper = riakObj.value; + logger.info("dodge viper: %s", viper.toString('utf8')); +}); +``` + +```erlang +Object = riakc_obj:new({<<"cars">>, <<"dodge">>}, + <<"viper">>, + <<"vroom">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Object, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("vroom"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("cars"). + WithBucket("dodge"). + WithKey("viper"). + WithW(3). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "vroom" \ + http://localhost:8098/types/cars/buckets/dodge/keys/viper?w=3&returnbody=true +``` + +### Store a New Object and Assign a Random Key + +If your application would rather leave key-generation up to Riak, issue +a `POST` request to the bucket URL instead of a PUT to a bucket/key +pair: + +``` +POST /types/TYPE/buckets/BUCKET/keys +``` + +If you don't pass Riak a `key` name after the bucket, it will know to +create one for you. + +Supported headers are the same as for bucket/key write requests, though +`X-Riak-Vclock` will never be relevant for these POST requests. +Supported query parameters are also the same as for bucket/key PUT +requests. + +Normal status codes: + +* `201 Created` + +This command will store an object in the bucket `random_user_keys`, +which bears the bucket type `users`. + +```java +Namespace locationWithoutKey = new Namespace("users", "random_user_keys"); +BinaryValue text = BinaryValue.create("{'user':'data'}"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(text); +StoreValue store = new StoreValue.Builder(locationWithoutKey, obj) + .build(); +String key = client.execute(store).getLocation().getKeyAsString(); + +// The Java client will assign a random key along the following lines: +"ZPFF18PUqGW9efVou7EHhfE6h8a" +``` + +```ruby +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = Riak::RObject.new(bucket) +obj.content_type = 'application/json' +obj.raw_data = '{"user":"data"}' + +obj.store + +# The client will assign a key like the following: +obj.key +"GB8fW6DDZtXogK19OLmaJf247DN" +``` + +```php +$response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildBucket('random_user_keys', 'users') + ->buildJsonObject(['user'=>'data']) + ->build() + ->execute(); + +echo $response->getLocation()->getKey(); // GB8fW6DDZtXogK19OLmaJf247DN +``` + +```python +bucket = client.bucket_type('users').bucket('random_user_keys') +obj = RiakObject(client, bucket) +obj.content_type = 'application/json' +obj.data = '{"user":"data"}' +obj.store() + +obj.key + +# The Python client will assign a random key along the following lines: +'ZPFF18PUqGW9efVou7EHhfE6h8a' +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +Debug.WriteLine(format: "Generated key: {0}", args: rslt.Value.Key); + +// The .NET client will output a random key similar to this: +// Generated key: DWDsnpYSqOU363c0Bqe8hCwAM7Q +``` + +```javascript +var user = { + user: 'data' +}; +var options = { + bucketType: 'users', bucket: 'random_user_keys', + returnBody: true, value: user +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var riakObj = rslt.values.shift(); + var generatedKey = riakObj.getKey(); + logger.info("Generated key: %s", generatedKey); +}); + +// The Node.js client will output a random key similar to this: +// info: Generated key: VBAMoX0OOucymVCxeQEYzLzzAh2 +``` + +```erlang +Object = riakc_obj:new({<<"users">>, <<"random_user_keys">>}, undefined, <<"{'user':'data'}">>, <<"application/json">>). +riakc_pb_socket:put(Pid, Object). + +%% The key can be retrieved from the output of the above call. +%% It will look something like this: + +{ok,{riakc_obj,{<<"users">>,<<"random_user_keys">>}, + <<"EZ7pp4bpdfpZw0fPUdTUafveQjO">>,undefined,[],undefined, + undefined}} +``` + +```golang +obj := &riak.Object{ + ContentType: "application/json", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("{'user':'data'}"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Printf("Generated key: %v\n", rsp.GeneratedKey) + +// Output: +// Generated key: QSHkZjFdWwfrxtKl3wtUhL2gz7N +``` + +```curl +curl -i -XPOST \ + -H "Content-Type: text/plain" \ + -d "this is a test" \ + http://localhost:8098/types/users/buckets/random_user_keys/keys + +# In the output, you should see a Location header that will give you the +# location of the object in Riak, with the key at the end: + +Location: /buckets/test/keys/G7FYUXtTsEdru4NP32eijMIRK3o +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/custom-extractors.md b/content/riak/kv/3.0.4/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..233eafe5f4 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/custom-extractors.md @@ -0,0 +1,424 @@ +--- +title: "Custom Extractors" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Custom Extractors" + identifier: "usage_custom_extractors" + weight: 113 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/search/custom-extractors + - /riak/kv/3.0.4/dev/search/custom-extractors +--- + +Solr, and by extension Riak Search, has default extractors for a wide +variety of data types, including JSON, XML, and plaintext. Riak Search +ships with the following extractors: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +No specified type | `yz_noop_extractor` + +There are also built-in extractors for [Riak Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/searching-data-types). + +If you're working with a data format that does not have a default Solr +extractor, you can create your own and register it with Riak Search. +We'll show you how to do so by way of example. + +## The Extractor Interface + +Creating a custom extract involves creating an Erlang interface that +implements two functions: + +* `extract/1` - Takes the contents of the object and calls `extract/2` + with the same contents and an empty list +* `extract/2` - Takes the contents of the object and returns an Erlang + [proplist](http://www.erlang.org/doc/man/proplists.html) with a + single field name and a single value associated with that name + +The following extractor shows how a pure text extractor implements those +two functions: + +```erlang +-module(search_test_extractor). +-include("yokozuna.hrl"). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +extract(Value, Opts) -> + FieldName = field_name(Opts), + [{FieldName, Value}]. + +-spec field_name(proplist()) -> any(). +field_name(Opts) -> + proplists:get_value(field_name, Opts, text). +``` + +This extractor takes the contents of a `Value` and returns a proplist +with a single field name (in this case `text`) and the single value. +This function can be run in the Erlang shell. Let's run it providing the +text `hello`: + +```erlang +> c(search_test_extractor). +%% {ok, search_test_extractor} + +> search_test_extractor:extract("hello"). + +%% Console output: +[{text, "hello"}] +``` + +Upon running this command, the value `hello` would be indexed in Solr +under the fieldname `text`. If you wanted to find all objects with a +`text` field that begins with `Fourscore`, you could use the +Solr query `text:Fourscore*`, to give just one example. + +## An Example Custom Extractor + +Let's say that we're storing HTTP header packet data in Riak. Here's an +example of such a packet: + +``` +GET http://www.google.com HTTP/1.1 +``` + +We want to register the following information in Solr: + +Field name | Value | Extracted value in this example +:----------|:------|:------------------------------- +`method` | The HTTP method | `GET` +`host` | The URL's host | `www.google.com` +`uri` | The URI, i.e. what comes after the host | `/` + +The example extractor below would provide the three desired +fields/values. It relies on the +[`decode_packet`](http://www.erlang.org/doc/man/erlang.html#decode_packet-3) +function from Erlang's standard library. + +```erlang +-module(yz_httpheader_extractor). +-compile(export_all). + +extract(Value) -> + extract(Value, []). + +%% In this example, we can ignore the Opts variable from the example +%% above, hence the underscore: +extract(Value, _Opts) -> + {ok, + {http_request, + Method, + {absoluteURI, http, Host, undefined, Uri}, + _Version}, + _Rest} = erlang:decode_packet(http, Value, []), + [{method, Method}, {host, list_to_binary(Host)}, {uri, list_to_binary(Uri)}]. +``` + +This file will be stored in a `yz_httpheader_extractor.erl` file (as +Erlang filenames must match the module name). Now that our extractor has +been written, it must be compiled and registered in Riak before it can +be used. + +## Registering Custom Extractors + +In order to use a custom extractor, you must create a compiled `.beam` +file out of your `.erl` extractor file and then tell Riak where that +file is located. Let's say that we have created a +`search_test_extractor.erl` file in the directory `/opt/beams`. First, +we need to compile that file: + +```bash +erlc search_test_extractor.erl +``` + +To instruct Riak where to find the resulting +`search_test_extractor.beam` file, we'll need to add a line to an +`advanced.config` file in the node's `/etc` directory (more information +can be found in our documentation on [advanced]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#advanced-configuration)). Here's an +example: + +```advancedconfig +[ + %% Other configs + {vm_args, [ + {"-pa /opt/beams", ""} + ]}, + %% Other configs +] +``` + +This will instruct the Erlang VM on which Riak runs to look for compiled +`.beam` files in the proper directory. You should re-start the node at +this point. Once the node has been re-started, you can use the node's +Erlang shell to register the `yz_httpheader_extractor`. First, attach to +the shell: + +```bash +riak attach +``` + +At this point, we need to choose a MIME type for our extractor. Let's +call it `application/httpheader`. Once you're in the shell: + +```erlang +> yz_extractor:register("application/httpheader", yz_httpheader_extractor). +``` + +If successful, this command will return a list of currently registered +extractors. It should look like this: + +```erlang +[{default,yz_noop_extractor}, + {"application/httpheader",yz_httpheader_extractor}, + {"application/json",yz_json_extractor}, + {"application/riak_counter",yz_dt_extractor}, + {"application/riak_map",yz_dt_extractor}, + {"application/riak_set",yz_dt_extractor}, + {"application/xml",yz_xml_extractor}, + {"text/plain",yz_text_extractor}, + {"text/xml",yz_xml_extractor}] +``` + +If the `application/httpheader` extractor is part of that list, then the +extractor has been successfully registered. + +## Verifying Our Custom Extractor + +Now that Riak Search knows how to decode and extract HTTP header packet +data, let's store some in Riak and then query it. We'll put the example +packet data from above in a `google_packet.bin` file. Then, we'll `PUT` +that binary to Riak's `/search/extract` endpoint: + +```curl +curl -XPUT $RIAK_HOST/search/extract \ + -H 'Content-Type: application/httpheader' \ # Note that we used our custom MIME type + --data-binary @google_packet.bin +``` + +That should return the following JSON: + +```json +{ + "method": "GET", + "host": "www.google.com", + "uri": "/" +} +``` + +We can also verify this in the Erlang shell (whether in a Riak node's +Erlang shell or otherwise): + +```erlang +yz_extractor:run(<<"GET http://www.google.com HTTP/1.1\n">>, yz_httpheader_extractor). + +%% Console output: +[{method,'GET'},{host,<<"www.google.com">>},{uri,<<"/">>}] +``` + +## Indexing and Searching HTTP Header Packet Data + +Now that Solr knows how to extract HTTP header packet data, we need to +create a schema that extends the [default schema]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search-schemas/#creating-a-custom-schema). The following fields should be added +to `<fields>` in the schema, which we'll name `http_header_schema` and +store in a `http_header_schema.xml` file: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="http_header_schema" version="1.5"> +<fields> + <!-- other required fields here --> + + <field name="method" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="host" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="uri" type="string" indexed="true" stored="true" multiValued="false"/> +</fields> +``` + +Now, we can store the schema: + +```java +import org.apache.commons.io.FileUtils + +File xml = new File("http_header_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("http_header_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_xml = File.read('http_header_schema.xml') +client.create_search_schema('http_header_schema', schema_xml) +``` + +```php +$schema_string = file_get_contents('http_header_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('http_header_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +import io + +schema_xml = open('http_header_schema.xml').read() +client.create_search_schema('http_header_schema', schema_xml) +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/http_header_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @http_header_schema.xml +``` + +Riak now has our schema stored and ready for use. Let's create a search +index called `header_data` that's associated with our new schema: + +```java +YokozunaIndex headerDataIndex = new YokozunaIndex("header_data", "http_header_schema"); +StoreSearchIndex storeIndex = new StoreSearchIndex.Builder(headerDataIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('header_data', 'http_header_schema') +``` + +```php +(new \Basho\Riak\Command\Builder\StoreIndex($riak)) + ->withName('header_data') + ->usingSchema('http_header_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('header_data', 'http_header_schema') +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/header_data \ + -H 'Content-Type: application/json' \ + -d '{"schema":"http_header_schema"}' +``` + +Now, we can create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) +for all of the HTTP header data that we plan to store. Any bucket that +bears this type will be associated with our `header_data` search index. +We'll call our bucket type `http_data_store`. + +```bash +riak-admin bucket-type create http_data_store '{"props":{"search_index":"header_data"}}' +riak-admin bucket-type activate http_data_store +``` + +Let's use the same `google_packet.bin` file that we used previously and +store it in a bucket with the `http_data_store` bucket type, making sure +to use our custom `application/httpheader` MIME type: + +```java +Location key = new Location(new Namespace("http_data_store", "packets"), "google"); +File packetData = new File("google_packet.bin"); +byte[] packetBinary = FileUtils.readFileToByteArray(packetData); + +RiakObject packetObject = new RiakObject() + .setContentType("application/httpheader") + .setValue(BinaryValue.create(packetBinary)); + +StoreValue storeOp = new StoreValue.Builder(packetObject) + .setLocation(key) + .build(); +client.execute(storeOp); +``` + +```ruby +packet_data = File.read('google_packet.bin') +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = Riak::Robject.new(bucket, 'google') +obj.content_type = 'application/httpheader' +obj.raw_data = packetData +obj.store +``` + +```php +$object = new Object(file_get_contents("google_packet.bin"), ['Content-Type' => 'application/httpheader']); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('google', 'packets', 'http_data_store') + ->withObject($object) + ->build() + ->execute(); +``` + +```python +packet_data = open('google_packet.bin').read() +bucket = client.bucket_type('http_data_store').bucket('packets') +obj = RiakObject(client, bucket, 'google') +obj.content_type = 'application/httpheader' +obj.data = packet_data +obj.store() +``` + +```curl +curl -XPUT $RIAK_HOST/types/http_data_store/buckets/packets/keys/google \ + -H 'Content-Type: application/httpheader' \ + --data-binary @google_packet.bin +``` + +Now that we have some header packet data stored, we can query our +`header_data` index on whatever basis we'd like. First, let's verify +that we'll get one result if we query for objects that have the HTTP +method `GET`: + +```java +// Using the same method from above: +String query = "method:GET"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```php +$response = (\Basho\Riak\Command\Search\FetchObjects($riak)) + ->withQuery('method:GET') + ->withIndexName('header_data') + ->build() + ->execute(); + +$response->getNumFound(); +``` + +```python +results = client.fulltext_search('http_header_schema', 'method:GET') +results['num_found'] # 1 +``` + +```curl +curl "$RIAK_HOST/search/query/header_data?wt=json&q=method:GET" + +# This should return a fairly large JSON object with a "num_found" field +# The value of that field should be 1 +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/deleting-objects.md b/content/riak/kv/3.0.4/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..e2ab906e10 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/deleting-objects.md @@ -0,0 +1,157 @@ +--- +title: "Deleting Objects" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Deleting Objects" + identifier: "usage_deleting_objects" + weight: 103 + parent: "developing_usage" +toc: true +aliases: +--- + +The delete command follows a predictable pattern and looks like this: + +``` +DELETE /types/TYPE/buckets/BUCKET/keys/KEY +``` + +The normal HTTP response codes for `DELETE` operations are `204 No +Content` and `404 Not Found`. 404 responses are *normal*, in the sense +that `DELETE` operations are idempotent and not finding the resource has +the same effect as deleting it. + +Let's try to delete the `genius` key from the `oscar_wilde` bucket +(which bears the type `quotes`): + +```java +Location geniusQuote = new Location(new Namespace("quotes", "oscar_wilde"), "genius"); +DeleteValue delete = new DeleteValue.Builder(geniusQuote).build(); +client.execute(delete); +``` + +```ruby +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```php +(new \Basho\Riak\Command\Builder\DeleteObject($riak)) + ->buildBucket('oscar_wilde', 'quotes') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('quotes').bucket('oscar_wilde') +bucket.delete('genius') +``` + +```csharp +var id = new RiakObjectId("users", "random_user_keys", null); +var obj = new RiakObject(id, @"{'user':'data'}", + RiakConstants.ContentTypes.ApplicationJson); +var rslt = client.Put(obj); +string key = rslt.Value.Key; +id = new RiakObjectId("users", "random_user_keys", key); +var del_rslt = client.Delete(id); +``` + +```javascript +// continuing from above example +options = { + bucketType: 'users', bucket: 'random_user_keys', + key: generatedKey +}; +client.deleteValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:delete(Pid, {<<"quotes">>, <<"oscar_wilde">>}, <<"genius">>) +``` + +```golang +// Continuing from above example +cmd, err = riak.NewDeleteValueCommandBuilder(). + WithBucketType("users"). + WithBucket("random_user_keys"). + WithKey(rsp.GeneratedKey). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} +``` + +```curl +curl -XDELETE http://localhost:8098/types/quotes/buckets/oscar_wilde/keys/genius +``` + +## Client Library Examples + +If you are updating an object that has been deleted---or if an update +might target a deleted object---we recommend that +you first fetch the [causal context]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context) of the object prior to updating. +This can be done by setting the `deletedvclock` parameter to `true` as +part of the [fetch operation]({{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/fetch-object). This can also be done +with the official Riak clients for Ruby, Java, and Erlang, as in the +example below: + + +```ruby +object.delete +deleted_object = bucket.get('bucket', 'key', deletedvclock: true) +deleted_object.vclock +``` + +```python +# It is not currently possible to fetch the causal context for a deleted +# key in the Python client. +``` + +```java +Location loc = new Location("<bucket>") + .setBucketType("<bucket_type>") + .setKey("<key>"); +FetchValue fetch = new FetchValue.Builder(loc) + .withOption(Option.DELETED_VCLOCK, true) + .build(); +FetchValue.Response response = client.execute(fetch); +System.out.println(response.getVclock().asString()); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"bucket_type">>, <<"bucket">>}, + <<"key">>, + [{deleted_vclock}]). + +%% In the Erlang client, the vector clock is accessible using the Obj +%% object obtained above. +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('deleted_key', 'in_some_bucket', 'of_a_certain_type') + ->build() + ->execute(); + +echo $response->getVclock(); // a85hYGBgzGDKBVI8m9WOeb835ZRhYCg1zGBKZM5jZdhnceAcXxYA +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/document-store.md b/content/riak/kv/3.0.4/developing/usage/document-store.md new file mode 100644 index 0000000000..f25b061682 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/document-store.md @@ -0,0 +1,617 @@ +--- +title: "Implementing a Document Store" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Implementing a Document Store" + identifier: "usage_document_store" + weight: 112 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/search/document-store + - /riak/kv/3.0.4/dev/search/document-store +--- + +Although Riak wasn't explicitly created as a document store, two +features recently added to Riak---[Riak Search]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search/) and [Riak Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/)---make it possible to use Riak as a +highly scalable document store with rich querying capabilities. In this +tutorial, we'll build a basic implementation of a document store using +[Riak maps]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps). + +## Basic Approach + +Riak Search enables you to implement a document store in Riak in a +variety of ways. You could, for example, store and query JSON objects or +XML and then retrieve them later via Solr queries. In this tutorial, +however, we will store data in [Riak maps]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps), +index that data using Riak Search, and then run Solr queries against +those stored objects. + +You can think of these Search indexes as **collections**. Each indexed +document will have an ID generated automatically by Search, and because +we're not interested in running normal [key/value queries]({{<baseurl>}}riak/kv/3.0.4/developing/key-value-modeling) on these objects, we'll allow Riak to assign [keys]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/keys-and-objects) automatically. This means that all we have to do is worry about the bucket type and/or bucket when storing objects. + +## Use Case + +Let's say that we're building a WordPress-style CMS and storing blog +posts in Riak. We will be storing the following information about each +post: + +* Title +* Author +* Content (the body of the post) +* Keywords associated with the post +* Date posted +* Whether the post has been published on the site + +For each of those pieces of information, we'll need to decide on (a) +which Riak Data Type most directly corresponds and (b) which Solr type +we want to associate with the info. It's important to bear in mind that +Riak Data Types can be indexed as a wide variety of things, e.g. +registers as Solr text fields, sets as multi-valued datetimes, etc. The +table below shows which Riak Data Type and Solr type we'll be using for +each field in our Riak maps. + +Info | Riak Data Type | Solr type +:----|:---------------|:--------- +Post title | Register | String +Post author | Register | String +Post content | Register | Text +Keywords | Set | Multi-valued string +Date posted | Register | Datetime +Whether the post is currently in draft form | Flag | Boolean + +Before we start actually creating and storing blog posts, let's set up +Riak Search with an appropriate index and schema. + +## Creating a Schema and Index + +In the documentation on [search schemas]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search-schemas), you'll find a +baseline schema to be used for creating custom schemas. We'll use that +baseline schema here and add the following fields to the `<fields>` +list: + +```xml +<field name="title_register" type="string" indexed="true" stored="true" /> +<field name="author_register" type="string" indexed="true" stored="true" /> +<field name="content_register" type="text" indexed="true" stored="true" /> +<field name="keywords_set" type="string" indexed="true" stored="true" multiValued="true" /> +<field name="date_register" type="datetime" indexed="true" stored="true" /> +<field name="published_flag" type="boolean" indexed="true" stored="true" /> +``` + +You can see the full schema [on +GitHub](https://github.com/basho/basho_docs/raw/master/extras/data/blog_post_schema.xml). +Let's store that schema in a file called `blog_post_schema.xml` and +upload that schema to Riak: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("blog_post_schema.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("blog_post_schema", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read('blog_post_schema.xml') +client.create_search_schema('blog_post_schema', schema_data) +``` + +```php +$schema_string = file_get_contents('blog_post_schema.xml'); +(new \Basho\Riak\Command\Builder\StoreSchema($riak)) + ->withName('blog_post_schema') + ->withSchemaString($schema_string) + ->build() + ->execute(); +``` + +```python +xml_file = open('blog_post_schema.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('blog_post_schema', schema_data) +xml_file.close() +``` + +```csharp +var schemaXml = File.ReadAllText("blog_post_schema.xml"); +var schema = new SearchSchema("blog_post_schema", schemaXml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +/* + * Full example here: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/document-store.js + * + */ +var options = { + schemaName: 'blog_post_schema', + schema: schemaXml +}; +client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("blog_post_schema.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"blog_post_schema">>, SchemaData). +``` + +```curl +curl -XPUT $RIAK_HOST/search/schema/blog_post_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @blog_post_schema.xml +``` + +With our schema uploaded, we can create an index called `blog_posts` and +associate that index with our schema: + +```java +YokozunaIndex blogPostIndex = new YokozunaIndex("blog_posts", "blog_post_schema"); +StoreIndex storeIndex = new StoreIndex.Builder(blogPostIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('blog_posts') + ->usingSchema('blog_post_schema') + ->build() + ->execute(); +``` + +```python +client.create_search_index('blog_posts', 'blog_post_schema') +``` + +```csharp +var idx = new SearchIndex("blog_posts", "blog_post_schema"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: 'blog_post_schema', + indexName: 'blog_posts' +}; +client.storeIndex(options, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"blog_posts">>, <<"blog_post_schema">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/blog_posts \ + -H 'Content-Type: application/json' \ + -d '{"schema": "blog_post_schema"}' +``` + +## How Collections will Work + +Collections are not a concept that is native to Riak but we can easily +mimic collections by thinking of a bucket type as a collection. When we +associate a bucket type with a Riak Search index, all of the objects +stored in any bucket of that bucket type will be queryable on the basis +of that one index. For this tutorial, we'll create a bucket type called +`cms` and think of that as a collection. We could also restrict our +`blog_posts` index to a single bucket just as easily and think of that +as a queryable collection, but we will not do that in this tutorial. + +The advantage of the bucket-type-based approach is that we could store +blog posts from different blogs in different blog posts and query them +all at once as part of the same index. It depends on the use case at +hand. In this tutorial, we'll only be storing posts from one blog, which +is called "Cat Pics Quarterly" and provides in-depth theoretical +discussions of cat pics with a certain number of Reddit upvotes. All of +the posts in this blog will be stored in the bucket +`cat_pics_quarterly`. + +First, let's create our `cms` bucket type and associate it with the +`blog_posts` index: + +```bash +riak-admin bucket-type create cms \ + '{"props":{"datatype":"map","search_index":"blog_posts"}}' +riak-admin bucket-type activate cms +``` + +Now, any object stored in any bucket of the type `cms` will be indexed +as part of our "collection." + +## Storing Blog Posts as Maps + +Now that we know how each element of a blog post can be translated into +one of the Riak Data Types, we can create an interface in our +application to serve as that translation layer. Using the method +described in [Data Modeling with Riak Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-modeling), we can construct a +class that looks like this: + +```java +import java.util.Set; + +public class BlogPost { + private String title; + private String author; + private String content; + private Set<String> keywords; + private DateTime datePosted; + private Boolean published; + private static final String bucketType = "cms"; + + private Location location; + + private RiakClient client; + + public BlogPost(RiakClient client + String bucketName, + String title, + String author, + String content, + Set<String> keywords, + DateTime datePosted, + Boolean published) { + this.client = client; + this.location = new Location(new Namespace(bucketType, bucketName), null); + this.title = title; + this.author = author; + this.content = content; + this.keywords = keywords; + this.datePosted = datePosted; + this.published = published; + } + + public void store() throws Exception { + RegisterUpdate titleUpdate = new RegisterUpdate(title); + RegisterUpdate authorUpdate = new RegisterUpdate(author); + RegisterUpdate contentUpdate = new RegisterUpdate(content); + SetUpdate keywordsUpdate = new SetUpdate(); + for (String keyword : keywords) { + keywordsUpdate.add(keyword); + } + RegisterUpdate dateUpdate = + new RegisterUpdate(datePosted.toString("YYYY-MM-DD HH:MM")); + if (published) { + FlagUpdate published = new FlagUpdate(published); + } + FlagUpdate publishedUpdate = new FlagUpdate(published); + MapUpdate mapUpdate = new MapUpdate() + .update("title", titleUpdate) + .update("author", authorUpdate) + .update("content", contentUpdate) + .update("keywords", keywordsUpdate) + .update("date", dateUpdate) + .update("published", publishedUpdate); + UpdateMap storeBlogPost = new UpdateMap.Builder(location, mapUpdate) + .build(); + client.execute(storeBlogPost); + } +} +``` + +```ruby +class BlogPost + def initialize(bucket_name, title, author, content, keywords, date_posted, published) + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Riak::Crdt::Map.new(bucket, nil) + map.batch do |m| + m.registers['title'] = title + m.registers['author'] = author + m.registers['content'] = content + keywords.each do |k| + m.sets['keywords'].add(k) + end + m.registers['date'] = date_posted + if published + m.flags['published'] = true + end + end +end +``` + +```php +class BlogPost { + private $title = ''; + private $author = ''; + private $content = ''; + private $keywords = []; + private $datePosted = ''; + private $published = false; + private $bucketType = "cms"; + + private $bucket = null; + + private $riak = null; + + public function __construct(\Basho\Riak $riak, $bucket, $title, $author, $content, array $keywords, $date, $published) + { + this->riak = $riak; + this->bucket = new Bucket($bucket, $this->bucketType); + this->title = $title; + this->author = $author; + this->content = $content; + this->keywords = $keywords; + this->datePosted = $date; + this->published = $published; + } + + public function store() + { + $setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($this->riak)); + + foreach($this->keywords as $keyword) { + $setBuilder->add($keyword); + } + + (new \Basho\Riak\Command\Builder\UpdateMap($this->riak)) + ->updateRegister('title', $this->title) + ->updateRegister('author', $this->author) + ->updateRegister('content', $this->content) + ->updateRegister('date', $this->date) + ->updateFlag('published', $this->published) + ->updateSet('keywords', $setBuilder) + ->withBucket($this->bucket) + ->build() + ->execute(); + } +} +``` + +```python +from riak.datatypes import Map + +class BlogPost: + def __init__(bucket_name, title, author, content, keywords, date_posted, published): + bucket = client.bucket_type('cms').bucket(bucket_name) + map = Map(bucket, None) + self.map.registers['title'].assign(title) + self.map.registers['author'].assign(author) + self.map.registers['content'].assign(content) + for k in keywords: + self.map.sets['keywords'].add(k) + self.map.registers['date'] = date_posted + if published: + self.map.flags['published'].enable() + self.map.store() +``` + +```csharp +/* + * Please see the code in the RiakClientExamples project: + * https://github.com/basho/riak-dotnet-client/tree/develop/src/RiakClientExamples/Dev/Search + */ +``` + +```javascript +/* + * Please see the code in the examples repository: + * https://github.com/basho/riak-nodejs-client-examples/blob/master/dev/search/ + */ +``` + +Now, we can store some blog posts. We'll start with just one: + +```java +Set<String> keywords = new HashSet<String>(); +keywords.add("adorbs"); +keywords.add("cheshire"); + +BlogPost post1 = new BlogPost(client, // client object + "cat_pics_quarterly", // bucket + "This one is so lulz!", // title + "Cat Stevens", // author + "Please check out these cat pics!", // content + keywords, // keywords + new DateTime(), // date posted + true); // published +try { + post1.store(); +} catch (Exception e) { + System.out.println(e); +} +``` + +```ruby +keywords = ['adorbs', 'cheshire'] +date = Time.now.strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost.new('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```php +$keywords = ['adorbs', 'cheshire']; +$date = new \DateTime('now'); + +$post1 = new BlogPost( + $riak, // client object + 'cat_pics_quarterly', // bucket + 'This one is so lulz!', // title + 'Cat Stevens', // author + 'Please check out these cat pics!', // content + $keywords, // keywords + $date, // date posted + true // published +); +``` + +```python +import datetime + +keywords = ['adorbs', 'cheshire'] +date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') +blog_post1 = BlogPost('cat_pics_quarterly', + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + keywords, + date, + true) +``` + +```csharp +var keywords = new HashSet<string> { "adorbs", "cheshire" }; + +var post = new BlogPost( + "This one is so lulz!", + "Cat Stevens", + "Please check out these cat pics!", + keywords, + DateTime.Now, + true); + +var repo = new BlogPostRepository(client, "cat_pics_quarterly"); +string id = repo.Save(post); +``` + +```javascript +var post = new BlogPost( + 'This one is so lulz!', + 'Cat Stevens', + 'Please check out these cat pics!', + [ 'adorbs', 'cheshire' ], + new Date(), + true +); + +var repo = new BlogPostRepository(client, 'cat_pics_quarterly'); + +repo.save(post, function (err, rslt) { + logger.info("key: '%s', model: '%s'", rslt.key, JSON.stringify(rslt.model)); +}); +``` + +## Querying + +Now that we have some blog posts stored in our "collection," we can +start querying for whatever we'd like. Let's say that we want to find +all blog posts with the keyword `funny` (after all, some cat pics are +quite serious, and we may not want those). + +```java +String index = "blog_posts"; +String query = "keywords_set:funny"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'keywords_set:funny') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('keywords_set:funny') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'keywords_set:funny') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "keywords_set:funny"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('keywords_set:funny') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=keywords_set:funny" +``` + +Or we can find posts that contain the word `furry`: + +```java +String index = "blog_posts"; +String query = "content_register:furry"; + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +``` + +```ruby +results = client.search('blog_posts', 'content_register:furry') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('blog_posts') + ->withQuery('content_register:furry') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('blog_posts', 'content_register:furry') +``` + +```csharp +var searchRequest = new RiakSearchRequest("blog_posts", "content_register:furry"); +var rslt = client.Search(searchRequest); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('blog_posts') + .withQuery('content_register:furry') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```curl +curl "$RIAK_HOST/search/query/blog_posts?wt=json&q=content_register:furry" +``` + +Here are some more possible queries: + +Info | Query +:----|:----- +Unpublished posts | `published_flag:false` +Titles that begin with `Loving*` | `title_register:Loving*` +Post bodies containing the words `furry` and `jumping` | `content_register:[furry AND jumping]` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/mapreduce.md b/content/riak/kv/3.0.4/developing/usage/mapreduce.md new file mode 100644 index 0000000000..753e0cd310 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/mapreduce.md @@ -0,0 +1,246 @@ +--- +title: "Using MapReduce" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Using MapReduce" + identifier: "usage_mapreduce" + weight: 106 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/using/mapreduce + - /riak/kv/3.0.4/dev/using/mapreduce +--- + +[usage 2i]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/secondary-indexes +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search +[usage types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[api http]: {{<baseurl>}}riak/kv/3.0.4/developing/api/http +[api pb]: {{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[guide mapreduce]: {{<baseurl>}}riak/kv/3.0.4/developing/app-guide/advanced-mapreduce + +{{% note title="Use MapReduce sparingly" %}} +In Riak KV, MapReduce is the primary method for non-primary-key-based +querying. Although useful for tasks such as batch +processing jobs, MapReduce operations can be very computationally +expensive to the extent that they can degrade performance in +production clusters operating under load. Thus, we recommend running +MapReduce operations in a controlled, rate-limited fashion and never for +realtime querying purposes. +{{% /note %}} + +MapReduce (M/R) is a technique for dividing data processing work across +a distributed system. It takes advantage of the parallel processing +power of distributed systems and also reduces network bandwidth, as the +algorithm is passed around to where the data lives rather than +transferring a potentially huge dataset to a client algorithm. + +You can use MapReduce for things like: filtering documents by +tags, counting words in documents, and extracting links to related data. +In Riak KV, MapReduce is one method for querying that is not strictly based +on key querying, alongside [secondary indexes][usage 2i] +and [search][usage search]. MapReduce jobs can be submitted through the +[HTTP API][api http] or the [Protocol Buffers API][api pb], although we +strongly recommend using the Protocol Buffers API for performance +reasons. + +## Features + +* Map phases execute in parallel with data locality. +* Reduce phases execute in parallel on the node where the job was + submitted. +* MapReduce queries written in Erlang. + +## When to Use MapReduce + +* When you know the set of objects over which you want to MapReduce + (i.e. the locations of the objects, as specified by [bucket type][usage types], bucket, and key) +* When you want to return actual objects or pieces of objects and not + just the keys. [Search][usage search] and [secondary indexes][usage 2i] are other means of returning objects based on + non-key-based queries, but they only return lists of keys and not + whole objects. +* When you need the utmost flexibility in querying your data. MapReduce + gives you full access to your object and lets you pick it apart any + way you want. + +## When Not to Use MapReduce + +* When you want to query data over an entire bucket. MapReduce uses a + list of keys, which can place a lot of demand on the cluster. +* When you want latency to be as predictable as possible. + +## How it Works + +The MapReduce framework helps developers divide a query into steps, +divide the dataset into chunks, and then run those step/chunk pairs in +separate physical hosts. + +There are two steps in a MapReduce query: + +* **Map** - The data collection phase, which breaks up large chunks of + work into smaller ones and then takes action on each chunk. Map + phases consist of a function and a list of objects on which the map + operation will operate. +* **Reduce** - The data collation or processing phase, which combines + the results from the map step into a single output. The reduce phase + is optional. + +Riak KV MapReduce queries have two components: + +* A list of inputs +* A list of phases + +The elements of the input list are object locations as specified by +[bucket type][usage types], bucket, and key. The elements of the +phases list are chunks of information related to a map, a reduce, or a +link function. + +A MapReduce query begins when a client makes the request to Riak KV. The +node that the client contacts to make the request becomes the +*coordinating node* responsible for the MapReduce job. As described +above, each job consists of a list of phases, where each phase is either +a map or a reduce phase. The coordinating node uses the list of phases +to route the object keys and the function that will operate over the +objects stored in those keys and instruct the proper [vnode][glossary vnode] to +run that function over the right objects. + +After running the map function, the results are sent back to the +coordinating node. This node then concatenates the list and passes that +information over to a reduce phase on the same coordinating node, +assuming that the next phase in the list is a reduce phase. + +The diagram below provides an illustration of how a coordinating vnode +orchestrates a MapReduce job. + +![MapReduce Diagram]({{<baseurl>}}images/MapReduce-diagram.png) + +## Example + +In this example, we'll create four objects with the text "caremad" +repeated a varying number of times and store those objects in the bucket +`training` (which does not bear a [bucket type][usage types]). +An Erlang MapReduce function will be used to count the occurrences of +the word "caremad." + +### Data object input commands + +For the sake of simplicity, we'll use [curl](http://curl.haxx.se/) +in conjunction with Riak KV's [HTTP API][api http] to store the objects: + +```curl +curl -XPUT http://localhost:8098/buckets/training/keys/foo \ + -H 'Content-Type: text/plain' \ + -d 'caremad data goes here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bar \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad caremad' + +curl -XPUT http://localhost:8098/buckets/training/keys/baz \ + -H 'Content-Type: text/plain' \ + -d 'nothing to see here' + +curl -XPUT http://localhost:8098/buckets/training/keys/bam \ + -H 'Content-Type: text/plain' \ + -d 'caremad caremad caremad' +``` + +### MapReduce invocation + +To invoke a MapReduce function from a compiled Erlang program requires +that the function be compiled and distributed to all nodes. + +For interactive use, however, it's not necessary to do so; instead, we +can invoke the client library from the +[Erlang shell](http://www.erlang.org/doc/man/shell.html) and define +functions to send to Riak KV on the fly. + +First we defined the map function, which specifies that we want to get +the key for each object in the bucket `training` that contains the text +`caremad`. + +We're going to generalize and optimize it a bit by supplying a +compiled regular expression when we invoke MapReduce; our function +will expect that as the third argument. + +```erlang +ReFun = fun(O, _, Re) -> case re:run(riak_object:get_value(O), Re, [global]) of + {match, Matches} -> [{riak_object:key(O), length(Matches)}]; + nomatch -> [{riak_object:key(O), 0}] +end end. +``` + +Next, to call `ReFun` on all keys in the `training` bucket, we can do +the following in the Erlang shell. + +{{% note title="Warning" %}} +Do not use this in a production +environment; listing all keys to identify those in the `training` bucket +is a very expensive process. +{{% /note %}} + +```erlang +{ok, Re} = re:compile("caremad"). +``` + +That will return output along the following lines, verifying that +compilation has completed: + +``` +{ok,{re_pattern,0,0, + <<69,82,67,80,69,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,99,0,100, + ...>>}} +``` + +Then, we can create a socket link to our cluster: + +```erlang +{ok, Riak} = riakc_pb_socket:start_link("127.0.0.1", 8087). + +%% This should return a process ID: +%% {ok,<0.34.0>} +``` + +Then we can run the compiled MapReduce job on the `training` bucket: + +```erlang +riakc_pb_socket:mapred_bucket(Riak, <<"training">>, + [{map, {qfun, ReFun}, Re, true}]). +``` + +If your bucket is part of a bucket type, you would use the following: + +```erlang +B = {<<"my_bucket_type">>, <<"training">>}, +Args = [{map, {qfun, ReFun}, Re, true}]), +riakc_pb_socket:mapred_bucket(Riak, B, Args). +``` + +That will return a list of tuples. The first element in each tuple is +the key for each object in the bucket, while the second element displays +the number of instances of the word "caremad" in the object: + +``` +{ok,[{0, + [{<<"foo">>,1},{<<"bam">>,3},{<<"baz">>,0},{<<"bar">>,4}]}]} +``` + +### Recap + +In this tutorial, we ran an Erlang MapReduce function against a total of +four object in the `training` bucket. This job took each key/value +object in the bucket and searched the text for the word "caremad," +counting the number of instances of the word. + +## Advanced MapReduce Queries + +For more detailed information on MapReduce queries in Riak KV, we recommend +checking out our [Advanced MapReduce][guide mapreduce] guide. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/next-gen-replication.md b/content/riak/kv/3.0.4/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..cb915691c9 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/next-gen-replication.md @@ -0,0 +1,153 @@ +--- +title: "Next-Gen Replication" +description: "" +project: "riak_kv" +project_version: "3.0.4" +menu: + riak_kv-3.0.4: + name: "Next Gen Replication" + identifier: "learn_concepts_next_gen_replication" + weight: 108 + parent: "learn_concepts" +version_history: + in: "2.9.1+" +toc: true +aliases: + - /riak-docs/riak/3.0.4/dev/using/nextgenreplication +--- +[concept TicTac aae]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/tictac-active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/replication + +## Next Generation Replication - How it Works + +### Replication Actors + +Each node in `riak_kv` starts three processes that manage the inter-cluster replication. A tictac AAE full-sync manager, a replication queue source manager, and a replication queue sink manager. All processes are started by default (whether or not replication is enabled), but will only play an active role should replication be configured. Further details on the processes involved: + +* __Tictac AAE Full-Sync Manager__ - `riak_kv_ttaaefs_manager` + + * There is a single actor on each node that manages the full-sync reconciliation workload configured for that node. + + * Each node is configured with the details of a peer node at a remote cluster. Each manager is responsible for controlling cluster-wide hashtree exchanges between the local node and the peer node, and to prompt any repairs required across the cluster (not just on this node). The information is exchanged between the peers, but that information represents the data across the whole cluster. Necessary repairs are prompted through the replication queue source-side manager `riak_kv_replrtq_src`. + + * Each node is configured with a schedule to determine how frequently this manager will run its reconcile and repair operations. + + * It is is an administrator responsibility to ensure the cluster AAE workload is distributed across nodes with sufficient diversity to ensure correct operation under failure. Work is not re-distributed between nodes in response to failure on either the local or remote cluster, so there must be other nodes already configured to share that workload to continue operation under failure conditions. + + * Each node can only full-sync with one other cluster (via the one peer node). If the cluster needs to full-sync with more than one cluster, then the administrator should ensure different nodes have the different configurations necessary to achieve this. + + * Scheduling of work to minimise concurrency of reconciliation operations is managed by this actor using a simple, coordination-free mechanism. + + * The administrator may at run-time suspend or resume the regular running of full-sync operations on any given node via the `riak_kv_ttaaefs_manager`. + +* __Replication Queue Source-Side Manager__ + + * There is a single actor on each node that manages the queueing of replication object references to be consumed from other clusters. This actor runs a configurable number of queues, which contain pointers to data which is required to be consumed by different remote clusters. + + * The general pattern is that each delta within a cluster will be published once via the `riak_kv_replrtq_src` on a node local to the discovery of the change. Each queue which is a source of updates will have multiple consumers spread across multiple sink nodes on the receiving cluster - where each sink-side node's consumers are being managed by a `riak_kv_replrtq_snk` process on that node. + + * Queues may have data filtering rules to restrict what changes are distributed via that queue. The filters can restrict replication to a specific bucket, or bucket type, a bucket name prefix or allow for any change to be published to that queue. + + * __Real-time replication__ changes (i.e. PUTs that have just been co-ordinated on this node within the cluster), are sent to the `riak_kv_replrtq_src` in one of the following formats: + * {Bucket, Key, Clock, {tombstone, Object}}; + * {Bucket, Key, Clock, {object, Object}}; + * {Bucket, Key, Clock, to_fetch}. + + * Real-time replicated objects are the highest priority items to be queued, and are placed on __every queue whose data filtering rules are matched__ by the object. If the priority queue has grown beyond a limited number of items (the number being defined in `riak_kv.replrtq_srcobjectlimt`), then any {object, Object} references is stripped and replaced with `to_fetch`. This is to help limit the memory consumed by the queue during failure conditions i.e. when a sink has stopped consuming from the source queue. + + * Changes identified by __AAE full-sync replication__ processes run by the `riak_kv_ttaaefs` manager on the local node are sent to the `riak_kv_replrtq_src` as references, and queued as the second highest priority. These changes are queued only on __a single queue defined within the configuration__ of `riak_kv_ttaaefs_manager`. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object. + + * Changes identified by __AAE fold operations__ for administrator initiated transition or repair operations (e.g. fold over a bucket or key-range, or for a given range of modified dates), are sent to the `riak_kv_replrtq_src` to be queued as the lowest priority onto __a single queue defined by the administrator when initiating the AAE fold operation__. The changes queued are only references to the object (Bucket, Key and Clock) not the actual object - and are only the changes discovered through the fold running on vnodes local to this node. + + * Should the local node fail, all undelivered object references will be dropped. + + * Queues are bounded, with limits set separately for each priority. Items are consumed from the queue in strict priority order. So a backlog of non-real-time replication events cannot cause a backlog or failure in real-time events. + + * The queues are provided using the existing `riak_core_priority_queue` module in Riak. + + * The administrator may at run-time suspend or resume the publishing of data to specific queues via the `riak_kv_replrtq_src` process. + +* __Replication Queue Sink-Side Manager__ `riak_kv_replrtq_snk` + + * There is a single actor on each node that manages the process of consuming from queues on the `riak_kv_replrtq_src` on remote clusters. + + * The `riak_kv_replrtq_snk` can be configured to consume from multiple queues, across an open-ended number of peers. For instance if each node on Cluster A maintains a queue named `cluster_c_full`, and each node on Cluster B maintains a queue named `cluster_c_partial` - then `riak_kv_replrtq_snk` can be configured to consume from the `cluster_c_full` from every node in Cluster A and from `cluster_c_partial` from every node in Cluster B. + + * The `riak_kv_replrtq_snk` manages a finite number of workers for consuming from remote peers. The `riak_kv_replrtq_snk` tracks the results of work in order to back-off slightly from peers regularly not returning results to consume requests (in favour of those peers indicating a backlog by regularly returning results). The `riak_kv_replrtq_snk` also tracks the results of work in order to back-off severely from those peers returning errors (so as not to lock too many workers consuming from unreachable nodes). + + * The administrator may at run-time suspend or resume the consuming of data from specific queues or peers via the `riak_kv_replrtq_snk`. + +### Real-time Replication - Step by Step + +Previous replication implementations initiate replication through a post-commit hook. Post-commit hooks are fired from the `riak_kv_put_fsm` after "enough" responses have been received from other vnodes (based on n, w, dw and pw values for the PUT). Without enough responses, the replication hook is not fired, although the client should receive an error and retry. This process of retrying may eventually fire the hook - although it is possible for a PUT to fail, the hook not to be fired, but a GET be locally successful (due to read-repair and anti-entropy) and there be no clue that the object has not been replicated. + +In implementing the new replication solution, the point of firing off replication has been changed to the point that the co-ordinated PUT is completed. So the replication of the PUT to the clusters may occur in parallel to the replication of the PUT to other nodes in the source cluster. This is the first opportunity where sufficient information is known (e.g. the updated vector clock), and reduces the size of the time-window of inconsistency between the clusters, and also reduce the window of opportunity for a PUT to succeed but not have replication triggered. + +Replication is fired within the `riak_kv_vnode` `actual_put/8`. On condition of the vnode being a co-ordinator of the put, and of `riak_kv.replrtq_enablesrc` being set to enabled (true), the following work is done: + +- The object reference to be replicated is determined, this is the type of reference to be placed on the replication queue. + + - If the object is now a tombstone, the whole object is used as the replication reference. The whole object is used due to the small size of the object, and the need to avoid race conditions with reaping activity if `delete_mode` is not `keep` - the cluster may not be able to fetch the tombstone to replicate in the future. The whole object must be kept on the queue and not be filtered by the `riak_kv_replrtq_src` to be replaced with a `to_fetch` reference. + + - If the object is below the `riak_kv.replrtq_srcobjectsize` (default 200KB) then whole object will be sent to the `riak_kv_replrtq_src`, and it will be queued as a whole object as long as the current size of the priority real-time queue does not exceed the `riak_kv.replrtq_srcobjectlimit` (default 1000). If an object is over the size limit a `to_fetch` references will be sent instead of the object, and if the queue is too large the `riak_kv_replrtq_src` will substitute a `to_fetch` reference before queueing. + +- The `{Bucket, Key, Clock, ObjectReference}` is cast to the `riak_kv_replrtq_src` and placed by the `riak_kv_replrtq_src` on the priority queue. + +- The queue has a configurable absolute limit, that is applied individually for each priority. The limit is configured via `riak_kv.replrtq_srcqueuelimit` and defaults to 300,000 references (5 minutes of traffic at 1,000 PUTs per second). When this limit is reached, new replication references are discarded on receipt rather than queued - these discarded references will need to eventually be re-replicated via full-sync. + +The reference now needs to be handled by the `riak_kv_replrtq_src`. The task list for this process is: + +- Assign a priority to the replication event depending on what prompted the replication (e.g. highest priority to real-time events received from co-ordinator vnodes). + +- Add the reference to the tail of the __every__ matching queue based on priority. Each queue is configured to either match `any` replication event, no real-time events (using the configuration `block_rtq`), or a subset of events (using either a bucket `type` filter or a `bucket` filter). + +In order to replicate the object, it must now be fetched from the queue by a sink. A sink-side cluster should have multiple consumers, on multiple nodes, consuming from each node in the source-side cluster. These workers are handed work items by the `riak_kv_replrtq_snk`, with a Riak client configured to communicate to the remote node, and the worker will initiate a `fetch` from that node. + +On receipt of the `fetch` request the source node should: + +- Initiate a `riak_kv_get_fsm`, passing `{queuename, QueueName}` in place of `{Bucket, Key}`. + +- The GET FSM should go directly into the `queue_fetch` state, and try and fetch the next replication reference from the given queue name via the `riak_kv_replrtq_src`. + + - If the fetch from the queue returns `queue_empty` this is relayed back to the sink-side worker, and ultimately the `riak_kv_replrtq_snk` which may then slow down the pace at which fetch requests are sent to this node/queue combination. To reduce the volume of individual requests when queues are mainly empty, the queue is only considered empty if it has reported empty 8 times from requests 4ms apart. + + - If the fetch returns an actual object, this is relayed back to the sink worker. + + - If the fetch returns a replication reference with the flag `to_fetch`, the `riak_kv_get_fsm` will continue down the standard path os states starting with `prepare`, and fetch the object which the will be returned to the sink worker. + +- If a successful fetch is relayed back to the sink worker it will replicate the PUT using a local `riak_client:push/4`. The push will complete a PUT of the object on the sink cluster - using a `riak_kv_put_fsm` with appropriate options (e.g. `asis`, `disable-hooks`). + + - The code within the `riak_client:push/4` follows the behaviour of the existing `riak_repl` on receipt of a replicated object. + +- If the fetch and push request fails, the sink worker will report this back to the `riak_kv_replrtq_snk` which should delay further requests to that node/queue so as to avoid rapidly locking sink workers up communicating to a failing node. + + +### Full-Sync Reconciliation and Repair - Step by Step + +The `riak_kv_ttaaefs_manager` controls the full-sync replication activity of a node. Each node is configured with a single peer with which it is to run full-sync checks and repairs, assuming that across the cluster sufficient peers to sufficient clusters have been configured to complete the overall work necessary for that cluster. Ensuring there are sufficient peer relations is an administrator responsibility, there are no re-balancing or re-scaling scenarios during failure scenarios. + +The `riak_kv_ttaaefs_manager` is a source side process. It will not attempt to repair any discovered discrepancies where the remote cluster is ahead of the local cluster - the job of the process is to ensure that a remote cluster is up-to-date with the changes which have occurred in the local cluster. For mutual full-sync replication, there will be a need for an equivalent configuration on the peer cluster. + +The `riak_kv_ttaaefs_manager` has a schedule of work obtained from the configuration. The schedule has wants, the number of times per day that it is desired that this manager will: + +- Reconcile changes across all the whole cluster over all time; + +- Skip work for a schedule slot and do nothing; + +- Reconcile changes that have occurred in the past hour; + +- Reconcile changes that have occurred in the past day. + +On startup, the manager looks at these wants and provides a random distribution of work across slots. The day is divided into slots evenly distributed so there is a slot for each want in the schedule. It will run work for the slot at an offset from the start of the slot, based on the place this node has in the sorted list of currently active nodes. So if each node is configured with the same total number of wants, work will be synchronised to have limited overlapping work within the cluster. + +When, on a node, a scheduled piece of work comes due, the `riak_kv_ttaaefs_manager` will start an `aae_exchange` to run the work between the two clusters (using the peer configuration to reach the remote cluster). Once the work is finished, it will schedule the next piece of work - unless the start time for the next piece of work has already passed, in which case the next work is skipped. When all the work in the schedule is complete, a new schedule is calculated from the wants. + +When starting an `aae_exchange` the `riak_kv_ttaaefs_manager` must pass in a repair function. This function will compare clocks from identified discrepancies, and where the source cluster is ahead of the sink, send the `{Bucket, Key, Clock, to_fetch}` tuple to a configured queue name on `riak_kv_replrtq_src`. These queued entries will then be replicated through being fetched by the `riak_kv_replrtq_snk` workers, although this will only occur when there is no higher priority work to replicate i.e. real-time replication events prompted by locally co-ordinated PUTs. + + + diff --git a/content/riak/kv/3.0.4/developing/usage/reading-objects.md b/content/riak/kv/3.0.4/developing/usage/reading-objects.md new file mode 100644 index 0000000000..0046760232 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/reading-objects.md @@ -0,0 +1,252 @@ +--- +title: "Reading Objects" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Reading Objects" + identifier: "usage_reading_objects" + weight: 101 + parent: "developing_usage" +toc: true +aliases: +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode + +You can think of reads in Riak as analogous to HTTP `GET` requests. You +specify a bucket type, bucket, and key, and Riak either returns the +object that's stored there---including its [siblings]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/#siblings) \(more on that later)---or it returns `not found` (the +equivalent of an HTTP `404 Object Not Found`). + +Here is the basic command form for retrieving a specific key from a +bucket: + +``` +GET /types/<type>/buckets/<bucket>/keys/<key> +``` + +Here is an example of a read performed on the key `rufus` in the bucket +`dogs`, which bears the bucket type `animals`. Please note that for this example to work, you must have first created the bucket-type `animals` as per the instructions on the [bucket type]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/bucket-types) page. + +```java +// In the Java client, it is best to specify a bucket type/bucket/key +// Location object that can be used as a reference for further +// operations, as in the example below: +Location myKey = new Location(new Namespace("animals", "dogs"), "rufus"); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'users', 'animals') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus') +``` + +```csharp +// Using the Riak .NET Client it is best to specify a bucket type/bucket/key +// RiakObjectId object that can be used as a reference for further +// operations +var id = new RiakObjectId("animals", "dogs", "rufus"); +``` + +```javascript +client.fetchValue({ bucketType: 'animals', bucket: 'dogs', key: 'rufus' }, function (err, rslt) { + assert(rslt.isNotFound); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>). +``` + +```golang +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + Build() +if err != nil { + // error occurred +} +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus +``` + +## Read Parameters + +Parameter | Default | Description +:---------|:--------|:----------- +`r` | `quorum` | How many replicas need to agree when retrieving an existing object before the write +`pr` | `0` | How many [vnodes][glossary vnode] must respond for a read to be deemed successful +`notfound_ok` | `true` | If set to `true`, if the first vnode to respond doesn't have a copy of the object, Riak will deem the failure authoritative and immediately return a `notfound` error to the client + +Riak also accepts many query parameters, including `r` for setting the +R-value for GET requests (R values describe how many replicas need to +agree when retrieving an existing object in order to return a successful +response). + +Here is an example of attempting a read with `r` set to `3`: + +```java +// Using the "myKey" location specified above: +FetchValue fetch = new FetchValue.Builder(myKey) + .withOption(FetchOption.R, new Quorum(3)) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue()); +``` + +```ruby +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r: 3) +p obj.data +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('rufus', 'dogs', 'animals') + ->build() + ->execute(); + +var_dump($response->getObject()->getData()); +``` + +```python +bucket = client.bucket_type('animals').bucket('dogs') +obj = bucket.get('rufus', r=3) +print obj.data +``` + +```csharp +var id = new RiakObjectId("animals", "dogs", "rufus"); +var opts = new RiakGetOptions(); +opts.SetR(3); +var rslt = client.Get(id, opts); +Debug.WriteLine(Encoding.UTF8.GetString(rslt.Value.Value)); +``` + +```javascript +var fetchOptions = { + bucketType: 'animals', bucket: 'dogs', key: 'rufus', + r: 3 +}; +client.fetchValue(fetchOptions, function (err, rslt) { + var riakObj = rslt.values.shift(); + var rufusValue = riakObj.value.toString("utf8"); + logger.info("rufus: %s", rufusValue); +}); +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"animals">>, <<"dogs">>}, + <<"rufus">>, + [{r, 3}]). +``` + +```golang +cmd, err := riak.NewFetchValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("dogs"). + WithKey("rufus"). + WithR(3). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fvc := cmd.(*riak.FetchValueCommand) +rsp := svc.Response +``` + +```curl +curl http://localhost:8098/types/animals/buckets/dogs/keys/rufus?r=3 +``` + +If you're using HTTP, you will most often see the following response +codes: + +* `200 OK` +* `300 Multiple Choices` +* `304 Not Modified` + +The most common error code: + +* `404 Not Found` + +{{% note title="Note" %}} +If you're using a Riak client instead of HTTP, these responses will vary a +great deal, so make sure to check the documentation for your specific client. +{{% /note %}} + +## Not Found + +If there's no object stored in the location where you attempt a read, you'll get the following response: + +```java +java.lang.NullPointerException +``` + +```ruby +Riak::ProtobuffsFailedRequest: Expected success from Riak but received not_found. The requested object was not found. +``` + +```php +$response->getStatusCode(); // 404 +$response->isSuccess(); // false +``` + +```python +riak.RiakError: 'no_type' +``` + +```csharp +result.IsSuccess == false +result.ResultCode == ResultCode.NotFound +``` + +```javascript +rslt.isNotFound === true; +``` + +```erlang +{error,notfound} +``` + +```golang +fvc := cmd.(*riak.FetchValueCommand) +rsp := fvc.Response +rsp.IsNotFound // Will be true +``` + +```curl +not found +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/replication.md b/content/riak/kv/3.0.4/developing/usage/replication.md new file mode 100644 index 0000000000..d2b1d605d0 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/replication.md @@ -0,0 +1,592 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Replication" + identifier: "usage_replication" + weight: 115 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/replication-properties + - /riak/kv/3.0.4/dev/advanced/replication-properties +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters + +Riak was built to act as a multi-node [cluster][concept clusters]. It +distributes data across multiple physical servers, which enables it to +provide strong availability guarantees and fault tolerance. + +The [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem), which +undergirds many of the design decisions behind Riak's architecture, +defines distributed systems in terms of three desired properties: +consistency, availability, and partition (i.e. failure) tolerance. Riak +can be used either as an AP, i.e. available/partition-tolerant, system +or as a CP, i.e. consistent/partition-tolerant, system. The former +relies on an [Eventual Consistency][concept eventual consistency] model, while the latter relies on +a special [strong consistency][use ref strong consistency] subsystem. + +Although the [CAP theorem](http://en.wikipedia.org/wiki/CAP_theorem) +dictates that there is a necessary trade-off between data consistency +and availability, if you are using Riak in an eventually consistent +manner, you can fine-tune that trade-off. The ability to make these +kinds of fundamental choices has immense value for your applications and +is one of the features that differentiates Riak from other databases. + +At the bottom of the page, you'll find a [screencast]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties#screencast) that briefly explains how to adjust your +replication levels to match your application and business needs. + +{{% note title="Note on strong consistency" %}} +An option introduced in Riak version 2.0 is to use Riak as a +<a href="{{< baseurl >}}riak/kv/3.0.4/using/reference/strong-consistency/">strongly +consistent</a> system for data in specified buckets. Using Riak in this way is +fundamentally different from adjusting replication properties and fine-tuning +the availability/consistency trade-off, as it sacrifices _all_ availability +guarantees when necessary. Therefore, you should consult the +<a href="{{< baseurl >}}riak/kv/3.0.4/developing/app-guide/strong-consistency/">Using +Strong Consistency</a> documentation, as this option will not be covered in +this tutorial. +{{% /note %}} + +## How Replication Properties Work + +When using Riak, there are two ways of choosing replication properties: + +1. On a per-request basis +2. In a more programmatic fashion, [using bucket types][usage bucket types] + +### Per-request Replication Properties + +The simplest way to apply replication properties to objects stored in +Riak is to specify those properties + +### Replication Properties Through Bucket Types + +Let's say, for example, that you want to apply an `n_val` of 5, an `r` +of 3, and a `w` of 3 to all of the data in some of the [buckets]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets) that +you're using. In order to set those replication properties, you should +create a bucket type that sets those properties. Below is an example: + +```bash +riak-admin bucket-type create custom_props '{"props":{"n_val":5,"r":3,"w":3}}' +riak-admin bucket-type activate custom_props +``` + +Now, any time you store an object in a bucket with the type +`custom_props` those properties will apply to it. + +## Available Parameters + +The table below lists the most frequently used replication parameters +that are available in Riak. Symbolic values like `quorum` are discussed +[below](#symbolic-consistency-names). Each +parameter will be explained in more detail in later sections: + +Parameter | Common name | Default value | Description +:---------|:------------|:--------------|:----------- +`n_val` | N | `3` | Replication factor, i.e. the number of nodes in the cluster on which an object is to be stored +`r` | R | `quorum` | The number of servers that must respond to a read request +`w` | W | `quorum` | Number of servers that must respond to a write request +`pr` | PR | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/3.0.4/learn/concepts/vnodes/">vnodes</a> that must respond to a read request +`pw` | PW | `0` | The number of primary <a href="{{< baseurl >}}riak/kv/3.0.4/learn/concepts/vnodes/">vnodes</a> that must respond to a write request +`dw` | DW | `quorum` | The number of servers that must report that a write has been successfully written to disk +`rw` | RW | `quorum` | If R and W are undefined, this parameter will substitute for both R and W during object deletes. It is extremely unlikely that you will need to adjust this parameter. +`notfound_ok` | | `true` | This parameter determines how Riak responds if a read fails on a node. Setting to `true` (the default) is the equivalent to setting R to 1: if the first node to respond doesn't have a copy of the object, Riak will immediately return a `not found` error. If set to `false`, Riak will continue to look for the object on the number of nodes specified by N (aka `n_val`). +`basic_quorum` | | `false` | If `notfound_ok` is set to `false`, Riak will be more thorough in looking for an object on multiple nodes. Setting `basic_quorum` to `true` in this case will instruct Riak to wait for only a `quorum` of responses to return a `notfound` error instead of N responses. + +## A Primer on N, R, and W + +The most important thing to note about Riak's replication controls is +that they can be at the bucket level. You can use [bucket types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) +to set up bucket `A` to use a particular set of replication properties +and bucket `B` to use entirely different properties. + +At the bucket level, you can choose how many copies of data you want to +store in your cluster (N, or `n_val`), how many copies you wish to read +from at one time (R, or `r`), and how many copies must be written to be +considered a success (W, or `w`). + +In addition to the bucket level, you can also specify replication +properties on the client side for any given read or write. The examples +immediately below will deal with bucket-level replication settings, but +check out the [section below](#client-level-replication-settings) +for more information on setting properties on a per-operation basis. + +The most general trade-off to be aware of when setting these values is +the trade-off between **data accuracy** and **client responsiveness**. +Choosing higher values for N, R, and W will mean higher accuracy because +more nodes are checked for the correct value on read and data is written +to more nodes upon write; but higher values will also entail degraded +responsiveness, especially if one or more nodes is failing, because Riak +has to wait for responses from more nodes. + +## N Value and Replication + +All data stored in Riak will be replicated to the number of nodes in the +cluster specified by a bucket's N value (`n_val`). The default `n_val` +in Riak is 3, which means that data stored in a bucket with the default +N will be replicated to three different nodes, thus storing three +**replicas** of the object. + +In order for this to be effective, you need at least three nodes in your +cluster. The merits of this system, however, can be demonstrated using +your local environment. + +Let's create a bucket type that sets the `n_val` for any bucket with +that type to 2. To do so, you must create and activate a bucket type +that sets this property: + +```bash +riak-admin bucket-type create n_val_equals_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_equals_2 +``` + +Now, all buckets that bear the type `n_val_equals_2` will have `n_val` +set to 2. Here's an example write: + +```curl +curl -XPUT http://localhost:8098/types/n_val_equals_2/buckets/test_bucket/keys/test_key \ + -H "Content-Type: text/plain" \ + -d "the n_val on this write is 2" +``` + +Now, whenever we write to a bucket of this type, Riak will write a +replica of the object to two different nodes. + +{{% note title="A Word on Setting the N Value" %}} +`n_val` must be greater than 0 and less than or equal to the number of actual +nodes in your cluster to get all the benefits of replication. We advise +against modifying the `n_val` of a bucket after its initial creation as this +may result in failed reads because the new value may not be replicated to all +the appropriate partitions. +{{% /note %}} + +## R Value and Read Failure Tolerance + +Read requests to Riak are sent to all N nodes that are known to be +currently responsible for the data. The R value (`r`) enables you to +specify how many of those nodes have to return a result on a given read +for the read to be considered successful. This allows Riak to provide +read availability even when nodes are down or laggy. + +You can set R anywhere from 1 to N; lower values mean faster response +time but a higher likelihood of Riak not finding the object you're +looking for, while higher values mean that Riak is more likely to find +the object but takes longer to look. + +As an example, let's create and activate a bucket type with `r` set to +`1`. All reads performed on data in buckets with this type require a +result from only one node. + +```bash +riak-admin bucket-type create r_equals_1 '{"props":{"r":1}}' +riak-admin bucket-type activate r_equals_1 +``` + +Here's an example read request using the `r_equals_1` bucket type: + +```ruby +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +obj = bucket.get('chimpanzee') +``` + +```java +Location chimpanzeeFact = + new Location(new Namespace("r_equals_1", "animal_facts"), "chimpanzee"); +FetchValue fetch = new FetchValue.Builder(chimpanzeeFact).build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +System.out.println(obj.getValue().toString()); +``` + +```php +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('chimpanzee', 'animal_facts', 'r_equals_1') + ->build() + ->execute(); + +echo $response->getObject()->getData(); +``` + +```python +bucket = client.bucket_type('r_equals_1').bucket('animal_facts') +bucket.get('chimpanzee') +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"r_equals_1">>, <<"animal_facts">>}, + <<"chimpanzee">>). +``` + +```curl +curl http://localhost:8098/types/r_equals_1/buckets/animal_facts/keys/chimpanzee +``` + +As explained above, reads to buckets with the `r_equals_1` type will +typically be completed more quickly, but if the first node to respond +to a read request has yet to receive a replica of the object, Riak will +return a `not found` response (which may happen even if the object lives +on one or more other nodes). Setting `r` to a higher value will mitigate +this risk. + +## W Value and Write Fault Tolerance + +As with read requests, writes to Riak are sent to all N nodes that are +know to be currently responsible for the data. The W value (`w`) enables +you to specify how many nodes must complete a write to be considered +successful---a direct analogy to R. This allows Riak to provide write +availability even when nodes are down or laggy. + +As with R, you can set W to any value between 1 and N. The same +performance vs. fault tolerance trade-offs that apply to R apply to W. + +As an example, let's create and activate a bucket type with `w` set to +`3`: + +```bash +riak-admin bucket-type create w_equals_3 '{"props":{"w":3}}' +riak-admin activate w_equals_3 +``` + +Now, we can attempt a write to a bucket bearing the type `w_equals_3`: + +```ruby +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = Riak::RObject.new(bucket, 'giraffe') +obj.raw_data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.content_type = 'text/plain' +obj.store +``` + +```java +Location storyKey = + new Location(new Namespace("w_equals_3", "animal_facts"), "giraffe"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("The species name of the giraffe is Giraffa camelopardalis")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation("giraffe") + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildLocation('giraffe', 'animal_facts', 'w_equals_3') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('w_equals_3').bucket('animal_facts') +obj = RiakObject(client, bucket, 'giraffe') +obj.content_type = 'text/plain' +obj.data = 'The species name of the giraffe is Giraffa camelopardalis' +obj.store() +``` + +```erlang +Obj = riakc_object:new({<<"w_equals_3">>, <<"animal_facts">>}, + <<"giraffe">>, + <<"The species name of the giraffe is Giraffa camelopardalis">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-type: text/plain" \ + -d "The species name of the giraffe is Giraffa camelopardalis" \ + http://localhost:8098/types/w_equals_3/buckets/animal_facts/keys/giraffe +``` + +Writing our `story.txt` will return a success response from Riak only if +3 nodes respond that the write was successful. Setting `w` to 1, for +example, would mean that Riak would return a response more quickly, but +with a higher risk that the write will fail because the first node it +seeks to write the object to is unavailable. + +## Primary Reads and Writes with PR and PW + +In Riak's replication model, there are N [vnodes]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode), +called _primary vnodes_, that hold primary responsibility for any given +key. Riak will attempt reads and writes to primary vnodes first, but in +case of failure, those operations will go to failover nodes in order to +comply with the R and W values that you have set. This failover option +is called _sloppy quorum_. + +In addition to R and W, you can also set integer values for the *primary +read* (PR) and _primary write_ (PW) parameters that specify how many +primary nodes must respond to a request in order to report success to +the client. The default for both values is zero. + +Setting PR and/or PW to non-zero values produces a mode of operation +called _strict quorum_. This mode has the advantage that the client is +more likely to receive the most up-to-date values, but at the cost of a +higher probability that reads or writes will fail because primary vnodes +are unavailable. + +{{% note title="Note on PW" %}} +If PW is set to a non-zero value, there is a higher risk (usually very small) +that failure will be reported to the client upon write. But this does not +necessarily mean that the write has failed completely. If there are reachable +primary vnodes, those vnodes will still write the new data to Riak. When the +failed vnode returns to service, it will receive the new copy of the data via +either read repair or active anti-entropy. +{{% /note %}} + +## Durable Writes with DW + +The W and PW parameters specify how many vnodes must _respond_ to a +write in order for it to be deemed successful. What they do not specify +is whether data has actually been written to disk in the storage backend. +The DW parameters enables you to specify a number of vnodes between 1 +and N that must write the data to disk before the request is deemed +successful. The default value is `quorum` (more on symbolic names below). + +How quickly and robustly data is written to disk depends on the +configuration of your backend or backends. For more details, see the +documentation on [Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [multiple backends]({{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/multi). + +## Delete Quorum with RW + +{{% note title="Deprecation notice" %}} +It is no longer necessary to specify an RW value when making delete requests. +We explain its meaning here, however, because RW still shows up as a property +of Riak buckets (as `rw`) for the sake of backwards compatibility. Feel free +to skip this explanation unless you are curious about the meaning of RW. +{{% /note %}} + +Deleting an object requires successfully reading an object and then +writing a tombstone to the object's key that specifies that an object +once resided there. In the course of their operation, all deletes must +comply with any R, W, PR, and PW values that apply along the way. + +If R and W are undefined, however, the RW (`rw`) value will substitute +for both R and W during object deletes. In recent versions of Riak, it +is nearly impossible to make reads or writes that do not somehow specify +oth R and W, and so you will never need to worry about RW. + +## The Implications of `notfound_ok` + +The `notfound_ok` parameter is a bucket property that determines how +Riak responds if a read fails on a node. If `notfound_ok` is set to +`true` (the default value) and the first vnode to respond doesn't have a +copy of the object, Riak will assume that the missing value is +authoritative and immediately return a `not found` result to the client. +This will generally lead to faster response times. + +On the other hand, setting `notfound_ok` to `false` means that the +responding vnode will wait for something other than a `not found` error +before reporting a value to the client. If an object doesn't exist under +a key, the coordinating vnode will wait for N vnodes to respond with +`not found` before it reports `not found` to the client. This setting +makes Riak search more thoroughly for objects but at the cost of slower +response times, a problem can be mitigated by setting `basic_quorum` to +`true`, which is discussed in the next section. + +## Early Failure Return with `basic_quorum` + +Setting `notfound_ok` to `false` on a request (or as a bucket property) +is likely to introduce additional latency. If you read a non-existent +key, Riak will check all 3 responsible vnodes for the value before +returning `not found` instead of checking just one. + +This latency problem can be mitigated by setting `basic_quorum` to +`true`, which will instruct Riak to query a quorum of nodes instead of N +nodes. A quorum of nodes is calculated as floor(N/2) + 1, meaning that 5 +nodes will produce a quorum of 3, 6 nodes a quorum of 4, 7 nodes a +quorum of 4, 8 nodes a quorum of 5, etc. + +The default for `basic_quorum` is `false`, so you will need to +explicitly set it to `true` on reads or in a bucket's properties. While +the scope of this setting is fairly narrow, it can reduce latency in +read-heavy use cases. + +## Symbolic Consistency Names + +Riak provides a number of "symbolic" consistency options for R, W, PR, +RW, and DW that are often easier to use and understand than specifying +integer values. The following symbolic names are available: + +* `all` - All replicas must reply. This is the same as setting R, W, PR, RW, or DW equal to N. +* `one` - This is the same as setting 1 as the value for R, W, PR, RW, or DW. +* `quorum` - A majority of the replicas must respond, that is, half plus one. For the default N value of 3, this calculates to 2, an N value of 5 calculates to 3, and so on. +* `default` - Uses whatever the per-bucket consistency property is for R, W, PR, RW, or DW, which may be any of the above symbolic values or an integer. + +Not submitting a value for R, W, PR, RW, or DW is the same as using +`default`. + +## Client-level Replication Settings + +Adjusting replication properties at the bucket level by [using bucket types][usage bucket types] +is how you set default properties for _all_ of a bucket's reads and +writes. But you can also set replication properties for specific reads +and writes without setting those properties at the bucket level, instead +specifying them on a per-operation basis. + +Let's say that you want to set `r` to 2 and `notfound_ok` to `true` for +just one read. We'll fetch [John Stockton](http://en.wikipedia.org/wiki/John_Stockton)'s +statistics from the `nba_stats` bucket. + +```ruby +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r: 2, notfound_ok: true) +``` + +```java +Location johnStocktonStats = + new Namespace(new Namespace("nba_stats"), "john_stockton"); +FetchValue fetch = new FetchValue.Builder(johnStocktonStats) + .withOption(FetchOption.R, new Quorum(2)) + .withOption(FetchOption.NOTFOUND_OK, true) + .build(); +client.execute(fetch); +``` + +```php +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('r', 2) + ->withParameter('notfound_ok', true) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('nba_stats') +obj = bucket.get('john_stockton', r=2, notfound_ok=True) +``` + +```erlang +{ok, Obj} = riakc_pb_socket:get(Pid, + <<"nba_stats">>, + <<"john_stockton">>, + [{r, 2}, {notfound_ok, true}]). +``` + +```curl +curl http://localhost:8098/buckets/nba_stats/keys/john_stockton?r=2¬found_ok=true +``` + +Now, let's say that you want to attempt a write with `w` set to 3 and +`dw` set to 2. As in the previous example, we'll be using the `default` +bucket type, which enables us to not specify a bucket type upon write. +Here's what that would look like: + +```ruby +bucket = client.bucket('nba_stats') +obj = Riak::RObject.new(bucket, 'michael_jordan') +obj.content_type = 'application/json' +obj.data = '{"stats":{ ... large stats object ... }}' +obj.store(w: 3, dw: 2) +``` + +```java +Location michaelJordanKey = + new Location(new Namespace("nba_stats"), "michael_jordan"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'stats':{ ... large stats object ... }}")); +StoreValue store = new StoreValue.Builder(obj) + .withLocation(michaelJordanKey) + .withOption(StoreOption.W, new Quorum(3)) + .withOption(StoreOption.DW, new Quorum(2)) + .build(); +client.execute(store); +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject('{'stats':{ ... large stats object ... }}') + ->buildLocation('john_stockton', 'nba_stats') + ->withParameter('w', 3) + ->withParameter('dw', 2) + ->build() + ->execute(); +``` + +```erlang +Obj = riakc_obj:new(<<"nba_stats">>, + <<"michael_jordan">>, + <<"{'stats':{ ... large stats object ... }}">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Obj). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d '{"stats":{ ... large stats object ... }}' \ + http://localhost:8098/buckets/nba_stats/keys/michael_jordan?w=3&dw=2 +``` + +All of Basho's [official Riak clients]({{<baseurl>}}riak/kv/3.0.4/developing/client-libraries) enable you to +set replication properties this way. For more detailed information, +refer to the tutorial on [basic key/value operations in Riak KV]({{<baseurl>}}riak/kv/3.0.4/developing/getting-started) +or to client-specific documentation: + +* [Ruby](https://github.com/basho/riak-ruby-client/blob/master/README.md) +* [Java](http://basho.github.io/riak-java-client/2.0.0/) +* [Python](http://basho.github.io/riak-python-client/) +* [Erlang](http://basho.github.io/riak-erlang-client/) + +## Illustrative Scenarios + +In case the above explanations were a bit too abstract for your tastes, +the following table lays out a number of possible scenarios for reads +and writes in Riak and how Riak is likely to respond. Some of these +scenarios involve issues surrounding conflict resolution, vector clocks, +and siblings, so we recommend reading the [Vector Clocks]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context#vector-clocks) documentation for more information. + +#### Read Scenarios + +These scenarios assume that a read request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +All 3 vnodes agree on the value | Once the first 2 vnodes return the value, that value is returned to the client +2 of 3 vnodes agree on the value, and those 2 are the first to reach the coordinating node | The value is returned to the client. Read repair will deal with the conflict per the later scenarios, which means that a future read may return a different value or <a href="{{< baseurl >}}riak/kv/3.0.4/learn/concepts/causal-context#siblings">siblings</a> +2 conflicting values reach the coordinating node and <a href="{{< baseurl >}}riak/kv/3.0.4/learn/concepts/causal-context#vector-clocks">vector clocks</a> allow for resolution | The vector clocks are used to resolve the conflict and return a single value, which is propagated via read repair to the relevant vnodes +2 conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `false` | The object with the most recent timestamp is returned and propagated via read repair to the relevant vnodes +2 siblings or conflicting values reach the coordinating node, vector clocks indicate a fork in the object history, and `allow_mult` is set to `true` | All keys are returned as siblings, optionally with associated values (depending on how the request is made) + +#### Write Scenarios + +These scenarios assume that a write request is sent to all 3 primary +vnodes responsible for an object. + +Scenario | What happens in Riak +:--------|:-------------------- +A vector clock is included with the write request, and is newer than the vclock attached to the existing object | The new value is written and success is indicated as soon as 2 vnodes acknowledge the write +A vector clock is included with the write request but conflicts with the vclock attached to the existing object, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is included with the write request but conflicts with (or is older than) the vclock attached to the existing object, with `allow_mult` set to `false` | Riak will decide which object "wins" on the basis of timestamps; no sibling will be created +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `true` | The new value is created as a sibling for future reads +A vector clock is not included with the write request and an object already exists, with `allow_mult` set to `false` | The new value overwrites the existing value + +## Screencast + +Here is a brief screencast that shows just how the N, R, and W values +function in our running 3-node Riak cluster: + +<div style="display:none" class="iframe-video" +id="http://player.vimeo.com/video/11172656"></div> + +<a href="http://vimeo.com/11172656">Tuning CAP Controls in Riak</a> from +<a href="http://vimeo.com/bashotech">Basho Technologies</a> on <a +href="http://vimeo.com">Vimeo</a>. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/search-schemas.md b/content/riak/kv/3.0.4/developing/usage/search-schemas.md new file mode 100644 index 0000000000..7380336ad8 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/search-schemas.md @@ -0,0 +1,511 @@ +--- +title: "Creating Search Schemas" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Creating Search Schemas" + identifier: "usage_search_schemas" + weight: 110 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/search-schema + - /riak/kv/3.0.4/dev/advanced/search-schema +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters + +> **Note on Search 2.0 vs. Legacy Search** +> +> This document refers to the new Riak Search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +Riak Search is built for ease of use, allowing you to write values into +Riak and query for values using Solr. Riak Search does a lot of work +under the hood to convert your values---plain text, JSON, XML, [Riak Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/), and [more]({{<baseurl>}}riak/kv/3.0.4/developing/usage/custom-extractors)---into something that can be indexed and searched later. +Nonetheless, you must still instruct Riak/Solr how to index a value. Are +you providing and array of strings? An integer? A date? Is your text in +English or Russian? You can provide such instructions to Riak Search by +defining a Solr **schema**. + +## The Default Schema + +Riak Search comes bundled with a default schema named `_yz_default`. The +default schema covers a wide range of possible field types. You can find +the default schema [on GitHub](https://raw.github.com/basho/yokozuna/develop/priv/default_schema.xml). +While using the default schema provides an easy path to starting +development, we recommend that you define your own schema in production. +Take note of `dynamicField name="*"`, which is a catch-all index for any +value. Sufficiently sized objects can potentially take up tremendous +amounts of disk space, so pay special attention to those indexes. + +## Custom Schemas + +We'll show you how you can create custom schemas by way of example. +Let's say that you have already created a schema named `cartoons` in a +file named `cartoons.xml`. This would register the custom schema in Riak +Search: + +```java +import org.apache.commons.io.FileUtils; + +File xml = new File("cartoons.xml"); +String xmlString = FileUtils.readFileToString(xml); +YokozunaSchema schema = new YokozunaSchema("cartoons", xmlString); +StoreSchema storeSchemaOp = new StoreSchema.Builder(schema).build(); +client.execute(storeSchemaOp); +``` + +```ruby +schema_data = File.read("cartoons.xml") +client.create_search_schema("cartoons", schema_data) +``` + +```php +(new \Basho\Riak\Command\Builder\Search\StoreSchema($riak)) + ->withName('users') + ->withSchemaFile('path/to/file.xml') + ->build() + ->execute(); +``` + +```python +xml_file = open('cartoons.xml', 'r') +schema_data = xml_file.read() +client.create_search_schema('cartoons', schema_data) +xml_file.close() +``` + +```csharp +var xml = File.ReadAllText("cartoons.xml"); +var schema = new SearchSchema("cartoons", xml); +var rslt = client.PutSearchSchema(schema); +``` + +```javascript +var fs = require('fs'); + +fs.readFile('cartoons.xml', function (err, data) { + if (err) { + throw new Error(err); + } + + var schemaXml = data.toString('utf8')); + + var options = { + schemaName: 'blog_post_schema', + schema: schemaXml + }; + + client.storeSchema(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); +}); +``` + +```erlang +{ok, SchemaData} = file:read_file("cartoons.xml"), +riakc_pb_socket:create_search_schema(Pid, <<"cartoons">>, SchemaData). +``` + +```curl +curl -XPUT http://localhost:8098/search/schema/cartoons \ + -H 'Content-Type:application/xml' \ + --data-binary @cartoons.xml +``` + +## Creating a Custom Schema + +The first step in creating a custom schema is to define exactly what +fields you must index. Part of that step is understanding how Riak +Search extractors function. + +### Extractors + +In Riak Search, extractors are modules responsible for pulling out a +list of fields and values from a Riak object. How this is achieved +depends on the object's content type, but the two common cases are JSON +and XML, which operate similarly. Our examples here will use JSON. + +The following JSON object represents the character +[Lion-o](http://en.wikipedia.org/wiki/List_of_ThunderCats_characters#Lion-O) +from the cartoon Thundercats. He has a name and age, he's the team +leader, and he has a list of aliases in other languages. + +```json +{ + "name":"Lion-o", + "age":30, + "leader":true, + "aliases":[ + {"name":"León-O", "desc_es":"Señor de los ThunderCats"}, + {"name":"Starlion", "desc_fr":"Le jeune seigneur des Cosmocats"}, + ] +} +``` + +The extractor will flatten the above objects into a list of field/value +pairs. Nested objects will be separated with a dot (`.`) and arrays will +simply repeat the fields. The above object will be extracted to the +following list of Solr document fields. + +``` +name=Lion-o +age=30 +leader=true +aliases.name=León-O +aliases.desc_es=Señor de los ThunderCats +aliases.name=Starlion +aliases.desc_fr=Le jeune seigneur des Cosmocats +``` + +This means that our schema should handle `name`, `age`, `leader`, +`aliases.name` (a `dot` is a valid field character), and +`aliases.desc_*` which is a description in the given language of the +suffix (Spanish and French). + +### Required Schema Fields + +Solr schemas can be very complex, containing many types and analyzers. +Refer to the [Solr 4.7 reference +guide](http://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf) +for a complete list. You should be aware, however, that there are a few +fields that are required by Riak Search in order to properly distribute +an object across a [cluster][concept clusters]. These fields are all prefixed +with `_yz`, which stands for +[Yokozuna](https://github.com/basho/yokozuna), the original code name +for Riak Search. + +Below is a bare minimum skeleton Solr Schema. It won't do much for you +other than allow Riak Search to properly manage your stored objects. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.5"> + <fields> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + </types> +</schema> +``` + +If you're missing any of the above fields, Riak Search will reject your +custom schema. The value for `<uniqueKey>` _must_ be `_yz_id`. + +In the table below, you'll find a description of the various required +fields. You'll rarely need to use any fields other than `_yz_rt` (bucket +type), `_yz_rb` (bucket) and `_yz_rk` (Riak key). On occasion, `_yz_err` +can be helpful if you suspect that your extractors are failing. +Malformed JSON or XML will cause Riak Search to index a key and set +`_yz_err` to 1, allowing you to reindex with proper values later. + +Field | Name | Description +:-------|:-----|:----------- +`_yz_id` | ID | Unique identifier of this Solr document +`_yz_ed` | Entropy Data | Data related to [active anti-entropy]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy) +`_yz_pn` | Partition Number | Used as a filter query parameter to remove duplicate replicas across nodes +`_yz_fpn` | First Partition Number | The first partition in this doc's preflist, used for further filtering on overlapping partitions +`_yz_vtag`| VTag | If there is a sibling, use vtag to differentiate them +`_yz_rk` | Riak Key | The key of the Riak object this doc corresponds to +`_yz_rt` | Riak Bucket Type | The bucket type of the Riak object this doc corresponds to +`_yz_rb` | Riak Bucket | The bucket of the Riak object this doc corresponds to +`_yz_err` | Error Flag | indicating if this doc is the product of a failed object extraction + +### Defining Fields + +With your required fields known and the skeleton schema elements in +place, it's time to add your own fields. Since you know your object +structure, you need to map the name and type of each field (a string, +integer, boolean, etc). + +When creating fields you can either create specific fields via the +`field` element or an asterisk (`*`) wildcard field via `dynamicField`. +Any field that matches a specific field name will win, and if not, it +will attempt to match a dynamic field pattern. + +Besides a field `type`, you also must decide if a value is to be +`indexed` (usually `true`) and `stored`. When a value is `stored` that +means that you can get the value back as a result of a query, but it +also doubles the storage of the field (once in Riak, again in Solr). If +a single Riak object can have more than one copy of the same matching +field, you also must set `multiValued` to `true`. + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="schedule" version="1.0"> + <fields> + <field name="name" type="string" indexed="true" stored="true" /> + <field name="age" type="int" indexed="true" stored="false" /> + <field name="leader" type="boolean" indexed="true" stored="false" /> + <field name="aliases.name" type="string" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_es" type="text_es" indexed="true" stored="true" multiValued="true" /> + <dynamicField name="*_de" type="text_de" indexed="true" stored="true" multiValued="true" /> + + <!-- All of these fields are required by Riak Search --> + <field name="_yz_id" type="_yz_str" indexed="true" stored="true" multiValued="false" required="true"/> + <field name="_yz_ed" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_pn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_fpn" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_vtag" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + <field name="_yz_rk" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rt" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_rb" type="_yz_str" indexed="true" stored="true" multiValued="false"/> + <field name="_yz_err" type="_yz_str" indexed="true" stored="false" multiValued="false"/> + </fields> + + <uniqueKey>_yz_id</uniqueKey> +``` + +Next, take note of the types you used in the fields and ensure that each +of the field types are defined as a `fieldType` under the `types` +element. Basic types such as `string`, `boolean`, `int` have matching +Solr classes. There are dozens more types, including many kinds of +number (`float`, `tdouble`, `random`), `date` fields, and even +geolocation types. + +Besides simple field types, you can also customize analyzers for +different languages. In our example, we mapped any field that ends with +`*_es` to Spanish, and `*_de` to German. + +```xml + <types> + <!-- YZ String: Used for non-analyzed fields --> + <fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true" /> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> + + <!-- Spanish --> + <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> + <filter class="solr.SpanishLightStemFilterFactory"/> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> + </analyzer> + </fieldType> + + <!-- German --> + <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> + <filter class="solr.GermanNormalizationFilterFactory"/> + <filter class="solr.GermanLightStemFilterFactory"/> + <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> + <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> + </analyzer> + </fieldType> + </types> +</schema> +``` + +### "Catch-All" Field + +Without a catch-all field, an exception will be thrown if data is +provided to index without a corresponding `<field>` element. The +following is the catch-all field from the default Yokozuna schema and +can be used in a custom schema as well. + +```xml +<dynamicField name="*" type="ignored" /> +``` + +The following is required to be a child of the `types` element in the +schema: + +```xml +<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> +``` + +### Dates + +The format of strings that represents a date/time is important as Solr +only understands [ISO8601 UTC date/time +values](http://lucene.apache.org/solr/4_6_1/solr-core/org/apache/solr/schema/DateField.html). +An example of a correctly formatted date/time string is +`1995-12-31T23:59:59Z`. If you provide an incorrectly formatted +date/time value, an exception similar to this will be logged to +`solr.log`: + +```log +2014-02-27 21:30:00,372 [ERROR] <qtp1481681868-421>@SolrException.java:108 org.apache.solr.common.SolrException: Invalid Date String:'Thu Feb 27 21:29:59 +0000 2014' + at org.apache.solr.schema.DateField.parseMath(DateField.java:182) + at org.apache.solr.schema.TrieField.createField(TrieField.java:611) + at org.apache.solr.schema.TrieField.createFields(TrieField.java:650) + at org.apache.solr.schema.TrieDateField.createFields(TrieDateField.java:157) + at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47) + ... + ... + ... +``` + +### Uploading + +Once you have decided on the format of your custom schema as an .xml file, it can be uploaded to Riak KV as follows: + +```curl +curl -v -XPUT $RIAK_HOST/search/schema/thundercats \ + -H 'Content-Type:application/xml' \ + --data-binary @thundercats_schema.xml +``` + + + +## Field Properties By Use Case + +Sometimes it can be tricky to decide whether a value should be `stored`, +or whether `multiValued` is allowed. This handy table from the [Solr +documentation](https://cwiki.apache.org/confluence/display/solr/Field+Properties+by+Use+Case) +may help you pick field properties. + +An entry of `true` or `false` in the table indicates that the option +must be set to the given value for the use case to function correctly. +If no entry is provided, the setting of that attribute has no impact on +the case. + +<table class="schemausecase"> +<thead> +<tr> +<th>Use Case</th> +<th><code>indexed</code></th> +<th><code>stored</code></th> +<th><code>multiValued</code></th> +<th><code>omitNorms</code></th> +<th><code>termVectors</code></th> +<th><code>termPositions</code></th> +</tr> +</thead> +<tbody> +<tr> +<td>search within field</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>retrieve contents</td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>use as unique key</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>sort on field</td> +<td><code>true</code></td> +<td></td> +<td><code>false</code></td> +<td><code>true</code>[1](#notes)</td> +<td></td> +<td></td> +</tr> +<tr> +<td>use field boosts[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>document boosts affect searches within field</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>highlighting</td> +<td><code>true</code>[4](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td>[2](#notes)</td> +<td><code>true</code>[3](#notes)</td> +</tr> +<tr> +<td>faceting[5](#notes)</td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>add multiple values, maintaining order</td> +<td></td> +<td></td> +<td><code>true</code></td> +<td></td> +<td></td> +<td></td> +</tr> +<tr> +<td>field length affects doc score</td> +<td></td> +<td></td> +<td></td> +<td><code>false</code></td> +<td></td> +<td></td> +</tr> +<tr> +<td>MoreLikeThis[5](#notes)</td> +<td></td> +<td></td> +<td></td> +<td></td> +<td><code>true</code>[6](#notes)</td> +<td></td> +</tr> +</tbody></table> + +```erlang +{analyzer_factory, {erlang, text_analyzers, noop_analyzer_factory}}} +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/search.md b/content/riak/kv/3.0.4/developing/usage/search.md new file mode 100644 index 0000000000..3b02eb4bb2 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/search.md @@ -0,0 +1,1455 @@ +--- +title: "Using Search" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Searching" + identifier: "usage_searching" + weight: 105 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/using/search + - /riak/kv/3.0.4/dev/using/search +--- + +[usage search schema]: ../search-schemas/ +[bucket types]: ../bucket-types/ + +## Setup + +Riak search 2.0 is an integration of Solr (for indexing and querying) +and Riak (for storage and distribution). There are a few points of +interest that a user of Riak search will have to keep in mind in order +to properly store and later query for values. + +1. **Schemas** explain to Solr how to index fields +2. **Indexes** are named Solr indexes against which you will query +3. **Bucket-index association** signals to Riak *when* to index values + (this also includes bucket type-index association) + +{{% note %}} +Riak search uses active anti-entropy (AAE) to ensure that the data is +consistent between the Riak backends and the Solr indexes. When using +Riak search, you should not disable AAE without understanding the risks +of divergence between the data in the Riak backends and the Solr indexes +and how that can impact your application. More information about how +Riak search uses AAE is in the +[Riak search reference](../../../using/reference/search/#active-anti-entropy-aae). +{{% /note %}} + +Riak Search must first be configured with a Solr schema so that Solr +knows how to index value fields. If you don't define one, you're +provided with a default schema named `_yz_default`, which can be found +[on +GitHub](https://raw.githubusercontent.com/basho/yokozuna/develop/priv/default_schema.xml). + +The examples in this document will presume the default. You can read +more about creating custom schemas in [Search Schema][usage search schema], which you'll likely want to use in a production environment. + +Next, you must create a named Solr index through Riak Search. This index +represents a collection of similar data that you connect with to perform +queries. When creating an index, you can optionally provide a schema. If +you do not, the default schema will be used. Here we'll `curl` create an +index named `famous` with the default schema. + +Both schema and index creation will be covered immediately below. + +{{% note title="Note on index names" %}} +Note that index names may only be +[ASCII](http://en.wikipedia.org/wiki/ASCII) values from 32-127 (spaces, +standard punctuation, digits, and word characters). This may change in +the future to allow full [Unicode](http://en.wikipedia.org/wiki/Unicode) +support. +{{% /note %}} + +All `curl` examples in this document assume that you have set an +environment variable named `RIAK_HOST`, which points to a Riak base URL, +such as `http://localhost:8098`. The appropriate value for `RIAK_HOST` +will depend on your [configuration]({{<baseurl>}}riak/kv/3.0.4/configuring/reference#client-interfaces). + +## Creating an Index + +Let's start by creating an index called `famous` that uses the default +schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous"); +StoreIndex storeIndex = + new StoreIndex.Builder(famousIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('famous') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('famouse') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous') +``` + +```csharp +var idx = new SearchIndex("famous"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var storeIndex_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + if (!rslt) { + // error... + } +}; + +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +export RIAK_HOST="http://localhost:8098" + +curl -XPUT $RIAK_HOST/search/index/famous +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.4/developing/client-libraries), you can find more information about getting started with your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.4/developing/getting-started) section. + + +Note that the above command is exactly the same as the following, which +explicitly defines the default schema. + +```java +YokozunaIndex famousIndex = new YokozunaIndex("famous", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(famousIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index("famous", "_yz_default") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('famous', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("famous", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var store = new Riak.Commands.YZ.StoreIndex.Builder() + .withIndexName("famous") + .withSchemaName("_yz_default") + .withCallback(storeIndex_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"famous">>, <<"_yz_default">>, []). +``` + +```golang +cmd, err := riak.NewStoreIndexCommandBuilder(). + WithIndexName("famous"). + WithSchemaName("_yz_default"). + WithTimeout(time.Second * 30). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/famous \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +## Associating an Index + +The last set-up item that you need to perform is to associate your Solr index +with either a [bucket type][bucket types] or a custom bucket. You +only need do this once per bucket type, and all buckets within that type +will use the same Solr index. For example, to associate a bucket type +named `animals` with the `famous` index, you can set the bucket type +property `search_index` to `animals`. If a Solr index is to be used by +only *one* Riak bucket, you can set the `search_index` property on that +bucket. If more than one bucket is to share a Solr index, a bucket type +should be used. More on bucket types in the section directly below. + +### Associating via Bucket Type + +We suggest that you use [bucket +types][bucket types] to namespace and configure all buckets you +use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional set-up step on the +command line. + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + +If you ever need to turn off indexing for a bucket, set the +`search_index` property to the `_dont_index_` sentinel value. + +### Associating an Index via Custom Bucket Properties + +Although we recommend that you use all new buckets under a bucket type, +if you have existing data with a type-free bucket (i.e. under the +default bucket type) you can set the `search_index` property for a +specific bucket. + +```java +Namespace catsBucket = new Namespace("cats"); +StoreBucketPropsOperation storePropsOp = new StoreBucketPropsOperation.Builder(catsBucket) + .withSearchIndex("famous") + .build(); +client.execute(storePropsOp); +``` + +```ruby +bucket = client.bucket('cats') +bucket.properties = {'search_index' => 'famous'} +``` + +```php +(new \Basho\Riak\Command\Builder\Search\AssociateIndex($riak)) + ->withName('famous') + ->buildBucket('cats') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('cats') +bucket.set_properties({'search_index': 'famous'}) +``` + +```csharp +var properties = new RiakBucketProperties(); +properties.SetSearchIndex("famous"); +var rslt = client.SetBucketProperties("cats", properties); +``` + +```javascript +var bucketProps_cb = function (err, rslt) { + if (err) { + throw new Error(err); + } + // success +}; + +var store = new Riak.Commands.KV.StoreBucketProps.Builder() + .withBucket("cats") + .withSearchIndex("famous") + .withCallback(bucketProps_cb) + .build(); + +client.execute(store); +``` + +```erlang +riakc_pb_socket:set_search_index(Pid, <<"cats">>, <<"famous">>). +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("famous"). + Build() +if err != nil { + return err +} + +err = cluster.Execute(cmd) +``` + +```curl +curl -XPUT $RIAK_HOST/buckets/cats/props \ + -H'content-type:application/json' \ + -d'{"props":{"search_index":"famous"}}' +``` + +Once you have created the index association, any new data will be indexed on +ingest according to your schema. + +## Riak Search Security Setup + +[Security]({{<baseurl>}}riak/kv/3.0.4/using/security/) is a new feature as of +Riak 2.0 that lets an administrator limit access to certain resources. +In the case of search, your options are to limit administration of +schemas or indexes (the `search.admin` permission) to certain users, and +to limit querying (the `search.query` permission) to any index or to a +specific index. The example below shows the various options. + +```bash +riak-admin security grant search.admin on schema to username +riak-admin security grant search.admin on index to username +riak-admin security grant search.query on index to username +riak-admin security grant search.query on index famous to username +``` + +Those permissions can also be revoked: + +```bash +riak-admin security revoke search.admin on schema from username +riak-admin security revoke search.admin on index from username +riak-admin security revoke search.query on index from username +riak-admin security revoke search.query on index famous from username +``` + +## Indexing Values + +> **Note on indexing and lag times** +> +> There is typically a one-second delay between storing an object in Riak +and that object being available in Search queries. You should take this +into account when writing Riak client tests, benchmarking, and so on. +More information can be found in the [Solr +documentation](http://wiki.apache.org/solr/SolrPerformanceFactors). + +With a Solr schema, index, and association in place (and possibly a +security setup as well), we're ready to start using Riak Search. First, +populate the `cat` bucket with values, in this case information about +four cats: Liono, Cheetara, Snarf, and Panthro. + +Depending on the driver you use, you may have to specify the content +type, which for this example is `application/json`. In the case of Ruby +and Python the content type is automatically set for you based on the +object given. + +```java +Namespace animalsBucket = new Namespace("animals"); +String json = "application/json"; + +RiakObject liono = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}")); +RiakObject cheetara = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}")); +RiakObject snarf = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}")); +RiakObject panthro = new RiakObject() + .setContentType(json) + .setValue(BinaryValue.create("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}")); +Location lionoLoc = new Location(animalsBucket, "liono"); +Location cheetaraLoc = new Location(animalsBucket, "cheetara"); +Location snarfLoc = new Location(animalsBucket, "snarf"); +Location panthroLoc = new Location(animalsBucket, "panthro"); + +StoreValue lionoStore = new StoreValue.Builder(liono).withLocation(lionoLoc).build(); +// The other StoreValue operations can be built the same way + +client.execute(lionoStore); +// The other storage operations can be performed the same way +``` + +```ruby +bucket = client.bucket_type('animals').bucket("cats") + +cat = bucket.get_or_new("liono") +cat.data = {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +cat.store + +cat = bucket.get_or_new("cheetara") +cat.data = {"name_s" => "Cheetara", "age_i" => 28, "leader_b" => false} +cat.store + +cat = bucket.get_or_new("snarf") +cat.data = {"name_s" => "Snarf", "age_i" => 43} +cat.store + +cat = bucket.get_or_new("panthro") +cat.data = {"name_s" => "Panthro", "age_i" => 36} +cat.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('cats', 'animals'); + +$storeObjectBuilder = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation(new \Basho\Riak\Location('liono', $bucket)) + ->buildJsonObject(['name_s' => 'Lion-o', 'age_i' => 30, 'leader_b' => true]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('cheetara', $bucket)) + ->buildJsonObject(['name_s' => 'Cheetara', 'age_i' => 28, 'leader_b' => false]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('snarf', $bucket)) + ->buildJsonObject(['name_s' => 'Snarf', 'age_i' => 43]); + +$storeObjectBuilder->build()->execute(); + +$storeObjectBuilder->withLocation(new \Basho\Riak\Location('panthro', $bucket)) + ->buildJsonObject(['name_s' => 'Panthro', 'age_i' => 36]); + +$storeObjectBuilder->build()->execute(); +``` + +```python +bucket = client.bucket_type('animals').bucket('cats') + +cat = bucket.new('liono', {'name_s': 'Lion-o', 'age_i': 30, 'leader_b': True}) +cat.store() + +cat = bucket.new('cheetara', {'name_s':'Cheetara', 'age_i':28, 'leader_b': True}) +cat.store() + +cat = bucket.new('snarf', {'name_s':'Snarf', 'age_i':43}) +cat.store() + +cat = bucket.new('panthro', {'name_s':'Panthro', 'age_i':36}) +cat.store() +``` + +```csharp +var lionoId = new RiakObjectId("animals", "cats", "liono"); +var lionoObj = new { name_s = "Lion-o", age_i = 30, leader = true }; +var lionoRiakObj = new RiakObject(lionoId, lionoObj); + +var cheetaraId = new RiakObjectId("animals", "cats", "cheetara"); +var cheetaraObj = new { name_s = "Cheetara", age_i = 30, leader = false }; +var cheetaraRiakObj = new RiakObject(cheetaraId, cheetaraObj); + +var snarfId = new RiakObjectId("animals", "cats", "snarf"); +var snarfObj = new { name_s = "Snarf", age_i = 43, leader = false }; +var snarfRiakObj = new RiakObject(snarfId, snarfObj); + +var panthroId = new RiakObjectId("animals", "cats", "panthro"); +var panthroObj = new { name_s = "Panthro", age_i = 36, leader = false }; +var panthroRiakObj = new RiakObject(panthroId, panthroObj); + +var rslts = client.Put(new[] { + lionoRiakObj, cheetaraRiakObj, snarfRiakObj, panthroRiakObj +}); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var objs = [ + [ 'liono', { name_s: 'Lion-o', age_i: 30, leader: true } ], + [ 'cheetara', { name_s: 'Cheetara', age_i: 30, leader: false } ], + [ 'snarf', { name_s: 'Snarf', age_i: 43, leader: false } ], + [ 'panthro', { name_s: 'Panthro', age_i: 36, leader: false } ], +]; + +var storeFuncs = []; +objs.forEach(function (o) { + var storeFunc = function (async_cb) { + var key = o[0]; + var value = o[1]; + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('application/json'); + riakObj.setBucketType('animals'); + riakObj.setBucket('cats'); + riakObj.setKey(key); + riakObj.setValue(value); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }; + storeFuncs.push(storeFunc); +}); + +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } + // NB: all objects stored and indexed... +}); +``` + +```erlang +CO = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"liono">>, + <<"{\"name_s\":\"Lion-o\", \"age_i\":30, \"leader_b\":true}">>, + "application/json"), +riakc_pb_socket:put(Pid, CO), + +C1 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"cheetara">>, + <<"{\"name_s\":\"Cheetara\", \"age_i\":28, \"leader_b\":false}">>, + "application/json"), +riakc_pb_socket:put(Pid, C1), + +C2 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"snarf">>, + <<"{\"name_s\":\"Snarf\", \"age_i\":43}">>, + "application/json"), +riakc_pb_socket:put(Pid, C2), + +C3 = riakc_obj:new({<<"animals">>, <<"cats">>}, <<"panthro">>, + <<"{\"name_s\":\"Panthro\", \"age_i\":36}">>, + "application/json"), +riakc_pb_socket:put(Pid, C3), +``` + +```golang +o1 := &riak.Object{ + Key: "liono", + Value: []byte("{\"name_s\":\"Lion-o\",\"age_i\":30,\"leader_b\":true}"), +} +o2 := &riak.Object{ + Key: "cheetara", + Value: []byte("{\"name_s\":\"Cheetara\",\"age_i\":30,\"leader_b\":false}"), +} +o3 := &riak.Object{ + Key: "snarf", + Value: []byte("{\"name_s\":\"Snarf\",\"age_i\":43,\"leader_b\":false}"), +} +o4 := &riak.Object{ + Key: "panthro", + Value: []byte("{\"name_s\":\"Panthro\",\"age_i\":36,\"leader_b\":false}"), +} + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "application/json" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/liono \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Lion-o", "age_i":30, "leader_b":true}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/cheetara \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Cheetara", "age_i":28, "leader_b":false}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/snarf \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Snarf", "age_i":43}' + +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/keys/panthro \ + -H 'Content-Type: application/json' \ + -d '{"name_s":"Panthro", "age_i":36}' +``` + +If you've used Riak before, you may have noticed that this is no +different from storing values without Riak Search. That's because we +designed Riak Search with the following design goal in mind: + +#### Write it like Riak, query it like Solr + +But how does Riak Search know how to index values, given that you can +store opaque values in Riak? For that, we employ extractors. + +## Extractors + +Extractors are modules in Riak that accept a Riak value with a certain +content type and convert it into a list of fields that can be indexed by +Solr. This is done transparently and automatically as part of the +indexing process. You can even create your own [custom extractors]({{<baseurl>}}riak/kv/3.0.4/developing/usage/custom-extractors). + +Our current example uses the JSON extractor, but Riak Search also +extracts indexable fields from the following content types: + +* JSON (`application/json`) +* XML (`application/xml`, `text/xml`) +* Plain text (`text/plain`) +* [Riak Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/) + * counter (`application/riak_counter`) + * map (`application/riak_map`) + * set (`application/riak_set`) +* noop (unknown content type) + +More on Riak Data Types can be found in [Riak Data Types and Search]({{<baseurl>}}riak/kv/3.0.4/developing/usage/searching-data-types). + +In the examples we've seen, the JSON field `name_s` is translated to a +Solr index document field insert. Solr will index any field that it +recognizes, based on the index's schema. The default schema +(`_yz_default`) uses the suffix to decide the field type (`_s` +represents a string, `_i` is an integer, `_b` is binary and so on). + +If the content type allows for nested values (e.g. JSON and XML), the +extractors will flatten each field, separated by dots. For example, if +you have this XML: + +```xml +<person> + <pets> + <pet> + <name_s>Spot</name_s> + </pet> + </pets> +</person> +``` + +The extractor will convert it to the Solr field `person.pets.pet.name_s` +with value `Spot`. Lists of values are assumed to be Solr multi-valued +fields. + +```json +{"people_ss":["Ryan", "Eric", "Brett"]} +``` + +The above JSON will insert a list of three values into Solr to be +indexed: `people_ss=Ryan`, `people_ss=Eric`, `people_ss=Brett`. + +You can also create your own custom extractors if your data doesn't fit +one of the default types. A full tutorial can be found in [Custom Search Extractors]({{<baseurl>}}riak/kv/3.0.4/developing/usage/custom-extractors). + +### Automatic Fields + +When a Riak object is indexed, Riak Search automatically inserts a few +extra fields as well. These are necessary for a variety of technical +reasons, and for the most part you don't need to think about them. +However, there are a few fields which you may find useful: + +- `_yz_rk` (Riak key) +- `_yz_rt` (Riak bucket type) +- `_yz_rb` (Riak bucket) +- `_yz_err` (extraction error) + +You can query on the basis of these fields, just like any other normal +Solr fields. Most of the time, however, you'll use `_yz_rk` as a query +result, which tells you the Riak key that matches the query you just +ran. Let's see this in detail by running some queries in the next +section. + +## Querying + +After the schema, index, association, and population/extraction/indexing +are taken care of, you can get down to the fun part of querying your +data. + +### Simple Query + +The basic query parameter is `q` via HTTP, or the first parameter of +your chosen driver's `search` function (there are examples from all of +our client libraries below). All distributed Solr queries are supported, +which actually includes most of the single-node Solr queries. This +example searches for all documents in which the `name_s` value begins +with `Lion` by means of a glob (wildcard) match. + +```java +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "name_s:Lion*") + .build(); +cluster.execute(searchOp); +// This will display the actual results as a List of Maps: +List<Map<String, List<String>>> results = searchOp.get().getAllResults(); +// This will display the number of results: +System.out.println(results); +``` + +```ruby +results = client.search("famous", "name_s:Lion*") +p results +p results['docs'] +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('name_s:Lion*') + ->build() + ->execute(); + +$response->getNumFound(); // 1 + +var_dump($response->getDocs()); +``` + +```python +results = client.fulltext_search('famous', 'name_s:Lion*') +print results +print results['docs'] +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "name_s") + .Search("Lion*") + .Build() +}; + +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +foreach (RiakSearchResultDocument doc in searchResult.Documents) +{ + var args = new[] { + doc.BucketType, + doc.Bucket, + doc.Key, + string.Join(", ", doc.Fields.Select(f => f.Value).ToArray()) + }; + Debug.WriteLine( + format: "BucketType: {0} Bucket: {1} Key: {2} Values: {3}", + args: args); +} +``` + +```javascript +function search_cb(err, rslt) { + if (err) { + throw new Error(err); + } + logger.info("docs:", JSON.stringify(rslt.docs)); +} + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('name_s:Lion*') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"famous">>, <<"name_s:Lion*">>), +io:fwrite("~p~n", [Results]), +Docs = Results#search_results.docs, +io:fwrite("~p~n", [Docs]). + +%% Please note that this example relies on an Erlang record definition +%% for the search_result record found here: +%% https://github.com/basho/riak-erlang-client/blob/master/include/riakc.hrl +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("name_s:Lion*"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +sc := cmd.(*riak.SearchCommand) +if json, jerr := json.MarshalIndent(sc.Response.Docs, "", " "); jerr != nil { + return jerr +} else { + fmt.Println(string(json)) +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=name_s:Lion*" | json_pp +``` + +The response to a query will be an object containing details about the +response, such as a query's max score and a list of documents which +match the given query. It's worth noting two things: + +* The documents returned are Search documents (a set of Solr + field/values), not a Riak value +* The HTTP response is a direct Solr response, while the drivers use + Protocol Buffers and are encoded with different field names + +This is a common HTTP `response` value: + +```json +{ + "numFound": 1, + "start": 0, + "maxScore": 1.0, + "docs": [ + { + "leader_b": true, + "age_i": 30, + "name_s": "Lion-o", + "_yz_id": "default_cats_liono_37", + "_yz_rk": "liono", + "_yz_rt": "default", + "_yz_rb": "cats" + } + ] +} +``` + +The most important field returned is `docs`, which is the list of +objects that each contain fields about matching index documents. The +values you'll use most often are `_yz_rt` (Riak bucket type), `_yz_rb` +(Riak bucket), `_yz_rk` (Riak key), and `score` which represent the +similarity of the matching doc to the query via [Lucene +scoring](https://lucene.apache.org/core/4_6_0/core/org/apache/lucene/search/package-summary.html#scoring). + +In this example the query fields are returned because they're stored in +Solr. This depends on your schema. If they are not stored, you'll have +to perform a separate Riak GET operation to retrieve the value using the +`_yz_rk` value. + +```java +// Using the results object from above +Map<String, List<String> doc = results.get(0); +String bucketType = doc.get("_yz_rt").get(0); +String bucket = doc.get("yz_rb").get(0); +String key = doc.get("_yz_rk").get(0); +Namespace namespace = new Namespace(bucketType, bucket); +Location objectLocation = new Location(namespace, key); +FetchValue fetchOp = new FetchValue.Builder(objectLocation) + .build(); +RiakObject obj = client.execute(fetchOp).getValue(RiakObject.class); +System.out.println(obj.getValue()); + +// {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```ruby +doc = results['docs'].first +btype = Riak::BucketType.new(client, doc["_yz_rt"]) # animals +bucket = Riak::Bucket.new(client, doc["_yz_rb"]) # cats +object = bucket.get( doc["_yz_rk"] ) # liono +p object.data + +# {"name_s" => "Lion-o", "age_i" => 30, "leader_b" => true} +``` + +```php +$doc = $response->getDocs()[0]; +$btype = $doc->_yz_rt; // animals +$bucket = $doc->_yz_rb; // cats +$key = $doc->_yz_rk; // liono +$name = $doc->name_s; // Lion-o + +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->buildLocation($key, $bucket, $btype) + ->build() + ->execute() + ->getObject(); + +var_dump($object->getData()); +``` + +```python +doc = results['docs'][0] +bucket = client.bucket_type(doc['_yz_rt']).bucket(doc['_yz_rb']) # animals/cats +object = bucket.get(doc['_yz_rk']) # liono +print object.data + +# {"name_s": "Lion-o", "age_i": 30, "leader_b": true} +``` + +```csharp +RiakSearchResult searchResult = searchRslt.Value; + +RiakSearchResultDocument doc = searchResult.Documents.First(); +var id = new RiakObjectId(doc.BucketType, doc.Bucket, doc.Key); +var rslt = client.Get(id); + +RiakObject obj = rslt.Value; +Debug.WriteLine(Encoding.UTF8.GetString(obj.Value)); + +// {"name_s":"Lion-o","age_i":30,"leader_b":true} +``` + +```javascript +var doc = rslt.docs.pop(); +var args = { + bucketType: doc._yz_rt, + bucket: doc._yz_rb, + key: doc._yz_rk, + convertToJs: true +}; +client.fetchValue(args, function (err, rslt) { + if (err) { + throw new Error(err); + } + logger.info(rslt.values[0].value); +}); +``` + +```erlang +[{Index,Doc}|_] = Docs, +BType = proplists:get_value(<<"_yz_rt">>, Doc), %% <<"animals">> +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), %% <<"cats">> +Key = proplists:get_value(<<"_yz_rk">>, Doc), %% <<"liono">> +{ok, Obj} = riakc_pb_socket:get(Pid, {BType, Bucket}, Key), +Val = riakc_obj:get_value(Obj), +io:fwrite("~s~n", [Val]). + +%% {"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +```golang +doc := sc.Response.Docs[0] // NB: SearchDoc struct type + +cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType(doc.BucketType). + WithBucket(doc.Bucket). + WithKey(doc.Key). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl $RIAK_HOST/types/animals/buckets/cats/keys/liono + +# Response: + +{"name_s":"Lion-o", "age_i":30, "leader_b":true} +``` + +This was one simple glob query example. There are many query options, a +more complete list of which can be found by digging into [searching +Solr](https://cwiki.apache.org/confluence/display/solr/Searching). Let's +look at a few others. + +### Range Queries + +Range queries are searches within a +[range](https://cwiki.apache.org/confluence/display/solr/The+Standard+Query+Parser#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser) +of numerical or +date/[datemath](http://lucene.apache.org/solr/4_6_0/solr-core/org/apache/solr/util/DateMathParser.html) +values. + +To find the ages of all famous cats who are 30 or younger: `age_i:[0 TO +30]`. If you wanted to find all cats 30 or older, you could include a +glob as a top end of the range: `age_i:[30 TO *]`. + +```java +String index = "famous"; +String query = "age_i:[30 TO *]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest("famous", "age_i:[30 TO *]"); + +/* + * Fluent interface: + * + * var search = new RiakSearchRequest + * { + * Query = new RiakFluentSearch("famous", "age_i") + * .Between("30", "*") + * .Build() + * }; + */ +var rslt = client.Search(search); +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=age_i:%5B30%20TO%20*%5D" | json_pp +``` + +<!-- TODO: pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY] --> + +### Boolean + +You can perform logical conjunctive, disjunctive, and negative +operations on query elements as, respectively, `AND`, `OR`, and `NOT`. +Let's say we want to see who is capable of being a US Senator (at least +30 years old, and a leader). It requires a conjunctive query: +`leader_b:true AND age_i:[25 TO *]`. + +```java +String index = "famous"; +String query = "leader_b:true AND age_i:[30 TO *]"; +Search searchOp = new Search.Builder(index, query).build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +client.search("famous", "leader_b:true AND age_i:[30 TO *]") +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('leader_b:true AND age_i:[30 TO *]') + ->build() + ->execute(); +``` + +```python +client.fulltext_search('famous', 'leader_b:true AND age_i:[30 TO *]') +``` + +```csharp +var search = new RiakSearchRequest +{ + Query = new RiakFluentSearch("famous", "leader_b") + .Search("true").AndBetween("age_i", "30", "*") + .Build() +}; +``` + +```javascript +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('leader_b:true AND age_i:[30 TO *]') + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:search(Pid, <<"famous">>, <<"leader_b:true AND age_i:[30 TO *]">>), +``` + +```golang +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("leader_b:true AND age_i:[30 TO *]"). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=leader_b:true%20AND%20age_i:%5B25%20TO%20*%5D" | json_pp +``` + +### Deleting Indexes + +Indexes may be deleted if they have no buckets associated with them: + +```java +String index = "famous"; +YzDeleteIndexOperation deleteOp = new YzDeleteIndexOperation.Builder(index) + .build(); +cluster.execute(deleteOp); +``` + +```ruby +client.delete_search_index('famous') +``` + +```php +(new Command\Builder\Search\DeleteIndex($riak)) + ->withName('famous') + ->build() + ->execute(); +``` + +```python +client.delete_search_index('famous') +``` + +```csharp +var rslt = client.DeleteSearchIndex("famous"); +``` + +```javascript +function delete_cb(err, rslt) { + if (err) { + throw new Error(err); + } + if (rslt === true) { + // success + } else { + // error + } +} + +// NB: first make sure that no bucket types or buckets are using the index +var search = new Riak.Commands.YZ.DeleteIndex.Builder() + .withIndexName('famous') + .withCallback(delete_cb) + .build(); +client.execute(search); +``` + +```erlang +riakc_pb_socket:delete_search_index(Pid, <<"famous">>, []), +``` + +```golang +cmd, err := riak.NewStoreBucketPropsCommandBuilder(). + WithBucketType("animals"). + WithBucket("cats"). + WithSearchIndex("_dont_index_"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} + +cmd, err = riak.NewDeleteIndexCommandBuilder(). + WithIndexName("famous"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XDELETE $RIAK_HOST/search/index/famous +``` + +If an index does have a bucket associated with it, then that index's +`search_index` property must be changed to either a different index name +or to the sentinel value `_dont_index_`. + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"_dont_index_"}}' +``` + +#### Pagination + +A common requirement you may face is paginating searches, where an +ordered set of matching documents are returned in non-overlapping +sequential subsets (in other words, *pages*). This is easy to do with +the `start` and `rows` parameters, where `start` is the number of +documents to skip over (the offset) and `rows` are the number of results +to return in one go. + +For example, assuming we want two results per page, getting the second +page is easy, where `start` is calculated as (rows per page) * (page +number - 1). + +```java +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("famous"), "*:*") + .withStart(start) + .withNumRows(rowsPerPage) + .build(); +client.execute(searchOp); +StoreOperation.Response response = searchOp.get(); +``` + +```ruby +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.search("famous", "*:*", {:start => start, :rows => ROWS_PER_PAGE}) +``` + +```php +$maxRows = 2; +$page = 2; +$start = $rowsPerPAge * (page - 1); + +(new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('famous') + ->withQuery('*:*') + ->withMaxRows($maxRows) + ->withStartRow($start) + ->build() + ->execute(); +``` + +```python +ROWS_PER_PAGE=2 +page = 2 +start = ROWS_PER_PAGE * (page - 1) + +client.fulltext_search('famous', '*:*', start=start, rows=ROWS_PER_PAGE) +``` + +```csharp +int rowsPerPage = 2; +int page = 2; +int start = rowsPerPage * (page - 1); + +var search = new RiakSearchRequest +{ + Start = start, + Rows = rowsPerPage, + Query = new RiakFluentSearch("famous", "*") + .Search("*") + .Build(), +}; + +var rslt = client.Search(search); +``` + +```javascript +var rowsPerPage = 2; +var page = 2; +var start = rowsPerPage * (page - 1); + +var search = new Riak.Commands.YZ.Search.Builder() + .withIndexName('famous') + .withQuery('*:*') + .withStart(start) + .withNumRows(rowsPerPage) + .withCallback(search_cb) + .build(); +client.execute(search); +``` + +```erlang +-define(ROWS_PER_PAGE, 2). + +Page = 2, +Start = ?ROWS_PER_PAGE * (Page - 1), + +riakc_pb_socket:search(Pid, <<"famous">>, <<"*:*">>, [{start, Start},{rows, ?ROWS_PER_PAGE}]), +``` + +```golang +rowsPerPage := uint32(2) +page := uint32(2) +start := rowsPerPage * (page - uint32(1)) + +cmd, err := riak.NewSearchCommandBuilder(). + WithIndexName("famous"). + WithQuery("*:*"). + WithStart(start). + WithNumRows(rowsPerPage). + Build(); +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +ROWS_PER_PAGE=2 +PAGE=2 +START=$(($ROWS_PER_PAGE * ($PAGE-1))) + +curl +curl "$RIAK_HOST/search/query/famous?wt=json&q=*:*&start=$START&rows=$ROWS_PER_PAGE" | json_pp +``` + +### Pagination Warning + +Distributed pagination in Riak Search cannot be used reliably when +sorting on fields that can have different values per replica of the same +object, namely `score` and `_yz_id`. In the case of sorting by these +fields, you may receive redundant objects. In the case of `score`, the +top-N can return different results over multiple runs. + +If you are paginating simply to get all keys that match and don't care +about the score, then you can sort on type-bucket-key (eg. `_yz_rt asc`, +`_yz_rb asc`, `_yz_rk asc`) to get consistent results. + +If you want to sort by score without repeating results then you must set +`rows` >= `numFound`. This requires having some idea of how many rows +will match before running the query. + +[This issue](https://github.com/basho/yokozuna/issues/355) is caused by +the way Search must minimally distribute a query across multiple Solr +nodes (called a *coverage plan*) and then filter duplicate results to +retrieve a full result set. Since this plan is frequently recalculated, +successive page queries may use a different plan, and thus calculate +alternate `score`s or filter different `_yz_id` values. We have plans to +fix this shortcoming in a future version of Riak. + +### MapReduce + +Riak Search allows for piping search results as inputs for +[MapReduce]({{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce/) jobs. This is a useful cross-section for +performing post-calculations of results or aggregations of ad-hoc +queries. The Riak Search MapReduce integration works similarly to +regular MapReduce, with the notable exception that your input is not a +bucket, but rather index and query arguments to the `yokozuna` module +and `mapred_search` function (an Erlang `module:function` pair that adds +the Riak Search hook to MapReduce). + +```json +{ + "inputs": { + "module": "yokozuna", + "function": "mapred_search", + "arg": ["famous","NOT leader_b:true"] + }, + "query": [ + { + "map": { + "language": "javascript", + "keep": false, + "source": "function(v) { return [1]; }" + } + }, + { + "reduce": { + "language": "javascript", + "keep": true, + "name": "Riak.reduceSum" + } + } + ] +} +``` + +In this example we're searching for all famous cats that are not +leaders and counting up the results using Javascript for both map and +reduce. It should return the reduced sum of `[3]`. + +```curl +curl -XPOST $RIAK_HOST/mapred \ + -H 'Content-Type: application/json' \ + -d '{"inputs":{"module":"yokozuna","function":"mapred_search","arg":["famous","NOT leader_b:true"]},"query":[{"map":{"language":"javascript","keep":false,"source":"function(v) { return [1]; }"}},{"reduce":{"language":"javascript","keep":true,"name":"Riak.reduceSum"}}]}' +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/searching-data-types.md b/content/riak/kv/3.0.4/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..9dc0a23a17 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/searching-data-types.md @@ -0,0 +1,1687 @@ +--- +title: "Searching with Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Searching with Data Types" + identifier: "usage_search_data_types" + weight: 111 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/search/search-data-types + - /riak/kv/3.0.4/dev/search/search-data-types +--- + +Although [Riak Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types) function differently from other +Riak objects in some respects, when you're using Search you can think of +them as normal Riak objects with special metadata attached (metadata +that you don't need to worry about as a user). Riak's [counters]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#counters), [sets]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets), and [maps]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps) +can be indexed and have their contents searched just like other Riak +objects. + +## Data Type MIME Types + +Like all objects stored in Riak, Riak Data Types are assigned content +types. Unlike other Riak objects, this happens automatically. When you +store, say, a counter in Riak, it will automatically be assigned the +type `application/riak_counter`. The table below provides the full list +of content types: + +Data Type | Content Type +:---------|:------------ +Counters | `application/riak_counter` +Sets | `application/riak_set` +Maps | `application/riak_map` + +When using Search, you won't need to worry about this, as Riak Data +Types are automatically indexed on the basis of these content types. + +## Data Type Schemas + +There are two types of schemas related to Riak Data Types: + +* **Top-level schemas** relate to Data Types that are stored at the key + level (counters and sets) +* **Embedded schemas** relate to Data Types nested inside of maps + (flags, counters, registers, and sets) + +As you can see from the [default Search +schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml#L96), +each of the Data Types has its own default schema, with the exception of +maps, which means that the `_yz_default` schema will automatically index +Data Types on the basis of their assigned content type. This means that +there is no extra work involved in indexing Riak Data Types. You can +simply store them and begin querying, provided that they are properly +indexed, which is covered in the [examples](#riak-data-types-and-search) section below. + +As mentioned above, there are no default schemas available for maps. +This is because maps are essentially carriers for the other Data Types. +Even when maps are embedded within other maps, all of the data that you +might wish to index and search is contained in counters, sets, +registers, and flags. + +The sections immediately below provide the default schemas for each Riak +Data Type. Because you will not need to manipulate these default schemas +to search Data Types, they are provided only for reference. + +### Top-level Schemas + +The default schema for [counters]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#counters) indexes each +counter as an integer. + +```xml +<field name="counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Constructing queries for counters involves prefacing the query with +`counter`. Below are some examples: + +Query | Syntax +:-----|:------ +Counters with a value over 10 | `counter:[10 TO *]` +Counters with a value below 10 and above 50 | `counter:[* TO 10] AND counter:[50 TO *]` +Counters with a value of 15 | `counter:15` +All counters within the index | `counter:*` + +The schema for [sets]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets) indexes each element of a set as +a string and indexes the set itself as multi-valued. + +```xml +<field name="set" type="string" indexed="true" stored="false" multiValued="true" /> +``` + +To query sets, preface the query with `set`. The table below shows some +examples: + +Query | Syntax +:-----|:------ +Sets that contain the value `apple` | `set:apple` +Sets that contain an item beginning with `level` | `set:level*` +Sets that contain both `apple` and `orange` | `set:apple AND set:orange` +All sets within the index | `set:*` + +### Embedded Schemas + +For searching within [maps]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps), there are four schemas +for embedded, aka dynamic, fields. Flags are indexed as booleans: + +```xml +<dynamicField name="*_flag" type="boolean" indexed="true" stored="true" multiValued="false" /> +``` + +Counters, like their top-level counterparts, are indexed as integers: + +```xml +<dynamicField name="*_counter" type="int" indexed="true" stored="true" multiValued="false" /> +``` + +Registers are indexed as strings, but unlike sets they are not +multi-valued. + +```xml +<dynamicField name="*_register" type="string" indexed="true" stored="true" multiValued="false" /> +``` + +Finally, sets at the embedded level are indexed as multi-valued strings. + +```xml +<dynamicField name="*_set" type="string" indexed="true" stored="true" multiValued="true" /> +``` + +To query embedded fields, you must provide the name of the field. The +table below provides some examples: + +Query | Syntax +:-----|:------ +Maps containing a set called `hobbies` | `hobbies_set:*` +Maps containing a `score` counter over 50 | `score_counter:[50 TO *]` +Maps containing disabled `advanced` flags | `advanced_flag:false` +Maps containing enabled `advanced` flags and `score` counters under 10 | `advanced_flag:true AND score_counter:[* TO 10]` + +You can also query maps within maps, which is covered in the **Querying +maps within maps** section below. + +## Data Types and Search Examples + +In this section, we'll start with two simple examples, one involving +counters and the other involving sets. Later on, we'll introduce a +slightly more complex map example. + +## Counters Example + +Let's say that we're storing scores in a multiplayer online game in +Riak. The game is called Boulderdash and it involves smashing digital +boulders armed with nothing but witty retorts and arcane trivia +knowledge. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) for [storing counters]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#counters) simply called +`counters`, like so: + +```bash +riak-admin bucket-type create counters '{"props":{"datatype":"counter"}}' +riak-admin bucket-type activate counters +``` + +Now, we'll create a search index called `scores` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex scoresIndex = new YokozunaIndex("scores", "_yz_default"); +StoreIndex storeIndex = new StoreIndex.Builder(scoresIndex) + .build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('scores', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('scores') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('scores', '_yz_default') +``` + +```csharp +var idx = new SearchIndex("scores", "_yz_default"); +var rslt = client.PutSearchIndex(idx); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'scores' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"scores">>, <<"_yz_default">>, []). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +Now, we can modify our `counters` bucket type to associate that bucket +type with our `scores` index: + +```bash +riak-admin bucket-type update counters '{"props":{"search_index":"scores"}}' +``` + +At this point, all of the counters that we stored in any bucket with the +bucket type `counters` will be indexed in our `scores` index. So let's +start playing with some counters. All counters will be stored in the +bucket `people`, while the key for each counter will be the username of +each player: + +```java +Namespace peopleBucket = new Namespace("counters", "people"); + +Location christopherHitchensCounter = new Location(peopleBucket, "christ_hitchens"); +CounterUpdate cu = new CounterUpdate(10); +UpdateCounter update = new UpdateCounter.Builder(christopherHitchensCounter, cu) + .build(); +client.execute(update); + +Location joanRiversCounter = new Location(peopleBucket, "joan_rivers"); +CounterUpdate cu = new CounterUpdate(25); +UpdateCounter update = new UpdateCounter.Builder(joanRiversCounter, cu) + .build(); +client.execute(update); +``` + +```ruby +bucket = client.bucket('people') + +christopher_hitchens_counter = Riak::Crdt::Counter.new(bucket, 'chris_hitchens', 'counters') +christopher_hitchens_counter.increment(10) + +joan_rivers_counter = Riak::Crdt::Counter.new(bucket, 'joan_rivers', 'counters') +joan_rivers_counter.increment(25) +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10) + ->buildLocation('chris_hitchens', 'people', 'counters'); + +$builder->build->execute(); + +$builder->withIncrement(25) + ->buildLocation('joan_rivers', 'people', 'counters') + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Counter + +bucket = client.bucket_type('counters').bucket('people') + +christopher_hitchens_counter = Counter(bucket, 'chris_hitchens') +christopher_hitchens_counter.increment(10) +christopher_hitchens_counter.store() + +joan_rivers_counter = Counter(bucket, 'joan_rivers') +joan_rivers_counter.increment(25) +joan_rivers_counter.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("christ_hitchens") + .WithIncrement(10) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateCounter.Builder() + .WithBucketType("counters") + .WithBucket("people") + .WithKey("joan_rivers") + .WithIncrement(25) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'christ_hitchens', + increment: 10 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'counters', + bucket: 'people', + key: 'joan_rivers', + increment: 25 + }; + + client.updateCounter(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +ChristopherHitchensCounter = riakc_counter:new(), +HitchensCounter1 = riakc_counter:increment(10, ChristopherHitchensCounter), +JoanRiversCounter = riakc_counter:new(), +RiversCounter1 = riakc_counter:increment(25, JoanRiversCounter), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"chris_hitchens">>, + riakc_counter:to_op(HitchensCounter1)), +riakc_pb_socket:update_type(Pid, + {<<"counters">>, <<"people">>}, + <<"joan_rivers">>, + riakc_counter:to_op(RiversCounter1)). +``` + +```curl +# We do not recommend working with Riak Data Types via curl. Try using +# one of our client libraries instead. +``` + +So now we have two counters, one with a value of 10 and the other with a +value of 25. Let's query to see how many counters have a value greater +than 20, just to be sure: + +```java +String index = "scores"; +String query = "counter:[20 TO *]"; +SearchOperation searchOp = new SearchOperation.Builder(BinaryValue.create(index), query) + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[20 TO *]') +# This should return a Hash with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[20 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[20 TO *]') +# This should return a dict with fields like 'num_found' and 'docs' + +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("counter numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[20 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[20 TO *]">>), +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[20 TO *]" | json_pp +``` + +And there we are: only one of our two stored sets has a value over 20. +To find out which set that is, we can dig into our results: + +```java +// Using the "results" object from above: +int numberFound = results.numResults(); +Map<String, List<String>> foundObject = results.getAllResults().get(0); +String key = foundObject.get("_yz_rk").get(0); // "joan_rivers" +String bucket = foundObject.get("_yz_rb").get(0); // "people" +String bucketType = foundObject.get("_yz_rt").get(0); // "counters" +``` + +```ruby +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```php +$doc = $response->getDocs()[0]; + +# The key +$doc['_yz_rk'] # 'joan_rivers' + +# The bucket +$doc['_yz_rb'] # 'people' + +# The bucket type +$doc['_yz_rt'] # 'counters' +``` + +```python +doc = results['docs'][0] + +# The key +doc['_yz_rk'] # 'joan_rivers' + +# The bucket +doc['_yz_rb'] # 'people' + +# The bucket type +doc['_yz_rt'] # 'counters' +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[20 TO *]"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +var doc = rslt.docs[0]; + +var key = doc['_yz_rk']; +var bucket = doc['_yz_rb']; +var bucketType = doc['_yz_rt']; +``` + +```erlang +Doc = lists:nth(1, Docs), +Key = proplists:get_value(<<"_yz_rk">>, Doc), +Bucket = proplists:get_value(<<"_yz_rb">>, Doc), +BucketType = proplists:get_value(<<"_yz_rt", Doc). +``` + +```curl +# Use the JSON object from above to locate bucket, key, and bucket type +# information +``` + +Alternatively, we can see how many counters have values below 15: + +```java +String index = "scores"; +String query = "counter:[* TO 15]"; +SearchOperation searchOp = new SearchOperation + .Builder(BinaryValue.create("scores"), "counter:[* TO 15]") + .build(); +cluster.execute(searchOp); +SearchOperation.Response results = searchOp.get(); +``` + +```ruby +results = client.search('scores', 'counter:[* TO 15]') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:[* TO 15]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('scores', 'counter:[* TO 15]') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:[* TO 15]"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:[* TO 15]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:[* TO 15]"). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:[* TO 15]" | json_pp +``` + +Or we can see how many counters have a value of 17 exactly: + +```java +// Using the same method as above, just changing the query: +String query = "counter:17"; +``` + +```ruby +results = client.search('scores', 'counter:17') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('scores') + ->withQuery('counter:17') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('scores', 'counter:17') +``` + +```csharp +var search = new RiakSearchRequest("scores", "counter:17"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('scores') + .withQuery('counter:17') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"scores">>, <<"counter:17">>). +``` + +```curl +curl "$RIAK_HOST/search/query/scores?wt=json&q=counter:17" | json_pp +``` + +## Sets Example + +Let's say that we're storing information about the hobbies of a group of +people in sets. We'll create and activate a [bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) for [storing sets]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#sets) simply called `sets`, +like so: + +```bash +riak-admin bucket-type create sets '{"props":{"datatype":"set"}}' +riak-admin bucket-type activate sets +``` + +Now, we'll create a Search index called `hobbies` that uses the default +schema (as in some of the examples above): + +```java +YokozunaIndex hobbiesIndex = new YokozunaIndex("hobbies"); +StoreIndex storeIndex = + new StoreIndex.Builder(hobbiesIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('hobbies', '_yz_default') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\StoreIndex($riak)) + ->withName('hobbies') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('hobbies', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("hobbies", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'hobbies' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"hobbies">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/hobbies \ + -H 'Content-Type: application/json' \ + -d '{"schema": "_yz_default"}' +``` + +Now, we can modify our `sets` bucket type to associate that bucket type +with our `hobbies` index: + +```bash +riak-admin bucket-type update sets '{"props":{"search_index":"hobbies"}}' +``` + +Now, all of the sets that we store in any bucket with the bucket type +`sets` will be automatically indexed as a set. So let's say that we +store three sets for two different people describing their respective +hobbies, in the bucket `people`: + +```java +Namespace peopleBucket = new Namespace("sets", "people"); + +Location mikeDitkaSet = new Location(peopleBucket, "ditka"); +SetUpdate su1 = new SetUpdate() + .add("football") + .add("winning"); +UpdateSet update1 = new UpdateSet.Builder(mikeDitkaSet, su1).build(); + +Location ronnieJamesDioSet = new Location(peopleBucket, "dio"); +SetUpdate su2 = new SetUpdate() + .add("wailing") + .add("rocking") + .add("winning"); +UpdateSet update2 = new UpdateSet.Builder(ronnieJamesDioSet, su2).build(); + +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('people') + +mike_ditka_set = Riak::Crdt::Set.new(bucket, 'ditka', 'sets') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') + +ronnie_james_dio_set = Riak::Crdt::Set.new(bucket, 'dio', 'sets') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +``` + +```php +$builder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)) + ->add('football') + ->add('winning') + ->buildLocation('ditka', 'people', 'counters'); + +$builder->build->execute(); + +$builder->add('wailing') + ->add('rocking') + ->add('winning') + ->buildLocation('dio', 'people', 'counters'); + ->build() + ->execute(); +``` + +```python +from riak.datatypes import Set + +bucket = client.bucket_type('sets').bucket('people') + +mike_ditka_set = Set(bucket, 'ditka') +mike_ditka_set.add('football') +mike_ditka_set.add('winning') +mike_ditka_set.store() + +ronnie_james_dio_set = Set(bucket, 'dio') +ronnie_james_dio_set.add('wailing') +ronnie_james_dio_set.add('rocking') +ronnie_james_dio_set.add('winning') +ronnie_james_dio_set.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +var cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("ditka") + .WithAdditions(new[] { "football", "winning" }) + .Build(); +RiakResult rslt = client.Execute(cmd); + +cmd = new UpdateSet.Builder() + .WithBucketType("sets") + .WithBucket("people") + .WithKey("dio") + .WithAdditions(new[] { "wailing", "rocking", "winning" }) + .Build(); +rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'ditka', + additions: ['football', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'sets', + bucket: 'people', + key: 'dio', + additions: ['wailing', 'rocking', 'winning'] + }; + + client.updateSet(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +```erlang +MikeDitkaSet = riakc_set:new(), +riakc_set:add_element(<<"football">>, MikeDitkaSet), +riakc_set:add_element(<<"winning">>, MikeDitkaSet), +RonnieJamesDioSet = riakc_set:new(), +riakc_set:add_element(<<"wailing">>, RonnieJamesDioSet), +riakc_set:add_element(<<"rocking">>, RonnieJamesDioSet), +riakc_set:add_element(<<"winning">>, RonnieJamesDioSet), + +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"ditka">>, + riakc_set:to_op(MikeDitkaSet)), +riakc_pb_socket:update_type(Pid, + {<<"sets">>, <<"people">>}, + <<"dio">>, + riakc_set:to_op(RonnieJamesDioSet)). +``` + +Now, we can query our `hobbies` index to see if anyone has the hobby +`football`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:football"; +``` + +```ruby +results = client.search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:football') + ->build() + ->execute(); +``` + +```python +results = client.fulltext_search('hobbies', 'set:football') +# This should return a dict with fields like 'num_found' and 'docs' +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:football"); +var rslt = client.Search(search); + +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); + +var firstDoc = searchResult.Documents.First(); +Console.WriteLine("Key: {0} Bucket: {1} Type: {2}", + firstDoc.Key, firstDoc.Bucket, firstDoc.BucketType); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("sets numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:football') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:football">>). +``` + +```curl +curl "$RIAK_HOST/search/query/hobbies?wt=json&q=set:football" | json_pp +``` + +Let's see how many sets contain the element `football`: + +```java +// Using the same method explained above for getting search results: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results['num_found'] +# 1 +``` + +```php +$response->getNumFound(); // 1 +``` + +```python +results['num_found'] +# 1 +``` + +```csharp +RiakSearchResult searchResult = rslt.Value; +Console.WriteLine("Num found: {0}", searchResult.NumFound); +``` + +```javascript +rslt.numFound; +// 1 +``` + +```erlang +NumberFound = Results#search_results.num_found. +%% 1 +``` + +```curl +``` + +Success! We stored two sets, only one of which contains the element +`football`. Now, let's see how many sets contain the element `winning`: + +```java +// Using the same method explained above, just changing the query: +String query = "set:winning"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('hobbies') + ->withQuery('set:winning') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('hobbies', 'set:winning') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("hobbies", "set:winning"); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('hobbies') + .withQuery('set:winning') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:search(Pid, <<"hobbies">>, <<"set:winning">>). +NumberFound = Results#search_results.num_found. +%% 2 +``` + +Just as expected, both sets we stored contain the element `winning`. + +## Maps Example + +This example will build on the example in the [Using Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types) +tutorial. That tutorial walks you through storing CMS-style user data in +Riak [maps]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/#maps), and we'd suggest that you +familiarize yourself with that tutorial first. More specifically, user +data is stored in the following fields in each user's map: + +* first name in a `first_name` register +* last name in a `last_name` register +* whether the user is an enterprise customer in an `enterprise_customer` + flag +* the number of times the user has visited the company page in a + `page_visits` counter +* a list of the user's interests in an `interests` set + +First, let's create and activate a bucket type simply called `maps` that +is set up to store Riak maps: + +```bash +riak-admin bucket-type create maps '{"props":{"datatype":"map"}}' +riak-admin bucket-type activate maps +``` + +Now, let's create a search index called `customers` using the default +schema: + +```java +YokozunaIndex customersIndex = new YokozunaIndex("customers", "_yz_default"); +StoreIndex storeIndex = + new StoreIndex.Builder(customersIndex).build(); +client.execute(storeIndex); +``` + +```ruby +client.create_search_index('customers', '_yz_default') +``` + +```php +(new Command\Builder\Search\StoreIndex($riak)) + ->withName('customers') + ->usingSchema('_yz_default') + ->build() + ->execute(); +``` + +```python +client.create_search_index('customers', '_yz_default') +``` + +```csharp +var searchIndex = new SearchIndex("customers", "_yz_default"); +var rslt = client.PutSearchIndex(searchIndex); +``` + +```javascript +var options = { + schemaName: '_yz_default', + indexName: 'customers' +}; +client.storeIndex(options, function (err, rslt) { +}); +``` + +```erlang +riakc_pb_socket:create_search_index(Pid, <<"customers">>, <<"_yz_default">>). +``` + +```curl +curl -XPUT $RIAK_HOST/search/index/customers \ + -H 'Content-Type: application/json' \ + -d '{"schema":"_yz_default"}' +``` + +With our index created, we can associate our new `customers` index with +our `maps` bucket type: + +```bash +riak-admin bucket-type update maps '{"props":{"search_index":"customers"}}' +``` + +Now we can create some maps along the lines suggested above: + +```java +Namespace customersBucket = new Namespace("maps", "customers"); + +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate mu = new MapUpdate() + .update("first_name", new RegisterUpdate("Idris")) + .update("last_name", new RegisterUpdate("Elba")) + .update("enterprise_customer", new FlagUpdate(false)) + .update("page_visits", new CounterUpdate(10)) + .update("interests", new SetUpdate().add("acting", "being Stringer Bell")); + +Location joanJettMap = new Location(customersBucket, "joan_jett"); +MapUpdate mu2 = new MapUpdate() + .update("first_name", new RegisterUpdate("Joan")) + .update("last_name", new RegisterUpdate("Jett")) + // Joan Jett is not an enterprise customer, so we don't need to + // explicitly disable the "enterprise_customer" flag, as all + // flags are disabled by default + .update("page_visits", new CounterUpdate(25)) + .update("interests", new SetUpdate().add("loving rock and roll").add("being in the Blackhearts")); + +UpdateMap update1 = new UpdateMap.Builder(idrisElbaMap, mu1).build(); +UpdateMap update2 = new UpdateMap.Builder(joanJettMap, mu2).build(); +client.execute(update1); +client.execute(update2); +``` + +```ruby +bucket = client.bucket('customers') + +idris_elba = Riak::Crdt::Map.new(bucket, 'idris_elba', 'maps') + +idris_elba.batch do |ie| + ie.registers['first_name'] = 'Idris' + ie.registers['last_name'] = 'Elba' + ie.flags['enterprise_customer'] = true + ie.counters['page_visits'].increment(10) + ['acting', 'being Stringer Bell'].each do |interest| + ie.sets['interests'].add(interest) + end +end + +joan_jett = Riak::Crdt::Map.new(bucket, 'joan_jett', 'maps') +joan_jett.batch do |jj| + jj.registers['first_name'] = 'Joan' + jj.registers['last_name'] = 'Jett' + ## Joan Jett is not an enterprise customers, so we don't need to + ## explicitly disable this flag, as all flags are disabled by default + jj.counters['page_visits'].increment(25) + ['loving rock and roll', 'being in the Blackhearts'].each do |interest| + jj.sets['interests'].add(interest) + end +end +``` + +```php +$counterBuilder = (new \Basho\Riak\Command\Builder\IncrementCounter($riak)) + ->withIncrement(10); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['acting', 'being Stringer Bell'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Idres') + ->updateRegister('last_name', 'Elba') + ->updateFlag('enterprise_customer', true) + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$setBuilder = (new \Basho\Riak\Command\Builder\UpdateSet($riak)); + +foreach(['loving rock and roll', 'being in the Blackhearts'] as $interest) { + $setBuilder->add($interest); +} + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('first_name', 'Joan') + ->updateRegister('last_name', 'Jett') + ->updateSet('interests', $setBuilder) + ->updateCounter('page_visits', $counterBuilder->withIncrement(25)) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('maps').bucket('customers') + +idris_elba = Map(bucket, 'idris_elba') +idris_elba.registers['first_name'].assign('Idris') +idris_elba.registers['last_name'].assign('Elba') +idris_elba.flags['enterprise_customer'].enable() +idris_elba.counters['page_visits'].increment(10) +for interest in ['acting', 'being Stringer Bell']: + idris_elba.sets['interests'].add(interest) +idris_elba.store() + +joan_jett = Map(bucket, 'joan_jett') +joan_jett.registers['first_name'].assign('Joan') +joan_jett.registers['last_name'].assign('Jett') +# Joan Jett is not an enterprise customers, so we don't need to +# explictly disable this flag, as all flags are disabled by default +idris_elba.counters['page_visits'].increment(25) +for interest in ['loving rock and roll', 'being in the Blackhearts']: + joan_jett.sets['interests'].add(interest) +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +// Note: similar code for Joan Jett + +const string firstNameRegister = "first_name"; +const string lastNameRegister = "last_name"; +const string enterpriseCustomerFlag = "enterprise_customer"; +const string pageVisitsCounter = "page_visits"; +const string interestsSet = "interests"; + +var idrisAdds = new[] { "acting", "being Stringer Bell" }; + +var mapOp = new UpdateMap.MapOperation() + .SetRegister(firstNameRegister, "Idris") + .SetRegister(lastNameRegister, "Elba") + .SetFlag(enterpriseCustomerFlag, false) + .IncrementCounter(pageVisitsCounter, 10) + .AddToSet(interestsSet, idrisAdds); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Idris'); + mapOp.setRegister('last_name', 'Elba'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 10); + mapOp.addToSet('interests', 'acting'); + mapOp.addToSet('interests', 'being Stringer Bell'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + mapOp.setRegister('first_name', 'Joan'); + mapOp.setRegister('last_name', 'Jett'); + mapOp.setFlag('enterprise_customer', false); + mapOp.incrementCounter('page_visits', 25); + mapOp.addToSet('interests', 'loving rock and roll'); + mapOp.addToSet('interests', 'being in the Blackhearts'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +### Searching Counters Within Maps + +We now have two maps stored in Riak that we can query. Let's query to +see how many users have page visit counters above 15. Unlike the +counters example above, we have to specify _which_ counter we're +querying: + +```java +// Using the same method explained above, just changing the query: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +``` + +```ruby +results = client.search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('page_visits_counter:[15 TO *]') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +``` + +```python +results = client.fulltext_search('customers', 'page_visits_counter:[15 TO *]') +results['num_found'] +# 1 +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +``` + +```javascript +function search_cb(err, rslt) { + logger.info("numFound: '%d', docs: '%s'", + rslt.numFound, JSON.stringify(rslt.docs)); + + var doc = rslt.docs[0]; + var key = doc['_yz_rk']; + var bucket = doc['_yz_rb']; + var bucketType = doc['_yz_rt']; +} + +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('page_visits_counter:[15 TO *]') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, one of our two stored maps has a `page_visits` counter +above 15. Let's make sure that we have the right result: + +```java +// Using the same method from above: +String query = "page_visits_counter:[15 TO *]"; + +// Again using the same method from above: +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results['docs'][0]['first_name_register'] +# 'Joan' +``` + +```php +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results['docs'][0]['first_name_register'] +# u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "page_visits_counter:[15 TO *]"); +var rslt = client.Search(search); +var firstDoc = searchResult.Documents.First(); +``` + +```javascript +var doc = rslts.docs[0]; +doc.page_visits_register; +``` + +Success! Now we can test out searching sets. + +### Searching Sets Within Maps + +Each of the maps we stored thus far had an `interests` set. First, let's +see how many of our maps even _have_ sets called `interests` using a +wildcard query: + +```java +// Using the same method from above: +String query = "interests_set:*"; +``` + +```ruby +results = client.search('customers', 'interests_set:*') +# 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'interests_set:*') +results['num_found'] +# 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, both stored maps have an `interests` set. Now let's see how +many maps have items in `interests` sets that begin with `loving`: + +```java +// Using the same method from above: +String query = "interests_set:loving*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('interests_set:loving*') + ->build() + ->execute(); + +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'interests_set:loving*') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan' +``` + +```csharp +var search = new RiakSearchRequest("customers", "interests_set:loving*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('interests_set:loving*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +As expected, only our Joan Jett map has one item in its `interests` set +that starts with `loving`. + +### Searching Maps Within Maps + +Before we can try to search maps within maps, we need to actually store +some. Let's add a `alter_ego` map to both of the maps we've stored thus +far. Each person's alter ego will have a first name only. + +```java +Location idrisElbaMap = new Location(customersBucket, "idris_elba"); +MapUpdate alterEgoUpdateName = new MapUpdate() + .update("name", new RegisterUpdate("John Luther")); +MapUpdate alterEgoUpdate = new MapUpdate() + .update("alter_ego", alterEgoUpdateName); +UpdateMap addSubMap = new UpdateMap.Builder(idrisElbaMap, alterEgoUpdate); +client.execute(addSubMap); +``` + +```ruby +idris_elba.maps['alter_ego'].registers['name'] = 'John Luther' + +joan_jett.maps['alter_ego'].registers['name'] = 'Robert Plant' +``` + +```php +$mapBuilder = (new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateRegister('name', 'John Luther') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('idris_elba', 'customers', 'maps') + ->build() + ->execute(); + +$mapBuilder->updateRegister('name', 'Robert Plant') + +(new \Basho\Riak\Command\Builder\UpdateMap($riak)) + ->updateMap('alter_ego', $mapBuilder) + ->buildLocation('joan_jett', 'customers', 'maps') + ->build() + ->execute(); +``` + +```python +idris_elba.maps['alter_ego'].registers['name'].assign('John Luther') +idris_elba.store() + +joan_jett.maps['alter_ego'].registers['name'].assign('Robert Plant') +joan_jett.store() +``` + +```csharp +// https://github.com/basho/riak-dotnet-client/blob/develop/src/RiakClientExamples/Dev/Search/SearchDataTypes.cs + +const string nameRegister = "name"; +const string alterEgoMap = "alter_ego"; + +var mapOp = new UpdateMap.MapOperation(); +mapOp.Map(alterEgoMap).SetRegister(nameRegister, "John Luther"); + +var cmd = new UpdateMap.Builder() + .WithBucketType("maps") + .WithBucket("customers") + .WithKey("idris_elba") + .WithMapOperation(mapOp) + .Build(); + +RiakResult rslt = client.Execute(cmd); +``` + +```javascript +var funcs = [ + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'idris_elba' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'John Luther'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + }, + function (async_cb) { + var options = { + bucketType: 'maps', + bucket: 'customers', + key: 'joan_jett' + }; + + var mapOp = new Riak.Commands.CRDT.UpdateMap.MapOperation(); + var alterEgoMap = mapOp.map('alter_ego'); + alterEgoMap.setRegister('name', 'Robert Plant'); + + options.op = mapOp; + + client.updateMap(options, function (err, rslt) { + throwIfErr(err); + async_cb(); + }); + } +]; + +async.parallel(funcs, function (err, rslts) { + throwIfErr(err); +}); +``` + +Querying maps within maps involves construct queries that separate the +different levels of depth with a single dot. Here's an example query for +finding maps that have a `name` register embedded within an `alter_ego` +map: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 2 +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*') + ->build() + ->execute(); + +$response->getNumFound(); // 2 +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*') +results['num_found'] # 2 +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Once we know how to query embedded fields like this, we can query those +just like any other. Let's find out which maps have an `alter_ego` +sub-map that contains a `name` register that ends with `PLant`, and +display that customer's first name: + +```java +// Using the same method from above: +String query = "alter_ego_map.name_register:*Plant"; + +// Again using the same method from above: +int numberFound = results.numResults(); // 1 +String registerValue = + results.getAllResults().get(0).get("first_name_register").get(0); // Joan +``` + +```ruby +results = client.search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # 'Joan' +``` + +```php +$response = (new \Basho\Riak\Command\Builder\Search\FetchObjects($riak)) + ->withIndexName('customers') + ->withQuery('alter_ego_map.name_register:*Plant') + ->build() + ->execute(); + +$response->getNumFound(); // 1 +$response->getDocs()[0]->first_name_register']; // Joan +``` + +```python +results = client.fulltext_search('customers', 'alter_ego_map.name_register:*Plant') +results['num_found'] # 1 +results['docs'][0]['first_name_register'] # u'Joan +``` + +```csharp +var search = new RiakSearchRequest("customers", "alter_ego_map.name_register:*Plant"); +var rslt = client.Search(search); +``` + +```javascript +var searchCmd = new Riak.Commands.YZ.Search.Builder() + .withIndexName('customers') + .withQuery('alter_ego_map.name_register:*Plant') + .withCallback(search_cb) + .build(); + +client.execute(searchCmd); +``` + +Success! We've now queried not just maps but also maps within maps. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/secondary-indexes.md b/content/riak/kv/3.0.4/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..2a94040e9d --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/secondary-indexes.md @@ -0,0 +1,2030 @@ +--- +title: "Using Secondary Indexes (2i)" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Using Secondary Indexes" + identifier: "usage_2i" + weight: 107 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/using/2i + - /riak/kv/3.0.4/dev/using/2i +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/memory +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search/) rather than secondary indexes for +a variety of reasons. Most importantly, Riak Search has a far more +capacious querying API and can be used with all of Riak's storage +backends. + +Secondary indexes (2i) in Riak enable you to tag objects stored in Riak, +at write time, with one or more queryable values. Those values can then +be used to find multiple objects in Riak. If you're storing [user data]({{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#user-accounts), for example, you could tag each object +associated with that user with a username or other unique marker. Once +tagged, you could find all objects in a Riak bucket sharing that tag. +Secondary indexes can be either a binary or string, such as +`sensor_1_data` or `admin_user` or `click_event`, or an integer, such as +`99` or `141121`. + +[Riak Search]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search/) serves analogous purposes but is quite +different because it parses key/value data itself and builds indexes on +the basis of Solr schemas. + +Please note that 2i can be used only with the [LevelDB][plan backend leveldb] and [Memory][plan backend memory] +backends. + +## Features + +* Allows two types of secondary attributes: integers and strings (aka + binaries) +* Allows querying by exact match or range on one index +* Allows pagination of results +* Allows streaming of results +* Query results can be used as input to a [MapReduce]({{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce/) + query + +> **Note on 2i and strong consistency** +Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] +feature introduced in Riak version 2.0. If you store objects in +[strongly consistent buckets]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + +## When to Use Secondary Indexes + +Secondary indexes are useful when you want to find data on the basis of +something other than objects' bucket type, bucket, and key, i.e. when +you want objects to be discoverable based on more than their location +alone. + +2i works best for objects whose value is stored in an opaque blob, like +a binary file, because those objects don't offer any clues that enable +you to discover them later. Indexing enables you to tag those objects +and find all objects with the same tag in a specified bucket later on. + +2i is thus recommended when your use case requires an easy-to-use search +mechanism that does not require a schema (as does [Riak Search]({{<baseurl>}}riak/kv/3.0.4/using/reference/search/#schemas)) and a basic query interface, i.e. an interface that +enables an application to tell Riak things like "fetch all objects +tagged with the string `Milwaukee_Bucks`" or "fetch all objects tagged +with numbers between 1500 and 1509." + +2i is also recommended if your use case requires anti-entropy. Since +secondary indexes are just metadata attached to key/value objects, 2i +piggybacks off of read-repair. + +## When Not to Use Secondary Indexes + +* If your ring size exceeds 512 partitions, 2i can cause performance + issues in large clusters. +* When you need more than the exact match and range searches that 2i + supports. If that's the case, we recommend checking out [Riak Search]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search/). +* When you want to use composite queries. A query like + `last_name=zezeski AND state=MD` would have to be split into two + queries and the results merged (or it would need to involve + [MapReduce]({{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce/)). + +## Query Interfaces and Examples + +Typically, the result set from a 2i query is a list of object keys from +the specified bucket that include the index values in question. As we'll +see below, when executing range queries in Riak 1.4 or higher, it is +possible to retrieve the index values along with the object keys. + +### Inserting Objects with Secondary Indexes + +In this example, the key `john_smith` is used to store user data in the +bucket `users`, which bears the `default` bucket type. Let's say that an +application would like add a Twitter handle and an email address to this +object as secondary indexes. + +```java +Location johnSmithKey = new Location(new Namespace("default", "users"), "john_smith"); + +// In the Java client (and all clients), if you do not specify a bucket type, +// the client will use the default type. And so the following store command +// would be equivalent to the one above: +Location johnSmithKey = new Location(new Namespace("users"), "john_smith"); + +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{'user_data':{ ... }}")); + +obj.getIndexes().getIndex(StringBinIndex.named("twitter")).add("jsmith123"); +obj.getIndexes().getIndex(StringBinIndex.named("email")).add("jsmith@basho.com"); + +StoreValue store = new StoreValue.Builder(obj) + .withLocation(johnSmithKey) + .build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('default').bucket('users') +obj = Riak::RObject.new(bucket, 'john_smith') +obj.content_type = 'application/json' +obj.raw_data = '{"user_data":{ ... }}' + +# String/binary indexes must be set as an array of strings +obj.indexes['twitter_bin'] = %w{ jsmith123 } +obj.indexes['email_bin'] = %w{ jsmith@basho.com } +obj.store + +# In the Ruby client (and all clients), if you do not specify a bucket +# type, the client will use the default type. And so the following set +# of commands would be equivalent to the one above: + +bucket = client.bucket('users') +# repeat the same commands for building the object +obj.store +``` + +```php +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 'jsmith123') + ->addValueToIndex('email_bin', 'jsmith@basho.com'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->buildLocation('john_smith', 'users', 'default') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('default').bucket('users') +# In the Python client (and all clients), if you do not specify a bucket type, +# the client will use the default type. And so the following store command +# would be equivalent to the one above: +bucket = client.bucket('users') + +obj = RiakObject(client, bucket, 'john_smith') +obj.content_type = 'text/plain' +obj.data = '...user data...' +obj.add_index('twitter_bin', 'jsmith123') +obj.add_index('email_bin', 'jsmith@basho.com') +obj.store() +``` + +```csharp +var id = new RiakObjectId("default", "users", "john_smith"); +var obj = new RiakObject(id, "...user data...", + RiakConstants.ContentTypes.TextPlain); +obj.BinIndex("twitter").Set("jsmith123"); +obj.BinIndex("email").Set"jsmith@basho.com"); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucket('users'); +riakObj.setKey('john_smith'); +riakObj.setValue('...user data...'); +riakObj.addToIndex('twitter_bin', 'jsmith123'); +riakObj.addToIndex('email_bin', 'jsmith@basho.com'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"default">>, <<"users">>}, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +%% In the Erlang client (and all clients), if you do not specify a bucket type, +%% the client will use the default type. And so the following object would be +%% equivalent to the one above: + +Obj = riakc_obj:new(<<"users">>, + <<"john_smith">>, + <<"...user data...">>, + <<"text/plain">>), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index( + MD1, + [{{binary_index, "twitter"}, [<<"jsmith123">>]}, + {{binary_index, "email"}, [<<"jsmith@basho.com">>]}]), +Obj2 = riakc_obj:update_metadata(Obj, MD2), +riakc_pb_socket:put(Pid, Obj2). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + BucketType: "indexes", + Bucket: "users", + Key: "john_smith", + Value: []byte("…user data…"), +} + +obj.AddToIndex("twitter_bin", "jsmith123") +obj.AddToIndex("email_bin", "jsmith@basho.com") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl -XPOST localhost:8098/types/default/buckets/users/keys/john_smith \ + -H 'x-riak-index-twitter_bin: jsmith123' \ + -H 'x-riak-index-email_bin: jsmith@basho.com' \ + -H 'Content-Type: application/json' \ + -d '{"userData":"data"}' +``` + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.4/developing/client-libraries), you can find more information about getting started with +your client in the [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.4/developing/getting-started) section. + +This has accomplished the following: + +* The object has been stored with a primary bucket/key of + `users`/`john_smith` +* The object now has a secondary index called `twitter_bin` with a value + of `jsmith123` +* The object now has a secondary index called `email_bin` with a value + of `jsmith@basho.com` + +### Querying Objects with Secondary Indexes + +Let's query the `users` bucket on the basis of Twitter handle to make +sure that we can find our stored object: + +```java +Namespace usersBucket = new Namespace("users"); +BinIndexQuery biq = new BinIndexQuery.Builder(usersBucket, "twitter", "jsmith123") + .build(); +BinIndexQuery.Response response = client.execute(biq); +List<BinIndexQuery.Response.Entry> entries = response.getEntries(); +for (BinIndexQuery.Response.Entry entry : entries) { + System.out.println(entry.getRiakObjectLocation().getKey()); +} +``` + +```ruby +bucket = client.bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') + +# This is equivalent to the following: +bucket = client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123') +``` + +```php +$response = (new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('users') + ->withIndexName('twitter_bin') + ->withScalarValue('jsmith123') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket('users') # equivalent to client.bucket_type('default').bucket('users') +bucket.get_index('twitter_bin', 'jsmith123').results +``` + +```csharp +var idxId = new RiakIndexId("default", "users", "twitter"); +var rslt = client.GetSecondaryIndex(idxId, "jsmith123"); +var idxRslt = rslt.Value; +foreach (var keyTerm in idxRslt.IndexKeyTerms) +{ + Debug.WriteLine(keyTerm.Key); +} +``` + +```javascript +var query_keys = []; +function query_cb(err, rslt) { + if (err) { + throw new Error(err); + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucket('users') + .withIndexName('twitter_bin') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); +``` + +```erlang +{ok, Results} = + riakc_pb_socket:get_index(Pid, + <<"users">>, %% bucket + {binary_index, "twitter"}, %% index name + <<"jsmith123">>). %% index +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("twitter_bin"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/buckets/users/index/twitter_bin/jsmith123 +``` + +The response: + +```java +john_smith +``` + +```ruby +["john_smith"] +``` + +```php +['john_smith'] +``` + +```python +['john_smith'] +``` + +```csharp +john_smith +``` + +```javascript +john_smith +``` + +```erlang +{ok,{index_results_v1,[<<"john_smith">>], + undefined,undefined}}. +``` + +```golang +john_smith +``` + +```curl +{ + "keys": [ + "john_smith" + ] +} +``` + +## Examples + +To run the following examples, make sure that Riak is configured to use +an index-capable storage backend, such as [LevelDB][plan backend leveldb] or [Memory][plan backend memory]. + +## Indexing Objects + +The following example indexes four different objects. Notice that we're +storing both integer and string (aka binary) fields. Field names are +automatically lowercased, some fields have multiple values, and +duplicate fields are automatically de-duplicated, as in the following +example: + +```java +Namespace peopleBucket = new Namespace("indexes", "people"); + +RiakObject larry = new RiakObject() + .setValue(BinaryValue.create("My name is Larry")); +larry.getIndexes().getIndex(StringBinIndex.named("field1")).add("val1"); +larry.getIndexes().getIndex(LongIntIndex.named("field2")).add(1001L); +StoreValue storeLarry = new StoreValue.Builder(larry) + .withLocation(peopleBucket.setKey("larry")) + .build(); +client.execute(storeLarry); + +RiakObject moe = new RiakObject() + .setValue(BinaryValue.create("Ny name is Moe")); +moe.getIndexes().getIndex(StringBinIdex.named("Field1")).add("val2"); +moe.getIndexes().getIndex(LongIntIndex.named("Field2")).add(1002L); +StoreValue storeMoe = new StoreValue.Builder(moe) + .withLocation(peopleBucket.setKey("moe")) + .build(); +client.execute(storeMoe); + +RiakObject curly = new RiakObject() + .setValue(BinaryValue.create("My name is Curly")); +curly.getIndexes().getIndex(StringBinIndex.named("FIELD1")).add("val3"); +curly.getIndexes().getIndex(LongIntIndex.named("FIELD2")).add(1003L); +StoreValue storeCurly = new StoreValue.Builder(curly) + .withLocation(peopleBucket.setKey("curly")) + .build(); +client.execute(storeCurly); + +RiakObject veronica = new RiakObject() + .setValue(BinaryValue.create("My name is Veronica")); +veronica.getIndexes().getIndex(StringBinIndex.named("field1")) + .add("val4").add("val4"); +veronica.getIndexes().getIndex(LongIntIndex.named("field2")) + .add(1004L).add(1005L).add(1006L).add(1004L).add(1004L).add(1007L); +StoreValue storeVeronica = new StoreValue.Builder(veronica) + .withLocation(peopleBucket.setKey("veronica")) + .build(); +client.execute(storeVeronica); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = Riak::RObject.new(bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.raw_data = 'My name is Larry' +obj1.indexes['field1_bin'] = %w{ val1 } +# Like binary/string indexes, integer indexes must be set as an array, +# even if you wish to add only a single index +obj1.indexes['field2_int'] = [1001] +obj1.store + +obj2 = Riak::RObject.new(bucket, 'moe') +obj2.content_type = 'text/plain' +obj2.raw_data = 'My name is Larry' +obj2.indexes['Field1_bin'] = %w{ val2 } +obj2.indexes['Field2_int'] = [1002] +obj2.store + +obj3 = Riak::RObject.new(bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.raw_data = 'My name is Curly' +obj3.indexes['FIELD1_BIN'] = %w{ val3 } +obj3.indexes['FIELD2_INT'] = [1003] +obj3.store + +obj4 = Riak::RObject.new(bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.raw_data = 'My name is Veronica' +obj4.indexes['field1_bin'] = %w{ val4 val4 val4a val4b } +obj4.indexes['field2_int'] = [1004, 1004, 1005, 1006] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1004] +obj4.indexes['field2_int'] = [1007] +obj4.store +``` + +```php +$bucket = new \Basho\Riak\Bucket('people', 'indexes'); + +$object = (new \Basho\Riak\Object'My name is Larry', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val1') + ->addValueToIndex('field2_int', 1001); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('larry', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Moe', ['Content-type' => 'text/plain'])) + ->addValueToIndex('Field1_bin', 'val2') + ->addValueToIndex('Field2_int', 1002); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('moe', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Curly', ['Content-type' => 'text/plain'])) + ->addValueToIndex('FIELD1_BIN', 'val3') + ->addValueToIndex('FIELD2_int', 1003); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('curly', $bucket)) + ->build() + ->execute(); + +$object = (new \Basho\Riak\Object'My name is Veronica', ['Content-type' => 'text/plain'])) + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4') + ->addValueToIndex('field1_bin', 'val4a') + ->addValueToIndex('field1_bin', 'val4b') + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1005) + ->addValueToIndex('field2_int', 1006) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1004) + ->addValueToIndex('field2_int', 1007); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->withLocation(new \Basho\Riak\Location('veronica', $bucket)) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') + +obj1 = RiakObject(client, bucket, 'larry') +obj1.content_type = 'text/plain' +obj1.data = 'My name is Larry' +obj1.add_index('field1_bin', 'val1').add_index('field2_int', 1001) +obj1.store() + +obj2 = RiakObject(client, bucket, 'moe') +obj2.content_type = 'text/plain' +obj2data = 'Moe' +obj2.add_index('Field1_bin', 'val2').add_index('Field2_int', 1002) +obj2.store() + +obj3 = RiakObject(client, bucket, 'curly') +obj3.content_type = 'text/plain' +obj3.data = 'Curly' +obj3.add_index('FIELD1_BIN', 'val3').add_index('FIELD2_INT', 1003) +obj3.store() + +obj4 = RiakObject(client, bucket, 'veronica') +obj4.content_type = 'text/plain' +obj4.data = 'Veronica' +obj4.add_index('field1_bin', 'val4').add_index('field1_bin', 'val4a').add_index('field1_bin', 'val4b').add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1005).add_index('field2_int', 1006).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1004).add_index('field2_int', 1007) +obj4.store() +``` + +```csharp +var larryId = new RiakObjectId("indexes", "people", "larry"); +var larry = new RiakObject(larryId, "My name is Larry", + RiakConstants.ContentTypes.TextPlain); + +larry.BinIndex("field1").Set("val1"); +larry.IntIndex("field2").Set(1001); + +client.Put(larry); + +var moeId = new RiakObjectId("indexes", "people", "moe"); +var moe = new RiakObject(moeId, "My name is Moe", + RiakConstants.ContentTypes.TextPlain); + +moe.BinIndex("Field1").Set("val2"); +moe.IntIndex("Field2").Set(1002); + +client.Put(moe); + +var curlyId = new RiakObjectId("indexes", "people", "curly"); +var curly = new RiakObject(curlyId, "My name is Curly", + RiakConstants.ContentTypes.TextPlain); + +curly.BinIndex("FIELD1").Set("val3"); +curly.IntIndex("FIELD2").Set(1003); + +client.Put(curly); + +var veronicaId = new RiakObjectId("indexes", "people", "veronica"); +var veronica = new RiakObject(veronicaId, "My name is Veronica", + RiakConstants.ContentTypes.TextPlain); + +veronica.BinIndex("FIELD1").Set(new string[] { "val4", "val4a", "val4b" }); +veronica.IntIndex("FIELD2").Set(new BigInteger[] { + 1004, 1005, 1006, 1004, 1004, 1007 +}); + +client.Put(veronica); +``` + +```javascript +function store_cb(err, rslt, async_cb) { + if (err) { + throw new Error(err); + } + async_cb(null, rslt); +} + +var storeFuncs = [ + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('larry'); + riakObj.setValue('My name is Larry'); + riakObj.addToIndex('field1_bin', 'val1'); + riakObj.addToIndex('field2_int', 1001); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('moe'); + riakObj.setValue('My name is Moe'); + riakObj.addToIndex('Field1_bin', 'val2'); + riakObj.addToIndex('Field2_int', 1002); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('curly'); + riakObj.setValue('My name is Curly'); + riakObj.addToIndex('FIELD1_BIN', 'val3'); + riakObj.addToIndex('FIELD2_INT', 1003); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + }, + function (async_cb) { + var riakObj = new Riak.Commands.KV.RiakObject(); + riakObj.setContentType('text/plain'); + riakObj.setBucketType('indexes'); + riakObj.setBucket('people'); + riakObj.setKey('veronica'); + riakObj.setValue('My name is Veronica'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4'); + riakObj.addToIndex('FIELD1_bin', 'val4a'); + riakObj.addToIndex('FIELD1_bin', 'val4b'); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1005); + riakObj.addToIndex('FIELD2_int', 1006); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1004); + riakObj.addToIndex('FIELD2_int', 1007); + client.storeValue({ value: riakObj }, function (err, rslt) { + store_cb(err, rslt, async_cb); + }); + } +]; +async.parallel(storeFuncs, function (err, rslts) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Larry = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"My name is Larry">>, + <<"text/plain">>), +LarryMetadata = riakc_obj:get_update_metadata(Larry), +LarryIndexes = riakc_obj:set_secondary_index( + LarryMetadata, + [{{binary_index, "field1"}, [<<"val1">>]}, {{integer_index, "field2"}, [1001]}] +), +LarryWithIndexes = riakc_obj:update_metadata(Larry, LarryIndexes). + +Moe = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"moe">>, + <<"My name is Moe">>, + <<"text/plain">>), +MoeMetadata = riakc_obj:get_update_metadata(Moe), +MoeIndexes = riakc_obj:set_secondary_index( + MoeMetadata, + [{{binary_index, "Field1"}, [<<"val2">>]}, {{integer_index, "Field2"}, [1002]}] +), +MoeWithIndexes = riakc_obj:update_metadata(Moe, MoeIndexes). + +Curly = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"curly">>, + <<"My name is Curly">>, + <<"text/plain">>), +CurlyMetadata = riakc_obj:get_update_metadata(Curly), +CurlyIndexes = riakc_obj:set_secondary_index( + CurlyMetadata, + [{{binary_index, "FIELD1"}, [<<"val3">>]}, {{integer_index, "FIELD2"}, [1003]}] +), +CurlyWithIndexes = riakc_obj:update_metadata(Curly, CurlyIndexes). + +Veronica = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"veronica">>, + <<"My name is Veronica">>, + <<"text/plain">>), +VeronicaMetadata = riakc_obj:get_update_metadata(Veronica), +VeronicaIndexes = riakc_obj:set_secondary_index( + VeronicaMetadata, + [{{binary_index, "field1"}, [<<"val4">>]}, {{binary_index, "field1"}, [<<"val4">>]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1005]}, {{integer_index, "field2"}, [1006]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1004]}, {{integer_index, "field2"}, [1007]}] +), +VeronicaWithIndexes = riakc_obj:update_metadata(Veronica, VeronicaIndexes). +``` + +```golang +o1 := &riak.Object{ + Key: "larry", + Value: []byte("My name is Larry"), +} +o1.AddToIndex("field1_bin", "val1") +o1.AddToIntIndex("field2_int", 1001) + +o2 := &riak.Object{ + Key: "moe", + Value: []byte("My name is Moe"), +} +o2.AddToIndex("Field1_bin", "val2") +o2.AddToIntIndex("Field2_int", 1002) + +o3 := &riak.Object{ + Key: "curly", + Value: []byte("My name is Curly"), +} +o3.AddToIndex("FIELD1_BIN", "val3") +o3.AddToIntIndex("FIELD2_INT", 1003) + +o4 := &riak.Object{ + Key: "veronica", + Value: []byte("My name is Veronica"), +} +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4") +o4.AddToIndex("FIELD1_bin", "val4a") +o4.AddToIndex("FIELD1_bin", "val4b") +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1005) +o4.AddToIntIndex("FIELD2_int", 1006) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1004) +o4.AddToIntIndex("FIELD2_int", 1007) + +objs := [...]*riak.Object{o1, o2, o3, o4} + +wg := &sync.WaitGroup{} +for _, obj := range objs { + obj.ContentType = "text/plain" + obj.Charset = "utf-8" + obj.ContentEncoding = "utf-8" + + cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithContent(obj). + Build() + if err != nil { + return err + } + + args := &riak.Async{ + Command: cmd, + Wait: wg, + } + if err := cluster.ExecuteAsync(args); err != nil { + return err + } +} + +wg.Wait() +``` + +```curl +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field1_bin: val1" \ + -H "x-riak-index-field2_int: 1001" \ + -d 'My name is Larry' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/moe \ + -H "x-riak-index-Field1_bin: val2" \ + -H "x-riak-index-Field2_int: 1002" \ + -d 'My name is Moe' + +curl -v -XPUT localhost:8098/types/indexes/buckets/people/keys/curly \ + -H "X-RIAK-INDEX-FIELD1_BIN: val3" \ + -H "X-RIAK-INDEX-FIELD2_INT: 1003" \ + -d 'My name is Curly' + +curl -v -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/veronica \ + -H "x-riak-index-field1_bin: val4, val4, val4a, val4b" \ + -H "x-riak-index-field2_int: 1004, 1004, 1005, 1006" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1004" \ + -H "x-riak-index-field2_int: 1007" \ + -d 'My name is Veronica' +``` + +The above objects will end up having the following secondary indexes, +respectively: + +* `Larry` - Binary index `field1_bin` and integer index `field2_int` +* `Moe` - Binary index `field1_bin` and integer index `field2_int` + (note that the index names are set to lowercase by Riak) +* `Curly` - Binary index `field1_bin` and integer index `field2_int` + (note again that the index names are set to lowercase) +* `Veronica` - Binary index `field1_bin` with the values `val4`, + `val4a`, and `val4b` and integer index `field2_int` with the values + `1004`, `1005`, `1006`, and `1007` (note that redundancies have been removed) + +As these examples show, there are safeguards in Riak that both normalize +the names of indexes and prevent the accumulation of redundant indexes. + +## Invalid Field Names and Types + +The following examples demonstrate what happens when an index field is +specified with an invalid field name or type. The system responds with +`400 Bad Request` and a description of the error. + +Invalid field name: + +```java +// The Java client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_foo'] = [1001] + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter', 'jsmith123'); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_foo', 1001) + +# Result: +riak.RiakError: "Riak 2i fields must end with either '_bin' or '_int'." +``` + +```csharp +// The Riak .NET Client will not allow you to provide invalid index names, +// because you are not required to add "_bin" or "_int" to the end of +// those names +``` + +```javascript +var cmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_foo') + .withIndexKey('jsmith123') + .withCallback(query_cb) + .build(); +client.execute(cmd); + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{foo_index, "field2"}, [1001]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: no function clause matching + riakc_obj:set_secondary_index( ... ). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("users"). + WithIndexName("field2_foo"). + WithIndexKey("jsmith123"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] field name error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// Produces the following stack trace (truncated): +error: query_cb err: 'Error processing incoming message: error:function_clause:[{riak_api_pb_server, + send_error, + [{unknown_field_type, + <<"field2_foo">>}, + {state, + {gen_tcp,inet}, + #Port<0.68338>, + undefined, + ... + ... + ... +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_foo: 1001" \ + -d 'data1' + +# Response +Unknown field type for field: 'field2_foo'. +``` + +Incorrect data type: + +```java +Location key = new Location(new Namespace("people"), "larry"); +RiakObject obj = new RiakObject(); +obj.getIndexes().getIndex(LongIntIndex.named("field2")).add("bar"); + +// The Java client will return a response indicating a type mismatch. +// The output may look something like this: + +Error:(46, 68) java: no suitable method found for add(java.lang.String) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.lang.Long) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.lang.Long) + method com.basho.riak.client.query.indexes.RiakIndex.add(java.util.Collection<java.lang.Long>) is not applicable + (argument mismatch; java.lang.String cannot be converted to java.util.Collection<java.lang.Long>) +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +obj = Riak::RObject.new(bucket, 'larry') +obj.indexes['field2_int'] = %w{ bar } + +# The Ruby client will let you get away with this...at first. But when +# you attempt to store the object, you will get an error response such +# as this: + +NoMethodError: undefined method 'map' for 1001:Fixnum +``` + +```php +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', 'not_an_int'); + +// throws \InvalidArgumentException +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_int', ['arrays', 'are', 'not', 'strings']); + +// does not throw an exception, it will just write ints as a string +// only requirement is that value is scalar (int, float, string, bool) +$object = (new \Basho\Riak\Object('{"user_data":{ ... }}', ['Content-type' => 'application/json'])) + ->addValueToIndex('twitter_bin', 12); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +obj = RiakObject(client, bucket, 'larry') +obj.add_index('field2_int', 'bar') + +# The Python client will let you get away with this...at first. But when you +# attempt to store the object, you will get an error response such as this: +riak.RiakError: '{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]}' +``` + +```csharp +var id = new RiakObjectId("indexes", "people", "larry"); +var obj = new RiakObject(id, "test value", "text/plain"); +var intIdx = obj.IntIndex("test-int-idx"); +intIdx.Add("invalid-value"); + +// The .NET client will throw a FormatException at this point +// The output may look something like this: + +The value could not be parsed. +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('indexes'); +riakObj.setBucket('people'); +riakObj.setKey('larry'); +riakObj.addToIndex('field2_int', 'bar'); +try { + client.storeValue({ value: riakObj }, function (err, rslt) { + logger.error("incorrect_data_type err: '%s'", err); + }); +} catch (e) { + logger.error("incorrect_data_type err: '%s'", e); +} + +// Output: +buffer.js:67 + throw new TypeError('must start with number, buffer, array or string'); + ^ +TypeError: must start with number, buffer, array or string + at new Buffer (buffer.js:67:11) +``` + +```erlang +Obj = riakc_obj:new( + {<<"indexes">>, <<"people">>}, + <<"larry">>, + <<"some data">>, + <<"text/plain">> +), +MD1 = riakc_obj:get_update_metadata(Obj), +MD2 = riakc_obj:set_secondary_index(MD1, [{{integer_index, "field2"}, [<<"bar">>]}]). + +%% The Erlang client will return an error message along these lines: +** exception error: bad argument + in function integer_to_list/1 + called as integer_to_list(<<"bar">>) ... +``` + +```golang +obj := &riak.Object{ + BucketType: "indexes", + Bucket: "people", + Key: "larry", + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("My name is Larry"), +} +obj.AddToIndex("field2_int", "bar") + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithContent(obj). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println("[DevUsing2i] index data type error:", err) +} else { + return errors.New("[DevUsing2i] expected an error!") +} + +// The riak.Error object will contain: +{precommit_fail,[{field_parsing_failed,{<<"field2_int">>,<<"bar">>}}]} +``` + +```curl +curl -XPUT 127.0.0.1:8098/types/indexes/buckets/people/keys/larry \ + -H "x-riak-index-field2_int: bar" \ + -d 'data1' + +# Response +HTTP/1.1 400 Bad Request + +Could not parse field 'field2_int', value 'bar'. +``` + +## Querying + +> **Note on 2i queries and the R parameter** +> +> For all 2i queries, the [R]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties#r-value-and-read-failure-tolerance) parameter is set to 1, +which means that queries that are run while [handoffs]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#hinted-handoff) and related operations are underway may not +return all keys as expected. +> +> To avoid issues such as the above, a new option has been added to the `riak.conf` file to allow you to disable or enable node participation in 2i queries. `participate_in_coverage=disabled` will prevent the node in question from participating. Recommended usage of this feature is to prevent newly added nodes to the cluster that have yet to receive all of their data from participating in 2i queries and generating non-consistent results. Changing the `participate_in_coverage` setting requires Riak to be restarted on that node for the change to take effect. The default setting is `enabled`. + +### Exact Match + +The following examples perform an exact match index query. + +Query a binary index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val1").build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withScalarValue('val1') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val1') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +// Note: using a string argument indicates a binary index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val1"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withIndexKey('val1') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field2"}, + <<"val1">> +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithIndexKey("val1"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val1 +``` + +Query an integer index: + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1001L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withScalarValue(1001) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1001) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +// Note: using an integer argument indicates an int index query: +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1001); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withIndexKey(1001) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, + 1001 +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntIndexKey(1001). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1001 +``` + +The following example performs an exact match query and pipes the +results into a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred \ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "key":"val3" + }, + "query": [ + { + "reduce": { + "language":"erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +### Range + +The following examples perform a range query. + +Query a binary index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +BinIndexQuery biq = new BinIndexQuery.Builder(myBucket, "field1", "val2", "val4") + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2'..'val4') +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field1_bin') + ->withRangeValue('val2', 'val4') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field1_bin', 'val2', 'val4') +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field1"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "val2", "val4"); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field1_bin') + .withRange('val2', 'val4') + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "field1"}, %% index name + <<"val2">>, <<"val4">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +c1, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field1_bin"). + WithRange("val2", "val4"). + Build() +if err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field1_bin/val2/val4 +``` + +Or query an integer index... + +```java +Namespace myBucket = new Namespace("indexes", "people"); +IntIndexQuery iiq = new IntIndexQuery.Builder(myBucket, "field2", 1002L, 1004L) + .build(); +IntIndexQuery.Response response = client.execute(iiq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002..1004) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('people', 'indexes') + ->withIndexName('field2_int') + ->withRangeValue(1002, 1004) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('field2_int', 1002, 1004) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "people", "field2"); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, 1002, 1004); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var intIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('people') + .withIndexName('field2_int') + .withRange(1002, 1004) + .withCallback(query_cb) + .build(); +client.execute(intIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {integer_index, "field2"}, %% index name + 1002, 1004 %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("people"). + WithIndexName("field2_int"). + WithIntRange(1002, 1004). + Build() +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/field2_int/1002/1004 +``` + +The following example performs a range query and pipes the results into +a MapReduce job: + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "field2_bin", + "start": "1002", + "end": "1004" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_identity", + "keep": true + } + } + ] +} +EOF +``` + +#### Range with terms + +When performing a range query, it is possible to retrieve the matched +index values alongside the Riak keys using `return_terms=true`. An +example from a small sampling of Twitter data with indexed hash tags: + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "rock", "rocl") + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock'..'rocl', return_terms: true) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('rock', 'rocl') + ->withReturnTerms() + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'rock', 'rocl', return_terms=True) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetReturnTerms(true); +var indexRiakResult = client.GetSecondaryIndex(riakIndexId, "rock", "rocl", options); +var indexResult = indexRiakResult.Value; +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('rock', 'rocl') + .withReturnKeyAndIndex(true) + .withCallback(query_cb) + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"rock">>, <<"rocl">> %% range query for keys between "val2" and "val4" +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("rock", "rocl"). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/rock/rocl?return_terms=true +``` + +Response: + +```json +{ + "results": [ + { + "rock": "349224101224787968" + }, + { + "rocks": "349223639880699905" + } + ] +} +``` + +### Pagination + +When asking for large result sets, it is often desirable to ask the +servers to return chunks of results instead of a firehose. You can do so +using `max_results=<n>`, where `n` is the number of results you'd like +to receive. + +Assuming more keys are available, a `continuation` value will be +included in the results to allow the client to request the next page. + +Here is an example of a range query with both `return_terms` and +pagination against the same Twitter data set. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withMaxResults(5) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri'..'ru', max_results: 5) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index('hashtags_bin', 'ri', 'ru', max_results=5) +``` + +```csharp +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +var rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); + +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +function do_query(continuation) { + var binIdxCmdBuilder = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withMaxResults(5) + .withCallback(pagination_cb); + + if (continuation) { + binIdxCmdBuilder.withContinuation(continuation); + } + + client.execute(binIdxCmdBuilder.build()); +} + +var query_keys = []; +function pagination_cb(err, rslt) { + if (err) { + logger.error("query_cb err: '%s'", err); + return; + } + + if (rslt.done) { + query_keys.forEach(function (key) { + logger.info("2i query key: '%s'", key); + }); + query_keys = []; + + if (rslt.continuation) { + do_query(rslt.continuation); + } + } + + if (rslt.values.length > 0) { + Array.prototype.push.apply(query_keys, + rslt.values.map(function (value) { + return value.objectKey; + })); + } +} + +do_query(); +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + {max_results, 5} +). +``` + +```golang +func doPaginatedQuery(cluster *riak.Cluster, continuation []byte) error { + builder := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithMaxResults(5) + + if continuation != nil && len(continuation) > 0 { + builder.WithContinuation(continuation) + } + + cmd, err := builder.Build() + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + printIndexQueryResults(cmd) + + sciq := cmd.(*riak.SecondaryIndexQueryCommand) + if sciq.Response == nil { + return errors.New("[DevUsing2i] expected response but did not get one") + } + + rc := sciq.Response.Continuation + if rc != nil && len(rc) > 0 { + return doPaginatedQuery(cluster, sciq.Response.Continuation) + } + + return nil +} + +func queryingPagination(cluster *riak.Cluster) error { + return doPaginatedQuery(cluster, nil) +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?max_results=5&return_terms=true +``` + +Here is an example JSON response (your client-specific response may differ): + +```json +{ + "continuation": "g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=", + "results": [ + { "rice": "349222574510710785" }, + { "rickross": "349222868095217664" }, + { "ridelife": "349221819552763905" }, + { "ripjake": "349220649341952001" }, + { "ripjake": "349220687057129473" } + ] +} +``` + +Take the continuation value from the previous result set and feed it +back into the query. + +```java +Namespace tweetsBucket = new Namespace("indexes", "tweets"); +BinIndexQuery biq = new BinIndexQuery.Builder(tweetsBucket, "hashtags", "ri", "ru") + .withContinuation(BinaryValue.create("g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM")) + .withMaxResults(5) + .withKeyAndIndex(true) + .build(); +BinIndexQuery.Response response = client.execute(biq); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri'..'ru', + continuation: 'g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results: 5, + return_terms: true +) +``` + +```php +(new \Basho\Riak\Command\Builder\QueryIndex($riak)) + ->buildBucket('tweets', 'indexes') + ->withIndexName('hashtags') + ->withRangeValue('ri', 'ru') + ->withMaxResults(5) + ->withContinuation('g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM') + ->build() + ->execute() + ->getResults(); +``` + +```python +bucket = client.bucket_type('indexes').bucket('tweets') +bucket.get_index( + 'hashtags_bin', + 'ri', 'ru', + continuation='g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM', + max_results=5, + return_terms=True +) +``` + +```csharp +// rslt is the previous 2i fetch result +var idxId = new RiakIndexId("indexes", "tweets", "hashtags"); +var options = new RiakIndexGetOptions(); +options.SetMaxResults(5); +options.SetContinuation(rslt.Continuation); +rslt = client.GetSecondaryIndex(idxId, "ri", "ru", options); +``` + +```javascript +// See above example +``` + +```erlang +{ok, Results} = riakc_pb_socket:get_index_range( + Pid, + {<<"indexes">>, <<"tweets">>}, %% bucket type and bucket name + {binary_index, "hashtags"}, %% index name + <<"ri">>, <<"ru">>, %% range query from "ri" to "ru" + [ + {continuation, <<"g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM">>}, + {max_results, 5}, + {return_terms, true} + ] +). +``` + +```golang +// See above example +``` + +```curl +curl localhost:8098/types/indexes/buckets/tweets/index/hashtags_bin/ri/ru?continuation=g2gCbQAAAAdyaXBqYWtlbQAAABIzNDkyMjA2ODcwNTcxMjk0NzM=&max_results=5&return_terms=true +``` + +The result: + +```json +{ + "continuation": "g2gCbQAAAAlyb2Jhc2VyaWFtAAAAEjM0OTIyMzcwMjc2NTkxMjA2NQ==", + "results": [ + { + "ripjake": "349221198774808579" + }, + { + "ripped": "349224017347100672" + }, + { + "roadtrip": "349221207155032066" + }, + { + "roastietime": "349221370724491265" + }, + { + "robaseria": "349223702765912065" + } + ] +} +``` + +### Streaming + +It is also possible to stream results: + +```java +// Available in Riak Java Client 2.1.0 and later +int pollTimeoutMS = 200; +Namespace ns = new Namespace("indexes", "tweets"); +String indexName = "hashtags"; + +BinIndexQuery indexQuery = + new BinIndexQuery.Builder(ns, indexName, "ri", "ru").build(); + +final RiakFuture<BinIndexQuery.StreamingResponse, BinIndexQuery> streamingFuture = + client.executeAsyncStreaming(indexQuery, pollTimeoutMS); + +// For streaming commands, the future's value will be available before +// the future is complete, so you may begin to pull results from the +// provided iterator as soon as possible. +final BinIndexQuery.StreamingResponse streamingResponse = streamingFuture.get(); + +for (BinIndexQuery.Response.Entry e : streamingResponse) +{ + // Do something with key... +} + +streamingFuture.await(); +Assert.assertTrue(streamingFuture.isDone()); +``` + +```ruby +bucket = client.bucket_type('indexes').bucket('people') +bucket.get_index('myindex_bin', 'foo', stream: true) +``` + +```php +/* + It is not currently possible to stream results using the PHP client +*/ +``` + +```python +bucket = client.bucket_type('indexes').bucket('people') +keys = [] +for key in bucket.stream_index('myindex_bin', 'foo'): + keys.append(key) +``` + +```csharp +var riakIndexId = new RiakIndexId("indexes", "tweets", "hashtags"); +var indexRiakResult = client.StreamGetSecondaryIndex(riakIndexId, "ri", "ru"); +var indexResult = indexRiakResult.Value; +foreach (var key in indexResult.IndexKeyTerms) +{ + // Do something with key... +} +``` + +```javascript +var binIdxCmd = new Riak.Commands.KV.SecondaryIndexQuery.Builder() + .withBucketType('indexes') + .withBucket('tweets') + .withIndexName('hashtags_bin') + .withRange('ri', 'ru') + .withStreaming(true); + .withCallback(query_cb) // See query_cb in other examples + .build(); +client.execute(binIdxCmd); +``` + +```erlang +{ok, KeyStream} = riakc_pb_socket:get_index_eq( + Pid, + {<<"indexes">>, <<"people">>}, %% bucket type and bucket name + {binary_index, "myindex"}, %% index name and type + <<"foo">>, %% value of the index + [{stream, true}] %% enable streaming +). +``` + +```golang +cmd, err := riak.NewSecondaryIndexQueryCommandBuilder(). + WithBucketType("indexes"). + WithBucket("tweets"). + WithIndexName("hashtags_bin"). + WithRange("ri", "ru"). + WithStreaming(true). + WithCallback(streamingCallback). + Build() +if err != nil { + return err +} + +if err := cluster.Execute(cmd); err != nil { + return err +} +``` + +```curl +curl localhost:8098/types/indexes/buckets/people/index/myindex_bin/foo?stream=true +``` + +Streaming can also be combined with `pagination` and `return_terms`. + +### Sorting + +As of Riak 1.4, the result set is sorted on index values (when executing +range queries) and object keys. See the pagination example above: hash +tags (2i keys) are returned in ascending order, and the object keys +(Twitter IDs) for the messages which contain the `ripjake` hash tag are +also returned in ascending order. + +### Retrieve all Bucket Keys via the `$bucket` Index + +The following example retrieves the keys for all objects stored in the +bucket `people` using an exact match on the special `$bucket` index. + +```curl +curl localhost:8098/types/indexes/buckets/people/index/\$bucket/_ +``` + +### Count Bucket Objects via $bucket Index + +The following example performs a secondary index lookup on the $bucket +index like in the previous example and pipes this into a MapReduce that +counts the number of records in the `people` bucket. In order to +improve efficiency, the batch size has been increased from the default +size of 20. + +```curl +curl -XPOST localhost:8098/mapred\ + -H "Content-Type: application/json" \ + -d @-<<EOF +{ + "inputs": { + "bucket": "people", + "index": "\$bucket", + "key":"people" + }, + "query": [ + { + "reduce": { + "language": "erlang", + "module": "riak_kv_mapreduce", + "function": "reduce_count_inputs", + "arg": { + "reduce_phase_batch_size":1000 + } + } + } + ] +} +EOF +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/security.md b/content/riak/kv/3.0.4/developing/usage/security.md new file mode 100644 index 0000000000..0830015fee --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/security.md @@ -0,0 +1,103 @@ +--- +title: "Client Security" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Security" + identifier: "usage_security" + weight: 114 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/client-security + - /riak/kv/3.0.4/dev/advanced/client-security +--- + +Versions of Riak 2.0 and later come equipped with a [security subsystem]({{<baseurl>}}riak/kv/3.0.4/using/security/basics) that enables you to choose + +* which Riak users/clients are authorized to perform a wide variety of + Riak operations, and +* how those users/clients are required to authenticate themselves. + +The following four authentication mechanisms, aka [security sources]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) are available: + +* [Trust]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#trust-based-authentication)-based + authentication enables you to specify trusted + [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)s + from which all clients will be authenticated by default +* [Password]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#password-based-authentication)-based authentication requires + that clients provide a username and password +* [Certificate]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication + requires that clients +* [Pluggable authentication module (PAM)]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication requires + clients to authenticate using the PAM service specified using the + [`riak-admin security`]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#managing-sources) + command line interface + +Riak's approach to security is highly flexible. If you choose to use +Riak's security feature, you do not need to require that all clients +authenticate via the same means. Instead, you can specify authentication +sources on a client-by-client, i.e. user-by-user, basis. This means that +you can require clients performing, say, [MapReduce]({{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce/) +operations to use certificate auth, while clients performing [K/V Operations]({{<baseurl>}}riak/kv/3.0.4/developing/usage) have to use username and password. The approach +that you adopt will depend on your security needs. + +This document provides a general overview of how that works. For +managing security in Riak itself, see the following documents: + +* [Authentication and Authorization]({{<baseurl>}}riak/kv/3.0.4/using/security/basics) +* [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) + +We also provide client-library-specific guides for the following +officially supported clients: + +* [Java]({{<baseurl>}}riak/kv/3.0.4/developing/usage/security/java) +* [Ruby]({{<baseurl>}}riak/kv/3.0.4/developing/usage/security/ruby) +* [PHP]({{<baseurl>}}riak/kv/3.0.4/developing/usage/security/php) +* [Python]({{<baseurl>}}riak/kv/3.0.4/developing/usage/security/python) +* [Erlang]({{<baseurl>}}riak/kv/3.0.4/developing/usage/security/erlang) + +## Certificates, Keys, and Authorities + +If Riak security is enabled, all client operations, regardless of the +security source you choose for those clients, must be over a secure SSL +connection. If you are using a self-generated Certificate Authority +(CA), Riak and connecting clients will need to share that CA. + +To use certificate-based auth, you will need to create a Public Key +Infrastructure (PKI) based on +[x.509](http://en.wikipedia.org/wiki/X.509) certificates. The central +foundation of your PKI should be a Certificate Authority (CA), created +inside of a secure environment, that can be used to sign certificates. +In addition to a CA, your client will need to have access to a private +key shared only by the client and Riak as well as a CA-generated +certificate. + +To prevent so-called [Man-in-the-Middle +attacks](http://en.wikipedia.org/wiki/Man-in-the-middle_attack), private +keys should never be shared beyond Riak and connecting clients. + +> **HTTP not supported** +> +> Certificate-based authentication is available only through Riak's +[Protocol Buffers]({{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/) interface. It is not available through the +[HTTP API]({{<baseurl>}}riak/kv/3.0.4/developing/api/http). + +### Default Names + +In Riak's [configuration files]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#security), the +default certificate file names are as follows: + +Cert | Filename +:----|:------- +Certificate authority (CA) | `cacertfile.pem` +Private key | `key.pem` +CA-generated cert | `cert.pem` + +These filenames will be used in the client-library-specific tutorials. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/security/erlang.md b/content/riak/kv/3.0.4/developing/usage/security/erlang.md new file mode 100644 index 0000000000..58a619e380 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/security/erlang.md @@ -0,0 +1,118 @@ +--- +title_supertext: "Client Security:" +title: "Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Erlang" + identifier: "usage_security_erlang" + weight: 103 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/client-security/erlang + - /riak/kv/3.0.4/dev/advanced/client-security/erlang +--- + +This tutorial shows you how to set up a Riak Erlang client to +authenticate itself when connecting to Riak. + +If you are using [trust]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/), [PAM-]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication), you can use the security setup described [below](#erlang-client-basics). [Password]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Erlang Client Basics + +When connecting to Riak using an Erlang-based client, you typically use +a process identifier to refer to the client connection. The following +example creates a process identifier (we'll call it `Pid`) for a +connection to `localhost` on port 8087: + +```erlang +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087). +``` + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) you +choose. In addition, all clients should provide a username. The example +above created a connection to Riak without specifying a username or CA. +That information is specified as a list of options passed to the +`start` function. We'll specify those options in a list called +`SecurityOptions`. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", ""}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +Please note that you do not need to specify a password if you are not +using password-based authentication. If you are using a different +security source, Riak will ignore the password. You can enter an empty +string (as in the example above) or anything you'd like. + +This client is not currently set up to use any of the available security +sources, with the exception of trust-based authentication, provided that +the [CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which the client is connecting has been specified as trusted. More +on specifying trusted CIDRs can be found in [Trust-based Authentication]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we'll also +specify a password for the client in the `SecurityOptions` list from +above. We'll use the password `rosebud` here and in the rest of the +examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.4/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```erlang +CertDir = "/ssl_dir", +SecurityOptions = [ + {credentials, "riakuser", "rosebud"}, + {cacertfile, filename:join([CertDir, "cacertfile.pem"])}, + {certfile, filename:join([CertDir, "cert.pem"])}, + {keyfile, filename:join([CertDir, "key.pem"])} + ], +{ok, Pid} = riakc_pb_socket:start("127.0.0.1", 8087, SecurityOptions). +``` + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/security/java.md b/content/riak/kv/3.0.4/developing/usage/security/java.md new file mode 100644 index 0000000000..368ded55aa --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/security/java.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Client Security:" +title: "Java" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Java" + identifier: "usage_security_java" + weight: 100 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/client-security/java + - /riak/kv/3.0.4/dev/advanced/client-security/java +--- + +This tutorial shows you how to set up a Riak Java client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#java-client-basics). [Certificate]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the Java client. + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Java Client Basics + +When connecting to Riak using a Java-based client, you typically do so +by instantiating separate `RiakNode` objects for each node in your +cluster, a `RiakCluster` object registering those `RiakNode` objects, +and finally a `RiakClient` object that registers the general cluster +configuration. In this document, we will be working with only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) you +choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a cluster object (we'll call it `cluster`), which will in turn be +used to create a `client` object. The setup below does not specify a CA: + +```java +import com.basho.riak.client.api.RiakClient; +import com.basho.riak.client.api.RiakCluster; +import com.basho.riak.client.api.RiakNode; + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + // This will specify a username but no password or keystore: + .withAuth("riakuser", null, null) + .build(); + +RiakCluster cluster = new RiakCluster.Builder(node) + .build(); + +RiakClient client = new RiakClient(cluster); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `withAuth` method in the `node` object's +constructor rather than leaving it as `null`. We will also pass a +`KeyStore` object into that method. + +```java +import java.io.FileInputStream; +import java.io.InputStream; +import java.security.KeyStore; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; + +// Generate an InputStream from the CA cert +InputStream inputStream = new InputStream("/ssl_dir/cacertfile.pem"); + +// Generate an X509Certificate from the InputStream and close the stream +CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); +X509Certificate caCert = (X509Certificate) certFactory.generateCertificate(inputStream); +inputStream.close(); + +// Generate a KeyStore object +KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); +ks.load(null, "password".toCharArray()); +ks.setCertificateEntry("cacert", caCert); + +RiakNode node = new RiakNode.Builder() + .withRemoteAddress("127.0.0.1") + .withRemotePort(8087) + .withAuth("riakuser", "rosebud", ks) + .build(); + +// Construct the cluster and client object in the same fashion as above +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak Java client. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/security/php.md b/content/riak/kv/3.0.4/developing/usage/security/php.md new file mode 100644 index 0000000000..fb7ced8844 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/security/php.md @@ -0,0 +1,122 @@ +--- +title_supertext: "Client Security:" +title: "PHP" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "PHP" + identifier: "usage_security_php" + weight: 104 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/client-security/php + - /riak/kv/3.0.4/dev/advanced/client-security/php +--- + +This tutorial shows you how to set up a Riak PHP client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#trust-based-authentication) or [PAM]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you can use the +security setup described [below](#php-client-basics). [Certificate]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication is not +yet supported in the PHP client due to limitations of the HTTP interface of Riak. + +## PHP Client Basics + +When connecting to Riak using a PHP-based client, you typically do so +by instantiating separate `\Basho\Riak\Node` objects for each node in your +cluster and passing those `\Basho\Riak\Node` objects as an array to a +`\Basho\Riak` object as a dependency. In this document, we will be working with +only one node. + +If you are using Riak security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) you choose. All clients should also provide a username, regardless of +security source. The example below sets up a single node object (we'll +simply call it `node`) that connects to Riak on `localhost` and on port +8087 and specifies `riakuser` as a username. That object will be used to +create a Riak object. The setup below does not specify a CA and will throw +an `\Basho\Riak\Node\Builder\Exception`: + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +This client object is not currently set up to use any of the available +security sources. This will change in the sections below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +setup from the example above, with the exception that we will specify a +password for the client in the `usingPasswordAuthentication` method in +the `node` object's builder rather than omitting it. We will also +pass the path of the CA file relative to the current working directory into +the `withCertificateAuthorityFile` method. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPasswordAuthentication('riakuser', 'rosebud') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## PAM- and Trust-based Authentication + +If you are using PAM- or trust-based authentication, the only difference +from password-based authentication is that you do not need to specify a +password. There are helper methods that handle this for you, +`usingPamAuthentication` and `usingTrustAuthentication`. + +```php +use \Basho\Riak; +use \Basho\Riak\Node; + +// PAM Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingPamAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// Trust Example +$node = (new Node\Builder()) + ->atHost('127.0.0.1') + ->onPort('8087') + ->usingTrustAuthentication('riakuser') + ->withCertificateAuthorityFile(getcwd() . '/ssl_dir/cacertfile.pem') + ->build(); + +// since we are using a single node, it needs to be wrapped in array brackets +$riak = new Riak([$node]); +``` + +## Certificate-based Authentication + +Certificate-based authentication is not currently supported in the +official Riak PHP client due to limitations in the HTTP interface. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/security/python.md b/content/riak/kv/3.0.4/developing/usage/security/python.md new file mode 100644 index 0000000000..dc65a60341 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/security/python.md @@ -0,0 +1,176 @@ +--- +title_supertext: "Client Security:" +title: "Python" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Python" + identifier: "usage_security_python" + weight: 102 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/client-security/python + - /riak/kv/3.0.4/dev/advanced/client-security/python +--- + +This tutorial shows you how to set up a Riak Python client to +authenticate itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) or [PAM-]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication), you can use the security +setup described [below](#python-client-basics). [Password]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication). If you are using +[certificate]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication, follow +the instructions in the [section below](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## OpenSSL Versions + +The Riak Python client requires that you install OpenSSL 1.0.1g or +later. If you have an earlier version installed, you will receive a +warning along the following lines: + +``` +Found OpenSSL 0.9.8za 5 Jun 2014 version, but expected at least OpenSSL 1.0.1g. Security may not support TLS 1.2. +``` + +## Python Client Basics + +When connecting to Riak using a Python-based client, you typically +instantiate an object from the `RiakClient` class that then handles all +interactions with Riak. All authentication-related information that +needs to be used by the client object can be passed to the object upon +instantiation by creating a `SecurityCreds` object. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) you +choose. All clients should also provide a username. The example below +sets up a client object (we'll simply call it `client`) that connects to +Riak on `localhost` and on port 8087 without any security credentials: + +```python +from riak import RiakClient + +client = RiakClient(host='127.0.0.1', pb_port=8087) +``` + +To provide security credentials, we'll create an object called `creds` +and specify `riakuser` as the username. We'll also point the client to a +CA stored at `/ssl_dir/cacertfile.pem`. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem') +``` + +Now we can specify those credentials when we create our `client` object. + +```python +client = RiakClient(host='127.0.0.1', pb_port=8087, credentials=creds) +``` + +This client object is not currently set up to use any of the +available security sources with the exception of trust-based auth, +provided that the +[CIDR](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) from +which the client is connecting has been specified as trusted. More on +specifying trusted CIDRs can be found in [Trust-based +Authentication]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#Trust-based-Authentication). + +**Note**: The examples in the following sections specify certs on the +basis of their filepaths, e.g. `/ssl_dir/cacertfile.pem`. In addition to +specifying certs by location, you can also provide OpenSSL objects +instead. You can find out how to do so in [Using OpenSSL Objects](#using-openssl-objects) below. + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the above, with the exception that we'll also specify a +password for the client in the `creds` object from above. We'll use the +password `rosebud` here and in the rest of the examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + password='rosebud') +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.4/using/security/basics/#user-management). + +## Certificate-based Authentication + +Using certificated-based authentication requires us to specify the +location of a general CA (as with all security sources), a username, a +CA-generated cert, and a private key. We'll assume that all certs are +stored in `/ssl_dir`, as in the previous examples. + +```python +creds = SecurityCreds(username='riakuser', + cacert_file='/ssl_dir/cacertfile.pem', + cert_file='/ssl_dir/cert.pem', + pkey_file='/ssl_dir/key.pem') +``` + +## Specifying a Certificate Revocation List + +If you are using a CA-generated Certificate Revocation List (CRL), you +can specify its filepath using the `crl_file` parameter. + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + crl_file='/ssl_dir/revocation.crl') +``` + +## Specifying Ciphers + +To specify a list of preferred [security ciphers]({{<baseurl>}}riak/kv/3.0.4/using/security/basics/#security-ciphers), you can pass in a colon-delimited +string to the `ciphers` parameter: + +```python +creds = SecurityCreds(username='riakuser', + # Using the cert information from above + ciphers='X-CIPHER-1:X-CIPHER-2:X-CIPHER-3:ETC') +``` + +## Using OpenSSL Objects + +Whenever you specify certs, you have the option of either passing in +file paths as strings (as in the examples above) or properly created +OpenSSL objects, e.g. objects created using the +[pyOpenSSL](https://pyopenssl.readthedocs.org/en/latest/) library. If +you generate OpenSSL objects this way, you should note that they must +be specified differently when creating a `SecurityCreds` object. The +table below lists the appropriate parameter names for each method, as +well as the pyOpenSSL class to which each cert must belong if you create +OpenSSL objects. + +Cert | File path | OpenSSL object | Class +:----|:----------|:---------------|:----- +Certificate Authority (CA) | `cacert_file` | `cacert` | `OpenSSL.crypto.X509` +Private key | `key_file` | `key` | `OpenSSL.crypto.PKey` +CA-generated cert | `cert` | `cert_file` | `OpenSSL.crypto.X509` +CRL | `crl` | `crl_file` | `OpenSSL.crypto.CRL` + +If you specify filepaths, the appropriate certs will be loaded and +converted into the appropriate OpenSSL object. The functions used for +this are `OpenSSL.crypto.load_privatekey()` for the private key and +`OpenSSL.crypto.load_certificate` for the cert and CA cert. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/security/ruby.md b/content/riak/kv/3.0.4/developing/usage/security/ruby.md new file mode 100644 index 0000000000..76859f8d45 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/security/ruby.md @@ -0,0 +1,162 @@ +--- +title_supertext: "Client Security:" +title: "Ruby" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Ruby" + identifier: "usage_security_ruby" + weight: 101 + parent: "usage_security" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/client-security/ruby + - /riak/kv/3.0.4/dev/advanced/client-security/ruby +--- + +This tutorial shows you how to set up a Riak Ruby client to authenticate +itself when connecting to Riak. + +If you are using [trust-]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) or [PAM]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication)-based authentication, you +can use the security setup described in the [Ruby Client Basics](#ruby-client-basics) section. +[Password]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#password-based-authentication)-based authentication is covered +in a [later section](#password-based-authentication), while [certificate]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#certificate-based-authentication)-based authentication +is covered [further down](#certificate-based-authentication). + +{{% note title="Note on certificate generation" %}} +This tutorial does not cover certificate generation. It assumes that all +necessary certificates have already been created and are stored in a directory +called `/ssl_dir`. This directory name is used only for example purposes. +{{% /note %}} + +## Ruby Client Basics + +When connecting to Riak using a Ruby-based client, you must instantiate +an object from the `Riak::Client` class that then handles interactions +with Riak (you may have more than one client object active in an +application if you wish). All authentication-related information that +needs to be used can be passed to the object upon instantiation in an +`authentication` hash. + +If you are using Riak Security, _all_ connecting clients should have +access to the same Certificate Authority (CA) used on the server side, +regardless of which [security source]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) you choose. All clients should also provide a username. The example below sets up a client object (we'll simply call it `client`) that connects +to Riak on `localhost` and on port 8087, specifies `riakuser` as a +username, and points the client to a CA located at +`/ssl_dir/cacertfile.pem`. + +```ruby +require 'riak' + +client = Riak::Client.new( + host: '127.0.0.1', + pb_port: 8087, + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser' + } +) +``` + +This client object is currently not set up to use any of the available +security sources, except trust-based auth, provided that the CIDR from +which the client is connecting has been specified as trusted. More on +this in [Trust-based Authentication]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#trust-based-authentication). + +## Password-based Authentication + +To enable our client to use password-based auth, we can use most of the +information from the example above, with the exception that we will +specify a password for the client in the `authentication` hash. We'll +use the password `rosebud` here and in the rest of the examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + password: 'rosebud' + } +) +``` + +## PAM-based Authentication + +If you have specified that a specific client be authenticated using +[PAM]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#pam-based-authentication), you will +need to provide a CA as well as the username and password that you +specified when creating the user in Riak. For more, see our +documentation on [User Management]({{<baseurl>}}riak/kv/3.0.4/using/security/basics#user-management). + + +## Certificate-based Authentication + +Using certificate-based authentication requires us to specify the +location of a CA (as with all security sources), a username, a +client-specific CA, a CA-generated cert, and a private key. We'll assume +that all certs are stored in `/ssl_dir`, as in the previous examples. + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/path/to/cacertfile.pem', + user: 'riakuser', + client_ca: '/path/to/client_cert.pem', + cert: '/path/to/cert.pem', + key: '/path/to/key.pem' + } +) +``` + +The `client_ca` must be specified if you intend to use a CA that is +different from the CA used by Riak, e.g. if you are integrating with +an existing single sign-on (SSO) system. If the client and server CA are +the same, you don't need to specify `client_ca`. The client cert and +key, however, must always be specified. + +The `client_ca`, `cert`, and `key` fields are all flexible in their +usage. You can use a string specifying a filename (as in the example +above), or you can pass in an appropriate OpenSSL object, e.g. an SSL +object created using the +[OpenSSL](http://ruby-doc.org/stdlib-2.0/libdoc/openssl/rdoc/OpenSSL.html) +gem. If you use specify filenames, those files will be loaded and +converted into the appropriate OpenSSL object. + +## Specifying a Certificate Revocation List + +If you create certificates specifying a CA-signed Certificate Revocation +List (CRL), those certs will be checked against the CRLs specified. You +can specify the location of the list in the `authentication` hash: + +```ruby +client = Riak::Client.new( + # Using the host and pb_port from above + authentication: { + ca_file: '/ssl_dir/cacertfile.pem', + user: 'riakuser', + # Using the cert paths from above + crl_file: '/ssl_dir/revocation.crl' + } +) +``` + +CRL checking can sometimes be a slow process. To disable it, you can set +`crl` to `false` in the `authentication` hash when instantiating your +client object. + +## Online Certificate Status Protocol + +If you create certificates with a specified Online Certificate Status +Protocol +([OCSP](http://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol)), +the OCSP endpoint will automatically be checked. If that endpoint is not +available or if checking is running slowly, you can disable OCSP +checking by setting `ocsp` to `false` in the `authentication` hash. + + + + diff --git a/content/riak/kv/3.0.4/developing/usage/updating-objects.md b/content/riak/kv/3.0.4/developing/usage/updating-objects.md new file mode 100644 index 0000000000..7f4a340b74 --- /dev/null +++ b/content/riak/kv/3.0.4/developing/usage/updating-objects.md @@ -0,0 +1,778 @@ +--- +title: "Updating Objects" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Updating Objects" + identifier: "usage_updating_objects" + weight: 102 + parent: "developing_usage" +toc: true +aliases: + - /riak/3.0.4/dev/using/updates + - /riak/kv/3.0.4/dev/using/updates +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode + +## Using Causal Context + +If an object already exists under a certain key and you want to write a +new object to that key, Riak needs to know what to do, especially if +multiple writes are happening at the same time. Which of the objects +being written should be deemed correct? These kinds of scenarios can +arise quite frequently in distributed, [eventually consistent]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency) systems. + +Riak decides which object to choose in case of conflict using [causal context]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context). These objects track the causal history of objects. +They are attached to _all_ Riak objects as metadata, and they are not +readable by humans. They may sound complex---and they are fairly complex +behind the scenes---but using them in your application is very simple. + +Using causal context in an update would involve the following steps; + +1. Fetch the object +2. Modify the object's value (without modifying the fetched [context object]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context) +3. Write the new object to Riak + +Step 2 is the most important here. All of Basho's official Riak clients +enable you to modify an object's value without modifying its [causal context]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context). Although a more detailed tutorial on context objects and +object updates can be found in [Conflict Resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), we'll walk you +through a basic example here. + +Let's say that the current NBA champion is the Washington Generals. +We've stored that data in Riak under the key `champion` in the bucket +`nba`, which bears the bucket type `sports`. The value of the object is +a simple text snippet that says `Washington Generals`. + +But one day the Harlem Globetrotters enter the league and dethrone the +hapless Generals (forever, as it turns out). Because we want our Riak +database to reflect this new development in the league, we want to make +a new write to the `champion` key. Let's read the object stored there +and modify the value. + +```java +Location currentChampion = new Location(new Namespace("sports", "nba"), "champion"); +FetchValue fetch = new FetchValue.Builder(currentChampion) + .build(); +FetchValue.Response response = client.execute(fetch); +RiakObject obj = response.getValue(RiakObject.class); +obj.setValue(BinaryValue.create("Harlem Globetrotters")) +``` + +```ruby +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.raw_data = 'Harlem Globetrotters' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('champion', new \Basho\Riak\Bucket('nba', 'sports')); +$object = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->withLocation($location) + ->build() + ->execute() + ->getObject(); + +$object->setData('Harlem Globetrotters'); + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withLocation($location) + ->withObject($object) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('sports').bucket('nba') +obj = bucket.get('champion') +obj.data = 'Harlem Globetrotters' +``` + +```csharp +var id = new RiakObjectId("sports", "nba", "champion"); +var obj = new RiakObject(id, "Washington Generals", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); + +rslt = client.Get(id); +obj = rslt.Value; +obj.SetObject("Harlem Globetrotters", + RiakConstants.ContentTypes.TextPlain); +rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setValue('Washington Generals'); + +var options = { + bucketType: 'sports', bucket: 'nba', key: 'champion', + value: riakObj +}; +client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + delete options.value; + client.fetchValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var fetchedObj = rslt.values.shift(); + fetchedObj.setValue('Harlem Globetrotters'); + options.value = fetchedObj; + options.returnBody = true; + client.storeValue(options, function (err, rslt) { + if (err) { + throw new Error(err); + } + var updatedObj = rslt.values.shift(); + logger.info("champion: %s", updatedObj.value.toString('utf8')); + }); + }); +}); +``` + +```erlang +%% In the Erlang client, you cannot view a context objectdirectly, but it +%% will be included in the output when you fetch an object: + +{ok, Obj} = riakc_pb_socket:get(Pid, + {<<"sports">>, <<"nba">>}, + <<"champion">>), +UpdatedObj = riakc_obj:update_value(Obj, <<"Harlem Globetrotters">>), +{ok, NewestObj} = riakc_pb_socket:put(Pid, UpdatedObj, [return_body]). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Washington Generals"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +obj = rsp.Values[0] +obj.Value = []byte("Harlem Globetrotters") + +cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("sports"). + WithBucket("nba"). + WithKey("champion"). + WithContent(obj). + WithReturnBody(true). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +svc = cmd.(*riak.StoreValueCommand) +rsp = svc.Response +obj = rsp.Values[0] +fmt.Printf("champion: %v", string(obj.Value)) +``` + +```curl +# When using curl, the context object is attached to the X-Riak-Vclock header + +curl -i http://localhost:8098/types/sports/buckets/nba/keys/champion + +# In the resulting output, the header will look something like this: + +X-Riak-Vclock: a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= + +# When performing a write to the same key, that same header needs to +# accompany the write for Riak to be able to use the context object +``` + +In the samples above, we didn't need to actually interact with the +context object, as retaining and passing along the context object was +accomplished automatically by the client. If, however, you do need +access to an object's context, the clients enable you to fetch it from +the object: + +```java +// Using the RiakObject obj from above: + +Vclock vClock = obj.getVclock(); +System.out.println(vClock.asString()); + +// The context object will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```ruby +# Using the RObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```php +# Using the RObject obj from above: + +echo $object->getVclock(); // a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```python +# Using the RiakObject obj from above: + +obj.vclock + +# The context object will look something like this: +# a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```csharp +// Using the RiakObject obj from above: +var vclock = result.Value.VectorClock; +Console.WriteLine(Convert.ToBase64String(vclock)); + +// The output will look something like this: +// a85hYGBgzGDKBVIcWu/1S4OVPaIymBIZ81gZbskuOMOXBQA= +``` + +```javascript +// Using the RiakObject fetchedObj from above: +var fetchedObj = rslt.values.shift(); +logger.info("vclock: %s", fetchedObj.getVClock().toString('base64')); + +// The output will look something like this: +// vclock: a85hYGBgymDKBVIcR4M2cov1HeHKYEpkymNlsE2cfo4PKjXXjuOU+FHdWqAUM1CqECSVBQA= +``` + +```erlang +%% Using the Obj object from above: + +riakc_obj:vclock(Obj). + +%% The context object will look something like this in the Erlang shell: +%% <<107,206,97,96,96,96,204,96,226,82,28,202,156,255,126, +%% 6,175,157,255,57,131,41,145,49,143,149,225,240,...>> +``` + +```golang +svc := cmd.(*riak.StoreValueCommand) +rsp := svc.Response +fmt.Println(rsp.VClock) + +// Output: +// X3hNXFq3ythUqvvrG9eJEGbUyLS +``` + +## The Object Update Cycle + +If you decide that your application requires mutable data in Riak, we +recommend that you: + +* avoid high-frequency object updates to the same key (i.e. multiple + updates per second for long periods of time), as this will degrade + Riak performance; and that you +* follow a read-modify-write cycle when performing updates. + +That cycle looks something like this: + +1. **Read** the object from Riak. This step is important for updates +because this enables you to fetch the object's [causal context]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context), which +is the information that Riak uses to make decisions about which object +values are most recent (this is especially useful for objects that are +frequently updated). This context object needs to be passed back to Riak +when you update the object. This step is handled for you by Basho's +client libraries as long as you perform a read prior to an update. In +addition, if you have chosen to allow Riak to generate +[siblings]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/#siblings) \(which we recommend), you +should **resolve sibling conflicts** upon read if they exist. For more +on this, please see our documentation on [conflict resolution]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution), along +with examples from our official client libraries: + * [Java]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/java) + * [Ruby]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/ruby) + * [Python]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/python) + * [C#]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/csharp) + * [Go]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution/golang) +2. **Modify the object** on the application side. +3. **Write** the new, modified object to Riak. Because you read the +object first, Riak will receive the object's causal context metadata. +Remember that this happens automatically. + +In general, you should read an object before modifying it. Think of it +as performing a `GET` prior to any `PUT` when interacting with a REST +API. + +> **Note on strong consistency** +> +> If you are using Riak's [strong consistency]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/strong-consistency/) feature, it is not only desirable but also necessary to use the read/modify/write cycle explained in the section above. If you attempt to update an object without fetching the object first, your update operation will necessarily fail. More information can be found in the +[strong consistency documentation]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/strong-consistency/#strongly-consistent-writes). + +### Updating Deleted Objects + +You should use the read-modify-write cycle explained above at all times, +_even if you're updating deleted objects_. The reasons for that can be +found in our documentation on [tombstones]({{<baseurl>}}riak/kv/3.0.4/using/reference/object-deletion/#tombstones). + +There are some modifications that you may need to make if you are +updating objects that may have been deleted previously. If you are using +the Java client, an explanation and examples are given in the +[Java-specific section below](#java-client-example). If +you are using the Python or Erlang clients, causal context for deleted +objects will be handled automatically. If you are using the Ruby client, +you will need to explicitly set the `deletedvclock` parameter to `true` +when reading an object, like so: + +```ruby +bucket = client.bucket('fruits') +obj = bucket.get('banana', deletedvclock: true) +``` + +## Example Update + +In this section, we'll provide an update example for Basho's official Ruby, +Python, .NET, Node.js, Erlang and Go clients. Because updates with the official +Java client functions somewhat differently, those examples can be found in the +[section below](#java-client-example). + +For our example, imagine that you are storing information about NFL head +coaches in the bucket `coaches`, which will bear the bucket type +`siblings`, which sets `allow_mult` to `true`. The key for each object +is the name of the team, e.g. `giants`, `broncos`, etc. Each object will +consist of the name of the coach in plain text. Here's an example of +creating and storing such an object: + +```ruby +bucket = client.bucket('coaches') +obj = bucket.get_or_new('seahawks', type: 'siblings') +obj.content_type = 'text/plain' +obj.raw_data = 'Pete Carroll' +obj.store +``` + +```php +$location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); +$response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + +if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); +} else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); +} + +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('siblings').bucket('coaches') +obj = RiakObject(client, bucket, 'seahawks') +obj.content_type = 'text/plain' +obj.data = 'Pete Carroll' +obj.store() +``` + +```csharp +var id = new RiakObjectId("siblings", "coaches", "seahawks"); +var obj = new RiakObject(id, "Pete Carroll", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var riakObj = new Riak.Commands.KV.RiakObject(); +riakObj.setContentType('text/plain'); +riakObj.setBucketType('siblings'); +riakObj.setBucket('coaches'); +riakObj.setKey('seahawks'); +riakObj.setValue('Pete Carroll'); +client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } else { + logger.info('Stored Pete Carroll'); + } +}); +``` + +```erlang +Obj = riakc_obj:new({<<"siblings">>, <<"coaches">>}, + <<"seahawks">>, + <<"Pete Carroll">>, + <<"text/plain">>). +riakc_pb_socket:put(Pid, Obj). +``` + +```golang +obj := &riak.Object{ + ContentType: "text/plain", + Charset: "utf-8", + ContentEncoding: "utf-8", + Value: []byte("Pete Carroll"), +} + +cmd, err := riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey("seahawks"). + WithContent(obj). + Build() + +if err != nil { + fmt.Println(err.Error()) + return +} + +if err := cluster.Execute(cmd); err != nil { + fmt.Println(err.Error()) + return +} + +fmt.Println("Stored Pete Carroll") +``` + +Every once in a while, though, head coaches change in the NFL, which +means that our data would need to be updated. Below is an example +function for updating such objects: + +```ruby +def update_coach(team, new_coach) + bucket = client.bucket('coaches') + # The read phase + obj = bucket.get_or_new(team, type: 'siblings') + # The modify phase + obj.data = new_coach + # The write phase + obj.store +end + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```php +function update_coach($team, $coach) { + $location = new \Basho\Riak\Location('seahawks', new \Basho\Riak\Bucket('coaches', 'siblings')); + $response = (new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); + + if ($response->isSuccess()) { + $object = $response->getObject(); + $object->setData('Pete Carroll'); + } else { + $object = new \Basho\Riak\Object('Pete Carroll', 'text/plain'); + } + + $response = (new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->withObject($object) + ->atLocation($location) + ->build() + ->execute(); + + return $response->isSuccess(); +} + +echo update_coach('packers', 'Vince Lombardi'); // true +``` + +```python +def update_coach(team, new_coach): + bucket = client.bucket_type('siblings').bucket('coaches') + # The read phase + obj = bucket.get(team) + # The modify phase + obj.data = new_coach + # The write phase + obj.store() + +# Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```csharp +private void UpdateCoach(string team, string newCoach) +{ + var id = new RiakObjectId("siblings", "coaches", team); + var getResult = client.Get(id); + + RiakObject obj = getResult.Value; + obj.SetObject<string>(newCoach, RiakConstants.ContentTypes.TextPlain); + client.Put(obj); +} +``` + +```javascript +function update_coach(team, newCoach) { + client.fetchValue({ + bucketType: 'siblings', bucket: 'coaches', key: team + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + var riakObj = rslt.values.shift(); + riakObj.setValue(newCoach); + client.storeValue({ value: riakObj }, function (err, rslt) { + if (err) { + throw new Error(err); + } + }); + }); +} +``` + +```erlang +update_coach(team, new_coach) -> + {ok, Obj} = riakc_pb_socket:get(Pid, + {<<"siblings">>, <<"coaches">>}, + <<team>>), + ModifiedObj = riakc_obj:update_value(Obj, <<new_coach>>), + riakc_pb_socket:put(Pid, ModifiedObj). + +%% Example usage +update_coach('packers', 'Vince Lombardi') +``` + +```golang +func updateCoach(cluster *riak.Cluster, team, newCoach string) error { + var cmd riak.Command + var err error + + cmd, err = riak.NewFetchValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + fvc := cmd.(*riak.FetchValueCommand) + obj := fvc.Response.Values[0] + obj.Value = []byte(newCoach) + + cmd, err = riak.NewStoreValueCommandBuilder(). + WithBucketType("siblings"). + WithBucket("coaches"). + WithKey(team). + WithContent(obj). + Build() + + if err != nil { + return err + } + + if err := cluster.Execute(cmd); err != nil { + return err + } + + return nil +} +``` + +In the example above, you can see the three steps in action: first, the +object is read, which automatically fetches the object's causal context; +then the object is modified, i.e. the object's value is set to the name +of the new coach; and finally the object is written back to Riak. + +## Object Update Anti-patterns + +The most important thing to bear in mind when updating objects is this: +you should always read an object prior to updating it _unless_ you are +certain that no object is stored there. If you are storing [sensor data]({{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#sensor-data) in Riak and using timestamps as keys, for example, then you can be sure that keys are not repeated. In that case, making writes to Riak without first reading the object is fine. If +you're not certain, however, then we recommend always reading the object +first. + +## Java Client Example + +As with the other official clients, object updates using the Java client +will automatically fetch the object's causal context metadata, modify +the object, and then write the modified value back to Riak. You can +update object values by creating your own `UpdateValue` operations that +extend the abstract class `Update<T>`. An `UpdateValue` operation must +have an `apply` method that returns a new `T`. In our case, the data +class that we're dealing with is `User`. First, let's create a very +basic `User` class: + +```java +public class User { + public String username; + public List<String> hobbies; + + public User(String username, List<String> hobbies) { + this.name = username; + this.hobbies = hobbies; + } +} +``` + +In the example below, we'll create an update value operation called +`UpdateUserName`: + +```java +import com.basho.riak.client.api.commands.kv.UpdateValue.Update; + +public class UpdateUserName extends Update<User> { + @Override + public User apply(User original) { + // update logic goes here + } +} +``` + +In the example above, we didn't specify any actual update logic. Let's +change that by creating an `UpdateValue` operation that changes a `User` +object's `name` parameter: + +```java +public class UpdateUserName extends Update<User> { + private String newUsername; + + public UpdateUserName(String newUsername) { + this.newUsername = newUsername; + } + + @Override + public User apply(User original) { + original.username = newUsername; + return original; + } +} +``` + +Now, let's put our `UpdateUserName` operation into effect. In the +example below, we'll change a `User` object's `username` from whatever +it currently is to `cliffhuxtable1986`: + +```java +import com.basho.riak.client.api.commands.kv.FetchValue; + +Location location = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(location) + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(new UpdateUserName("cliffhuxtable1986")) + .build(); +client.execute(updateOp); +``` + +You may notice that a fetch option was added to our `UpdateValue` +operation: `FetchValue.Option.DELETED_VCLOCK` was set to `true`. +Remember from the section above that you should always read an object +before modifying and writing it, _even if the object has been deleted_. +Setting this option to `true` ensures that the causal context is fetched +from Riak if the object has been deleted. We recommend always setting +this option to `true` when constructing `UpdateValue` operations. + +### Clobber Updates + +If you'd like to update an object by simply replacing it with an +entirely new value of the same type (unlike in the section above, where +only one property of the object was updated), the Java client provides +you with a "clobber" update that you can use to replace the existing +object with a new object of the same type rather than changing one or +more properties of the object. Imagine that there is a `User` object +stored in the bucket `users` in the key `cliffhuxtable1986`, as in the +example above, and we simply want to replace the object with a brand new +object: + +```java +Location location = new Location(new Namespace("users"), "cliffhuxtable1986"); +User brandNewUser = new User(/* new user info */); +UpdateValue updateOp = new UpdateValue.Builder(Location) + // As before, we set this option to true + .withFetchOption(FetchValue.Option.DELETED_VCLOCK, true) + .withUpdate(Update.clobberUpdate(brandNewUser)) + .build(); +client.execute(updateOp); +``` + +### No-operation Updates in Java + +The Java client also enables you to construct **no-operation updates** +that don't actually modify the object and simply write the original +value back to Riak. What is the use of that, given that it isn't +changing the value of the object at all? No-operation updates can be +useful because they can help Riak resolve [sibling conflicts]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution#siblings). If you have an object---or many objects, for that +matter---with siblings, a no-operation update will fetch the object _and +its causal context_ and write the object back to Riak with the same, +fetched context. This has the effect of telling Riak that you deem this +value to be most current. Riak can then use this information in internal +sibling resolution operations. + +Below is an example: + +```java +Location loc = new Location(...); +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withUpdate(Update.noopUpdate()) + .build(); +client.execute(updateOp); +``` + +The example above would update the object without fetching it. You +could, however, use a no-operation update to _read_ an object as well if +you set `return_body` to `true` in your request: + +```java +// Using the Location object "loc" from above: +UpdateValue updateOp = new UpdateValue.Builder(loc) + .withFetchOption(Option.RETURN_BODY, true) + .withUpdate(Update.noopUpdate()) + .build(); +UpdateValue.Response response = client.execute(updateOp); +RiakObject object = response.getValue(RiakObject.class); + +// Or to continue the User example from above: +User user = response.getValue(User.class); +``` + +In general, you should use no-operation updates only on keys that you +suspect may have accumulated siblings or on keys that are frequently +updated (and thus bear the possibility of accumulating siblings). +Otherwise, you're better off performing normal reads. + + + + diff --git a/content/riak/kv/3.0.4/downloads.md b/content/riak/kv/3.0.4/downloads.md new file mode 100644 index 0000000000..8aa7596823 --- /dev/null +++ b/content/riak/kv/3.0.4/downloads.md @@ -0,0 +1,27 @@ +--- +title: "Download for Riak KV 3.0.4" +description: "Download some stuff!" +menu: + riak_kv-3.0.4: + name: "Download Riak KV" + identifier: "download_riak_kv" + weight: 101 + pre: download-alt +project: "riak_kv" +project_version: 3.0.4 +toc: false +layout: downloads +listed_projects: + - project: "riak_kv" + version: 3.0.4 + title: "Riak KV" + install_instructions_set: "setup/installing" +aliases: + - /riak/3.0.4/downloads + - /riak/kv/3.0.4/downloads +--- + + + + + diff --git a/content/riak/kv/3.0.4/index.md b/content/riak/kv/3.0.4/index.md new file mode 100644 index 0000000000..9d76e67876 --- /dev/null +++ b/content/riak/kv/3.0.4/index.md @@ -0,0 +1,81 @@ +--- +title: "Riak KV 3.0.4" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Riak KV" + identifier: "index" + weight: 100 + pre: riak +toc: false +aliases: + - /riak/3.0.4/ +--- + + +[aboutenterprise]: https://www.tiot.jp/en/about-us/contact-us/ +[config index]: {{<baseurl>}}riak/kv/3.0.4/configuring +[downloads]: {{<baseurl>}}riak/kv/3.0.4/downloads/ +[install index]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/ +[plan index]: {{<baseurl>}}riak/kv/3.0.4/setup/planning +[perf open files]: {{<baseurl>}}riak/kv/3.0.4/using/performance/open-files-limit +[install debian & ubuntu]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/debian-ubuntu +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search +[getting started]: {{<baseurl>}}riak/kv/3.0.4/developing/getting-started +[dev client libraries]: {{<baseurl>}}riak/kv/3.0.4/developing/client-libraries + + + +Riak KV is a distributed NoSQL database designed to deliver maximum data availability by distributing data across multiple servers. As long as your Riak KV client can reach one Riak server, it should be able to write data. + +This release is tested with OTP 20, OTP 21 and OTP 22; but optimal performance is likely to be achieved when using OTP 22. + +## Supported Operating Systems + +- Amazon Linux 2016.09 (AWS) +- Amazon Linux 2 (AWS) +- CentOS 6 +- CentOS 7 +- CentOS 8 +- Debian 7.0 ("Wheezy") +- Debian 8.0 ("Jessie") +- Debian 9.0 ("Stretch") +- Red Hat Enterprise Linux 6 +- Red Hat Enterprise Linux 7 +- Red Hat Enterprise Linux 8 +- Raspbian Buster +- Ubuntu 12.04 ("Precise Pangolin") +- Ubuntu 14.04 ("Trusty Tahr") +- Ubuntu 16.04 ("Xenial Xerus") +- Ubuntu 18.04 ("Bionic Beaver") +- FreeBSD 10.4 +- FreeBSD 11.1 +- Mac OSX 10.11+ (development only) + +## Getting Started + +Are you brand new to Riak KV? Start by [downloading][downloads] Riak KV, and then follow the below pages to get started: + +1. [Install Riak KV][install index] +2. [Plan your Riak KV setup][plan index] +3. [Configure Riak KV for your needs][config index] + +{{% note title="Developing with Riak KV" %}} +If you are looking to integrate Riak KV with your existing tools, check out the [Developing with Riak KV]({{<baseurl>}}riak/kv/3.0.4/developing) docs. They provide instructions and examples for languages such as: Java, Ruby, Python, Go, Haskell, NodeJS, Erlang, and more. +{{% /note %}} + +## Popular Docs + +1. [Open Files Limit][perf open files] +2. [Installing on Debian-Ubuntu][install debian & ubuntu] +3. [Developing with Riak KV: Searching][usage search] +4. [Developing with Riak KV: Getting Started][getting started] +5. [Developing with Riak KV: Client Libraries][dev client libraries] + + + + + + diff --git a/content/riak/kv/3.0.4/learn.md b/content/riak/kv/3.0.4/learn.md new file mode 100644 index 0000000000..35ce57c1b5 --- /dev/null +++ b/content/riak/kv/3.0.4/learn.md @@ -0,0 +1,53 @@ +--- +title: "Learn About Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Learning" + identifier: "learn" + weight: 400 + pre: beaker +toc: true +aliases: +--- + +[learn why riak]: ./why-riak-kv/ +[learn use cases]: ./use-cases/ +[learn new nosql]: ./new-to-nosql/ +[glossary]: ./glossary/ +[concepts]: ./concepts/ + +## In This Section + +#### [Why Riak KV?][learn why riak] + +An overview of Riak KV and when to use it. + +[Learn More >>][learn why riak] + +#### [Use Cases][learn use cases] + +Details use cases and applications in which Riak KV excels. + +[Learn More >>][learn use cases] + + + +#### [Glossary][glossary] + +A list of terms relating to Riak used throughout the documentation. + +[Learn More >>][glossary] + +#### [Concepts][concepts] + +Provides definitions for, insight into, and high level information about the various parts of Riak KV + +[Learn More >>][concepts] + + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts.md b/content/riak/kv/3.0.4/learn/concepts.md new file mode 100644 index 0000000000..9d2ded27c9 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts.md @@ -0,0 +1,49 @@ +--- +title: "Concepts" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Concepts" + identifier: "learn_concepts" + weight: 104 + parent: "learn" +toc: true +aliases: +--- + +[concept aae]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy +[concept buckets]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets +[concept cap neg]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/capability-negotiation +[concept causal context]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/vnodes +[config index]: {{<baseurl>}}riak/kv/3.0.4/configuring +[plan index]: {{<baseurl>}}riak/kv/3.0.4/setup/planning +[use index]: {{<baseurl>}}riak/kv/3.0.4/using/ + + +Riak KV has many great features, functions, and guiding principles that inform how the product works. This section provides definitions for, insight into, and high level information about the various parts of Riak KV you will encounter as you [plan][plan index], [configure][config index], and [use][use index] Riak. + +Learn more about: + +* [Active Anti-Entropy (AAE)][concept aae] +* [Buckets][concept buckets] +* [Capability Negotiation][concept cap neg] +* [Causal Context][concept causal context] +* [Clusters][concept clusters] +* [Convergent Replicated Data Types (CRDTs)][concept crdts] +* [Eventual Consistency][concept eventual consistency] +* [Keys and Objects][concept keys objects] +* [Replication][concept replication] +* [Virtual Nodes (vnodes)][concept vnodes] + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/active-anti-entropy.md b/content/riak/kv/3.0.4/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..b13abd8d65 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/active-anti-entropy.md @@ -0,0 +1,111 @@ +--- +title: "Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Active Anti-Entropy" + identifier: "learn_concepts_aae" + weight: 100 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/aae + - /riak/kv/3.0.4/theory/concepts/aae +--- + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter +[cluster ops aae]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[config aae]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/#active-anti-entropy +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[Merkle tree]: http://en.wikipedia.org/wiki/Merkle_tree +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search + + +In a [clustered][concept clusters], [eventually consistent][concept eventual consistency] system like Riak, conflicts between object replicas stored +on different nodes are an expected byproduct of node failure, concurrent +client updates, physical data loss and corruption, and other events that +distributed systems are built to handle. These conflicts occur when +objects are either + +* **missing**, as when one node holds a replica of the object and + another node does not, or +* **divergent**, as when the values of an existing object differ across + nodes. + +Riak KV offers two means of resolving object conflicts: read repair and +active anti-entropy (AAE). Both of these conflict resolution mechanisms +apply both to normal key/value data in Riak as well as to +[search indexes][usage search] + + +## Read Repair vs. Active Anti-Entropy + +In versions of Riak prior to 1.3, replica conflicts were healed via +[read repair][glossary read rep] which is a _passive_ +anti-entropy mechanism that heals object conflicts only when a read +request reaches Riak from a client. Under read repair, if the +[vnode][glossary vnode] coordinating the read request determines +that different nodes hold divergent values for the object, the repair +process will be set in motion. + +One advantage of using read repair alone is that it doesn't require any +kind of background process to take effect, which can cut down on CPU +resource usage. The drawback of the read repair-only approach, however, +is that the healing process only can only ever reach those objects that +are read by clients. Any conflicts in objects that are not read by +clients will go undetected. + +The _active_ anti-entropy (AAE) subsystem was added to Riak in +versions 1.3 and later to enable conflict resolution to run as a +continuous background process, in contrast with read repair, which does +not run continuously. AAE is most useful in clusters containing so- +called "cold data" that may not be read for long periods of time, even +months or years, and is thus not reachable by read repair. + +Although AAE is enabled by default, it can be turned off if necessary. +See our documentation on [managing active anti-entropy][cluster ops aae] +for information on how to enable and disable AAE, as well as on configuring +and monitoring AAE. + +## Active Anti-Entropy and Hash Tree Exchange + +In order to compare object values between replicas without using more +resources than necessary, Riak relies on [Merkle +tree] hash exchanges between +nodes. + +Using this type of exchange enables Riak to compare a balanced tree of +Riak object hashes. Any difference at a higher level in the hierarchy +means that at least one value has changed at a lower level. AAE +recursively compares the tree, level by level, until it pinpoints exact +values with a difference between nodes. The result is that AAE is able +to run repair operations efficiently regardless of how many objects are +stored in a cluster, since it need only repair specific objects instead +of all objects. + +In contrast with related systems, Riak uses persistent, on-disk hash +trees instead of in-memory hash trees. The advantages of this approach +are twofold: + +* Riak can run AAE operations with a minimal impact on memory usage +* Riak nodes can be restarted without needing to rebuild hash trees + +In addition, hash trees are updated in real time as new writes come in, +which reduces the time that it takes to detect and repair missing or +divergent replicas. + +As an additional fallback measure, Riak periodically clears and +regenerates all hash trees from on-disk key/value data, which enables +Riak to detect silent data corruption to on-disk data arising from disk +failure, faulty hardware, and other sources. The default time period for +this regeneration is one week, but this can be adjusted in each node's +[configuration file][config aae]. + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/buckets.md b/content/riak/kv/3.0.4/learn/concepts/buckets.md new file mode 100644 index 0000000000..e78f2a88b1 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/buckets.md @@ -0,0 +1,217 @@ +--- +title: "Buckets" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Buckets" + identifier: "learn_concepts_buckets" + weight: 101 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/Buckets + - /riak/kv/3.0.4/theory/concepts/Buckets + - /riak/3.0.4/theory/concepts/buckets + - /riak/kv/3.0.4/theory/concepts/buckets +--- + +[apps cluster metadata]: {{<baseurl>}}riak/kv/3.0.4/developing/app-guide/cluster-metadata +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/bucket-types +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/strong-consistency +[concept causal context]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context +[concept causal context sib]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#siblings +[concept replication]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency +[config basic]: {{<baseurl>}}riak/kv/3.0.4/configuring/basic +[dev api http]: {{<baseurl>}}riak/kv/3.0.4/developing/api/http +[dev data types]: {{<baseurl>}}riak/kv/3.0.4/developing/data-types +[glossary ring]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#ring +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/multi +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[usage commit hooks]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/commit-hooks +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/replication + + +Buckets are used to define a virtual keyspace for storing Riak objects. +They enable you to define non-default configurations over that keyspace +concerning [replication properties][concept replication] and [other +parameters][config basic]. + +In certain respects, buckets can be compared to tables in relational +databases or folders in filesystems, respectively. From the standpoint +of performance, buckets with default configurations are essentially +"free," while non-default configurations, defined [using bucket +types][cluster ops bucket types], will be gossiped around [the ring][glossary read rep] using Riak's [cluster metadata][apps cluster metadata] subsystem. + +## Configuration + +Bucket configurations are defined [using bucket types][cluster ops bucket types], which enables +you to create and modify sets of configurations and apply them to as +many buckets as you wish. With bucket types, you can configure the +following bucket-level parameters, overriding the default values if you +wish. + +#### allow_mult + +Determines whether sibling values can be created. See [siblings][concept causal context sib]. The default can be `true` or `false` depending on +the context. See the documentation on [`allow_mult`][usage bucket types] for more +information. + +#### n_val + +Specifies the number of copies of each object to be stored in the +cluster. See the documentation on [replication properties][usage replication]. Default: +`3`. + +#### last_write_wins + +Indicates if an object's timestamp will be used to decide the canonical +write in the case of a conflict. See the documentation on [vector +clocks][concept causal context] and on [conflict resolution][usage conflict resolution] for more information. Default: +`false`. + +#### r, pr, w, dw, pw, rw, notfound_ok, basic_quorum + +See the documentation on [replication properties][usage replication] for more information +on all of these properties. + +#### precommit + +A list of Erlang functions to be executed before writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no pre-commit +hooks, i.e. an empty list. + +#### postcommit + +A list of Erlang functions to be executed after writing an object. See +our documentation on [pre-commit hooks][usage commit hooks] for more information. Default: no post-commit +hooks, i.e. an empty list. + +#### old_vclock, young_vclock, small_vclock, big_vclock + +These settings enable you to manage [vector clock pruning][concept causal context]. + +#### backend + +If you are using the [Multi][plan backend multi] backend, this property enables you to +determine which of Riak's available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], or [Memory][plan backend memory]---will be used in buckets of this type. If you are using +LevelDB, Bitcask, or the Memory backend at a cluster-wide level, _all_ +buckets of all types will use the assigned backend. + +#### consistent + +If you are using Riak's experimental [strong consistency][concept strong consistency] feature for buckets +bearing a type, this setting must be set to `true`. The default is +`false`. More information can be found in our documentation on [using +strong consistency][cluster ops strong consistency]. + +#### datatype + +If you are using [Riak data types][dev data types], this setting +determines which data type will be used in +buckets of this bucket type. Possible values: `counter`, `set`, or +`map`. + +#### dvv_enabled + +Whether [dotted version vectors][concept causal context] +will be used instead of traditional vector clocks for [conflict resolution][usage conflict resolution]. Default: `false`. + +#### chash_keyfun, linkfun + +These settings involve features that have been deprecated. You will not +need to adjust these values. + +## Fetching Bucket Properties + +If you'd like to see how a particular bucket has been configured, you +can do so using our official client libraries or through Riak's [HTTP +API][dev api http]. The following would fetch the properties for the bucket +`animals` if that bucket had a default configuration, i.e. the `default` +bucket type: + +```java +Namespace animalsBucket = new Namespace("animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(animalsBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, <<"animals">>). +``` + +```curl +# Assuming that Riak is running on "localhost" and port 8087: + +curl http://localhost:8087/types/default/buckets/animals/props +``` + +If the bucket `animals` had a different type that you had created and +activated, e.g. `my_custom_type`, you could fetch the bucket properties +like so: + +```java +Namespace customTypedBucket = new Namespace("my_custom_type", "animals"); +FetchBucketProperties fetchProps = + new FetchBucketProperties.Builder(customTypedBucket).build(); +FetchBucketProperties.Response response = client.execute(fetchProps); +BucketProperties props = response.getProperties(); +``` + +```ruby +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.properties +``` + +```php +$bucketProperties = (new \Basho\Riak\Command\Builder\FetchBucketProperties($riak)) + ->buildBucket('animals', 'my_custom_type') + ->build() + ->execute() + ->getBucket() + ->getProperties(); +``` + +```python +bucket = client.bucket_type('my_custom_type').bucket('animals') +bucket.get_properties() +``` + +```erlang +{ok, Props} = riakc_pb_socket:get_bucket(Pid, {<<"my_custom_type">>, <<"animals">>}). +``` + +```curl +curl http://localhost:8087/types/my_custom_type/buckets/animals/props +``` + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/capability-negotiation.md b/content/riak/kv/3.0.4/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..d10736df72 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/capability-negotiation.md @@ -0,0 +1,36 @@ +--- +title: "Capability Negotiation" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Capability Negotiation" + identifier: "learn_concepts_cap_negot" + weight: 102 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/capability-negotiation + - /riak/kv/3.0.4/theory/concepts/capability-negotiation +--- + + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[upgrade cluster]: {{<baseurl>}}riak/kv/3.0.4/setup/upgrading/cluster +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce + + +In early versions of Riak KV, [rolling upgrades][upgrade cluster] from an older version to a newer involved (a) disabling all new features associated with the newer version, and then (b) re-enabling those features once all nodes in the cluster were upgraded. + +Rolling upgrades no longer require you to disable and then re-enable features due to the *capability negotiation* subsystem that automatically manages the addition of new features. Using this subsystem, nodes negotiate with each other to automatically determine which versions are supported on which nodes, which allows clusters to maintain normal operations even when divergent versions of Riak KV are present in the cluster. + +{{% note title="Note on Mixed Versions" %}} +The capability negotiation subsystem is used to manage mixed versions of Riak KV within a cluster ONLY during rolling upgrades. We strongly recommend not running mixed versions during normal operations. +{{% /note %}} + + + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/causal-context.md b/content/riak/kv/3.0.4/learn/concepts/causal-context.md new file mode 100644 index 0000000000..3f5bb0b0d3 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/causal-context.md @@ -0,0 +1,289 @@ +--- +title: "Causal Context" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Causal Context" + identifier: "learn_concepts_causal_context" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/context + - /riak/kv/3.0.4/theory/concepts/context +--- + + +[concept aae]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[CRM]: http://en.wikipedia.org/wiki/Customer_relationship_management +[dev api http]: {{<baseurl>}}riak/kv/3.0.4/developing/api/http +[dev key value]: {{<baseurl>}}riak/kv/3.0.4/developing/key-value-modeling +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#read-repair +[perf latency reduc]: {{<baseurl>}}riak/kv/3.0.4/using/performance/latency-reduction +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution +[usage protocol buffers]: {{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers +[usage updating objects]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/updating-objects +[Vector Clocks on Wikipedia]: http://en.wikipedia.org/wiki/Vector_clock +[Why Vector Clocks are Easy]: http://basho.com/posts/technical/why-vector-clocks-are-easy/ +[Why Vector Clocks are Hard]: http://basho.com/posts/technical/why-vector-clocks-are-hard/ +[work of Leslie Lamport]: http://portal.acm.org/citation.cfm?id=359563 +[Evaluating Dotted Version Vectors in Riak]: http://asc.di.fct.unl.pt/~nmp/pubs/inforum-2011-2.pdf +[Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study]: http://paginas.fe.up.pt/~prodei/dsie12/papers/paper_19.pdf +[Dotted Version Vector Sets]: https://github.com/ricardobcl/Dotted-Version-Vectors +[A History of Time in Riak]: https://www.youtube.com/watch?v=3SWSw3mKApM + + +Because Riak is an [eventually consistent][concept eventual consistency], +[clustered][concept clusters] database, [conflicts][usage conflict resolution] between +object replicas stored on different nodes are inevitable, particularly +when multiple clients update an object simultaneously. + +## The Problem of Conflicting Values + +To illustrate this problem, imagine that you're building a +[CRM] +application and storing customer information in Riak. Now imagine that +information about a particular user is being stored in the [key][dev key value] `mariejohnston` in the [bucket][usage bucket types] `customers`. +What happens if Marie has two browser windows open and changes her phone +number to 555-1337 in one window and saves it, and then also changes it +to 555-1212 in another window and saves it? + +This means that two different values are sent into Riak. So what +happens at that point? There are several possible outcomes: + +1. Riak is able to discern that one object is more causally recent than the other (in this case 555-1212) and chooses to store that value as the "correct" value. +2. The two operations hit the database at roughly the same time, i.e. two **concurrent +updates** have been completed, and Riak is unable to determine which +value "wins." In this scenario, one of three things can happen: + + a. The object is a CRDT, so Riak is able to resolve conflicting values by type-specific rules + + b. Riak creates sibling values, aka **siblings**, for the object + + c. Riak resolves the values on the basis of timestamps + +In the case of outcome 1 above, Riak uses **causal context** metadata to +make that decision. This metadata is attached to every object in Riak. +Causal context comes in two forms in Riak: **vector clocks** and +**dotted version vectors**. More information in both can be found in the +sections below. + +In the case of outcome 2, the choice between **a**, **b** and **c** is determined by settings. If you set the `allow_mult` parameter to `true` for a [bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types), all non-CRDT writes to that bucket type will create siblings in the case of concurrent writes (and occasionally under other +scenarios, e.g. healed network partitions). + +If, however, `allow_mult` is set to `false`, then Riak will not generate +siblings, instead relying on simple timestamp resolution to decide which value +"wins." In general, we recommend _always_ setting `allow_mult` to +`true`. A more complete discussion can be found in our documentation on +[conflict resolution][usage conflict resolution]. + +## Vector Clocks + +In versions of Riak prior to 1.4, Riak used vector clocks as the sole +means of tracking the history of object updates. In Riak versions 2.0 +and later, we recommend using [dotted version vectors](#dotted-version-vectors) instead, for reasons that are explained +in that section. + +Like dotted version vectors, vector clocks are a means of tracking +events in distributed systems. Unlike normal clocks, vector clocks have +no sense of chronological time, i.e. they don't care if something +happened at 6 pm today or back in 1972. They care only about sequences +of events. More specifically, they keep track of who---i.e. which actor +in the system---has modified an object and how many times they've done +so. + +In a distributed system like Riak, multiple replicas of each object are +active in the cluster all the time. Because it's inevitable that objects +will have conflicting values due to events like concurrent updates and +healed network partitions, Riak needs a mechanism to keep track of which +replica of an object is more current than another. In versions of Riak +prior to 2.0, vector clocks were the means employed by Riak to do +precisely that. + +A number of important aspects of the relationship between object +replicas can be determined using vector clocks: + + * Whether one object is a direct descendant of the other + * Whether the objects are direct descendants of a common parent + * Whether the objects are unrelated in recent heritage + +Behind the scenes, Riak uses vector clocks as an essential element of +its [active anti-entropy][concept aae] subsystem and of its automatic read +repair capabilities. + + +Vector clocks are non-human-readable metadata attached to all Riak +objects. They look something like this: + +``` +a85hYGBgzGDKBVIcR4M2cgczH7HPYEpkzGNlsP/VfYYvCwA= +``` + +While vector clocks quite often resolve object conflicts without +trouble, there are times when they can't, i.e. when it's unclear which +value of an object is most current. When that happens, Riak, if +configured to do so, will create **siblings**. + +## More Information on Vector Clocks + +Additional information on vector clocks: + +* [Conflict Resolution][usage conflict resolution] in Riak KV +* [Vector Clocks on Wikipedia] +* [Why Vector Clocks are Easy] +* [Why Vector Clocks are Hard] +* The vector clocks used in Riak are based on the [work of Leslie Lamport]. + +## Siblings + +It is possible, though not recommendable, to [configure Riak][usage conflict resolution] to ensure that only one copy of an object ever exists in a +specific location. This will ensure that _at most_ one value is returned +when a read is performed on a bucket type/bucket/key location (and no +value if Riak returns `not found`). + +It's also possible, however, to configure Riak to store multiple objects +in a single key if necessary, i.e. for an object to have different +values on different nodes. Objects stored this way have what are called +sibling values. You can instruct Riak to allow for sibling creation by +setting the the `allow_mult` bucket property to `true` for a specific +bucket, preferably [using bucket types][usage bucket types]. + +From the standpoint of application development, the difficulty with +siblings is that they _by definition_ conflict with one another. When an +application attempts to read an object that has siblings, multiple +replicas will be stored in the location where the application is +looking. This means that the application will need to develop a +strategy for [conflict resolution][usage conflict resolution], i.e. the application will need to +decide which value is more correct depending on the use case. + +## Dotted Version Vectors + +In versions of Riak prior to 2.0, all causality-based conflict +resolution, whether on the client side or in Riak, was achieved using +[vector clocks][concept causal context]. In version 2.0, +Riak added the option of using **dotted version vectors** (DVVs) +instead. + +Like vector clocks, dotted version vectors are a mechanism for tracking +object update causality in terms of **logical time** rather than +chronological time (as with timestamps), enabling Riak to make decisions +about which objects are more current than others in cases of conflict. + +>**Note: DVVs Recommended Over Vector Clocks** +> +>If you are using Riak version 2.0 or later, we strongly recommend using +dotted version vectors instead of vector clocks, as DVVs are far better +at limiting the number of siblings produced in a cluster, which can +prevent a wide variety of potential issues. + + +## DVVs Versus Vector Clocks + +The role that DVVs play in Riak is directly analogous to that of +vector clocks, as both are used +to resolve object conflicts, whether during background operations like +[active anti-entropy][concept aae] or [read repair][glossary read rep], or +when applications engage in client-side [conflict resolution][usage conflict resolution]. The +crucial difference between them, however, lies in the way that they +handle concurrent updates. + +Vector clocks can detect concurrent updates to the same object but they +can't identify which value was associated with each update. If an object +stored in the bucket `frequent_updates` with the key `update_me` is +updated by five different clients concurrently and tagged with the same +vector clock, then five values should be created as siblings. However, +depending on the order of delivery of those updates to the different +replicas, sibling values may be duplicated, which can in turn lead to +[sibling explosion](#siblings) and thus undue +[latency][perf latency reduc]. + +DVVs, on the other hand, identify each value with the update that +created it. If five clients concurrently update the object above (in the +bucket `frequent_updates`, with the key `update_me`), each of these +updates will be marked with a _dot_ (a minimal vector clock) that indicates the specific event that introduced it. This +means that duplicate values can always be identified and removed, +reducing the likelihood of sibling explosion. Rather than being potentially unbounded, the +number of sibling values will be proportional to the number of +concurrent updates. + +In terms of performance, the difference between vector clocks and DVVs +should be minimal in most cases. Because DVVs de-duplicate updates, they +should generally be smaller than objects that use vector clocks. + +## Usage + +From an application's perspective, vector clocks and DVVs function in +exactly the same fashion. Object updates using DVVs involve the same +sequence in interacting with Riak: + +* fetch an object from Riak, +* fetch the object's metadata, which will contain an opaque context + object (e.g. `a85hYGBgzGDKBVIcWu/1S4Pjin9lMCWy5bEycN1/cYYvCwA=`) for + the vector clock or DVV attached to that version of the object, and + finally +* pass that opaque context object back to Riak when you update the + object. + +You will not need to modify your application code when switching from +vector clocks to DVVs, even if you choose to switch all Riak objects in +your cluster to DVVs. You should make sure, however, that the right +bucket types and buckets are being targeted by your application after +the `dvv_enabled` parameter has been changed. + +For compatibility's sake, DVVs contained in Riak objects' metadata are +still labeled `X-Riak-Vclock` if you're using the [HTTP API][dev api http] and +`vclock` if using the [Protocol Buffers interface][usage protocol buffers]. + +More on using vector clocks and DVVs on the application side can be +found in our documentation on [conflict resolution][usage conflict resolution]. + +>**Note on DVVs and bucket types** +> +>The choice between vector clocks and DVVs can be made at the bucket +level, [using bucket types][usage bucket types]. This enables you to employ a mixed +conflict resolution strategy in your Riak cluster, using DVVs in some +buckets and vector clocks in others if you wish. DVVs can be enabled by +setting the `dvv_enabled` bucket property to +`true` for one or more bucket types. +> +>Vector clocks remain the default if you are not using bucket types. +However, any bucket type that you create and activate will have +`dvv_enabled` set to `true`. And so if you wish to +create a bucket type that uses traditional vector clocks, you will need +to explicitly set `dvv_enabled` to `false` for +that bucket type. + + +## Sibling Explosion + +Sibling explosion occurs when an object rapidly collects siblings that +are not reconciled. This can lead to a variety of problems, including +degraded performance, especially if many objects in a cluster suffer +from siblings explosion. At the extreme, having an enormous object in a +node can cause reads of that object to crash the entire node. Other +issues include [undue latency][perf latency reduc] and +out-of-memory errors. + +To prevent sibling explosion, we recommend the following: + +1. Use [dotted version vectors](#dotted-version-vectors) +instead of vector clocks for causal +context. +2. Always update mutable objects within a read/modify/write cycle. More +information can be found in the [Object Updates][usage updating objects] doc. + +## Resources + +* [Evaluating Dotted Version Vectors in Riak] +* [Improving Logical Clocks in Riak with Dotted Version Vectors: A Case Study] +* [Dotted Version Vector Sets] +* [A History of Time in Riak] + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/clusters.md b/content/riak/kv/3.0.4/learn/concepts/clusters.md new file mode 100644 index 0000000000..da1ce7ce70 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/clusters.md @@ -0,0 +1,117 @@ +--- +title: "Clusters" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Clusters" + identifier: "learn_concepts_clusters" + weight: 103 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/Clusters + - /riak/kv/3.0.4/theory/concepts/Clusters + - /riak/3.0.4/theory/concepts/clusters + - /riak/kv/3.0.4/theory/concepts/clusters +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[learn dynamo]: {{<baseurl>}}riak/kv/3.0.4/learn/dynamo +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution +[usage replication]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/replication + + +Riak's default mode of operation is to work as a cluster consisting of +multiple [nodes][glossary node], i.e. multiple well-connected data +hosts. + +Each host in the cluster runs a single instance of Riak, referred to as +a Riak node. Each Riak node manages a set of virtual nodes, or +[vnodes][glossary vnode], that are responsible for storing a +separate portion of the keys stored in the cluster. + +In contrast to some high-availability systems, Riak nodes are _not_ +clones of one another, and they do not all participate in fulfilling +every request. Instead, you can configure, at runtime or at request +time, the number of nodes on which data is to be replicated, as well as +when [replication][concept replication] occurs and which [merge strategy][usage conflict resolution] and failure model are to be followed. + +## The Ring + +Though much of this section is discussed in our annotated discussion of +the Amazon [Dynamo paper][learn dynamo], it nonetheless provides a summary of +how Riak implements the distribution of data throughout a cluster. + +Any client interface to Riak interacts with objects in terms of the +[bucket][concept buckets] and [key][concept keys objects] in which a value is +stored, as well as the [bucket type][usage bucket types] that is used +to set the bucket's properties. + +Internally, Riak computes a 160-bit binary hash of each bucket/key pair +and maps this value to a position on an ordered **ring** of all such +values. This ring is divided into partitions, with each Riak vnode +responsible for one of these partitions (we say that each vnode +_claims_ that partition). + +Below is a visual representation of a Riak ring: + +![A Riak Ring]({{<baseurl>}}images/riak-ring.png) + +The nodes of a Riak cluster each attempt to run a roughly equal number +of vnodes at any given time. In the general case, this means that each +node in the cluster is responsible for 1/(number of nodes) of the ring, +or (number of partitions)/(number of nodes) vnodes. + +If two nodes define a 16-partition cluster, for example, then each node +will run 8 vnodes. Nodes attempt to claim their partitions at intervals +around the ring such that there is an even distribution amongst the +member nodes and that no node is responsible for more than one replica +of a key. + +## Intelligent Replication + +When an object is being stored in the cluster, any node may participate +as the **coordinating node** for the request. The coordinating node +consults the ring state to determine which vnode owns the partition in +which the value's key belongs, then sends the write request to that +vnode as well as to the vnodes responsible for the next N-1 partitions +in the ring (where N is a [configurable parameter][usage replication] that describes how many copies of the value to store). The +write request may also specify that at least W (=< N) of those vnodes +reply with success, and that DW (=< W) reply with success only after +durably storing the value. + +A read, or GET, request operates similarly, sending requests to the +vnode that "claims" the partition in which the key resides, as well as +to the next N-1 partitions. The request also specifies R (=< N), the +number of vnodes that must reply before a response is returned. + +Here is an illustration of this process: + +![A Riak Ring]({{<baseurl>}}images/riak-data-distribution.png) + +When N is set to 3, the value `REM` is stored in the key `artist`. That +key is assigned to 3 partitions out of 32 available partitions. When a +read request is made to Riak, the ring state will be used to determine +which partitions are responsible. From there, a variety of +[configurable parameters][usage replication] determine how Riak +will behave in case the value is not immediately found. + +## Gossiping + +The ring state is shared around the cluster by means of a "gossip +protocol." Whenever a node changes its claim on the ring, it announces, +i.e. "gossips," this change to other nodes so that the other nodes can +respond appropriately. Nodes also periodically re-announce what they +know about ring in case any nodes happened to miss previous updates. + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/crdts.md b/content/riak/kv/3.0.4/learn/concepts/crdts.md new file mode 100644 index 0000000000..a3ef4d1982 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/crdts.md @@ -0,0 +1,252 @@ +--- +title_supertext: "Concept" +title: "Data Types" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Data Types" + identifier: "learn_concepts_data_types" + weight: 104 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/crdts + - /riak/kv/3.0.4/theory/concepts/crdts +--- + +[crdts pdf]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[data types converg]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/crdts/#convergence +[crdts reading list]: http://christophermeiklejohn.com/crdt/2014/07/22/readings-in-crdts.html +[data types impl]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/crdts/#implementation +[concept causal context dvv]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#dotted-version-vectors +[concept causal context sib]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#siblings +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#vector-clocks +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/strong-consistency +[dev data types]: {{<baseurl>}}riak/kv/3.0.4/developing/data-types +[riak_dt]: https://github.com/basho/riak_dt +[dev data types context]: {{<baseurl>}}riak/kv/3.0.4/developing/data-types/#data-types-and-context +[glossary node]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#node +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution + +Riak Data Types are convergent replicated data types (CRDTs), inspired by the work of [Marc Shapiro, Nuno Preguiça, Carlos Baquero, and Marek Zawirski][crdts pdf]. Riak KV supports the following eventually-convergent data types, described in later sections: + +- Counters +- Flags +- HyperLogLogs +- Maps +- Registers +- Sets + +The difference between Riak Data Types and typical key/value data stored in Riak KV is that Riak Data Types are operations-based from the standpoint of Riak KV clients. + +Instead of the usual create, read, update, and delete (CRUD) operations +performed on key/value pairs, data types enable you to perform +operations such as removing a register from a map, telling a counter to +increment itself by 5, or enabling a flag that was previously disabled. + +It's important to note that Riak Data Types are operations-based from the standpoint of connecting clients. Like CRDTs, the [convergence logic][data types converg] is state-based behind the scenes. + +Riak Data Types enable applications to use CRDTs through a simple interface, without being exposed to the complex state-based logic underneath. More on Data Types and state can be found in the section on [implementation][data types impl] below. + +For more articles on CRDTs, check out this [reading list][crdts reading list]. + + +## Counters + +Counters are a bucket-level Riak data type that can be used by themselves, associated with a bucket/key pair, or used within a map. A counter’s value can only be a positive integer, negative integer, or zero. + +Counters are useful when a count is needed, for example: + +- Counting the number of people following someone on Twitter +- Counting the amount of likes on a Facebook post +- Counting the points scored by a player in a game + +If you require unique, ordered IDs counters should not be used because uniqueness cannot be guaranteed. + +### Operations + +Counters are subject to two operations: increment and decrement. + + +## Flags + +Flags are similar to Boolean values, but instead of `true` or +`false` flags are the value `enable` or `disable`. Flags can only be stored within maps; they cannot be stored in a bucket/key on their own. + +Some examples of using flags: + +- Showing if a tweet has been retweeted +- Showing if a user has signed up for a specific pricing plan + +### Operations + +Flags support only two operations: `enable` and `disable`. Flags can be +added to or removed from a map, but those operations are performed on +the map and not on the flag directly. + + +## HyperLogLogs + +HyperLogLogs (HLLs) are a data type used to count unique elements within a data set or stream. + +For example, hyperloglogs can be used for: + +- Counting the number of unique visitors to your website +- Counting the number of unique searches users performed + +### Operations + +HyperLogLogs support two operations: adding elements and retrieving the count. + + +## Maps + +Maps are the most versatile of the Riak data types because all other data types can be embedded within them, _including maps themselves_. This enables the creation of complex, custom data types from a few basic building blocks. + +Maps are best suited for complex, multi-faceted data. The following +JSON-inspired pseudocode shows how a tweet might be structured as a map: + +``` +Map tweet { + Counter: numberOfRetweets, + Register: username, + Register: tweetContent, + Flag: favorited?, + Map: userInfo +} +``` + +### Operations + +You can perform two types of operations on maps: + +1. Operations performed directly on the map itself, which includes + adding fields to and removing fields from the map (e.g. adding a flag + or removing a counter). +2. Operations performed on the Data Types nested in the map, e.g. + incrementing a counter in the map or setting a flag to `enable`. + Those operations behave just like the operations specific to that + Data Type. + + +## Registers + +Registers are essentially named binaries (like strings). Any binary +value can act as the value of a register. Like flags, registers cannot +be used on their own and must be embedded in maps. + +Some examples of using registers: + +- Storing the name `Cassius` in the register `first_name` in a map called `user14325_info` +- Storing the title of a blog post in a map called `2010-03-01_blog_post` + +### Operations + +Registers can only have the binaries stored within them changed. They can be added to and removed from maps, but those operations take place on the map in which the register is nested, and not on the register itself. + + +## Sets + +Sets are collections of unique binary values, such as strings. All of +the values in a set are unique. For example, if you attempt to add the +element `shovel` to a set that already contains `shovel`, the operation +will be ignored by Riak KV. Sets can be used either on their own or +embedded in a map. + +Some examples of using sets: + +- Storing the UUIDs of a user's friends in a social network application +- Storing items in an e-commerce shopping cart + +### Operations + +Sets are subject to four basic operations: add an element, remove an +element, add multiple elements, or remove multiple elements. + + +## Advantages and Disadvantages of Data Types + +[Conflict resolution][usage conflict resolution] in Riak KV can be difficult because it involves reasoning about concurrency, [eventual consistency][concept eventual consistency], [siblings][concept causal context sib], and other issues that many other databases don't require you to consider. + +One of the core purposes behind data types is to relieve developers +using Riak KV of the burden of producing data convergence at the +application level by absorbing a great deal of that complexity into Riak KV +itself. Riak KV manages this complexity by building eventual consistency +into the data types themselves instead of requiring clients to do so. + +You can still build applications with Riak KV that treat it as a highly +available key/value store, and you will always have this choice. What +Riak Data Types provide is additional flexibility and a broader choice +palette. + +The trade-off that data types necessarily present is that they don't +allow you to produce your own convergence logic. If your use case +demands that you be able to create your own deterministic merge +functions, then Riak Data Types might not be a good fit. + + +## Implementation + +Conflicts between replicas are inevitable in a distributed system like +Riak KV. + +For example, if a map is stored in the key `my_map`, it is always +possible that the value of `my_map` will be different in nodes A and B. + +Without using data types, that conflict must be resolved using +timestamps, [vector clocks][concept causal context vc], [dotted version vectors][concept causal context dvv], or some other means. With data types, conflicts are resolved by Riak KV itself, using a subsystem called [`riak_dt`][riak_dt]. + + +## Convergence + +The benefit of data types is that Riak KV knows how to resolve value +conflicts by applying data type-specific rules. + +Riak KV does this by remembering the history of a value and broadcasting that +history along with the current value in the form of a [context object][dev data types context] that is similar to a [vector clock][concept causal context vc] or [dotted version vectors][concept causal context dvv]. Riak KV uses the history of each data type to make deterministic judgments about which value should be deemed correct. + +### Example + +Imagine a set stored in the key `fruits`. On one [node][glossary node] the set `fruits` has two elements, `apple` and `orange`. While on another node the set has only one element, `apple`. + +What happens when the two nodes communicate and note the divergence? + +In this case Riak KV would declare the set with two elements the winner. +At that point, the node with the incorrect set would be told: "The set +`fruits` should have elements `apple` and `orange`." + +In general, convergence involves the following stages: + +1. Check for divergence. If the data types have the same value, Riak KV + does nothing. But if divergence is noted... +2. Riak KV applies data type-specific merge rules, like in the `fruits` + set example above, which will result in a "correct" value. +3. After the merge logic is applied and the correct value is determined, + the relevant [vnodes][glossary vnode] are notified and act to + correct the divergence. + +## Convergence Rules + +Convergence means that data type conflicts are weighted in a certain direction. Riak's Data Types have their own internal weights that dictate what happens in case of conflict: + +Data Type | Convergence rule +:--------|:------------ +Flags | `enable` wins over `disable` +Registers | The most chronologically recent value wins, based on timestamps +Counters | Implemented as a PN-Counter ([paper][crdts pdf]), so all increments and decrements by all actors are eventually applied. Every actor wins. +Sets | If an element is concurrently added and removed, the add will win +Maps | If a field is concurrently added or updated and removed, the add/update will win + +In a production Riak KV cluster being hit by lots and lots of concurrent +writes, value conflicts are inevitable. Riak Data Types are not perfect, particularly because they do not guarantee [strong consistency][concept strong consistency] and you cannot specify the rules yourself. But the +rules that dictate the convergence logic behind the Riak Data Types +were carefully chosen to minimize the potential downsides associated +with value conflicts. + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/eventual-consistency.md b/content/riak/kv/3.0.4/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..f2bbdf79d1 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/eventual-consistency.md @@ -0,0 +1,202 @@ +--- +title: "Eventual Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Eventual Consistency" + identifier: "learn_concepts_eventual_cons" + weight: 105 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/Eventual-Consistency + - /riak/kv/3.0.4/theory/concepts/Eventual-Consistency + - /riak/3.0.4/theory/concepts/eventual-consistency + - /riak/kv/3.0.4/theory/concepts/eventual-consistency +--- + + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[concept replication]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication +[glossary node]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#node +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#read-repair +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution + + +In a distributed and fault-tolerant system like Riak, server and network +failures are expected. Riak is designed to respond to requests even when +[nodes][glossary node] are offline or the cluster is experiencing +a network partition. + +Riak handles this problem by enabling conflicting copies of data stored +in the same location, as specified by [bucket type][concept buckets], bucket, and key, to exist at the same time in the cluster. This +gives rise to the problem of **data inconsistency**. + +## Data Inconsistency + +Conflicts between replicas of an object are inevitable in +highly-available, [clustered][concept clusters] systems like Riak because there +is nothing in those systems to guarantee so-called [ACID +transactions](http://en.wikipedia.org/wiki/ACID). Because of this, these +systems need to rely on some form of conflict-resolution mechanism. + +One of the things that makes Riak's eventual consistency model powerful +is that Riak does not dictate how data resolution takes place. While +Riak does ship with a set of defaults regarding how data is +[replicated](#replication-properties-and-request-tuning) and how +[conflicts are resolved][usage conflict resolution], you can override these +defaults if you want to employ a different strategy. + +Among those strategies, you can enable Riak to resolve object conflicts +automatically, whether via internal [vector clocks][concept causal context vc], timestamps, or +special eventually consistent [Data Types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types/), or you can resolve those +conflicts on the application side by employing a use case-specific logic +of your choosing. More information on this can be found in our guide to +[conflict resolution][usage conflict resolution]. + +This variety of options enables you to manage Riak's eventually +consistent behavior in accordance with your application's [data model +or models]({{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/). + +## Replication Properties and Request Tuning + +In addition to providing you different means of resolving conflicts, +Riak also enables you to fine-tune **replication properties**, which +determine things like the number of nodes on which data should be stored +and the number of nodes that are required to respond to read, write, and +other requests. + +An in-depth discussion of these behaviors and how they can be +implemented on the application side can be found in our guides to +[replication properties][concept replication] and [conflict resolution][usage conflict resolution]. + +In addition to our official documentation, we also recommend checking +out the [Understanding Riak's Configurable +Behaviors](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +series from [the Basho blog](https://riak.com/blog/). + +## A Simple Example of Eventual Consistency + +Let's assume for the moment that a sports news application is storing +all of its data in Riak. One thing that the application always needs to +be able to report to users is the identity of the current manager of +Manchester United, which is stored in the key `manchester-manager` in +the bucket `premier-league-managers`. This bucket has `allow_mult` set +to `false`, which means that Riak will resolve all conflicts by itself. + +Now let's say that a node in this cluster has recently recovered from +failure and has an old copy of the key `manchester-manager` stored in +it, with the value `Alex Ferguson`. The problem is that Sir Ferguson +stepped down in 2013 and is no longer the manager. Fortunately, the +other nodes in the cluster hold the value `David Moyes`, which is +correct. + +Shortly after the recovered node comes back online, other cluster +members recognize that it is available. Then, a read request for +`manchester-manager` arrives from the application. Regardless of which +order the responses arrive to the node that is coordinating this +request, `David Moyes` will be returned as the value to the client, +because `Alex Ferguson` is recognized as an older value. + +Why is this? How does Riak make this decision? Behind the scenes, after +`David Moyes` is sent to the client, a [read repair][glossary read rep] mechanism will occur on the cluster to fix the +older value on the node that just came back online. Because Riak tags +all objects with versioning information, it can make these kinds of +decisions on its own, if you wish. + +### R=1 + +Let's say that you keep the above scenario the same, except you tweak +the request and set R to 1, perhaps because you want faster responses to +the client. In this case, it _is_ possible that the client will receive +the outdated value `Alex Ferguson` because it is only waiting for a +response from one node. + +However, the read repair mechanism will kick in and fix the value, so +the next time someone asks for the value of `manchester-manager`, `David +Moyes` will indeed be the answer. + +### R=1, sloppy quorum + +Let's take the scenario back in time to the point at which our unlucky +node originally failed. At that point, all 3 nodes had `Alex Ferguson` +as the value for `manchester-manager`. + +When a node fails, Riak's *sloppy quorum* feature kicks in and another +node takes responsibility for serving its requests. + +The first time we issue a read request after the failure, if `R` is set +to 1, we run a significant risk of receiving a `not found` response from +Riak. The node that has assumed responsibility for that data won't have +a copy of `manchester-manager` yet, and it's much faster to verify a +missing key than to pull a copy of the value from disk, so that node +will likely respond fastest. + +If `R` is left to its default value of 2, there wouldn't be a problem +because 1 of the nodes that still had a copy of `Alex Ferguson` would +also respond before the client got its result. In either case, read +repair will step in after the request has been completed and make +certain that the value is propagated to all the nodes that need it. + +### PR, PW, sloppy quorum + +Thus far, we've discussed settings that permit sloppy quorums in the +interest of allowing Riak to maintain as high a level of availability as +possible in the presence of node or network failure. + +It is possible to configure requests to ignore sloppy quorums in order +to limit the possibility of older data being returned to a client. The +tradeoff, of course, is that there is an increased risk of request +failures if failover nodes are not permitted to serve requests. + +In the scenario we've been discussing, for example, the possibility of a +node for the `manchester-manager` key having failed, but to be more +precise, we've been talking about a *primary* node, one that when the +cluster is perfectly healthy would bear responsibility for that key. + +When that node failed, using `R=2` as we've discussed or even `R=3` for +a read request would still work properly: a failover node (sloppy quorum +again) would be tasked to take responsibility for that key, and when it +receives a request for it, it would reply that it doesn't have any such +key, but the two surviving primary nodes still know who the +`manchester-manager` is. + +However, if the PR (primary read) value is specified, only the two +surviving primary nodes are considered valid sources for that data. + +So, setting PR to 2 works fine, because there are still 2 such nodes, +but a read request with PR=3 would fail because the 3rd primary node is +offline, and no failover node can take its place *as a primary*. + +The same is true of writes: W=2 or W=3 will work fine with the primary +node offline, as will PW=2 (primary write), but PW=3 will result in an +error. + +>**Note: Errors and Failures** +> +>It is important to understand the difference between an error and a +failure. +> +>The `PW=3` request in this scenario will result in an error, +but the value will still be written to the two surviving primary +nodes. +> +>By specifying `PW=3` the client indicated that 3 primary +nodes must respond for the operation to be considered successful, which +it wasn't, but there's no way to tell without performing another read +whether the operation truly failed. + + +## Further Reading + +* [Understanding Riak's Configurable Behaviors blog series](http://basho.com/understanding-riaks-configurable-behaviors-part-1/) +* Werner Vogels, et. al.: [Eventually Consistent - Revisited](http://www.allthingsdistributed.com/2008/12/eventually_consistent.html) + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/keys-and-objects.md b/content/riak/kv/3.0.4/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..c88d4a3789 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/keys-and-objects.md @@ -0,0 +1,53 @@ +--- +title: "Keys and Objects" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Keys and Objects" + identifier: "learn_concepts_keys_objects" + weight: 106 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/keys-and-values + - /riak/kv/3.0.4/theory/concepts/keys-and-values +--- + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#vector-clocks + +In an RDBMS, data is organized by tables that are individually +identifiable entities. Within those tables exist rows of a data +organized into columns. It is possible to retrieve or update entire +tables, individual rows, or a group of columns within a set of +rows. In contrast, Riak has a simpler data model in which the Object +(explained below) is both the largest and smallest data element. When +performing any fetch or update operation in Riak, the entire Riak +Object must be retrieved or modified; there are no partial fetches or +updates. + +## Keys + +Keys in Riak are simply binary values (or strings) used to identify +Objects. From the perspective of a client interacting with Riak, +each bucket appears to represent a separate keyspace. It is important +to understand that Riak treats the bucket-key pair as a single entity +when performing fetch and store operations (see: [Buckets][concept buckets]). + +## Objects + +Objects are the only unit of data storage in Riak. Riak Objects are +essentially structs identified by bucket and key and composed of the +following parts: a bucket, key, vector clock, and a list of +metadata-value pairs. Normally, objects have only one metadata-value +pair, but when there are more than one, the object is said to have +"siblings". These siblings may occur both within a single node and +across multiple nodes, and do occur when either more than one actor +updates an object, a network partition occurs, or a stale vector clock +is submitted when updating an object (see: [Vector Clocks][concept causal context vc]). + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/replication.md b/content/riak/kv/3.0.4/learn/concepts/replication.md new file mode 100644 index 0000000000..77f5dcf2a5 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/replication.md @@ -0,0 +1,323 @@ +--- +title: "Replication" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Replication" + identifier: "learn_concepts_replication" + weight: 108 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/Replication + - /riak/kv/3.0.4/theory/concepts/Replication + - /riak/3.0.4/theory/concepts/replication + - /riak/kv/3.0.4/theory/concepts/replication +--- + + +[cluster ops v3 mdc]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter +[concept aae]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[concept vnodes]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/vnodes +[glossary node]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#ring +[usage replication]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/replication + + +Data replication is a core feature of Riak's basic architecture. Riak +was designed to operate as a [clustered][concept clusters] system containing +multiple Riak [nodes][glossary node], which allows data to live +on multiple machines at once in case a node in the cluster goes down. + +Replication is fundamental and automatic in Riak, providing security +that your data will still be there if a node in your Riak cluster goes +down. All data stored in Riak will be replicated to a number of nodes in +the cluster according to the N value (`n_val`) property set in a +bucket's [bucket type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types). + +>**Note: Replication across clusters** +> +>If you're interested in replication not just within a cluster but across +multiple clusters, we recommend checking out our documentation on Riak's +[Multi-Datacenter Replications]({{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/multi) capabilities. + +## Selecting an N value (`n_val`) + +By default, Riak chooses an `n_val` of 3 default. This means that data +stored in any bucket will be replicated to 3 different nodes. For this +to be effective, you need at least 3 nodes in your cluster. + +The ideal value for N depends largely on your application and the shape +of your data. If your data is highly transient and can be reconstructed +easily by the application, choosing a lower N value will provide greater +performance. However, if you need high assurance that data is available +even after node failure, increasing the N value will help protect +against loss. How many nodes do you expect will fail at any one time? +Choose an N value larger than that and your data will still be +accessible when they go down. + +The N value also affects the behavior of read (GET) and write (PUT) +requests. The tunable parameters you can submit with requests are bound +by the N value. For example, if N=3, the maximum read quorum (known as +"R") you can request is also 3. If some nodes containing the data you +are requesting are down, an R value larger than the number of available +nodes with the data will cause the read to fail. + +## Setting the N value (`n_val`) + +To change the N value for a bucket, you need to create a [bucket +type]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) with `n_val` set to your desired value and +then make sure that the bucket bears that type. + +In this example, we'll set N to 2. First, we'll create the bucket type +and call it `n_val_of_2` and then activate that type: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +riak-admin bucket-type activate n_val_of_2 +``` + +Now, any bucket that bears the type `n_val_of_2` will propagate objects +to 2 nodes. + +>**Note on changing the value of N** +> +>Changing the N value after a bucket has data in it is *not +recommended*. If you do change the value, especially if you +increase it, you might need to force read repair (more on that below). +Overwritten objects and newly stored objects will automatically be +replicated to the correct number of nodes. + +## Changing the N value (`n_val`) + +While raising the value of N for a bucket or object shouldn't cause +problems, it's important that you never lower N. If you do so, you can +wind up with dead, i.e. unreachable data. This can happen because +objects' preflists, i.e. lists of [vnodes][concept vnodes] responsible for the object, +can end up + +Unreachable data is a problem because it can negatively impact coverage +queries, e.g. [secondary index]({{<baseurl>}}riak/kv/3.0.4/developing/usage/secondary-indexes/) and +[MapReduce]({{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce/) queries. Lowering an object or bucket's +`n_val` will likely mean that objects that you would expect to +be returned from those queries will no longer be returned. + +## Active Anti-Entropy + +Riak's active anti-entropy (AAE) subsystem is a continuous background +process that compares and repairs any divergent or missing object +replicas. For more information on AAE, see the following documents: + +* [Active Anti-Entropy][concept aae] +* [Managing Active Anti-Entropy][cluster ops v3 mdc] + + +## Read Repair + +Read repair occurs when a successful read occurs---i.e. when the target +number of nodes have responded, as determined by R---but not all +replicas of the object agree on the value. There are two possibilities +here for the errant nodes: + +1. The node responded with a `not found` for the object, meaning that + it doesn't have a copy. +2. The node responded with a [vector clock][concept causal context vc] that is an + ancestor of the vector clock of the successful read. + +When this situation occurs, Riak will force the errant nodes to update +the object's value based on the value of the successful read. + +### Forcing Read Repair + +When you increase the `n_val` of a bucket, you may start to see failed +read operations, especially if the R value you use is larger than the +number of replicas that originally stored the object. Forcing read +repair will solve this issue. Or if you have [active +anti-entropy][usage replication] enabled, your values will +eventually replicate as a background task. + +For each object that fails read (or the whole bucket, if you like), read +the object using an R value less than or equal to the original number of +replicas. For example, if your original `n_val` was 3 and you increased +it to 5, perform your read operations with R=3 or less. This will cause +the nodes that do not have the object(s) yet to respond with `not +found`, invoking read repair. + +## So what does N=3 really mean? + +N=3 simply means that three copies of each piece of data will be stored +in the cluster. That is, three different partitions/vnodes will receive +copies of the data. **There are no guarantees that the three replicas +will go to three separate physical nodes**; however, the built-in +functions for determining where replicas go attempts to distribute the +data evenly. + +As nodes are added and removed from the cluster, the ownership of +partitions changes and may result in an uneven distribution of the data. +On some rare occasions, Riak will also aggressively reshuffle ownership +of the partitions to achieve a more even balance. + +For cases where the number of nodes is less than the N value, data will +likely be duplicated on some nodes. For example, with N=3 and 2 nodes in +the cluster, one node will likely have one replica, and the other node +will have two replicas. + +## Understanding replication by example + +To better understand how data is replicated in Riak let's take a look at +a put request for the bucket/key pair `my_bucket`/`my_key`. Specifically +we'll focus on two parts of the request: routing an object to a set of +partitions and storing an object on a partition. + +### Routing an object to a set of partitions + + * Assume we have 3 nodes + * Assume we store 3 replicas per object (N=3) + * Assume we have 8 partitions in our [ring][glossary ring] \(ring_creation_size=8) + +**Note**: It is not recommended that you use such a small ring size. +This is for demonstration purposes only. + +With only 8 partitions our ring will look approximately as follows +(response from `riak_core_ring_manager:get_my_ring/0` truncated for +clarity): + +```erlang +(dev1@127.0.0.1)3> {ok,Ring} = riak_core_ring_manager:get_my_ring(). +[{0,'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}, +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}] +``` + +The node handling this request hashes the bucket/key combination: + +```erlang +(dev1@127.0.0.1)4> DocIdx = riak_core_util:chash_key({<<"my_bucket">>, <<"my_key">>}). +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +``` + +The DocIdx hash is a 160-bit integer: + +```erlang +(dev1@127.0.0.1)5> <<I:160/integer>> = DocIdx. +<<183,28,67,173,80,128,26,94,190,198,65,15,27,243,135,127,121,101,255,96>> +(dev1@127.0.0.1)6> I. +1045375627425331784151332358177649483819648417632 +``` + +The node looks up the hashed key in the ring, which returns a list of +_preferred_ partitions for the given key. + +```erlang +(node1@127.0.0.1)> Preflist = riak_core_ring:preflist(DocIdx, Ring). +[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0, 'dev1@127.0.0.1'}, +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}] +``` + +The node chooses the first N partitions from the list. The remaining +partitions of the "preferred" list are retained as fallbacks to use if +any of the target partitions are unavailable. + +```erlang +(dev1@127.0.0.1)9> {Targets, Fallbacks} = lists:split(N, Preflist). +{[{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'}, +{1278813932664540053428224228626747642198940975104, 'dev2@127.0.0.1'}, +{0,'dev1@127.0.0.1'}], +[{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'}, +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'}, +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'}, +{730750818665451459101842416358141509827966271488, 'dev2@127.0.0.1'}, +{913438523331814323877303020447676887284957839360, 'dev3@127.0.0.1'}]} +``` + +The partition information returned from the ring contains a partition +identifier and the parent node of that partition: + +```erlang +{1096126227998177188652763624537212264741949407232, 'dev1@127.0.0.1'} +``` + +The requesting node sends a message to each parent node with the object +and partition identifier (pseudocode for clarity): + +```erlang +'dev1@127.0.0.1' ! {put, Object, 1096126227998177188652763624537212264741949407232} +'dev2@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +'dev1@127.0.0.1' ! {put, Object, 0} +``` + +If any of the target partitions fail, the node sends the object to one +of the fallbacks. When the message is sent to the fallback node, the +message references the object and original partition identifier. For +example, if `dev2@127.0.0.1` were unavailable, the requesting node would +then try each of the fallbacks. The fallbacks in this example are: + +```erlang +{182687704666362864775460604089535377456991567872, 'dev2@127.0.0.1'} +{365375409332725729550921208179070754913983135744, 'dev3@127.0.0.1'} +{548063113999088594326381812268606132370974703616, 'dev1@127.0.0.1'} +``` + +The next available fallback node would be `dev3@127.0.0.1`. The +requesting node would send a message to the fallback node with the +object and original partition identifier: + +```erlang +'dev3@127.0.0.1' ! {put, Object, 1278813932664540053428224228626747642198940975104} +``` + +Note that the partition identifier in the message is the same that was +originally sent to `dev2@127.0.0.1` only this time it is being sent to +`dev3@127.0.0.1`. Even though `dev3@127.0.0.1` is not the parent node of +that partition, it is smart enough to hold on to the object until +`dev2@127.0.0.1` returns to the cluster. + +## Processing partition requests + +Processing requests per partition is fairly simple. Each node runs a +single process (`riak_kv_vnode_master`) that distributes requests to +individual partition processes (`riak_kv_vnode`). The +`riak_kv_vnode_master` process maintains a list of partition identifiers +and corresponding partition processes. If a process does not exist for a +given partition identifier a new process is spawned to manage that +partition. + +The `riak_kv_vnode_master` process treats all requests the same and +spawns partition processes as needed even when nodes receive requests +for partitions they do not own. When a partition's parent node is +unavailable, requests are sent to fallback nodes (handoff). The +`riak_kv_vnode_master` process on the fallback node spawns a process to +manage the partition even though the partition does not belong to the +fallback node. + +The individual partition processes perform hometests throughout the life +of the process. The hometest checks if the current node (`node/0`) +matches the parent node of the partition as defined in the ring. If the +process determines that the partition it is managing belongs on another +node (the parent node), it will attempt to contact that node. If that +parent node responds, the process will hand off any objects it has +processed for that partition and shut down. If that parent node does not +respond, the process will continue to manage that partition and check +the parent node again after a delay. The hometest is also run by +partition processes to account for changes in the ring, such as the +addition or removal of nodes to the cluster. + + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/strong-consistency.md b/content/riak/kv/3.0.4/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..c1d7f0d457 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/strong-consistency.md @@ -0,0 +1,105 @@ +--- +title: "Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Strong Consistency" + identifier: "learn_concepts_strong_consistency" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/strong-consistency + - /riak/kv/3.0.4/theory/concepts/strong-consistency +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/3.0.4/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + + + + diff --git a/content/riak/kv/3.0.4/learn/concepts/vnodes.md b/content/riak/kv/3.0.4/learn/concepts/vnodes.md new file mode 100644 index 0000000000..cc7ef9e66f --- /dev/null +++ b/content/riak/kv/3.0.4/learn/concepts/vnodes.md @@ -0,0 +1,160 @@ +--- +title: "Vnodes" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Vnodes" + identifier: "learn_concepts_vnodes" + weight: 109 + parent: "learn_concepts" +toc: true +aliases: + - /riak/3.0.4/theory/concepts/vnodes + - /riak/kv/3.0.4/theory/concepts/vnodes +--- + + +[concept causal context]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context +[concept clusters ring]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters/#the-ring +[concept replication]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/strong-consistency +[glossary node]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#node +[glossary ring]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#ring +[plan backend]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/cluster-capacity +[use admin riak cli]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-cli + + +Virtual nodes, more commonly referred to as **vnodes**, are processes +that manage partitions in the Riak [ring][glossary ring]. Each data +partition in a Riak cluster has a vnode that **claims** that partition. +Vnodes perform a wide variety of operations, from K/V storage operations +to guaranteeing [strong consistency][concept strong consistency] if you choose to use that +feature. + +## The Number of Vnodes in a Cluster + +The term [node][glossary node] refers to a full instance of Riak, +be it on its own physical machine or alongside others on a single +machine, as in a development cluster on your laptop. Each Riak node +contains multiple vnodes. The number per node is the [ring +size][concept clusters ring] divided by the number of nodes in the cluster. + +This means that in some clusters different nodes will have different +numbers of data partitions (and hence a different number of vnodes), +because (ring size / number of nodes) will not produce an even integer. +If the ring size of your cluster is 64 and you are running three nodes, +two of your nodes will have 21 vnodes, while the third node holds 22 +vnodes. + +The output of the [`riak-admin member-status`][use admin riak cli] +command shows this: + +``` +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 34.4% -- 'dev1@127.0.0.1' +valid 32.8% -- 'dev2@127.0.0.1' +valid 32.8% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid: 3 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 +``` + +In this cluster, one node accounts for 34.4% of the ring, i.e. 22 out of +64 partitions, while the other two nodes account for 32.8%, i.e. 21 out +of 64 partitions. This is normal and expected behavior in Riak. + +We strongly recommend setting the appropriate ring size, and by +extension the number of vnodes, prior to building a cluster. A full +guide can be found in our [cluster planning][plan cluster capacity] documentation. + +## The Role of Vnodes + +Vnodes essentially watch over a designated subset of a cluster's key +space. Riak computes a 160-bit binary hash of each bucket/key pair and +maps this value to a position on an ordered [ring][concept clusters ring] +of all such values. The illustration below provides a visual +representation of the Riak ring: + +![The Riak +Ring]({{<baseurl>}}images/shared/riak-ring.png) + +You can think of vnodes as managers, responsible for handling incoming +requests from other nodes/vnodes, storing objects in the appropriate +storage backend, fetching objects from backends, interpreting [causal +context][concept causal context] metadata for objects, acting as [strong consistency +ensembles][concept strong consistency] and much +more. At the system level, vnodes are Erlang processes build on top of +the [`gen_fsm`](http://www.erlang.org/doc/design_principles/fsm.html) +abstraction in Erlang, i.e. you can think of vnodes as **finite state +machines** that are constantly at work ensuring that Riak's key +goals---high availability, fault tolerance, etc.---are guaranteed for +their allotted portion of the cluster's key space. Whereas nodes are +essentially a passive container for a wide variety of Riak processes, +vnodes are the true workhorses of Riak. + +While each vnode has a main Erlang process undergirding it, vnodes may +also spawn new worker processes (i.e. new Erlang actors) to perform +asynchronous tasks on behalf of the vnode. + +If you're navigating through the file system of a Riak node, you'll +notice that each node's `/data` directory holds a variety of +subdirectories. If you're using, say, [Bitcask]({{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask) as a backend, navigate +into the `/bitcask` directory (you'll also see a `/ring` directory and +several others). If you open up the `/bitcask` directory, you'll see a +wide assortment of directories with numbers as names, e.g. `0` or +`1004782375664995756265033323.0.444576013453623296`. These directories +each house the data from a particular partition. + +## Vnodes and Replication Properties + +In our documentation on [replication properties][concept replication], we make frequent +mention of users' ability to choose how many nodes store copies of +data, how many nodes must respond for a read request to succeed, and so +on. This is slightly misleading, as the fundamental units of replication +are not nodes but rather vnodes. + +This can be illustrated by way of a potential user error. If you store +an object and set N=5, this means that you want the object to be stored +on 5 different nodes. But imagine that your cluster only has 3 nodes. +Setting N=5 on a 3-node cluster is actually just fine. The data will be +managed by 5 vnodes, but some of that data may end up being stored more +than once on different nodes. A likely scenario is that two nodes will +store two copies of the data a piece, while the third node will store +only one. Absent such an error, however, nodes will not contain multiple +vnodes responsible for the same partition. + +## Vnode Status + +You can check the current status of all vnodes in your cluster using the +[`riak-admin vnode-status`][use admin riak cli] +command. When you run that command, you will see a series of reports on +each of the vnodes active on the local node. The output of this command +consists of a series of reports on each active vnode. The report for a +specific vnode should look something like this: + +``` +VNode: 1278813932664540053428224228626747642198940975104 +Backend: riak_kv_bitcask_backend +Status: +[{key_count, 275}, + {status,[{"./data/bitcask/1278813932664540053428224228626747642198940975104/2.bitcask.data", + 0,0,335}]}] +Status: +{vnodeid,<<"ÅR±\vi80\f">>} +``` + +The meaning of each field is given in the table below. + +Field | Description +:-----|:----------- +`VNode` | The ID of the vnode in question +`Backend` | The storage [backend][plan backend] utilized by the vnode +`Status` | The number of keys managed by the vnode and the file where the vnode stores its data. The other information can be ignored. + + + + diff --git a/content/riak/kv/3.0.4/learn/dynamo.md b/content/riak/kv/3.0.4/learn/dynamo.md new file mode 100644 index 0000000000..2221c06817 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/dynamo.md @@ -0,0 +1,1928 @@ +--- +title: "Dynamo: Amazon’s Highly Available Key-value Store" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Dynamo" + identifier: "learn_dynamo" + weight: 110 + parent: "learn" +toc: false +aliases: + - /riak/3.0.4/theory/dynamo + - /riak/kv/3.0.4/theory/dynamo +--- + +<div style="text-align:center;font-style:italic"> + Giuseppe DeCandia, Deniz Hastorun, Madan Jampani, Gunavardhan Kakulapati, + Avinash Lakshman, Alex Pilchin, Swaminathan Sivasubramanian, Peter Vosshall + and Werner Vogels + <br> + Amazon.com +</div> + +<br> + +> *Dynamo: Amazon's Highly Available Key-value Store* is reprinted here in its +> entirety, images and all. +> +> Throughout the paper you will find notes containing Riak KV-specifics that +> relate to a given section of the paper; anything from links to the docs, to +> code references, to explanations of why and how we did what we did. + +<!-- Random comment to add some padding between blockquotes --> + +> This paper was first released in 2007 and was popularized on the blog of +> Werner Vogels. Since then, several Dynamo-inspired databases have appeared +> (either entirely or partially) by this paper. In addition to Riak KV, +> Cassandra and Voldemort come to mind. You may also remember Dynomite (which +> predates all of these). There are probably more. +> +> Also note that this paper has little to do with Amazon's DynamoDB service. +> They have not published the inner workings of that implementation. + + +## Abstract + +Reliability at massive scale is one of the biggest challenges we face at +Amazon.com, one of the largest e-commerce operations in the world; even the +slightest outage has significant financial consequences and impacts customer +trust. The Amazon.com platform, which provides services for many web sites +worldwide, is implemented on top of an infrastructure of tens of thousands of +servers and network components located in many datacenters around the world. At +this scale, small and large components fail continuously and the way persistent +state is managed in the face of these failures drives the reliability and +scalability of the software systems. + +This paper presents the design and implementation of Dynamo, a highly available +key-value storage system that some of Amazon’s core services use to provide an +“always-on” experience. To achieve this level of availability, Dynamo sacrifices +consistency under certain failure scenarios. It makes extensive use of object +versioning and application-assisted conflict resolution in a manner that +provides a novel interface for developers to use. + +Categories and Subject Descriptors + +* D.4.2 [Operating Systems]: Storage Management; +* D.4.5 [Operating Systems]: Reliability; +* D.4.2 [Operating Systems]: Performance; + +General Terms + +Algorithms, Management, Measurement, Performance, Design, Reliability. + +## 1. Introduction + +Amazon runs a world-wide e-commerce platform that serves tens of millions +customers at peak times using tens of thousands of servers located in many data +centers around the world. There are strict operational requirements on Amazon’s +platform in terms of performance, reliability and efficiency, and to support +continuous growth the platform needs to be highly scalable. Reliability is one +of the most important requirements because even the slightest outage has +significant financial consequences and impacts customer trust. In addition, to +support continuous growth, the platform needs to be highly scalable. + +One of the lessons our organization has learned from operating Amazon’s platform +is that the reliability and scalability of a system is dependent on how its +application state is managed. Amazon uses a highly decentralized, loosely +coupled, service oriented architecture consisting of hundreds of services. In +this environment there is a particular need for storage technologies that are +always available. For example, customers should be able to view and add items to +their shopping cart even if disks are failing, network routes are flapping, or +data centers are being destroyed by tornados. Therefore, the service responsible +for managing shopping carts requires that it can always write to and read from +its data store, and that its data needs to be available across multiple data +centers. + +Dealing with failures in an infrastructure comprised of millions of components +is our standard mode of operation; there are always a small but significant +number of server and network components that are failing at any given time. As +such Amazon’s software systems need to be constructed in a manner that treats +failure handling as the normal case without impacting availability or +performance. + +To meet the reliability and scaling needs, Amazon has developed a number of +storage technologies, of which the Amazon Simple Storage Service (also available +outside of Amazon and known as Amazon S3), is probably the best known. This +paper presents the design and implementation of Dynamo, another highly available +and scalable distributed data store built for Amazon’s platform. Dynamo is used +to manage the state of services that have very high reliability requirements and +need tight control over the tradeoffs between availability, consistency, cost- +effectiveness and performance. Amazon’s platform has a very diverse set of +applications with different storage requirements. A select set of applications +requires a storage technology that is flexible enough to let application +designers configure their data store appropriately based on these tradeoffs to +achieve high availability and guaranteed performance in the most cost effective +manner. + +There are many services on Amazon’s platform that only need primary-key access +to a data store. For many services, such as those that provide best seller +lists, shopping carts, customer preferences, session management, sales rank, and +product catalog, the common pattern of using a relational database would lead to +inefficiencies and limit scale and availability. Dynamo provides a simple +primary-key only interface to meet the requirements of these applications. + +Dynamo uses a synthesis of well known techniques to achieve scalability and +availability: Data is partitioned and replicated using consistent hashing [10], +and consistency is facilitated by object versioning [12]. The consistency among +replicas during updates is maintained by a quorum-like technique and a +decentralized replica synchronization protocol. Dynamo employs a gossip based +distributed failure detection and membership protocol. Dynamo is a completely +decentralized system with minimal need for manual administration. Storage nodes +can be added and removed from Dynamo without requiring any manual partitioning +or redistribution. + +> Like Dynamo, Riak KV employs consistent hashing to partition and replicate +> data around the ring. For the consistent hashing that takes place in +> riak_core, Basho chose the SHA1 hash. See [Consistent Hashing] in our docs. +> +> Riak KV uses vector clocks for object versioning. Scroll down to section 4.4 +> to read up on this in depth. +> +> Riak KV makes use of gossiping in the same way that Dynamo does: to +> communicate ring state and node membership. See [Gossip Protocol] in our docs. +> +> And, nodes can be added and removed from your Riak cluster as needed. + +[Consistent Hashing]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#consistent-hashing +[Gossip Protocol]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#gossiping + +In the past year, Dynamo has been the underlying storage technology for a number +of the core services in Amazon’s e-commerce platform. It was able to scale to +extreme peak loads efficiently without any downtime during the busy holiday +shopping season. For example, the service that maintains shopping cart (Shopping +Cart Service) served tens of millions requests that resulted in well over 3 +million checkouts in a single day and the service that manages session state +handled hundreds of thousands of concurrently active sessions. + +The main contribution of this work for the research community is the evaluation +of how different techniques can be combined to provide a single highly-available +system. It demonstrates that an eventually-consistent storage system can be used +in production with demanding applications. It also provides insight into the +tuning of these techniques to meet the requirements of production systems with +very strict performance demands. + +The paper is structured as follows. Section 2 presents the background and +Section 3 presents the related work. Section 4 presents the system design and +Section 5 describes the implementation. Section 6 details the experiences and +insights gained by running Dynamo in production and Section 7 concludes the +paper. There are a number of places in this paper where additional information +may have been appropriate but where protecting Amazon’s business interests +require us to reduce some level of detail. For this reason, the intra- and +inter-datacenter latencies in section 6, the absolute request rates in section +6.2 and outage lengths and workloads in section 6.3 are provided through +aggregate measures instead of absolute details. + + +## 2. Background + +Amazon’s e-commerce platform is composed of hundreds of services that work in +concert to deliver functionality ranging from recommendations to order +fulfillment to fraud detection. Each service is exposed through a well defined +interface and is accessible over the network. These services are hosted in an +infrastructure that consists of tens of thousands of servers located across many +data centers world-wide. Some of these services are stateless (i.e., services +which aggregate responses from other services) and some are stateful (i.e., a +service that generates its response by executing business logic on its state +stored in persistent store). + +> **Brief Background on Riak KV** +> +> Basho Technologies started to develop Riak KV back in 2007 to solve an +> internal problem. We were, at the time, builing a web application that would +> require a database layer that afforded higher availability and scale out +> properties than any technology we knew of. So, we rolled our own. +> +> After using Riak KV in production for several successful applications that +> generated revenue, we decided to open source it and share our creation with +> the world. + +Traditionally production systems store their state in relational databases. For +many of the more common usage patterns of state persistence, however, a +relational database is a solution that is far from ideal. Most of these services +only store and retrieve data by primary key and do not require the complex +querying and management functionality offered by an RDBMS. This excess +functionality requires expensive hardware and highly skilled personnel for its +operation, making it a very inefficient solution. In addition, the available +replication technologies are limited and typically choose consistency over +availability. Although many advances have been made in the recent years, it is +still not easy to scale-out databases or use smart partitioning schemes for load +balancing. + +This paper describes Dynamo, a highly available data storage technology that +addresses the needs of these important classes of services. Dynamo has a simple +key/value interface, is highly available with a clearly defined consistency +window, is efficient in its resource usage, and has a simple scale out scheme to +address growth in data set size or request rates. Each service that uses Dynamo +runs its own Dynamo instances. + +> Riak KV is a highly available, scalable, open source key/value database. These +> notes describe where Riak KV's design decisions emulated and diverged from +> Dynamo's (as described in this paper). +> +> Riak KV offers several query methods in addition to the standard key/value +> interface, is made to be highly-available, is efficient in its resource uses, +> and has a simple scale out story to accompany data and traffic growth. + + +### 2.1 System Assumptions and Requirements + +The storage system for this class of services has the following requirements: + + +* Query Model: simple read and write operations to a data item that is uniquely +identified by a key. State is stored as binary objects (i.e., blobs) identified +by unique keys. No operations span multiple data items and there is no need for +relational schema. This requirement is based on the observation that a +significant portion of Amazon’s services can work with this simple query model +and do not need any relational schema. Dynamo targets applications that need to +store objects that are relatively small (usually less than 1 MB). + +> **Riak KV's Query Model** +> +> We've extended Dynamo's proposed query model in several ways. Currently Riak +> KV offers: +> +> 1. Standard key/value access (GET, PUT, DELETE) +> 2. MapReduce querying +> 3. Secondary Indexing +> 4. Full-text Search +> +> Riak KV's realistic object size limit is around 5MB. + +* ACID Properties: ACID (Atomicity, Consistency, Isolation, Durability) is a set +of properties that guarantee that database transactions are processed reliably. +In the context of databases, a single logical operation on the data is called a +transaction. Experience at Amazon has shown that data stores that provide ACID +guarantees tend to have poor availability. This has been widely acknowledged by +both the industry and academia [5]. Dynamo targets applications that operate +with weaker consistency (the “C” in ACID) if this results in high availability. +Dynamo does not provide any isolation guarantees and permits only single key +updates. + +> **ACID?** +> +> Riak KV offers no traditional "ACID" semantics around transactions. Instead, +> it's built to be "eventually consistent." We did this because we were of the +> opinion (and our users proved this out)that most applications don't require +> heavy transactions. (Even ATMs are eventually consistent.) + +* Efficiency: The system needs to function on a commodity hardware +infrastructure. In Amazon’s platform, services have stringent latency +requirements which are in general measured at the 99.9th percentile of the +distribution. Given that state access plays a crucial role in service operation +the storage system must be capable of meeting such stringent SLAs (see Section +2.2 below). Services must be able to configure Dynamo such that they +consistently achieve their latency and throughput requirements. The tradeoffs +are in performance, cost efficiency, availability, and durability guarantees. + +> **Efficiency** +> +> Agreed. Riak KV is made to (and will!) scale linearly on commodity hardware +> (often called "pizza boxes"). + +* Other Assumptions: Dynamo is used only by Amazon’s internal services. Its +operation environment is assumed to be non-hostile and there are no security +related requirements such as authentication and authorization. Moreover, since +each service uses its distinct instance of Dynamo, its initial design targets a +scale of up to hundreds of storage hosts. We will discuss the scalability +limitations of Dynamo and possible scalability related extensions in later +sections. + + +### 2.2 Service Level Agreements (SLA) + +To guarantee that the application can deliver its functionality in a bounded +time, each and every dependency in the platform needs to deliver its +functionality with even tighter bounds. Clients and services engage in a Service +Level Agreement (SLA), a formally negotiated contract where a client and a +service agree on several system-related characteristics, which most prominently +include the client’s expected request rate distribution for a particular API and +the expected service latency under those conditions. An example of a simple SLA +is a service guaranteeing that it will provide a response within 300ms for 99.9% +of its requests for a peak client load of 500 requests per second. + +In Amazon’s decentralized service oriented infrastructure, SLAs play an +important role. For example a page request to one of the e-commerce sites +typically requires the rendering engine to construct its response by sending +requests to over 150 services. These services often have multiple dependencies, +which frequently are other services, and as such it is not uncommon for the call +graph of an application to have more than one level. To ensure that the page +rendering engine can maintain a clear bound on page delivery each service within +the call chain must obey its performance contract. + +> **Riak KV Loves SLAs** +> +> Much like Amazon built Dynamo to guarantee their applications were always +> available to retail shoppers, the design decisions in Riak KV were taken to +> ensure that developers could sleep well knowing that their database would +> always be available to serve requests. +> +> Many of our clients and open source users have explicit uptime agreements +> related to their applications and services built on Riak KV. This was not an +> accident. + + +<a href="#figure-1">Figure 1</a> shows an abstract view of the architecture of +Amazon’s platform, where dynamic web content is generated by page rendering +components which in turn query many other services. A service can use different +data stores to manage its state and these data stores are only accessible within +its service boundaries. Some services act as aggregators by using several other +services to produce a composite response. Typically, the aggregator services are +stateless, although they use extensive caching. + +**<figure id="figure-1" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure1.png"> + <figcaption> + Figure 1: Service-oriented architecture of Amazon’s platform. + </figcaption> +</figure>** + +A common approach in the industry for forming a performance oriented SLA is to +describe it using average, median and expected variance. At Amazon we have found +that these metrics are not good enough if the goal is to build a system where +all customers have a good experience, rather than just the majority. For example +if extensive personalization techniques are used then customers with longer +histories require more processing which impacts performance at the high-end of +the distribution. An SLA stated in terms of mean or median response times will +not address the performance of this important customer segment. To address this +issue, at Amazon, SLAs are expressed and measured at the 99.9th percentile of +the distribution. The choice for 99.9% over an even higher percentile has been +made based on a cost-benefit analysis which demonstrated a significant increase +in cost to improve performance that much. Experiences with Amazon’s production +systems have shown that this approach provides a better overall experience +compared to those systems that meet SLAs defined based on the mean or median. + +In this paper there are many references to this 99.9th percentile of +distributions, which reflects Amazon engineers’ relentless focus on performance +from the perspective of the customers’ experience. Many papers report on +averages, so these are included where it makes sense for comparison purposes. +Nevertheless, Amazon’s engineering and optimization efforts are not focused on +averages. Several techniques, such as the load balanced selection of write +coordinators, are purely targeted at controlling performance at the 99.9th +percentile. + +Storage systems often play an important role in establishing a service’s SLA, +especially if the business logic is relatively lightweight, as is the case for +many Amazon services. State management then becomes the main component of a +service’s SLA. One of the main design considerations for Dynamo is to give +services control over their system properties, such as durability and +consistency, and to let services make their own tradeoffs between functionality, +performance and cost-effectiveness. + + +### 2.3 Design Considerations + +Data replication algorithms used in commercial systems traditionally perform +synchronous replica coordination in order to provide a strongly consistent data +access interface. To achieve this level of consistency, these algorithms are +forced to tradeoff the availability of the data under certain failure scenarios. +For instance, rather than dealing with the uncertainty of the correctness of an +answer, the data is made unavailable until it is absolutely certain that it is +correct. From the very early replicated database works, it is well known that +when dealing with the possibility of network failures, strong consistency and +high data availability cannot be achieved simultaneously [2, 11]. As such +systems and applications need to be aware which properties can be achieved under +which conditions. + +> **Riak KV's Design Considerations** +> +> Availability under any circumstances was something we stressed when designing +> Riak KV, too. Most databases didn't enable developers to do this in a simple +> way so we set out to change this. + +For systems prone to server and network failures, availability can be increased +by using optimistic replication techniques, where changes are allowed to +propagate to replicas in the background, and concurrent, disconnected work is +tolerated. The challenge with this approach is that it can lead to conflicting +changes which must be detected and resolved. This process of conflict resolution +introduces two problems: when to resolve them and who resolves them. Dynamo is +designed to be an eventually consistent data store; that is all updates reach +all replicas eventually. + +> Remember Eventual Consistency? We followed Dynamo's lead here and made sure +> that Riak KV could withstand network, server and other failures by sacrificing +> absolute consistency and building in mechanisms to rectify object conflicts. + +An important design consideration is to decide when to perform the process of +resolving update conflicts, i.e., whether conflicts should be resolved during +reads or writes. Many traditional data stores execute conflict resolution during +writes and keep the read complexity simple [7]. In such systems, writes may be +rejected if the data store cannot reach all (or a majority of) the replicas at a +given time. On the other hand, Dynamo targets the design space of an “always +writeable” data store (i.e., a data store that is highly available for writes). +For a number of Amazon services, rejecting customer updates could result in a +poor customer experience. For instance, the shopping cart service must allow +customers to add and remove items from their shopping cart even amidst network +and server failures. This requirement forces us to push the complexity of +conflict resolution to the reads in order to ensure that writes are never +rejected. + +> Ditto! + +The next design choice is who performs the process of conflict resolution. This +can be done by the data store or the application. If conflict resolution is done +by the data store, its choices are rather limited. In such cases, the data store +can only use simple policies, such as “last write wins” [22], to resolve +conflicting updates. On the other hand, since the application is aware of the +data schema it can decide on the conflict resolution method that is best suited +for its client’s experience. For instance, the application that maintains +customer shopping carts can choose to “merge” the conflicting versions and +return a single unified shopping cart. Despite this flexibility, some +application developers may not want to write their own conflict resolution +mechanisms and choose to push it down to the data store, which in turn chooses a +simple policy such as “last write wins”. + +> No conflict here (pun intended). Riak KV also follows this approach to +> conflict resolution. + +Other key principles embraced in the design are: + +Incremental scalability: Dynamo should be able to scale out one storage host +(henceforth, referred to as “node”) at a time, with minimal impact on both +operators of the system and the system itself. + +> We refer to hosts as "nodes", too. Riak KV provides a simple set of commands +> to start and join nodes to a running cluster. With proper capacity planning, +> this process should be painless for the ops team and devs, and imperceivable +> by the client. + +Symmetry: Every node in Dynamo should have the same set of responsibilities as +its peers; there should be no distinguished node or nodes that take special +roles or extra set of responsibilities. In our experience, symmetry simplifies +the process of system provisioning and maintenance. + +> Again, we agree. Each storage node is the same at its neighbor. Any node can +> coordinate a request and, in the event that a node goes does, its neighbors +> can cover for it until it's restarted or decommissioned. + +Decentralization: An extension of symmetry, the design should favor +decentralized peer-to-peer techniques over centralized control. In the past, +centralized control has resulted in outages and the goal is to avoid it as much +as possible. This leads to a simpler, more scalable, and more available system. + +> A Riak cluster is completely decentralized. No single node is special and this +> leads to no single points of failure. + +Heterogeneity: The system needs to be able to exploit heterogeneity in the +infrastructure it runs on. e.g. the work distribution must be proportional to +the capabilities of the individual servers. This is essential in adding new +nodes with higher capacity without having to upgrade all hosts at once. + +> Riak KV agrees. + + +## 3. Related Work + +> This section is not strictly necessary to read for an understanding of how a +> Dynamo distributed database functions, especially Riak KV. It's still an +> excellent study of other distributed systems, in some cases ones that helped +> inspire Dynamo. When you have time, we highly recommend you read this section. + + +### 3.1 Peer to Peer Systems + +There are several peer-to-peer (P2P) systems that have looked at the problem of +data storage and distribution. The first generation of P2P systems, such as +Freenet and Gnutella, were predominantly used as file sharing systems. These +were examples of unstructured P2P networks where the overlay links between peers +were established arbitrarily. In these networks, a search query is usually +flooded through the network to find as many peers as possible that share the +data. P2P systems evolved to the next generation into what is widely known as +structured P2P networks. These networks employ a globally consistent protocol to +ensure that any node can efficiently route a search query to some peer that has +the desired data. Systems like Pastry [16] and Chord [20] use routing mechanisms +to ensure that queries can be answered within a bounded number of hops. + +To reduce the additional latency introduced by multi-hop routing, some P2P +systems (e.g., [14]) employ O(1) routing where each peer maintains enough +routing information locally so that it can route requests (to access a data +item) to the appropriate peer within a constant number of hops. + +> Riak KV's gossip protocol communicates between nodes with O(1) routing, and +> maintains local routing information. + +Various storage systems, such as Oceanstore [9] and PAST [17] were built on top +of these routing overlays. Oceanstore provides a global, transactional, +persistent storage service that supports serialized updates on widely replicated +data. To allow for concurrent updates while avoiding many of the problems +inherent with wide-area locking, it uses an update model based on conflict +resolution. Conflict resolution was introduced in [21] to reduce the number of +transaction aborts. Oceanstore resolves conflicts by processing a series of +updates, choosing a total order among them, and then applying them atomically in +that order. It is built for an environment where the data is replicated on an +untrusted infrastructure. By comparison, PAST provides a simple abstraction +layer on top of Pastry for persistent and immutable objects. It assumes that the +application can build the necessary storage semantics (such as mutable files) on +top of it. + +### 3.2 Distributed File Systems and Databases + +Distributing data for performance, availability and durability has been widely +studied in the file system and database systems community. Compared to P2P +storage systems that only support flat namespaces, distributed file systems +typically support hierarchical namespaces. Systems like Ficus [15] and Coda [19] +replicate files for high availability at the expense of consistency. Update +conflicts are typically managed using specialized conflict resolution +procedures. The Farsite system [1] is a distributed file system that does not +use any centralized server like NFS. Farsite achieves high availability and +scalability using replication. The Google File System [6] is another distributed +file system built for hosting the state of Google’s internal applications. GFS +uses a simple design with a single master server for hosting the entire metadata +and where the data is split into chunks and stored in chunkservers. Bayou is a +distributed relational database system that allows disconnected operations and +provides eventual data consistency [21]. + +Among these systems, Bayou, Coda and Ficus allow disconnected operations and are +resilient to issues such as network partitions and outages. These systems differ +on their conflict resolution procedures. For instance, Coda and Ficus perform +system level conflict resolution and Bayou allows application level resolution. +All of them, however, guarantee eventual consistency. + +Similar to these systems, Dynamo allows read and write operations to continue +even during network partitions and resolves updated conflicts using different +conflict resolution mechanisms. Distributed block storage systems like FAB [18] +split large size objects into smaller blocks and stores each block in a highly +available manner. In comparison to these systems, a key-value store is more +suitable in this case because: (a) it is intended to store relatively small +objects (size < 1M) and (b) key-value stores are easier to configure on a per- +application basis. Antiquity is a wide-area distributed storage system designed +to handle multiple server failures [23]. It uses a secure log to preserve data +integrity, replicates each log on multiple servers for durability, and uses +Byzantine fault tolerance protocols to ensure data consistency. In contrast to +Antiquity, Dynamo does not focus on the problem of data integrity and security +and is built for a trusted environment. Bigtable is a distributed storage system +for managing structured data. It maintains a sparse, multi-dimensional sorted +map and allows applications to access their data using multiple attributes [2]. +Compared to Bigtable, Dynamo targets applications that require only key/value +access with primary focus on high availability where updates are not rejected +even in the wake of network partitions or server failures. + +> This all applies to Riak KV, as well. + +Traditional replicated relational database systems focus on the problem of +guaranteeing strong consistency to replicated data. Although strong consistency +provides the application writer a convenient programming model, these systems +are limited in scalability and availability [7]. These systems are not capable +of handling network partitions because they typically provide strong consistency +guarantees. + +### 3.3 Discussion + +Dynamo differs from the aforementioned decentralized storage systems in terms of +its target requirements. First, Dynamo is targeted mainly at applications that +need an “always writeable” data store where no updates are rejected due to +failures or concurrent writes. This is a crucial requirement for many Amazon +applications. Second, as noted earlier, Dynamo is built for an infrastructure +within a single administrative domain where all nodes are assumed to be trusted. +Third, applications that use Dynamo do not require support for hierarchical +namespaces (a norm in many file systems) or complex relational schema (supported +by traditional databases). Fourth, Dynamo is built for latency sensitive +applications that require at least 99.9% of read and write operations to be +performed within a few hundred milliseconds. To meet these stringent latency +requirements, it was imperative for us to avoid routing requests through +multiple nodes (which is the typical design adopted by several distributed hash +table systems such as Chord and Pastry). This is because multi-hop routing +increases variability in response times, thereby increasing the latency at +higher percentiles. Dynamo can be characterized as a zero-hop DHT, where each +node maintains enough routing information locally to route a request to the +appropriate node directly. + + +## 4.System Architecture + +> This is truly the meat of the Dynamo paper. Stick around. It gets good. + +The architecture of a storage system that needs to operate in a production +setting is complex. In addition to the actual data persistence component, the +system needs to have scalable and robust solutions for load balancing, +membership and failure detection, failure recovery, replica synchronization, +overload handling, state transfer, concurrency and job scheduling, request +marshalling, request routing, system monitoring and alarming, and configuration +management. Describing the details of each of the solutions is not possible, so +this paper focuses on the core distributed systems techniques used in Dynamo: +partitioning, replication, versioning, membership, failure handling and scaling. +<a href="#table-1">Table 1</a> presents a summary of the list of techniques +Dynamo uses and their respective advantages. + +<table id="table-1"> + <caption> + Table 1: Summary of techniques used in Dynamo and their advantages. + </caption> + <tr> + <th>Problem</th> + <th>Technique</th> + <th>Advantage</th> + </tr> + <tr> + <td>Partitioning</td> + <td>Consistent Hashing</td> + <td>Incremental Scalability</td> + </tr> + <tr> + <td>High Availability for writes</td> + <td>Vector clocks with reconciliation during reads</td> + <td>Version size is decoupled from update rates.</td> + </tr> + <tr> + <td>Handling temporary failures</td> + <td>Sloppy Quorum and hinted handoff</td> + <td>Provides high availability and durability guarantee when some of the + replicas are not available.</td> + </tr> + <tr> + <td>Recovering from permanent failures</td> + <td>Anti-entropy using Merkle trees</td> + <td>Synchronizes divergent replicas in the background.</td> + </tr> + <tr> + <td>Membership and failure detection</td> + <td>Gossip-based membership protocol and failure detection.</td> + <td>Preserves symmetry and avoids having a centralized registry for storing + membership and node liveness information.</td> + </tr> +</table> + +### 4.1 System Interface + +Dynamo stores objects associated with a key through a simple interface; it +exposes two operations: get() and put(). The get(key) operation locates the +object replicas associated with the key in the storage system and returns a +single object or a list of objects with conflicting versions along with a +context. The put(key, context, object) operation determines where the replicas +of the object should be placed based on the associated key, and writes the +replicas to disk. The context encodes system metadata about the object that is +opaque to the caller and includes information such as the version of the object. +The context information is stored along with the object so that the system can +verify the validity of the context object supplied in the put request. + +> Whereas Dynamo only has the concept of keys, we added a higher level of +> organization called a "bucket." Keys are stored in buckets and buckets are the +> level at which several Riak KV properties can be configured (primarily the "N" +> value, or the replication value.) In addition to the bucket+key identifier and +> value, Riak KV will also return the associated metadata for a given object +> with each get or put. +> +> Riak KV has two APIs: an [HTTP API] and a [Protocol Buffers API]. + +[HTTP API]: {{<baseurl>}}riak/kv/3.0.4/developing/api/http/ +[Protocol Buffers API]: {{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/ + +Dynamo treats both the key and the object supplied by the caller as an opaque +array of bytes. It applies a MD5 hash on the key to generate a 128-bit +identifier, which is used to determine the storage nodes that are responsible +for serving the key. + +> Riak KV concatenates the bucket with the key and runs it through the SHA1 hash +> to generate a 160 bit identifier which is then used to determine where in the +> database each datum is stored. Riak KV treats data as an opaque binary, thus +> enabling users to store virtually anything. + + +### 4.2 Partitioning Algorithm + +One of the key design requirements for Dynamo is that it must scale +incrementally. This requires a mechanism to dynamically partition the data over +the set of nodes (i.e., storage hosts) in the system. Dynamo’s partitioning +scheme relies on consistent hashing to distribute the load across multiple +storage hosts. In consistent hashing [10], the output range of a hash function +is treated as a fixed circular space or “ring” (i.e. the largest hash value +wraps around to the smallest hash value). Each node in the system is assigned a +random value within this space which represents its “position” on the ring. Each +data item identified by a key is assigned to a node by hashing the data item’s +key to yield its position on the ring, and then walking the ring clockwise to +find the first node with a position larger than the item’s position. Thus, each +node becomes responsible for the region in the ring between it and its +predecessor node on the ring. The principle advantage of consistent hashing is +that departure or arrival of a node only affects its immediate neighbors and +other nodes remain unaffected. + +> **Partitioning in Riak KV** +> +> As mentioned above, Riak KV uses consistent hashing to distribute data around +> ring to partitions responsible for storing data. The ring has a maximum key +> space of 2^160. Each bucket+key (and its associated value) is hashed to a +> location on the ring. +> +> Riak KV also breaks the ring into a set number of partitions. This number is +> configured when a cluster is first built. Each node will be responsible for +> storing the data hashed to a set number of partitions. Each storage node will +> optimistically handle an equal number of partitions. + +The basic consistent hashing algorithm presents some challenges. First, the +random position assignment of each node on the ring leads to non-uniform data +and load distribution. Second, the basic algorithm is oblivious to the +heterogeneity in the performance of nodes. To address these issues, Dynamo uses +a variant of consistent hashing (similar to the one used in [10, 20]): instead +of mapping a node to a single point in the circle, each node gets assigned to +multiple points in the ring. To this end, Dynamo uses the concept of “virtual +nodes”. A virtual node looks like a single node in the system, but each node can +be responsible for more than one virtual node. Effectively, when a new node is +added to the system, it is assigned multiple positions (henceforth, “tokens”) in +the ring. The process of fine-tuning Dynamo’s partitioning scheme is discussed +in Section 6. + +> Riak KV also has the concept of virtual nodes and they are used to the same +> end as they are in Dynamo. Physical storage nodes are responsible for +> partitions, and each partition a vnode. + +Using virtual nodes has the following advantages: + +If a node becomes unavailable (due to failures or routine maintenance), the load +handled by this node is evenly dispersed across the remaining available nodes. + +When a node becomes available again, or a new node is added to the system, the +newly available node accepts a roughly equivalent amount of load from each of +the other available nodes. + +> All of these properties for vnodes in Dynamo hold true for Riak KV, too. + +The number of virtual nodes that a node is responsible can decided based on its +capacity, accounting for heterogeneity in the physical infrastructure. + +> [Further Reading on Partitioning in Riak KV] and [All about the Riak KV Ring]. + +[Further Reading on Partitioning in Riak KV]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters/ +[All about the Riak KV Ring]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters/#the-ring + +### 4.3 Replication + +To achieve high availability and durability, Dynamo replicates its data on +multiple hosts. Each data item is replicated at N hosts, where N is a parameter +configured “per-instance”. Each key, k, is assigned to a coordinator node +(described in the previous section). The coordinator is in charge of the +replication of the data items that fall within its range. In addition to locally +storing each key within its range, the coordinator replicates these keys at the +N-1 clockwise successor nodes in the ring. This results in a system where each +node is responsible for the region of the ring between it and its Nth +predecessor. In <a href="#figure-2">Figure 2</a>, node B replicates the key k at +nodes C and D in addition to storing it locally. Node D will store the keys that +fall in the ranges (A, B], (B, C], and (C, D]. + +**<figure id="figure-2" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure2.png"> + <figcaption> + Figure 2: Partitioning and replication of keys in Dynamo ring. + </figcaption> +</figure>** + +> Replication in Riak KV, like in Dynamo, is fundamental and automatic. Remember +> the concept of a bucket we covered above? In Riak KV, the replication +> parameter, "N" (also called "n_val"), is configurable at the bucket level. +> The default n_val in Riak KV is 3, meaning that out of the box Riak KV will +> store three replicas of your data on three different partitions on the ring. +> +> The diagram is applicable to Riak KV and the manner in which it replicates +> data. The preference list is present in Riak KV, too, and is the reason why +> any node in the ring can coordinate a request. The node receives a request, +> consults the preference list, and routes the request accordingly. + +The list of nodes that is responsible for storing a particular key is called the +preference list. The system is designed, as will be explained in Section 4.8, so +that every node in the system can determine which nodes should be in this list +for any particular key. To account for node failures, preference list contains +more than N nodes. Note that with the use of virtual nodes, it is possible that +the first N successor positions for a particular key may be owned by less than N +distinct physical nodes (i.e. a node may hold more than one of the first N +positions). To address this, the preference list for a key is constructed by +skipping positions in the ring to ensure that the list contains only distinct +physical nodes. + + +### 4.4 Data Versioning + +Dynamo provides eventual consistency, which allows for updates to be propagated +to all replicas asynchronously. A put() call may return to its caller before the +update has been applied at all the replicas, which can result in scenarios where +a subsequent get() operation may return an object that does not have the latest +updates.. If there are no failures then there is a bound on the update +propagation times. However, under certain failure scenarios (e.g., server +outages or network partitions), updates may not arrive at all replicas for an +extended period of time. + +> Riak KV is an "eventually consistent" database. All replication is done +> asynchronously, as you would expect, could result in a datum being returned to +> the client that is out of date. But don't worry. We built in some mechanisms +> to address this. + +There is a category of applications in Amazon’s platform that can tolerate such +inconsistencies and can be constructed to operate under these conditions. For +example, the shopping cart application requires that an “Add to Cart” operation +can never be forgotten or rejected. If the most recent state of the cart is +unavailable, and a user makes changes to an older version of the cart, that +change is still meaningful and should be preserved. But at the same time it +shouldn’t supersede the currently unavailable state of the cart, which itself +may contain changes that should be preserved. Note that both “add to cart” and +“delete item from cart” operations are translated into put requests to Dynamo. +When a customer wants to add an item to (or remove from) a shopping cart and the +latest version is not available, the item is added to (or removed from) the +older version and the divergent versions are reconciled later. + +> Much like Dynamo was suited to the design of the shopping cart, Riak KV, and +> its tradeoffs, are appropriate for a certain set of use cases. We happen to +> feel that _most_ use cases can tolerate some level of eventual consistency. + +In order to provide this kind of guarantee, Dynamo treats the result of each +modification as a new and immutable version of the data. It allows for multiple +versions of an object to be present in the system at the same time. Most of the +time, new versions subsume the previous version(s), and the system itself can +determine the authoritative version (syntactic reconciliation). However, version +branching may happen, in the presence of failures combined with concurrent +updates, resulting in conflicting versions of an object. In these cases, the +system cannot reconcile the multiple versions of the same object and the client +must perform the reconciliation in order to collapse multiple branches of data +evolution back into one (semantic reconciliation). A typical example of a +collapse operation is “merging” different versions of a customer’s shopping +cart. Using this reconciliation mechanism, an “add to cart” operation is never +lost. However, deleted items can resurface. + +> The same holds true for Riak KV. If, by way of some failure and concurrent +> update (rare but quite possible), there come to exist multiple versions of the +> same object, Riak KV will push this decision down to the client (who are we to +> tell you which is the authoritative object?). All that said, if your +> application doesn't need this level of version control, we enable you to turn +> the usage of vector clocks on and off at the bucket level. + +It is important to understand that certain failure modes can potentially result +in the system having not just two but several versions of the same data. Updates +in the presence of network partitions and node failures can potentially result +in an object having distinct version sub-histories, which the system will need +to reconcile in the future. This requires us to design applications that +explicitly acknowledge the possibility of multiple versions of the same data (in +order to never lose any updates). + +> Ditto. + +Dynamo uses vector clocks [12] in order to capture causality between different +versions of the same object. A vector clock is effectively a list of (node, +counter) pairs. One vector clock is associated with every version of every +object. One can determine whether two versions of an object are on parallel +branches or have a causal ordering, by examine their vector clocks. If the +counters on the first object’s clock are less-than-or-equal to all of the nodes +in the second clock, then the first is an ancestor of the second and can be +forgotten. Otherwise, the two changes are considered to be in conflict and +require reconciliation. + +> As you may have already figured out, Riak KV uses vector clocks for object +> versioning, too. Here are a whole host of resources to keep you busy for a while: +> +> [Vector Clock on Riak KV Glossary]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vector-clock) +> +> [Why Vector Clocks are Easy](http://basho.com/posts/technical/why-vector-clocks-are-easy/) +> | +> [Why Vector Clocks are Hard](http://basho.com/posts/technical/why-vector-clocks-are-hard/) +> +> [Vector Clocks Revisited](http://basho.com/posts/technical/vector-clocks-revisited/) +> +> [Vector Clocks on Wikipedia](https://en.wikipedia.org/wiki/Vector_clock) + +In Dynamo, when a client wishes to update an object, it must specify which +version it is updating. This is done by passing the context it obtained from an +earlier read operation, which contains the vector clock information. Upon +processing a read request, if Dynamo has access to multiple branches that cannot +be syntactically reconciled, it will return all the objects at the leaves, with +the corresponding version information in the context. An update using this +context is considered to have reconciled the divergent versions and the branches +are collapsed into a single new version. + +**<figure id="figure-3" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure3.png"> + <figcaption> + Figure 3: Version evolution of an object over time. + </figcaption> +</figure>** + +To illustrate the use of vector clocks, let us consider the example shown in +<a href="#figure-3">Figure 3</a>. A client writes a new object. The node (say +Sx) that handles the write for this key increases its sequence number and uses +it to create the data's vector clock. The system now has the object D1 and its +associated clock [(Sx, 1)]. The client updates the object. Assume the same node +handles this request as well. The system now also has object D2 and its +associated clock [(Sx, 2)]. D2 descends from D1 and therefore over-writes D1, +however there may be replicas of D1 lingering at nodes that have not yet seen +D2. Let us assume that the same client updates the object again and a different +server (say Sy) handles the request. The system now has data D3 and its +associated clock [(Sx, 2), (Sy, 1)]. + +Next assume a different client reads D2 and then tries to update it, and another +node (say Sz) does the write. The system now has D4 (descendant of D2) whose +version clock is [(Sx, 2), (Sz, 1)]. A node that is aware of D1 or D2 could +determine, upon receiving D4 and its clock, that D1 and D2 are overwritten by +the new data and can be garbage collected. A node that is aware of D3 and +receives D4 will find that there is no causal relation between them. In other +words, there are changes in D3 and D4 that are not reflected in each other. Both +versions of the data must be kept and presented to a client (upon a read) for +semantic reconciliation. + +Now assume some client reads both D3 and D4 (the context will reflect that both +values were found by the read). The read's context is a summary of the clocks of +D3 and D4, namely [(Sx, 2), (Sy, 1), (Sz, 1)]. If the client performs the +reconciliation and node Sx coordinates the write, Sx will update its sequence +number in the clock. The new data D5 will have the following clock: [(Sx, 3), +(Sy, 1), (Sz, 1)]. + +A possible issue with vector clocks is that the size of vector clocks may grow +if many servers coordinate the writes to an object. In practice, this is not +likely because the writes are usually handled by one of the top N nodes in the +preference list. In case of network partitions or multiple server failures, +write requests may be handled by nodes that are not in the top N nodes in the +preference list causing the size of vector clock to grow. In these scenarios, it +is desirable to limit the size of vector clock. To this end, Dynamo employs the +following clock truncation scheme: Along with each (node, counter) pair, Dynamo +stores a timestamp that indicates the last time the node updated the data item. +When the number of (node, counter) pairs in the vector clock reaches a threshold +(say 10), the oldest pair is removed from the clock. Clearly, this truncation +scheme can lead to inefficiencies in reconciliation as the descendant +relationships cannot be derived accurately. However, this problem has not +surfaced in production and therefore this issue has not been thoroughly +investigated. + +> Riak KV does a certain amount of vector clock pruning to ensure their growth +> is kept under control. + + +### 4.5 Execution of get () and put () operations + +Any storage node in Dynamo is eligible to receive client get and put operations +for any key. In this section, for sake of simplicity, we describe how these +operations are performed in a failure-free environment and in the subsequent +section we describe how read and write operations are executed during failures. + +> Any node in the Riak KV ring can coordinate a request. The Riak KV information +> in this section applies to a failure-free environment. + +Both get and put operations are invoked using Amazon’s infrastructure-specific +request processing framework over HTTP. There are two strategies that a client +can use to select a node: (1) route its request through a generic load balancer +that will select a node based on load information, or (2) use a partition-aware +client library that routes requests directly to the appropriate coordinator +nodes. The advantage of the first approach is that the client does not have to +link any code specific to Dynamo in its application, whereas the second strategy +can achieve lower latency because it skips a potential forwarding step. + +A node handling a read or write operation is known as the coordinator. +Typically, this is the first among the top N nodes in the preference list. If +the requests are received through a load balancer, requests to access a key may +be routed to any random node in the ring. In this scenario, the node that +receives the request will not coordinate it if the node is not in the top N of +the requested key’s preference list. Instead, that node will forward the request +to the first among the top N nodes in the preference list. + +Read and write operations involve the first N healthy nodes in the preference +list, skipping over those that are down or inaccessible. When all nodes are +healthy, the top N nodes in a key’s preference list are accessed. When there are +node failures or network partitions, nodes that are lower ranked in the +preference list are accessed. + +To maintain consistency among its replicas, Dynamo uses a consistency protocol +similar to those used in quorum systems. This protocol has two key configurable +values: R and W. R is the minimum number of nodes that must participate in a +successful read operation. W is the minimum number of nodes that must +participate in a successful write operation. Setting R and W such that R + W > N +yields a quorum-like system. In this model, the latency of a get (or put) +operation is dictated by the slowest of the R (or W) replicas. For this reason, +R and W are usually configured to be less than N, to provide better latency. + +> Riak KV makes use of the same values. But, thanks to our concept of buckets, +> we made it a bit more customizable. The default R and W values are set at the +> bucket level but can be configured at the request level if the developer deems +> it necessary for certain data. "Quorum" as described in Dynamo is the default +> setting in Riak KV. +> +>Some more resources on R and W: +> +>[REST API]({{<baseurl>}}riak/kv/3.0.4/developing/api/http/) +> +>[Writing Data]({{<baseurl>}}riak/kv/3.0.4/developing/usage/creating-objects/) +> +>[Reading Data]({{<baseurl>}}riak/kv/3.0.4/developing/usage/reading-objects/) + +Upon receiving a put() request for a key, the coordinator generates the vector +clock for the new version and writes the new version locally. The coordinator +then sends the new version (along with the new vector clock) to the N highest- +ranked reachable nodes. If at least W-1 nodes respond then the write is +considered successful. + +> In Riak KV a write is considered successful when the total number of +> responding writes equals W. This need not be a durable write, which is a +> separate value in Riak KV labeled DW. + +Similarly, for a get() request, the coordinator requests all existing versions +of data for that key from the N highest-ranked reachable nodes in the preference +list for that key, and then waits for R responses before returning the result to +the client. If the coordinator ends up gathering multiple versions of the data, +it returns all the versions it deems to be causally unrelated. The divergent +versions are then reconciled and the reconciled version superseding the current +versions is written back. + +> Same for Riak KV. Reconciling divergent versions in Riak KV is called +> [Read Repair]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication/#read-repair). + + +### 4.6 Handling Failures: Hinted Handoff + +If Dynamo used a traditional quorum approach it would be unavailable during +server failures and network partitions, and would have reduced durability even +under the simplest of failure conditions. To remedy this it does not enforce +strict quorum membership and instead it uses a “sloppy quorum”; all read and +write operations are performed on the first N healthy nodes from the preference +list, which may not always be the first N nodes encountered while walking the +consistent hashing ring. + +> [Hinted handoff] is built into Riak KV's core. +> +> You can glimpse at Riak KV's preference list (or *preflist*) calculation in +> the [Replication] walkthrough. + +[Hinted handoff]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#hinted-handoff +[Replication]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/replication/ + +Consider the example of Dynamo configuration given in <a href="#figure-2">Figure +2</a> with N=3. In this example, if node A is temporarily down or unreachable +during a write operation then a replica that would normally have lived on A will +now be sent to node D. This is done to maintain the desired availability and +durability guarantees. The replica sent to D will have a hint in its metadata +that suggests which node was the intended recipient of the replica (in this case +A). Nodes that receive hinted replicas will keep them in a separate local +database that is scanned periodically. Upon detecting that A has recovered, D +will attempt to deliver the replica to A. Once the transfer succeeds, D may +delete the object from its local store without decreasing the total number of +replicas in the system. + +Using hinted handoff, Dynamo ensures that the read and write operations are not +failed due to temporary node or network failures. Applications that need the +highest level of availability can set W to 1, which ensures that a write is +accepted as long as a single node in the system has durably written the key it +to its local store. Thus, the write request is only rejected if all nodes in the +system are unavailable. However, in practice, most Amazon services in production +set a higher W to meet the desired level of durability. A more detailed +discussion of configuring N, R and W follows in section 6. + +> As mentioned previously, Riak KV does not require that a write be durable, +> only that a vnode responds in the affirmative. If you require a durable write +> in the way mentioned here, use DW. + +It is imperative that a highly available storage system be capable of handling +the failure of an entire data center(s). Data center failures happen due to +power outages, cooling failures, network failures, and natural disasters. Dynamo +is configured such that each object is replicated across multiple data centers. +In essence, the preference list of a key is constructed such that the storage +nodes are spread across multiple data centers. These datacenters are connected +through high speed network links. This scheme of replicating across multiple +datacenters allows us to handle entire data center failures without a data +outage. + +> [Multi Datacenter Replication] was previously only implemented in the commercial extension to +> Riak KV, called [Riak KV Enterprise Edition]. Now it is available in all versions from Riak KV 2.2.6 onwards. + +[Multi Datacenter Replication]: {{<baseurl>}}riak/kv/3.0.4/using/reference/v3-multi-datacenter/architecture/ +[Riak KV Enterprise Edition]: http://basho.com/products/riak-kv/ + + +### 4.7 Handling permanent failures: Replica synchronization + +Hinted handoff works best if the system membership churn is low and node +failures are transient. There are scenarios under which hinted replicas become +unavailable before they can be returned to the original replica node. To handle +this and other threats to durability, Dynamo implements an anti-entropy (replica +synchronization) protocol to keep the replicas synchronized. + +> Read repair, mentioned above, is the simplest form of anti-entropy. But it is +> passive, not active as this section describes. + +To detect the inconsistencies between replicas faster and to minimize the amount +of transferred data, Dynamo uses Merkle trees [13]. A Merkle tree is a hash tree +where leaves are hashes of the values of individual keys. Parent nodes higher in +the tree are hashes of their respective children. The principal advantage of +Merkle tree is that each branch of the tree can be checked independently without +requiring nodes to download the entire tree or the entire data set. Moreover, +Merkle trees help in reducing the amount of data that needs to be transferred +while checking for inconsistencies among replicas. For instance, if the hash +values of the root of two trees are equal, then the values of the leaf nodes in +the tree are equal and the nodes require no synchronization. If not, it implies +that the values of some replicas are different. In such cases, the nodes may +exchange the hash values of children and the process continues until it reaches +the leaves of the trees, at which point the hosts can identify the keys that are +“out of sync”. Merkle trees minimize the amount of data that needs to be +transferred for synchronization and reduce the number of disk reads performed +during the anti-entropy process. + +> Riak KV implements a Merkel-Tree based Active Anti-Entropy (*AAE*). + +Dynamo uses Merkle trees for anti-entropy as follows: Each node maintains a +separate Merkle tree for each key range (the set of keys covered by a virtual +node) it hosts. This allows nodes to compare whether the keys within a key range +are up-to-date. In this scheme, two nodes exchange the root of the Merkle tree +corresponding to the key ranges that they host in common. Subsequently, using +the tree traversal scheme described above the nodes determine if they have any +differences and perform the appropriate synchronization action. The disadvantage +with this scheme is that many key ranges change when a node joins or leaves the +system thereby requiring the tree(s) to be recalculated. This issue is +addressed, however, by the refined partitioning scheme described in Section 6.2. + + +### 4.8 Membership and Failure Detection + +> This section is well expressed in [Adding and Removing Nodes] and +> [Failure Scenarios]. + +[Adding and Removing Nodes]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes/ +[Failure Scenarios]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency/ + +#### 4.8.1 Ring Membership + +> Riak KV operators can trigger node management via the +> [riak-admin command-line tool]. + +[riak-admin command-line tool]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/ + +In Amazon’s environment node outages (due to failures and maintenance tasks) are +often transient but may last for extended intervals. A node outage rarely +signifies a permanent departure and therefore should not result in rebalancing +of the partition assignment or repair of the unreachable replicas. Similarly, +manual error could result in the unintentional startup of new Dynamo nodes. For +these reasons, it was deemed appropriate to use an explicit mechanism to +initiate the addition and removal of nodes from a Dynamo ring. An administrator +uses a command line tool or a browser to connect to a Dynamo node and issue a +membership change to join a node to a ring or remove a node from a ring. The +node that serves the request writes the membership change and its time of issue +to persistent store. The membership changes form a history because nodes can be +removed and added back multiple times. + +> Nodes are manually added using the `riak-admin cluster join`. +> +> When a node permanently departs, rebalancing is triggered using the +> `riak-admin cluster leave` command. + +A gossip-based protocol propagates membership changes and maintains an +eventually consistent view of membership. Each node contacts a peer chosen at +random every second and the two nodes efficiently reconcile their persisted +membership change histories. + +> Riak KV's ring state holds membership information, and is propgated via +> [gossiping], including random reconciliation, defaulting to once a minute. + +[gossiping]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#gossiping + +When a node starts for the first time, it chooses its set of tokens (virtual +nodes in the consistent hash space) and maps nodes to their respective token +sets. The mapping is persisted on disk and initially contains only the local +node and token set. The mappings stored at different Dynamo nodes are reconciled +during the same communication exchange that reconciles the membership change +histories. Therefore, partitioning and placement information also propagates via +the gossip-based protocol and each storage node is aware of the token ranges +handled by its peers. This allows each node to forward a key’s read/write +operations to the right set of nodes directly. + +> These tokens are vnodes (virtual nodes) in Riak KV. + + +#### 4.8.2 External Discovery + +The mechanism described above could temporarily result in a logically +partitioned Dynamo ring. For example, the administrator could contact node A to +join A to the ring, then contact node B to join B to the ring. In this scenario, +nodes A and B would each consider itself a member of the ring, yet neither would +be immediately aware of the other. To prevent logical partitions, some Dynamo +nodes play the role of seeds. Seeds are nodes that are discovered via an +external mechanism and are known to all nodes. Because all nodes eventually +reconcile their membership with a seed, logical partitions are highly unlikely. +Seeds can be obtained either from static configuration or from a configuration +service. Typically seeds are fully functional nodes in the Dynamo ring. + +> To rectify these sorts of logical partitions, multiple Riak cluster changes +> are configured as one batch. Any changes must first be viewed `riak-admin +> cluster plan`, then the changes are committed with `riak-admin cluster +> commit`. The new ring state is gossiped. +> +> See _[The Node Join Process]_ for more. + +[The Node Join Process]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster + + +#### 4.8.3 Failure Detection + +Failure detection in Dynamo is used to avoid attempts to communicate with +unreachable peers during get() and put() operations and when transferring +partitions and hinted replicas. For the purpose of avoiding failed attempts at +communication, a purely local notion of failure detection is entirely +sufficient: node A may consider node B failed if node B does not respond to node +A’s messages (even if B is responsive to node C*s messages). In the presence of +a steady rate of client requests generating inter-node communication in the +Dynamo ring, a node A quickly discovers that a node B is unresponsive when B +fails to respond to a message; Node A then uses alternate nodes to service +requests that map to B's partitions; A periodically retries B to check for the +latter's recovery. In the absence of client requests to drive traffic between +two nodes, neither node really needs to know whether the other is reachable and +responsive. + +Decentralized failure detection protocols use a simple gossip-style protocol +that enable each node in the system to learn about the arrival (or departure) of +other nodes. For detailed information on decentralized failure detectors and the +parameters affecting their accuracy, the interested reader is referred to [8]. +Early designs of Dynamo used a decentralized failure detector to maintain a +globally consistent view of failure state. Later it was determined that the +explicit node join and leave methods obviates the need for a global view of +failure state. This is because nodes are notified of permanent node additions +and removals by the explicit node join and leave methods and temporary node +failures are detected by the individual nodes when they fail to communicate with +others (while forwarding requests). + +> Riak KV follows the same mechanism, by manually triggering permanent ring +> state changes, and gossiping the new state. + + +### 4.9 Adding/Removing Storage Nodes + +When a new node (say X) is added into the system, it gets assigned a number of +tokens that are randomly scattered on the ring. For every key range that is +assigned to node X, there may be a number of nodes (less than or equal to N) +that are currently in charge of handling keys that fall within its token range. +Due to the allocation of key ranges to X, some existing nodes no longer have to +some of their keys and these nodes transfer those keys to X. Let us consider a +simple bootstrapping scenario where node X is added to the ring shown in +<a href="#figure-2">Figure 2</a> between A and B. When X is added to the system, +it is in charge of storing keys in the ranges (F, G], (G, A] and (A, X]. As a +consequence, nodes B, C and D no longer have to store the keys in these +respective ranges. Therefore, nodes B, C, and D will offer to and upon +confirmation from X transfer the appropriate set of keys. When a node is removed +from the system, the reallocation of keys happens in a reverse process. + +> Riak KV does not randomly assign vnodes, but rather, iterates through the list +> of partitions, assigning them to nodes in a round-robin style. + +Operational experience has shown that this approach distributes the load of key +distribution uniformly across the storage nodes, which is important to meet the +latency requirements and to ensure fast bootstrapping. Finally, by adding a +confirmation round between the source and the destination, it is made sure that +the destination node does not receive any duplicate transfers for a given key +range. + + +## 5.Implementation + +In Dynamo, each storage node has three main software components: request +coordination, membership and failure detection, and a local persistence engine. +All these components are implemented in Java. + +> Riak KV is implemented in Erlang. Request coordination and membership behavior +> is defined by [riak_core] and implemented by [Riak KV]. + +[riak_core]: http://github.com/basho/riak_core +[Riak KV]: http://github.com/basho/riak_kv + +Dynamo’s local persistence component allows for different storage engines to be +plugged in. Engines that are in use are Berkeley Database (BDB) Transactional +Data Store, BDB Java Edition, MySQL, and an in-memory buffer with persistent +backing store. The main reason for designing a pluggable persistence component +is to choose the storage engine best suited for an application’s access +patterns. For instance, BDB can handle objects typically in the order of tens of +kilobytes whereas MySQL can handle objects of larger sizes. Applications choose +Dynamo’s local persistence engine based on their object size distribution. The +majority of Dynamo’s production instances use BDB Transactional Data Store. + +> Riak KV ships with various [backend options]. [Bitcask] is the default, but +> [LevelDB] and Main [Memory] are also used heavily in production (in that +> order). You can also use more than one backend in production via the [[Multi]] +> backend configuration. +> +> Bitcask is a fast and reliable choice, but does have some limitations at very +> large scales. For larger clusters, you may want to choose LevelDB (which also +> supports [secondary indexes]). The Memory backend is an excellent choice when +> speed is important and durability is not. It also has TTL support. + +[backend options]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/ +[Bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask/ +[LevelDB]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb/ +[Memory]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/memory/ +[secondary indexes]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/secondary-indexes/ + +The request coordination component is built on top of an event-driven messaging +substrate where the message processing pipeline is split into multiple stages +similar to the SEDA architecture [24]. All communications are implemented using +Java NIO channels. The coordinator executes the read and write requests on +behalf of clients by collecting data from one or more nodes (in the case of +reads) or storing data at one or more nodes (for writes). Each client request +results in the creation of a state machine on the node that received the client +request. The state machine contains all the logic for identifying the nodes +responsible for a key, sending the requests, waiting for responses, potentially +doing retries, processing the replies and packaging the response to the client. +Each state machine instance handles exactly one client request. For instance, a +read operation implements the following state machine: (i) send read requests to +the nodes, (ii) wait for minimum number of required responses, (iii) if too few +replies were received within a given time bound, fail the request, (iv) +otherwise gather all the data versions and determine the ones to be returned and +(v) if versioning is enabled, perform syntactic reconciliation and generate an +opaque write context that contains the vector clock that subsumes all the +remaining versions. For the sake of brevity the failure handling and retry +states are left out. + +> Request coordination in Riak KV uses Erlang message passing, but follows a +> similar state machine. + +After the read response has been returned to the caller the state machine waits +for a small period of time to receive any outstanding responses. If stale +versions were returned in any of the responses, the coordinator updates those +nodes with the latest version. This process is called read repair because it +repairs replicas that have missed a recent update at an opportunistic time and +relieves the anti-entropy protocol from having to do it. + +> Riak KV implements [Read Repair]. + +[Read Repair]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication/#read-repair + +As noted earlier, write requests are coordinated by one of the top N nodes in +the preference list. Although it is desirable always to have the first node +among the top N to coordinate the writes thereby serializing all writes at a +single location, this approach has led to uneven load distribution resulting in +SLA violations. This is because the request load is not uniformly distributed +across objects. To counter this, any of the top N nodes in the preference list +is allowed to coordinate the writes. In particular, since each write usually +follows a read operation, the coordinator for a write is chosen to be the node +that replied fastest to the previous read operation which is stored in the +context information of the request. This optimization enables us to pick the +node that has the data that was read by the preceding read operation thereby +increasing the chances of getting “read-your-writes” consistency. It also +reduces variability in the performance of the request handling which improves +the performance at the 99.9 percentile. + + +## 6. Experiences & Lessons Learned + +> Much of this section relates to benchmarks run against Dynamo. You can run +> [Basho Bench] against your own Riak cluster to discover your own +> optimal values. + +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.4/using/performance/benchmarking/ + +Dynamo is used by several services with different configurations. These +instances differ by their version reconciliation logic, and read/write quorum +characteristics. The following are the main patterns in which Dynamo is used: + +* Business logic specific reconciliation: This is a popular use case for Dynamo. +Each data object is replicated across multiple nodes. In case of divergent +versions, the client application performs its own reconciliation logic. The +shopping cart service discussed earlier is a prime example of this category. Its +business logic reconciles objects by merging different versions of a customer’s +shopping cart. + +> Riak KV currently supports simple conflict resolution by way of read-repair, +> remanding more complex reconciliation to the client. There are several tools +> to help simplify this task, such as [Statebox]. +> +> Riak KV supports a simple reconciliation strategy, called [CRDTs (Commutative +> Replicated Data Types)], for reconciling common data types like sets and +> counters. + +[Statebox]: https://github.com/mochi/statebox_riak +[CRDTs (Commutative Replicated Data Types)]: {{<baseurl>}}riak/kv/3.0.4/developing/data-types/ + + +* Timestamp based reconciliation: This case differs from the previous one only +in the reconciliation mechanism. In case of divergent versions, Dynamo performs +simple timestamp based reconciliation logic of “last write wins”; i.e., the +object with the largest physical timestamp value is chosen as the correct +version. The service that maintains customer’s session information is a good +example of a service that uses this mode. + +> Riak also supports this for high-performance cases where accuracy is less +> important than speed. + +* High performance read engine: While Dynamo is built to be an “always +writeable” data store, a few services are tuning its quorum characteristics and +using it as a high performance read engine. Typically, these services have a +high read request rate and only a small number of updates. In this +configuration, typically R is set to be 1 and W to be N. For these services, +Dynamo provides the ability to partition and replicate their data across +multiple nodes thereby offering incremental scalability. Some of these instances +function as the authoritative persistence cache for data stored in more heavy +weight backing stores. Services that maintain product catalog and promotional +items fit in this category. + +> Riak can be used in this manner. + +The main advantage of Dynamo is that its client applications can tune the values +of N, R and W to achieve their desired levels of performance, availability and +durability. For instance, the value of N determines the durability of each +object. A typical value of N used by Dynamo’s users is 3. + +The values of W and R impact object availability, durability and consistency. +For instance, if W is set to 1, then the system will never reject a write +request as long as there is at least one node in the system that can +successfully process a write request. However, low values of W and R can +increase the risk of inconsistency as write requests are deemed successful and +returned to the clients even if they are not processed by a majority of the +replicas. This also introduces a vulnerability window for durability when a +write request is successfully returned to the client even though it has been +persisted at only a small number of nodes. + +Traditional wisdom holds that durability and availability go hand-in-hand. +However, this is not necessarily true here. For instance, the vulnerability +window for durability can be decreased by increasing W. This may increase the +probability of rejecting requests (thereby decreasing availability) because more +storage hosts need to be alive to process a write request. + +The common (N,R,W) configuration used by several instances of Dynamo is (3,2,2). +These values are chosen to meet the necessary levels of performance, durability, +consistency, and availability SLAs. + +All the measurements presented in this section were taken on a live system +operating with a configuration of (3,2,2) and running a couple hundred nodes +with homogenous hardware configurations. As mentioned earlier, each instance of +Dynamo contains nodes that are located in multiple datacenters. These +datacenters are typically connected through high speed network links. Recall +that to generate a successful get (or put) response R (or W) nodes need to +respond to the coordinator. Clearly, the network latencies between datacenters +affect the response time and the nodes (and their datacenter locations) are +chosen such that the applications target SLAs are met. + +> Ditto for Riak. + +### 6.1 Balancing Performance and Durability + +While Dynamo’s principle design goal is to build a highly available data store, +performance is an equally important criterion in Amazon’s platform. As noted +earlier, to provide a consistent customer experience, Amazon’s services set +their performance targets at higher percentiles (such as the 99.9th or 99.99th +percentiles). A typical SLA required of services that use Dynamo is that 99.9% +of the read and write requests execute within 300ms. + +Since Dynamo is run on standard commodity hardware components that have far less +I/O throughput than high-end enterprise servers, providing consistently high +performance for read and write operations is a non-trivial task. The involvement +of multiple storage nodes in read and write operations makes it even more +challenging, since the performance of these operations is limited by the slowest +of the R or W replicas. <a href="#figure-4">Figure 4</a> shows the average and +99.9th percentile latencies of Dynamo’s read and write operations during a +period of 30 days. As seen in the figure, the latencies exhibit a clear diurnal +pattern which is a result of the diurnal pattern in the incoming request rate +(i.e., there is a significant difference in request rate between the daytime and +night). Moreover, the write latencies are higher than read latencies obviously +because write operations always results in disk access. Also, the 99.9th +percentile latencies are around 200 ms and are an order of magnitude higher than +the averages. This is because the 99.9th percentile latencies are affected by +several factors such as variability in request load, object sizes, and locality +patterns. + +**<figure id="figure-4" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure4.png"> + <figcaption> + Figure 4: Average and 99.9 percentiles of latencies for read and write + requests during our peak request season of December 2006. The intervals + between consecutive ticks in the x-axis correspond to 12 hours. Latencies + follow a diurnal pattern similar to the request rate and 99.9 percentile + latencies are an order of magnitude higher than averages. + </figcaption> +</figure>** + +While this level of performance is acceptable for a number of services, a few +customer-facing services required higher levels of performance. For these +services, Dynamo provides the ability to trade-off durability guarantees for +performance. In the optimization each storage node maintains an object buffer in +its main memory. Each write operation is stored in the buffer and gets +periodically written to storage by a writer thread. In this scheme, read +operations first check if the requested key is present in the buffer. If so, the +object is read from the buffer instead of the storage engine. + +> This is more similar to Riak's W value, since only DW requires a durable write +> to respond as a success. + +This optimization has resulted in lowering the 99.9th percentile latency by a +factor of 5 during peak traffic even for a very small buffer of a thousand +objects (see <a href="#figure-5">Figure 5</a>). Also, as seen in the figure, +write buffering smoothes out higher percentile latencies. Obviously, this scheme +trades durability for performance. In this scheme, a server crash can result in +missing writes that were queued up in the buffer. To reduce the durability risk, +the write operation is refined to have the coordinator choose one out of the N +replicas to perform a “durable write”. Since the coordinator waits only for W +responses, the performance of the write operation is not affected by the +performance of the durable write operation performed by a single replica. + +**<figure id="figure-5" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure5.png"> + <figcaption> + Figure 5: Comparison of performance of 99.9th percentile latencies for + buffered vs. non-buffered writes over a period of 24 hours. The intervals + between consecutive ticks in the x-axis correspond to one hour. + </figcaption> +</figure>** + +> Setting DW=1 will replicate this behavior. + + +### 6.2 Ensuring Uniform Load distribution + +Dynamo uses consistent hashing to partition its key space across its replicas +and to ensure uniform load distribution. A uniform key distribution can help us +achieve uniform load distribution assuming the access distribution of keys is +not highly skewed. In particular, Dynamo’s design assumes that even where there +is a significant skew in the access distribution there are enough keys in the +popular end of the distribution so that the load of handling popular keys can be +spread across the nodes uniformly through partitioning. This section discusses +the load imbalance seen in Dynamo and the impact of different partitioning +strategies on load distribution. + +> Riak follows a SHA1 based consistent hashing for [partitioning]. + +[partitioning]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication/#understanding-replication-by-example + +To study the load imbalance and its correlation with request load, the total +number of requests received by each node was measured for a period of 24 hours - +broken down into intervals of 30 minutes. In a given time window, a node is +considered to be “in-balance”, if the node’s request load deviates from the +average load by a value a less than a certain threshold (here 15%). Otherwise +the node was deemed “out-of-balance”. <a href="#figure-6">Figure 6</a> presents +the fraction of nodes that are “out-of-balance” (henceforth, “imbalance ratio”) +during this time period. For reference, the corresponding request load received +by the entire system during this time period is also plotted. As seen in the +figure, the imbalance ratio decreases with increasing load. For instance, during +low loads the imbalance ratio is as high as 20% and during high loads it is +close to 10%. Intuitively, this can be explained by the fact that under high +loads, a large number of popular keys are accessed and due to uniform +distribution of keys the load is evenly distributed. However, during low loads +(where load is 1/8th of the measured peak load), fewer popular keys are +accessed, resulting in a higher load imbalance. + +**<figure id="figure-6" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure6.png"> + <figcaption> + Figure 6: Fraction of nodes that are out-of-balance (i.e., nodes whose + request load is above a certain threshold from the average system load) and + their corresponding request load. The interval between ticks in x-axis + corresponds to a time period of 30 minutes. + </figcaption> +</figure>** + +<i>This section discusses how Dynamo’s partitioning scheme has evolved over time +and its implications on load distribution.</i> + +<strong>Strategy 1:</strong> T random tokens per node and partition by token +value: This was the initial strategy deployed in production (and described in +Section 4.2). In this scheme, each node is assigned T tokens (chosen uniformly +at random from the hash space). The tokens of all nodes are ordered according to +their values in the hash space. Every two consecutive tokens define a range. The +last token and the first token form a range that "wraps" around from the highest +value to the lowest value in the hash space. Because the tokens are chosen +randomly, the ranges vary in size. As nodes join and leave the system, the token +set changes and consequently the ranges change. Note that the space needed to +maintain the membership at each node increases linearly with the number of nodes +in the system. + +> Riak uses equal sized partitions with a round-robin distribution--not a +> variably-sized partitions that are randomly distributed. + +While using this strategy, the following problems were encountered. First, when +a new node joins the system, it needs to “steal” its key ranges from other +nodes. However, the nodes handing the key ranges off to the new node have to +scan their local persistence store to retrieve the appropriate set of data +items. Note that performing such a scan operation on a production node is tricky +as scans are highly resource intensive operations and they need to be executed +in the background without affecting the customer performance. This requires us +to run the bootstrapping task at the lowest priority. However, this +significantly slows the bootstrapping process and during busy shopping season, +when the nodes are handling millions of requests a day, the bootstrapping has +taken almost a day to complete. Second, when a node joins/leaves the system, the +key ranges handled by many nodes change and the Merkle trees for the new ranges +need to be recalculated, which is a non-trivial operation to perform on a +production system. Finally, there was no easy way to take a snapshot of the +entire key space due to the randomness in key ranges, and this made the process +of archival complicated. In this scheme, archiving the entire key space requires +us to retrieve the keys from each node separately, which is highly inefficient. + +The fundamental issue with this strategy is that the schemes for data +partitioning and data placement are intertwined. For instance, in some cases, it +is preferred to add more nodes to the system in order to handle an increase in +request load. However, in this scenario, it is not possible to add nodes without +affecting data partitioning. Ideally, it is desirable to use independent schemes +for partitioning and placement. To this end, following strategies were +evaluated: + +<strong>Strategy 2:</strong> T random tokens per node and equal sized +partitions: In this strategy, the hash space is divided into Q equally sized +partitions/ranges and each node is assigned T random tokens. Q is usually set +such that Q >> N and Q >> S*T, where S is the number of nodes in the system. In +this strategy, the tokens are only used to build the function that maps values +in the hash space to the ordered lists of nodes and not to decide the +partitioning. A partition is placed on the first N unique nodes that are +encountered while walking the consistent hashing ring clockwise from the end of +the partition. <a href="#figure-7">Figure 7</a> illustrates this strategy for +N=3. In this example, nodes A, B, C are encountered while walking the ring from +the end of the partition that contains key k1. The primary advantages of this +strategy are: (i) decoupling of partitioning and partition placement, and (ii) +enabling the possibility of changing the placement scheme at runtime. + +> As before mentioned, Riak uses equal sized partitions, but not +> random distribution. + +**<figure id="figure-7" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure7-small.png"> + <figcaption> + Figure 7: Partitioning and placement of keys in the three strategies. A, B, + and C depict the three unique nodes that form the preference list for the + key k1 on the consistent hashing ring (N=3). The shaded area indicates the + key range for which nodes A, B, and C form the preference list. Dark arrows + indicate the token locations for various nodes. + </figcaption> +</figure>** + +<strong>Strategy 3:</strong> Q/S tokens per node, equal-sized partitions: +Similar to strategy 2, this strategy divides the hash space into Q equally sized +partitions and the placement of partition is decoupled from the partitioning +scheme. Moreover, each node is assigned Q/S tokens where S is the number of +nodes in the system. When a node leaves the system, its tokens are randomly +distributed to the remaining nodes such that these properties are preserved. +Similarly, when a node joins the system it "steals" tokens from nodes in the +system in a way that preserves these properties. + +> Riak most closely follows strategy 3. +> +> See [The Node Join Process] and [Replacing a Node]. + +[The Node Join Process]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes/#joining-nodes-to-form-a-cluster +[Replacing a Node]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/replacing-node/ + +The efficiency of these three strategies is evaluated for a system with S=30 and +N=3. However, comparing these different strategies in a fair manner is hard as +different strategies have different configurations to tune their efficiency. For +instance, the load distribution property of strategy 1 depends on the number of +tokens (i.e., T) while strategy 3 depends on the number of partitions (i.e., Q). +One fair way to compare these strategies is to evaluate the skew in their load +distribution while all strategies use the same amount of space to maintain their +membership information. For instance, in strategy 1 each node needs to maintain +the token positions of all the nodes in the ring and in strategy 3 each node +needs to maintain the information regarding the partitions assigned to each +node. + +In our next experiment, these strategies were evaluated by varying the relevant +parameters (T and Q). The load balancing efficiency of each strategy was +measured for different sizes of membership information that needs to be +maintained at each node, where Load balancing efficiency is defined as the ratio +of average number of requests served by each node to the maximum number of +requests served by the hottest node. + +The results are given in <a href="#figure-8">Figure 8</a>. As seen in the +figure, strategy 3 achieves the best load balancing efficiency and strategy 2 +has the worst load balancing efficiency. For a brief time, Strategy 2 served as +an interim setup during the process of migrating Dynamo instances from using +Strategy 1 to Strategy 3. Compared to Strategy 1, Strategy 3 achieves better +efficiency and reduces the size of membership information maintained at each +node by three orders of magnitude. While storage is not a major issue the nodes +gossip the membership information periodically and as such it is desirable to +keep this information as compact as possible. In addition to this, strategy 3 is +advantageous and simpler to deploy for the following reasons: (i) Faster +bootstrapping/recovery: Since partition ranges are fixed, they can be stored in +separate files, meaning a partition can be relocated as a unit by simply +transferring the file (avoiding random accesses needed to locate specific +items). This simplifies the process of bootstrapping and recovery. (ii) Ease of +archival: Periodical archiving of the dataset is a mandatory requirement for +most of Amazon storage services. Archiving the entire dataset stored by Dynamo +is simpler in strategy 3 because the partition files can be archived separately. +By contrast, in Strategy 1, the tokens are chosen randomly and, archiving the +data stored in Dynamo requires retrieving the keys from individual nodes +separately and is usually inefficient and slow. The disadvantage of strategy 3 +is that changing the node membership requires coordination in order to preserve +the properties required of the assignment. + +**<figure id="figure-8" style="text-align:center;"> + <img src="/riak-docs/images/dynamo/figure8.png"> + <figcaption> + Figure 8: Comparison of the load distribution efficiency of different + strategies for system with 30 nodes and N=3 with equal amount of metadata + maintained at each node. The values of the system size and number of + replicas are based on the typical configuration deployed for majority of + our services. + </figcaption> +</figure>** + +### 6.3 Divergent Versions: When and How Many? + +As noted earlier, Dynamo is designed to tradeoff consistency for availability. +To understand the precise impact of different failures on consistency, detailed +data is required on multiple factors: outage length, type of failure, component +reliability, workload etc. Presenting these numbers in detail is outside of the +scope of this paper. However, this section discusses a good summary metric: the +number of divergent versions seen by the application in a live production +environment. + +> This first statement should be read carefully. It's probably more correct to +> say that Dynamo (and Riak) provides no consistency guarantees, and allows +> users to trade availability for durability/latency. + +Divergent versions of a data item arise in two scenarios. The first is when the +system is facing failure scenarios such as node failures, data center failures, +and network partitions. The second is when the system is handling a large number +of concurrent writers to a single data item and multiple nodes end up +coordinating the updates concurrently. From both a usability and efficiency +perspective, it is preferred to keep the number of divergent versions at any +given time as low as possible. If the versions cannot be syntactically +reconciled based on vector clocks alone, they have to be passed to the business +logic for semantic reconciliation. Semantic reconciliation introduces additional +load on services, so it is desirable to minimize the need for it. + +In our next experiment, the number of versions returned to the shopping cart +service was profiled for a period of 24 hours. During this period, 99.94% of +requests saw exactly one version; 0.00057% of requests saw 2 versions; 0.00047% +of requests saw 3 versions and 0.00009% of requests saw 4 versions. This shows +that divergent versions are created rarely. + +Experience shows that the increase in the number of divergent versions is +contributed not by failures but due to the increase in number of concurrent +writers. The increase in the number of concurrent writes is usually triggered by +busy robots (automated client programs) and rarely by humans. This issue is not +discussed in detail due to the sensitive nature of the story. + +### 6.4 Client-driven or Server-driven Coordination + +As mentioned in Section 5, Dynamo has a request coordination component that uses +a state machine to handle incoming requests. Client requests are uniformly +assigned to nodes in the ring by a load balancer. Any Dynamo node can act as a +coordinator for a read request. Write requests on the other hand will be +coordinated by a node in the key’s current preference list. This restriction is +due to the fact that these preferred nodes have the added responsibility of +creating a new version stamp that causally subsumes the version that has been +updated by the write request. Note that if Dynamo’s versioning scheme is based +on physical timestamps, any node can coordinate a write request. + +> In Riak, a server-side load-balancer is an optional configuration. You +> generally use either virtual IPs or reverse-proxies. +> +> See [Load Balancing] for more information. + +[Load Balancing]: {{<baseurl>}}riak/kv/3.0.4/configuring/load-balancing-proxy/ + +An alternative approach to request coordination is to move the state machine to +the client nodes. In this scheme client applications use a library to perform +request coordination locally. A client periodically picks a random Dynamo node +and downloads its current view of Dynamo membership state. Using this +information the client can determine which set of nodes form the preference list +for any given key. Read requests can be coordinated at the client node thereby +avoiding the extra network hop that is incurred if the request were assigned to +a random Dynamo node by the load balancer. Writes will either be forwarded to a +node in the key’s preference list or can be coordinated locally if Dynamo is +using timestamps based versioning. + +> Many [client libraries] provide built-in node request coordination. +> +> For example, using the Ruby driver, you could specify three nodes like this: +> +> client = Riak::Client.new(nodes: [ +> {host: '10.0.0.1'}, +> {host: '10.0.0.2'}, +> {host: '10.0.0.3'} +> ]) +> +> Note that the Riak clients do not coordinate with Riak's preference list, but +> simply round-robin requests, letting the Riak cluster handle routing. + +[client libraries]: {{<baseurl>}}riak/kv/3.0.4/developing/client-libraries/ + +An important advantage of the client-driven coordination approach is that a load +balancer is no longer required to uniformly distribute client load. Fair load +distribution is implicitly guaranteed by the near uniform assignment of keys to +the storage nodes. Obviously, the efficiency of this scheme is dependent on how +fresh the membership information is at the client. Currently clients poll a +random Dynamo node every 10 seconds for membership updates. A pull based +approach was chosen over a push based one as the former scales better with large +number of clients and requires very little state to be maintained at servers +regarding clients. However, in the worst case the client can be exposed to stale +membership for duration of 10 seconds. In case, if the client detects its +membership table is stale (for instance, when some members are unreachable), it +will immediately refresh its membership information. + +<a href="#table-2">Table 2</a> shows the latency improvements at the 99.9th +percentile and averages that were observed for a period of 24 hours using +client-driven coordination compared to the server-driven approach. As seen in +the table, the client-driven coordination approach reduces the latencies by at +least 30 milliseconds for 99.9th percentile latencies and decreases the average +by 3 to 4 milliseconds. The latency improvement is because the client-driven +approach eliminates the overhead of the load balancer and the extra network hop +that may be incurred when a request is assigned to a random node. As seen in the +table, average latencies tend to be significantly lower than latencies at the +99.9th percentile. This is because Dynamo’s storage engine caches and write +buffer have good hit ratios. Moreover, since the load balancers and network +introduce additional variability to the response time, the gain in response time +is higher for the 99.9th percentile than the average. + +<table id="table-2"> + <caption> + Table 2: Performance of client-driven and server-driven + coordination approaches. + </caption> + <tr> + <th></th> + <th>99.9th percentile read latency (ms)</th> + <th>99.9th percentile write latency (ms)</th> + <th>Average read latency (ms)</th> + <th>Average write latency (ms)</th> + </tr> + <tr> + <th>Server-driven</th> + <td>68.9</td> + <td>68.5</td> + <td>3.9</td> + <td>4.02</td> + </tr> + <tr> + <th>Client-driven</th> + <td>30.4</td> + <td>30.4</td> + <td>1.55</td> + <td>1.9</td> + </tr> +</table> + +### 6.5 Balancing background vs. foreground tasks + +Each node performs different kinds of background tasks for replica +synchronization and data handoff (either due to hinting or adding/removing +nodes) in addition to its normal foreground put/get operations. In early +production settings, these background tasks triggered the problem of resource +contention and affected the performance of the regular put and get operations. +Hence, it became necessary to ensure that background tasks ran only when the +regular critical operations are not affected significantly. To this end, the +background tasks were integrated with an admission control mechanism. Each of +the background tasks uses this controller to reserve runtime slices of the +resource (e.g. database), shared across all background tasks. A feedback +mechanism based on the monitored performance of the foreground tasks is employed +to change the number of slices that are available to the background tasks. + +> Riak does this, too. For example, hinted handoff runs in the background at a +> low level, so as not to overwhelm a cluster when nodes are added/removed. + +The admission controller constantly monitors the behavior of resource accesses +while executing a "foreground" put/get operation. Monitored aspects include +latencies for disk operations, failed database accesses due to lock-contention +and transaction timeouts, and request queue wait times. This information is used +to check whether the percentiles of latencies (or failures) in a given trailing +time window are close to a desired threshold. For example, the background +controller checks to see how close the 99th percentile database read latency +(over the last 60 seconds) is to a preset threshold (say 50ms). The controller +uses such comparisons to assess the resource availability for the foreground +operations. Subsequently, it decides on how many time slices will be available +to background tasks, thereby using the feedback loop to limit the intrusiveness +of the background activities. Note that a similar problem of managing background +tasks has been studied in [4]. + +### 6.6 Discussion + +This section summarizes some of the experiences gained during the process of +implementation and maintenance of Dynamo. Many Amazon internal services have +used Dynamo for the past two years and it has provided significant levels of +availability to its applications. In particular, applications have received +successful responses (without timing out) for 99.9995% of its requests and no +data loss event has occurred to date. + +Moreover, the primary advantage of Dynamo is that it provides the necessary +knobs using the three parameters of (N,R,W) to tune their instance based on +their needs.. Unlike popular commercial data stores, Dynamo exposes data +consistency and reconciliation logic issues to the developers. At the outset, +one may expect the application logic to become more complex. However, +historically, Amazon’s platform is built for high availability and many +applications are designed to handle different failure modes and inconsistencies +that may arise. Hence, porting such applications to use Dynamo was a relatively +simple task. For new applications that want to use Dynamo, some analysis is +required during the initial stages of the development to pick the right conflict +resolution mechanisms that meet the business case appropriately. Finally, Dynamo +adopts a full membership model where each node is aware of the data hosted by +its peers. To do this, each node actively gossips the full routing table with +other nodes in the system. This model works well for a system that contains +couple of hundreds of nodes. However, scaling such a design to run with tens of +thousands of nodes is not trivial because the overhead in maintaining the +routing table increases with the system size. This limitation might be overcome +by introducing hierarchical extensions to Dynamo. Also, note that this problem +is actively addressed by O(1) DHT systems(e.g., [14]). + +> This is equally true for Riak. As mentioned above, consider running +> [Basho Bench] to help discover your optimal setup. Nothing will give you +> better numbers than real experimentation. + +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.4/using/performance/benchmarking/ + +## 7. Conclusions + +> This paper was an overview of Riak from a Dynamo point-of-view. To get a +> better sense of the Riak ecosystem, read our ever-expanding [documentation]. + +[documentation]: {{<baseurl>}} + +This paper described Dynamo, a highly available and scalable data store, used +for storing state of a number of core services of Amazon.com’s e-commerce +platform. Dynamo has provided the desired levels of availability and performance +and has been successful in handling server failures, data center failures and +network partitions. Dynamo is incrementally scalable and allows service owners +to scale up and down based on their current request load. Dynamo allows service +owners to customize their storage system to meet their desired performance, +durability and consistency SLAs by allowing them to tune the parameters N, R, +and W. + +The production use of Dynamo for the past year demonstrates that decentralized +techniques can be combined to provide a single highly-available system. Its +success in one of the most challenging application environments shows that an +eventual-consistent storage system can be a building block for highly-available +applications. + + + + diff --git a/content/riak/kv/3.0.4/learn/glossary.md b/content/riak/kv/3.0.4/learn/glossary.md new file mode 100644 index 0000000000..cbcb7a80f8 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/glossary.md @@ -0,0 +1,358 @@ +--- +title: "Riak KV Glossary" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Riak KV Glossary" + identifier: "learn_glossary" + weight: 103 + parent: "learn" +toc: true +aliases: +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties +[basho bench GH]: http://github.com/basho/basho_bench/ +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/strong-consistency +[concept buckets]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets +[concept causal context vc]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#vector-clocks +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[concept crdts]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/crdts +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[concept keys objects]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/keys-and-objects +[concept replication]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/strong-consistency +[dev kv model]: {{<baseurl>}}riak/kv/3.0.4/developing/key-value-modeling +[concept replication aae]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication/#active-anti-entropy +[dev api http]: {{<baseurl>}}riak/kv/3.0.4/developing/api/http +[dev data model]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling +[dev data types]: {{<baseurl>}}riak/kv/3.0.4/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#read-repair +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[Lager]: https://github.com/basho/lager +[learn dynamo]: {{<baseurl>}}riak/kv/3.0.4/learn/dynamo +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/cluster-capacity +[repair recover failure recovery]: {{<baseurl>}}riak/kv/3.0.4/using/repair-recovery/failure-recovery +[repair recover repairs]: {{<baseurl>}}riak/kv/3.0.4/using/repair-recovery/repairs +[Riak Core]: https://github.com/basho/riak_core +[Riak KV]: https://github.com/basho/riak_kv +[Riak Pipe]: https://github.com/basho/riak_pipe +[Riak Pipe - the New MapReduce Power]: http://basho.com/riak-pipe-the-new-mapreduce-power/ +[Riak Pipe - Riak's Distributed Processing Framework]: http://vimeo.com/53910999 +[Understanding Riak's Configurable Behaviors]: http://basho.com/riaks-config-behaviors-part-2/ +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/secondary-indexes +[Where To Start With Riak Core]: http://basho.com/where-to-start-with-riak-core/ +[Wikipedia:Consistent Hashing]: http://en.wikipedia.org/wiki/Consistent_hashing + + +Below is a list of terms that you may run into frequently in the +documentation for Riak, along with links to more in-depth treatments. + +## Active Anti-Entropy (AAE) + +A continuous background process that compares and repairs any divergent, +missing, or corrupted replicas. Unlike [read +repair][glossary read rep], which is only triggered when data is +read, the Active Anti-Entropy system ensures the integrity of all data +stored in Riak. This is particularly useful in clusters containing “cold +data,” i.e. data that may not be read for long periods of time, +potentially years. Furthermore, unlike the repair command, Active +Anti-Entropy is an automatic process requiring no user intervention. It +is enabled by default in Riak 1.3 and greater. + +* [Replication][concept replication aae] + +## Basho Bench + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests and to produce performance +graphs. + +* [Basho Bench]({{<baseurl>}}riak/kv/3.0.4/using/performance/benchmarking) +* [GitHub repository][basho bench GH] + +## Bucket + +A bucket is a namespace for data stored in Riak, with a set of common +properties for its contents, e.g. the number of replicas (`n_val`), +whether siblings are returned on reads (`allow_mult`), etc. Buckets' +properties are determined by their bucket type (see below). + +* [Buckets][concept buckets] +* [HTTP Bucket Operations][dev api http] + +## Bucket Type + +Bucket types enable you to create and manage sets of bucket properties +that, when applied to buckets, dictate those buckets' behavior. They +also act as a third namespace in Riak in addition to buckets and keys. + +* [Bucket Types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) + +## Cluster + +A Riak cluster is a 160-bit integer space which is divided into +equally-sized partitions. Each vnode in the Riak Ring is responsible for +one of these partitions. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + +## Consistent Hashing + +Consistent hashing is a technique used to limit the reshuffling of keys +when a hash-table data structure is rebalanced (i.e. when slots are +added or removed). Riak uses consistent hashing to organize its data +storage and replication. Specifically, the vnodes in the Riak Ring +responsible for storing each object are determined using the consistent +hashing technique. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Wikipedia:Consistent Hashing] + +## Data Types + +Riak Data Types are data objects inspired by research on +[CRDTs](http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf) that use +certain rules of convergence to dictate how conflicts between replicas +are resolved in Riak's eventually consistent system. There are five Riak +Data Types in total: flags, registers, counters, sets, and maps. + +* [Data Types Concept][concept crdts] +* [Using Data Types][dev data types] +* [Data Modeling with Riak Data Types][dev data model] + +## Eventual Consistency + +A consistency model that informally guarantees that if no new updates +are made to a given data item, all reads on that item will eventually +return the last updated value. Details about what this means in Riak can +be found in the document below. + +* [Eventual Consistency][concept eventual consistency] + +## Gossiping + +Riak uses a "gossip protocol" to share and communicate ring state and +bucket properties around the cluster. Whenever a node changes its claim +on the ring, it announces its change via this protocol. Each node also +periodically sends its current view of the ring state to a randomly +selected peer in case any nodes missed previous updates. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Hinted Handoff + +Hinted handoff is a technique for dealing with node failure in the Riak +cluster in which neighboring nodes temporarily take over storage +operations for the failed node. When the failed node returns to the +cluster, the updates received by the neighboring nodes are handed off to +it. + +Hinted handoff allows Riak to ensure database availability. When a node +fails, Riak can continue to handle requests as if the node were still +there. + +* [Recovering a Failed Node][repair recover failure recovery] + +## Key + +Keys are unique object identifiers in Riak and are scoped within buckets +and bucket types. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Lager + +[Lager] is an Erlang/OTP framework that +ships as Riak's default logger. + +## MapReduce + +Riak's MapReduce gives developers the capability to perform more +powerful queries over the data stored in their key/value data. + +* [Using MapReduce][usage mapreduce] + +## Node + +A node is analogous to a physical server. Nodes run a certain number of +vnodes, each of which claims a partition in the Riak Ring key space. + +* [Clusters][concept clusters] +* [Adding and Removing Nodes][cluster ops add remove node] + +## Object + +An object is another name for a value. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] + +## Partition + +Partitions are the spaces into which a Riak cluster is divided. Each +vnode in Riak is responsible for a partition. Data is stored on a set +number of partitions determined by the `n_val` setting, with the target +partitions chosen statically by applying consistent hashing to an +object's key. + +* [Clusters][concept clusters] +* [Eventual Consistency][concept eventual consistency] +* [Cluster Capacity Planning][plan cluster capacity] + +## Quorum + +Quorum in Riak has two meanings: + +* The quantity of replicas that must respond to a read or write request + before it is considered successful. This is defined as a bucket + property or as one of the relevant parameters to a single request + (R,W,DW,RW). +* A symbolic quantity for the above, `quorum`, which is equivalent to + `n_val` / 2 + 1. The default setting is `2`. + +* [Eventual Consistency][concept eventual consistency] +* [Replication properties][apps replication properties] +* [Understanding Riak's Configurable Behaviors] + +## Sloppy Quorum + +During failure scenarios, in which available nodes < total nodes, sloppy +quorum is used to ensure that Riak is still available to take writes. +When a primary node is unavailable, another node will accept its write +requests. When the node returns, data is transferred to the primary node +via the [Hinted Handoff](#hinted-handoff) process. + +## Read Repair + +Read repair is an anti-entropy mechanism that Riak uses to +optimistically update stale replicas when they reply to a read request +with stale data. + +* [More about Read Repair][concept replication] + +## Replica + +Replicas are copies of data stored in Riak. The number of replicas +required for both successful reads and writes is configurable in Riak +and should be set based on your application's consistency and +availability requirements. + +* [Eventual Consistency][concept eventual consistency] +* [Understanding Riak's Configurable Behaviors] + +## Riak Core + +Riak Core is the modular distributed systems framework that serves as +the foundation for Riak's scalable architecture. + +* [Riak Core] +* [Where To Start With Riak Core] + +## Riak KV + +Riak KV is the key/value datastore for Riak. + +* [Riak KV] + +## Riak Pipe + +Riak Pipe is the processing layer that powers Riak's MapReduce. It's +best described as "UNIX pipes for Riak." + +* [Riak Pipe] +* [Riak Pipe - the New MapReduce Power] +* [Riak Pipe - Riak's Distributed Processing Framework] + +## Riak Search + +Riak Search is a distributed, scalable, failure-tolerant, realtime, +full-text search engine integrating [Apache +Solr](https://lucene.apache.org/solr/) with Riak KV. + +* [Using Search][usage search] + +## Ring + +The Riak Ring is a 160-bit integer space. This space is equally divided +into partitions, each of which is claimed by a vnode, which themselves +reside on actual physical server nodes. + +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] +* [Cluster Capacity Planning][plan cluster capacity] + +## Secondary Indexing (2i) + +Secondary Indexing in Riak gives developers the ability to tag an object +stored in Riak with one or more values which can then be queried. + +* [Using Secondary Indexes][usage secondary-indexes] +* [Repairing Indexes][repair recover repairs] + +## Strong Consistency + +While Riak is most well known as an [eventually consistent][concept eventual consistency] data storage system, versions of Riak 2.0 and greater +enable you to apply strong consistency guarantees to some or all of your +data, thus using Riak as a CP (consistent plus partition-tolerant) +rather than AP (highly available plus partition-tolerant) system. + +* [Strong Consistency Concept][concept strong consistency] +* [Using Strong Consistency][cluster ops strong consistency] + +## Value + +Riak is best described as a key/value store. In versions of Riak prior +to 2.0, all "values" are opaque BLOBs (binary large objects) identified +with a unique key. Values can be any type of data, including a string, a +JSON object, a text document, etc. Modifying values involves fetching +the value that exists in Riak and substituting it for a new value; +operations on values are thus basic CRUD operations. + +[Riak Data Types][dev data types], added in version 2.0, are an important +exception to this. While still considered values---because they are +stored in bucket type/bucket/key locations, like anything in Riak---Riak +Data Types are not BLOBs and are modified by Data Type-specific +operations. + +* [Keys and Objects][concept keys objects] +* [Key/Value Development][dev kv model] +* [Data Types][dev data types] + + +## Vector Clock + +Riak utilizes vector clocks (or _vclocks_) to handle version control. +Since any node in a Riak cluster is able to handle a request, and not +all nodes need to participate, data versioning is required to keep track +of a current value. When a value is stored in Riak, it is tagged with a +vector clock and establishes the initial version. When it is updated, +the client provides the vector clock of the object being modified so +that this vector clock can be extended to reflect the update. Riak can +then compare vector clocks on different versions of the object and +determine certain attributes of the data. + +* [Vector clocks][concept causal context vc] + +## Vnode + +Vnodes, or "virtual nodes," are responsible for claiming a partition in +the Riak Ring, and they coordinate requests for these partitions. + +* [vnodes][glossary vnode] +* [Clusters][concept clusters] +* [Dynamo][learn dynamo] + + + + diff --git a/content/riak/kv/3.0.4/learn/new-to-nosql.md b/content/riak/kv/3.0.4/learn/new-to-nosql.md new file mode 100644 index 0000000000..00e84391fd --- /dev/null +++ b/content/riak/kv/3.0.4/learn/new-to-nosql.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "New to NoSQL?" +description: "" +project: "riak_kv" +project_version: 3.0.4 +#menu: +# riak_kv-3.0.4: +# name: "New to NoSQL?" +# identifier: "learn_new_nosql" +# weight: 102 +# parent: "learn" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this lives in existing docs)** + + + + diff --git a/content/riak/kv/3.0.4/learn/use-cases.md b/content/riak/kv/3.0.4/learn/use-cases.md new file mode 100644 index 0000000000..8fcd8def10 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/use-cases.md @@ -0,0 +1,405 @@ +--- +title: "Use Cases For Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Use Cases" + identifier: "learn_use_cases" + weight: 101 + parent: "learn" +toc: true +aliases: + - /riak/3.0.4/dev/data-modeling/ + - /riak/kv/3.0.4/dev/data-modeling/ +--- + + +[dev data model articles etc]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#articles-blog-posts-and-other-content +[dev data model log data]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#log-data +[dev data model sensor data]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#sensor-data +[dev data model serve advertisements]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#serving-advertisements +[dev data model sess storage]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#session-storage +[dev data model user acct]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#user-accounts +[dev data model user events]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#user-events-and-timelines +[dev data model user settings]: {{<baseurl>}}riak/kv/3.0.4/developing/data-modeling/#user-settings-and-preferences +[dev data types]: {{<baseurl>}}riak/kv/3.0.4/developing/data-types +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask +[replication properties]: {{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/secondary-indexes + +Riak is a flexible data storage technology capable of addressing a wide variety +of problems in a scalable way. In this guide, we'll list a number of use cases +and data models that are a good fit for Riak. All of these use cases are already +being used in production for projects large and small. We'll also suggest +possibilities for implementation and provide links to videos and documentation +for further exploration. + +How you structure your application to run on Riak should take into account the +unique needs of your use case, including access patterns such as read/write +distribution, latency differences between various operations, use of Riak +features including [Data Types][dev data types], [MapReduce][usage mapreduce], +[Search][usage search], [secondary indexes (2i)][usage secondary-indexes], and +more. This guide is intended to be illustrative only. + +## High Read/Write, Simple Applications + +The following are examples of Riak use cases that require high read/write +performance without necessarily utilizing complex data structures: + +* [Session Storage][dev data model sess storage] +* [Serving Advertisements][dev data model serve advertisements] +* [Log Data][dev data model log data] +* [Sensor Data][dev data model sensor data] + +## Content Management, Social Applications + +The following application types require more subtle relationships between +objects, e.g. one-to-many and many-to-many relationships. + +* [User Accounts][dev data model user acct] +* [User Settings and Preferences][dev data model user settings] +* [User Events and Timelines][dev data model user events] +* [Articles, Blog Posts, and Other Content][dev data model articles etc] + +## Session Storage + +Riak was originally created to serve as a highly scalable session store. This is +an ideal use case for Riak, which is always most performant and predictable when +used as a key/value store. Since user and session IDs are usually stored in +cookies or otherwise known at lookup time, Riak is able to serve these requests +with predictably low latency. Riak's content-type agnosticism also imposes no +restrictions on the value, so session data can be encoded in many ways and can +evolve without administrative changes to schemas. + +### Complex Session Storage Case + +Riak has features that allow for more complex session storage use cases. The +[Bitcask][plan backend bitcask] storage backend, for example, supports automatic +expiry of keys, which frees application developers from implementing manual +session expiry. Riak's [MapReduce][usage mapreduce] system can also be used to +perform batch processing analysis on large bodies of session data, for example +to compute the average number of active users. If sessions must be retrieved +using multiple keys (e.g. a UUID or email address), +[using secondary indexes][usage secondary-indexes] can provide an easy solution. + +### Session Storage Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Scaling Riak at Kiip"> + <img src="http://b.vimeocdn.com/ts/296/624/29662.9.0_960.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/42744689" target="_blank" title="Riak at OpenX">Scaling Riak at Kiip</a> + <br> + In this talk, recorded at the May 2012 San Francisco Riak Meetup, Armon + Dadgar and Mitchell Hashimoto of Kiip give an overview of how and why they + are using Riak in production, and the road they took to get there. One of + the first subsystems they switched over to Riak was Sessions. You can also + read the blog post and catch the slides <a + href="http://basho.com/posts/business/Scaling-Riak-At-Kiip/" class="riak" + target="_blank">here.</a> + </td> + </tr> +</table> + +## Serving Advertisements + +Riak is often a good choice for serving advertising content to many different +web and mobile users simultaneously with low latency. Content of this sort, e.g. +images or text, can be stored in Riak using unique generated either by the +application or by Riak. Keys can be created based on, for example, a campaign or +company ID for easy retrieval. + +### Serving Advertisements Complex Case + +In the advertising industry, being able to serve ads quickly to many users and +platforms is often the most important factor in selecting and tuning a database. +Riak's tunable [apps replication properties][replication properties] can be set +to favor fast read performance. By setting R to 1, only one of N replicas will +need to be returned to complete a read operation, yielding lower read latency +than an R value equal to the number of replicas (i.e. R=N). This is ideal for +advertising traffic, which primarily involves serving reads. + +### Serving Advertisements Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX"> + <img src="http://b.vimeocdn.com/ts/343/417/343417336_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/49775483" target="_blank" title="Riak at OpenX">Riak at OpenX</a> + <br> + Los Angeles-based OpenX will serves trillions of ads a year. In this talk, + Anthony Molinaro, Engineer at OpenX, goes in depth on their architecture, + how they've built their system, and why/how they're switching to Riak for + data storage after using databases like CouchDB and Cassandra in + production. + </td> + </tr> +</table> + +## Log Data + +A common use case for Riak is storing large amounts of log data, either for +analysis [using MapReduce][usage secondary-indexes] or as a storage system used +in conjunction with a secondary analytics cluster used to perform more advanced +analytics tasks. To store log data, you can use a bucket called `logs` (just to +give an example) and use a unique value, such as a date, for the key. Log files +would then be the values associated with each unique key. + +For storing log data from different systems, you could use unique buckets for +each system (e.g. `system1_log_data`, `system2_log_data`, etc.) and write +associated logs to the corresponding buckets. To analyze that data, you could +use Riak's MapReduce system for aggregation tasks, such as summing the counts of +records for a date or Riak Search for a more robust, text-based queries. + +### Log Data Complex Case + +For storing a large amount of log data that is frequently written to Riak, some +users might consider doing primary storage of logs in a Riak cluster and then +replicating data to a secondary cluster to run heavy analytics jobs, either over +another Riak cluster or another solution such as Hadoop. Because the access +patterns of reading and writing data to Riak is very different from the access +pattern of something like a MapReduce job, which iterates over many keys, +separating the write workload from the analytics workload will let you maintain +higher performance and yield more predictable latency. + +### Log Data Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank" title="Riak at OpenX"> + <img src="/riak-docs/images/simon-analyzing-logs.png"/> + </a> + </td> + <td> + Simon Buckle on <a href="http://www.simonbuckle.com/2011/08/27/analyzing-apache-logs-with-riak/" target="_blank">analyzing Apache logs with Riak.</a> + </td> + </tr> +</table> + +## Sensor Data + +Riak's scalable design makes it useful for data sets, like sensor data, that +scale rapidly and are subject to heavy read/write loads. Many sensors collect +and send data at a given interval. One way to model this in Riak is to use a +bucket for each sensor device and use the time interval as a unique key (i.e. a +date or combination of date and time), and then store update data as the value. + +That data could then be queried on the basis of the interval. Alternatively, a +timestamp could be attached to each object as a +[secondary index][usage secondary-indexes], which would allow you to perform +queries on specific time interval ranges or to perform +[MapReduce][usage mapreduce] queries against the indexes. + +### Sensor Data Complex Case + +If you are dealing with thousands or millions of sensors yet with very small +data sets, storing all of a single device's updates as unique keys may be +cumbersome when it comes to reading that device's data. Retrieving it all would +mean calling a number of keys. + +Instead, you could store all of a device's updates in a document with a unique +key to identify the device. Stored as a JSON document, you could read and parse +all of those updates on the client side. Riak, however, doesn't allow you to +append data to a document without reading the object and writing it back to the +key. This strategy would mean more simplicity and performance on the read side +as a tradeoff for slightly more work at write time and on the client side. + +It's also important to keep an eye out for the total size of documents as they +grow, as we tend to recommend that Riak objects stay smaller than 1-2 MB and +preferably below 100 KB. Otherwise, performance problems in the cluster are +likely. + +## User Accounts + +User accounts tend to rely on fairly straightforward data models. One way of +storing user account data in Riak would be store each user's data as a JSON +object in a bucket called `users` (or whatever you wish). Keys for user data +objects could be constructed using application-specific considerations. If your +application involves user logins, for example, the simplest and most read- +efficient strategy would be to use the login username as the object key. The +username could be extracted upon login, and a read request could be performed on +the corresponding key. + +There are, however, several drawbacks to this approach. What happens if a user +wants to change their username later on? The most common solution would be to +use a UUID-type key for the user and store the user's username as a +[secondary index][usage secondary-indexes] for efficient lookup. + +### User Accounts Complex Case + +For simple retrieval of a specific account, a user ID (plus perhaps a secondary +index on a username or email) is enough. If you foresee the need to make queries +on additional user attributes (e.g. creation time, user type, or region), plan +ahead and either set up additional secondary indexes or consider using +[Riak Search][usage search] to index the JSON contents of the user account. + +### User Accounts Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree"> + <img class="vid_img"src="http://b.vimeocdn.com/ts/329/711/329711886_640.jpg"/> + </a> + </td> + <td> + <a href="https://player.vimeo.com/video/47535803" target="_blank" title="Riak at Braintree">Riak at Braintree</a> + <br> + Ben Mills, a developer at Braintree, discusses how their backend team came + to find and begin to integrate Riak into their production environment. + They also cover their model and repository framework for Ruby, Curator. + Check out more details and slides on the + <a href="http://basho.com/posts/business/riak-at-braintree/" target="_blank">Riak blog.</a> + </td> + </tr> +</table> + +## User Settings and Preferences + +For user account-related data that is simple and frequently read but rarely +changed (such as a privacy setting or theme preference), consider storing it in +the user object itself. Another common pattern is to create a companion user +settings-type of object, with keys based on user ID for easy one-read retrieval. + +### User Settings and Preferences Complex Case + +If you find your application frequently writing to the user account or have +dynamically growing user-related data such as bookmarks, subscriptions, or +multiple notifications, then a more advanced data model may be called for. + +## User Events and Timelines + +Sometimes you may want to do more complex or specific kinds of modeling user +data. A common example would be storing data for assembling a social network +timeline. To create a user timeline, you could use a `timeline` bucket in Riak +and form keys on the basis of a unique user ID. You would store timeline +information as the value, e.g. a list of status update IDs which could then be +used to retrieve the full information from another bucket, or perhaps containing +the full status update. If you want to store additional data, such as a +timestamp, category or list of properties, you can turn the list into an array +of hashes containing this additional information. + +Note than in Riak you cannot append information to an object, so adding events +in the timeline would necessarily involve reading the full object, modifying it, +and writing back the new value. + +### User Events and Timelines Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer"> + <img src="http://b.vimeocdn.com/ts/139/033/139033664_640.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/21598799" target="_blank" title="Riak at Yammer">Riak at Yammer</a> + <br> + This video was recorded at the March 2012 San Francisco Riak Meetup and is + worth every minute of your time. Coda Hale and Ryan Kennedy of Yammer give + an excellent and in depth look into how they built “Streamie”, user + notifications, why Riak was the right choice, and the lessons learned in + the process. Read more and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-And-Scala-At-Yammer/" target="_blank">here.</a> + </td> + </tr> + <tr> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer"> + <img src="http://b.vimeocdn.com/ts/309/154/309154350_960.jpg"/> + </a> + </td> + <td> + <a href="http://player.vimeo.com/video/44498491" target="_blank" title="Riak at Voxer">Riak at Voxer</a> + <br> + The team at Voxer has long relied on Riak as their primary data store for + various production services. They have put Riak through its paces and have + served as one of our more exciting customers and use cases: Riak was in + place when they shot to the top of the App Store at the end of 2011. We + also love them because they open-sourced their Node.js client. Read more + and get the slides in the Riak blog + <a href="http://basho.com/posts/business/Riak-in-Production-at-Voxer/" target="_blank">here.</a> + </td> + </tr> +</table> + +## Articles, Blog Posts, and Other Content + +The simplest way to model blog posts, articles, or similar content is to use a +bucket in Riak with some unique attribute for logical division of content, such +as `blogs` or `articles`. Keys could be constructed out of unique identifiers +for posts, perhaps the title of each article, a combination of the title and +data/time, an integer that can be used as part of a URL string, etc. + +In Riak, you can store content of any kind, from HTML files to plain text to +JSON or XML or another document type entirely. Keep in mind that data in Riak is +opaque, with the exception of [Riak Data Types][dev data types], and so Riak +won't "know" about the object unless it is indexed +[using Riak Search][usage search] or +[using secondary indexes][usage secondary-indexes]. + +### Articles et al Complex Case + +Setting up a data model for content becomes more complex based on the querying +and search requirements of your application. For example, you may have different +kinds of content that you want to generate in a view, e.g. not just a post but +also comments, user profile information, etc. + +For many Riak developers, it will make sense to divide content into different +buckets, e.g. a bucket for comments that would be stored in the Riak cluster +along with the posts bucket. Comments for a given post could be stored as a +document with the same key as the content post, though with a different +bucket/key combination. Another possibility would be to store each comment with +its own ID. Loading the full view with comments would require your application +to call from the posts and comments buckets to assemble the view. + +Other possible cases may involve performing operations on content beyond +key/value pairs. [Riak Search][usage search] is recommended for use cases +involving full-text search. For lighter-weight querying, +[using secondary indexes][usage secondary-indexes] \(2i) enables you to add +metadata to objects to either query for exact matches or to perform range +queries. 2i also enables you to tag posts with dates, timestamps, topic areas, +or other pieces of information useful for later retrieval. + +### Articles et al Community Examples + +<table class="use-cases__image-links"> + <tr> + <td> + <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/linkfluence-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + Linkfluence case study on using Riak to <a href="http://media.basho.com/pdf/Linkfluence-Case-Study-v2-1.pdf" target="_blank">store social web content</a>. + </td> + </tr> + <tr> + <td> + <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" class="vid_img" link target="_blank"> + <img src="/riak-docs/images/ideeli-case-study.png" title="Milking Performance"> + </a> + </td> + <td> + ideeli case study on <a href="http://basho.com/assets/Basho-Case-Study-ideeli.pdf" target="_blank">serving web pages with Riak</a>. + </td> + </tr> +</table> + + + + + diff --git a/content/riak/kv/3.0.4/learn/why-riak-kv.md b/content/riak/kv/3.0.4/learn/why-riak-kv.md new file mode 100644 index 0000000000..af78a15021 --- /dev/null +++ b/content/riak/kv/3.0.4/learn/why-riak-kv.md @@ -0,0 +1,225 @@ +--- +title: "Why Riak KV?" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Why Riak KV?" + identifier: "learn_why_riak_kv" + weight: 100 + parent: "learn" +toc: true +aliases: + - /riak/3.0.4/theory/why-riak/ + - /riak/kv/3.0.4/theory/why-riak/ +--- + + +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties +[Basho Bench]: {{<baseurl>}}riak/kv/3.0.4/using/performance/benchmarking +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/strong-consistency +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[convergent replicated data types]: http://hal.upmc.fr/docs/00/55/55/88/PDF/techreport.pdf +[Datomic]: http://www.datomic.com/overview.html +[dev data types]: {{<baseurl>}}riak/kv/3.0.4/developing/data-types +[glossary read rep]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#read-repair + + +## What is Riak? + +Riak is a distributed database designed to deliver maximum data +availability by distributing data across multiple servers. As long as +your Riak client can reach *one* Riak server, it should be able to write +data. + +Riak is used as an **eventually consistent** system in that the data you want to read should remain available in most failure scenarios, although it may not be the most up-to-date version of that data. + + +### Basho's goals for Riak + +Goal | Description +-------|------- +**Availability** | Riak writes to and reads from multiple servers to offer data availability even when hardware or the network itself are experiencing failure conditions +**Operational simplicity** | Easily add new machines to your Riak cluster without incurring a larger operational burden +**Scalability** | Riak automatically distributes data around the cluster and yields a near-linear performance increase as you add capacity +**Masterless** | Your requests are not held hostage to a specific server in the cluster that may or may not be available + +### When Riak makes sense + +If your data does not fit on a single server and demands a distributed +database architecture, you should take a close look at Riak as a +potential solution to your data availability issues. Getting distributed +databases right is **very** difficult, and Riak was built to address the +problem of data availability with as few trade-offs and downsides as +possible. + +Riak's focus on availability makes it a good fit whenever downtime is +unacceptable. No one can promise 100% uptime, but Riak is designed to +survive network partitions and hardware failures that would +significantly disrupt most databases. + +A less-heralded feature of Riak is its predictable latency. Because its +fundamental operations---read, write, and delete---do not involve +complex data joins or locks, it services those requests promptly. Thanks +to this capability, Riak is often selected as a data storage backend for +data management software from a variety of paradigms, such as +[Datomic]. + +From the standpoint of the actual content of your data, Riak might also +be a good choice if your data can be modeled as one of Riak's currently +available [Data Types][dev data types]: flags, registers, counters, +sets, or maps. These Data Types enable you to take advantage of Riak's +high availability approach while simplifying application development. + +### When Riak is Less of a Good Fit + +We recommend running no fewer than 5 data servers in a cluster. +This means that Riak can be overkill for small databases. If you're not +already sure that you will need a distributed database, there's a good +chance that you won't need Riak. + +If explosive growth is a possibility, however, you are always highly +advised to prepare for that in advance. Scaling at Internet speeds is +sometimes compared to overhauling an airplane mid-flight. If you feel +that such a transition might be necessary in the future, then you might +want to consider Riak. + +Riak's simple data model, consisting of keys and values as its atomic +elements, means that your data must be denormalized if your system is to +be reasonably performant. For most applications this is not a serious +hurdle. But if your data simply cannot be effectively managed as keys +and values, Riak will most likely not be the best fit for you. + +Correspondingly, if your application demands a high query load by any +means other than key/value lookup---e.g. SQL-style `SELECT * FROM table` +operations---Riak will not be as efficient as other databases. If you +wish to compare Riak with other data technologies, Basho offers a tool +called [Basho Bench] to help measure its performance, so that you can +decide whether the availability and operational benefits of Riak +outweigh its disadvantages. + +## How Does a Riak Cluster Work? + +A Riak cluster is a group of **nodes** that are in constant +communication to ensure data availability and partition tolerance. + +### What is a Riak Node? + +A Riak node is not quite the same as a server, but in a production +environment the two should be equivalent. A developer may run multiple +nodes on a single laptop, but this would never be advisable in a real +production cluster. + +Each node in a Riak cluster is equivalent, containing a complete, +independent copy of the whole Riak package. There is no "master" node; +no node has more responsibilities than others; and no node has special +tasks not performed by other nodes. This uniformity provides the basis +for Riak's fault tolerance and scalability. + +Each node is responsible for multiple data partitions, as discussed +below: + +### Riak Automatically Re-Distributes Data When Capacity is Added + +When you add (or remove) machines, data is rebalanced automatically with +no downtime. New machines claim data until ownership is equally spread +around the cluster, with the resulting cluster status updates shared to +every node via a gossip protocol and used to route requests. This is +what makes it possible for any node in the cluster to receive requests. +The end result is that developers don't need to deal with the underlying +complexity of where data lives. + +### Consistent Hashing + +Data is distributed across nodes using consistent hashing. Consistent +hashing ensures that data is evenly distributed around the cluster and +makes possible the automatic redistribution of data as the cluster +scales. + +### Intelligent Replication + +Riak's replication scheme ensures that you can still read, write, and +update data if nodes go down. Riak allows you to set a replication +variable, N (also known as the `n_val`), that specifies the number of +nodes on which a value will be replicated. + +An `n_val` value of 3 (the default) means that each object is replicated +3 times. When an object's key is mapped onto a given node, Riak will +continue on and automatically replicate the data onto two more nodes. +This parameter enables you to replicate values to 7 nodes in a 10-node +cluster, 10 nodes in a 15-node cluster, and so on. + +## When Things Go Wrong + +Riak retains fault tolerance, data integrity, and availability even in +failure conditions such as hardware failure and network partitions. Riak +has a number of means of addressing these scenarios and other bumps in +the road, like version conflicts in data. + +### Hinted Handoff + +Hinted handoff enables Riak to handle node failure. If a node goes down, +a neighboring node will take over its storage operations. When the +failed node returns, the updates received by the neighboring node are +handed back to it. This ensures that availability for writes and updates +is maintained automatically, minimizing the operational burden of +failure conditions. + +### Version Conflicts + +In any system that replicates data, conflicts can arise, for example +when two clients update the same object at the exact same time or when +not all updates have yet reached hardware that is experiencing lag. + +In Riak, replicas are [eventually consistent][concept eventual consistency], +meaning that while data is always available, not all replicas may have +the most recent update at the exact same time, causing brief +periods---generally on the order of milliseconds---of inconsistency +while all state changes are synchronized. + +Riak addresses data conflicts as follows: When you make a read request, +Riak looks up all replicas for that object. By default, Riak will return +the most recently updated version, determined by looking at the object's +vector clock. Vector clocks are metadata attached to each replica when +it is created. They are extended each time a replica is updated to keep +track of versions. You can also allow clients to resolve conflicts +themselves if that is a better fit for your use case. + +### Riak Data Types + +If you are not interested in dealing with version conflicts on the +application side, [Riak Data Types][dev data types] offer a powerful +yet easy-to-use means of storing certain types of data while allowing +Riak to handle merge conflicts. These conflicts are resolved +automatically by Riak using Data Type-specific algorithms inspired by +research into [convergent replicated data types]. + +### Read Repair + +When an outdated replica is returned as part of a read request, Riak +will automatically update the out-of-sync replica to make it consistent. +[Read repair][glossary read rep], a self-healing property of +the database, will even update a replica that returns a `not_found` in +the event that a node loses the data due to physical failure. + +### Reading and Writing Data in Failure Conditions + +In Riak, you can set an R value for reads and a W value for writes. +These values give you control over how many replicas must respond to a +request for it to succeed. + +Let's say that you have an N value of 3 (aka `n_val=3`) for a particular +key/value pair, but one of the physical nodes responsible for a replica +is down. With an `r=2` setting, only 2 replicas must return results for +read to be deemed successful. This allows Riak to provide read +availability even when nodes are down or laggy. The same applies for the +W in writes. If this value is not specified, Riak defaults to `quorum`, +according to which the majority of nodes must respond. + +There is more on [replication properties][apps replication properties] elsewhere in the +documentation. + + + + diff --git a/content/riak/kv/3.0.4/release-notes.md b/content/riak/kv/3.0.4/release-notes.md new file mode 100644 index 0000000000..6f48ca5c47 --- /dev/null +++ b/content/riak/kv/3.0.4/release-notes.md @@ -0,0 +1,40 @@ +--- +title: "Riak KV 3.0.4 Release Notes" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Release Notes" + identifier: "index_release_notes" + weight: 101 + parent: index +toc: false +aliases: + - /riak/3.0.4/community/release-notes + - /riak/kv/3.0.4/intro-v20 + - /riak/3.0.4/intro-v20 + - /riak/kv/3.0.4/introduction +--- + +Released April 2nd, 2021. + + +## Overview + +There are two fixes provided in Release 3.0.4: + +An issue with leveled application dependencies has been resolved, and so lz4 can now again be used as the compression method. + +The riak clients are now compatible with systems that require semantic versioning. + +This release is tested with OTP 20, OTP 21 and OTP 22; but optimal performance is likely to be achieved when using OTP 22. + +## Previous Release Notes + +Please see the KV 3.0.3 release notes [here]({{<baseurl>}}riak/kv/3.0.3/release-notes/). + + + + + diff --git a/content/riak/kv/3.0.4/setup.md b/content/riak/kv/3.0.4/setup.md new file mode 100644 index 0000000000..a3d074d6e3 --- /dev/null +++ b/content/riak/kv/3.0.4/setup.md @@ -0,0 +1,51 @@ +--- +title: "Setup Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Setup" + identifier: "setup_index" + weight: 110 + pre: install +toc: false +aliases: +--- + +[plan index]: ../setup/planning +[install index]: ../setup/installing +[upgrade index]: ../setup/upgrading +[downgrade]: ../setup/downgrade + +## In This Section + +#### [Planning][plan index] + +Information on planning your Riak KV cluster including software & hardware recommendations. + +[Learn More >>][plan index] + +#### [Installing][install index] + +Step-by-step tutorials on installing Riak KV. + +[Learn More >>][install index] + +#### [Upgrading][upgrade index] + +Guides on upgrading your Riak KV cluster. + +[Learn More >>][upgrade index] + +#### [Downgrading][downgrade] + +A guide on downgrading your Riak KV cluster. + +[Learn More >>][downgrade] + + + + + + diff --git a/content/riak/kv/3.0.4/setup/downgrade.md b/content/riak/kv/3.0.4/setup/downgrade.md new file mode 100644 index 0000000000..a7f4a9f416 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/downgrade.md @@ -0,0 +1,179 @@ +--- +title: "Downgrading" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Downgrading" + identifier: "downgrading" + weight: 103 + parent: "setup_index" +toc: true +aliases: + - /riak/3.0.4/ops/upgrading/rolling-downgrades/ + - /riak/kv/3.0.4/ops/upgrading/rolling-downgrades/ +--- + +[rolling upgrade]: {{<baseurl>}}riak/kv/3.0.4/setup/upgrading/cluster +[config ref]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[concept aae]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/ +[aae status]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#aae-status + +Downgrades of Riak KV are tested and supported for two feature release versions, with the general procedure being similar to that of a [rolling upgrade][rolling upgrade]. + +Depending on the versions involved in the downgrade, there are additional steps to be performed before, during, and after the upgrade on on each node. These steps are related to changes or new features that are not present in the downgraded version. + +## Overview + +For every node in the cluster: + +1. Stop Riak KV. +2. Back up Riak's `etc` and `data` directories. +3. Downgrade the Riak KV. +4. Remove Riak search index and temporary data. +5. Reconfigure Solr cores. +6. Start Riak KV and disable Riak search. +7. Monitor the reindex of the data. +8. Finalize process and restart Riak KV & Riak search. + +### Guidelines + +* Riak control should be disabled throughout the rolling downgrade process. +* [Configuration Files][config ref] must be replaced with those of the version being downgraded to. + + +### Components That Complicate Downgrades + +| Feature | automatic | required | Notes | +|:---|:---:|:---:|:---| +|Migration to Solr 4.10.4 |✔ | ✔| Applies to all clusters using Riak Search. +| Active Anti-Entropy file format changes | ✔ | | Can be opted out using a [capability](#aae_tree_capability) + + +### When Downgrading is No Longer an Option + +If you enabled LZ4 compression in LevelDB and/or enabled global expiration in LevelDB when you installed KV 3.0.4, you cannot downgrade. + + +## General Process + +{{% note %}} +While the cluster contains mixed version members, if you have not set the cluster to use the legacy AAE tree format, you will see the `bad_version` error emitted to the log any time nodes with differing versions attempt to exchange AAE data (including AAE fullsync). + +This is benign and similar to the `not_built` and `already_locked` errors which can be seen during normal AAE operation. These events will stop once the downgrade is complete. +{{% /note %}} + +### Stop Riak KV and remove Riak search index & temporary data + +1\. Stop Riak KV: + +```bash +riak stop +``` +2\. Back up your Riak KV /etc and /data directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Downgrade Riak KV: + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +4\. Remove the Riak search index data and AAE data: + + 1. Remove the cached Solr web application from the yz_temp folder. For the default package paths, this would be `/var/lib/riak/yz_temp/solr-webapp`. + + ```bash + rm -rf /var/lib/riak/yz_temp/solr-webapp + ``` + 2. Delete the Solr cores located in the yz directory. If you have custom solrconfig.xml files, you will need to restore the core from backup instead. + + For example: + + ```bash + rm -rf /var/lib/riak/yz/example_core1 + rm -rf /var/lib/riak/yz/example_core2 + ``` + +### Prepare to Re-index Solr Cores + +5\. (**Optional**) You can increase the AAE operation concurrency and increase the number of build operations while lowering the build limit's interval. This will increase the speed at which the AAE trees are rebuilt and the search indexes are repopulated. However, if you have a latency sensitive application, you should adjust these settings with care. + +```riak.conf +anti_entropy.concurrency_limit = 8 +anti_entropy.tree.build_limit.number = 4 +anti_entropy.tree.build_limit.per_timespan = 5m +``` + +### Start the node and disable Yokozuna + +6\. Start Riak KV: +{{% note %}} +Search results will be inconsistent until **Step 8.1** is complete. +{{% /note %}} + +```bash +riak start +``` + +7\. Wait for Riak search to start by running the following command: + +```bash +riak-admin wait-for-service yokozuna +``` + +8\. Run `riak attach`. + + 1. Run the following snippet to prevent this node from participating in distributed Riak Search queries: + + ``` + riak_core_node_watcher:service_down(yokozuna). + ``` + + 2. Expire the Yokozuna AAE Trees: + + ``` + yz_entropy_mgr:expire_trees(). + ``` + + 3. Exit the attach session by pressing **Ctrl-G** then **q**. + +### Monitor the reindex of the data + +9\. Monitor the build and exchange progress using the `riak-admin aae-status` and `riak-admin search aae-status` commands. + +The **All** column shows how long it has been since a partition exchanged with all of its sibling replicas. Consult the [`riak-admin aae-status` documentation][aae status] for more information about the AAE status output. + +Once both riak-admin aae-status and riak-admin search aae-status show values in the **All** column, the node will have successfully rebuilt all of the indexed data. + +### Finalize process and restart Yokozuna + + +10\. If you raised the concurrency AAE currency settings in riak.conf during **Step 5**, stop the node and remove the increased AAE thresholds. + +11\. If you chose not to increase the AAE concurrency via configuration and want to start Yokozuna without restarting the node, run `riak attach` and enter the following snippet: + +```erlang +riak_core_node_watcher:service_up(yokozuna,whereis(yz_solr_proc)). +``` + +12\. Exit the attach session by pressing **Ctrl-G** then **q**. + +13\. Verify that transfers have completed: + +```bash +riak-admin transfers +``` + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing.md b/content/riak/kv/3.0.4/setup/installing.md new file mode 100644 index 0000000000..4da022ddf0 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing.md @@ -0,0 +1,61 @@ +--- +title: "Installing Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Installing" + identifier: "installing" + weight: 101 + parent: "setup_index" + pre: cog +toc: true +aliases: + - /riak/3.0.4/ops/building/installing + - /riak/kv/3.0.4/ops/building/installing + - /riak/3.0.4/installing/ + - /riak/kv/3.0.4/installing/ +--- + +[install aws]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/amazon-web-services +[install debian & ubuntu]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/debian-ubuntu +[install freebsd]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/freebsd +[install mac osx]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/mac-osx +[install rhel & centos]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/rhel-centos +[install suse]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/suse +[install windows azure]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/windows-azure +[install source index]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/source +[community projects]: {{<baseurl>}}community/projects +[upgrade index]: {{<baseurl>}}riak/kv/3.0.4/setup/upgrading + +## Supported Platforms + +Riak is supported on numerous popular operating systems and virtualized +environments. The following information will help you to +properly install or upgrade Riak in one of the supported environments: + + * [Amazon Web Services][install aws] + * [Debian & Ubuntu][install debian & ubuntu] + * [FreeBSD][install freebsd] + * [Mac OS X][install mac osx] + * [RHEL & CentOS][install rhel & centos] + * [SUSE][install suse] + * [Windows Azure][install windows azure] + +## Building from Source + +If your platform isn’t listed above, you may be able to build Riak from source. See [Installing Riak from Source][install source index] for instructions. + +## Community Projects + +Check out [Community Projects][community projects] for installing with tools such as [Chef](https://www.chef.io/chef/), [Ansible](http://www.ansible.com/), or [Cloudsoft](http://www.cloudsoftcorp.com/). + +## Upgrading + +For information on upgrading an existing cluster see [Upgrading Riak KV][upgrade index]. + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/amazon-web-services.md b/content/riak/kv/3.0.4/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..f97437b4f6 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/amazon-web-services.md @@ -0,0 +1,153 @@ +--- +title_supertext: "Installing on" +title: "Amazon Web Services" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Amazon Web Services" + identifier: "installing_amazon_web_services" + weight: 301 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/kv/3.0.4/ops/building/installing/Installing-on-AWS-Marketplace + - /riak/3.0.4/installing/amazon-web-services/ + - /riak/kv/3.0.4/installing/amazon-web-services/ +--- + + +## Launching Riak VMs via the AWS Marketplace + +{{% note title="Note" %}} +The AWS Marketplace does not always have the most recent versions of Riak available. To use the latest version of Riak, please refer to the [Installing from Package](#installing-from-package) section underneath. +{{% /note %}} + +In order to launch a Riak virtual machine via the AWS Marketplace, you will first need to sign up for an [Amazon Web Services](http://aws.amazon.com) account. + +1. Navigate to [https://aws.amazon.com/marketplace/](https://aws.amazon.com/marketplace/) and sign in with your Amazon Web Services account. + +2. Locate Riak in the **Databases & Caching** category or search for Riak from any page. + +3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair + + ![AWS Marketplace Instance Settings]({{<baseurl>}}images/aws-marketplace-settings.png) + +4. Click the **Accept Terms and Launch with 1-Click** button. + +### Security Group Settings + +Once the virtual machine is created, you should verify that your selected EC2 security group is properly configured for Riak. + +1. In the AWS EC2 Management Console, click **Security Groups**, then click the name of the security group for your Riak VM. + +2. Click on the **Inbound** tab in the lower pane. Your security group should include the following open ports: + + * 22 (SSH) + * 8087 (Riak Protocol Buffers Interface) + * 8098 (Riak HTTP Interface) + +3. You will need to add additional rules within this security group to allow your Riak instances to communicate. For each port range below, create a new **Custom TCP rule** with the source set to the current security group ID (found on the **Details** tab). + + * Port range: 4369 + * Port range: 6000-7999 + * Port range: 8099 + +4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. + + ![EC2 Security Group Settings]({{<baseurl>}}images/aws-marketplace-security-group.png) + +We also recommend that you read more about Riak's [Security and Firewalls]({{<baseurl>}}riak/kv/3.0.4/using/security/). + +## Clustering Riak on AWS + +You will need need to launch at least 3 instances to form a Riak cluster. When the instances have been provisioned and the security group is configured, you can connect to them using SSH or PuTTY as the ec2-user. + +You can find more information on connecting to an instance on the official [Amazon EC2 instance guide](http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html). + +{{% note title="Note" %}} +The following clustering setup will _not_ be resilient to instance restarts +unless deployed in Amazon VPC. +{{% /note %}} + +{{% note title="Note on Package Based Installation" %}} + If installing to AWS by package, further configuration to _riak.conf_ to set the node name and listening IP addresses is necessary for the below steps to function. +{{% /note %}} + +1. On the first node, obtain the internal IP address: + + ```bash + curl http://169.254.169.254/latest/meta-data/local-ipv4 + ``` + +2. For all other nodes, use the internal IP address of the first node: + + ```bash + sudo riak-admin cluster join riak@<ip.of.first.node> + ``` + +3. After all of the nodes are joined, execute the following: + + ```bash + sudo riak-admin cluster plan + ``` + + If this looks good: + + ```bash + sudo riak-admin cluster commit + ``` + + To check the status of clustering use: + + ```bash + sudo riak-admin member_status + ``` + +You now have a Riak cluster running on AWS. + + +## Installing From Package + +#### AWS (2) + +You can install on AWS 2 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4-1.amzn2x86_64.rpm +sudo yum localinstall -y riak_3.0.4-1.amzn2x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4-1.amzn2x86_64.rpm +sudo rpm -i riak_3.0.4-1.amzn2x86_64.rpm +``` + + +#### AWS (2016.09) + +You can install on AWS 2016.09 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4-1.amzn1x86_64.rpm +sudo yum localinstall -y riak_3.0.4-1.amzn1x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4-1.amzn1x86_64.rpm +sudo rpm -i riak_3.0.4-1.amzn1x86_64.rpm +``` +## Next Steps + +Now that Riak is installed and you have set the [Security Group Settings](#security-group-settings), check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/debian-ubuntu.md b/content/riak/kv/3.0.4/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..cdc1ec7d8b --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/debian-ubuntu.md @@ -0,0 +1,171 @@ +--- +title_supertext: "Installing on" +title: "Debian and Ubuntu" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Debian & Ubuntu" + identifier: "installing_debian_ubuntu" + weight: 302 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/kv/3.0.4/ops/building/installing/Installing-on-Debian-and-Ubuntu + - /riak/3.0.4/installing/debian-ubuntu/ + - /riak/kv/3.0.4/installing/debian-ubuntu/ +--- + +[install source index]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/source/ +[security index]: {{<baseurl>}}riak/kv/3.0.4/using/security/ +[install source erlang]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/verify + +Riak KV can be installed on Debian or Ubuntu-based systems using a binary +package or by compiling from source code. + +The following steps have been tested to work with Riak KV on: + +- Ubuntu 18.02 +- Ubuntu 16.04 +- Ubuntu 14.04 +- Ubuntu 12.04 +- Debian 9.2 +- Debian 8.6 +- Debian 7.6 +- Raspbian Buster + +> **Note on Debian 7** +> +> If you wish to install Riak on Debian 7, you may need to install +[libc6](https://packages.debian.org/search?keywords=libc6) version 2.15 or +later, which in turn requires upgrading your system to +[sid](https://www.debian.org/releases/sid/). Installation instructions +can be found +[here](https://wiki.debian.org/DebianUnstable#How_do_I_install_Sid.3F). +> +> Once sid has been installed, you can install libc6 with the following +command: +> +>```bash +apt-get -t sid install libc6 libc6-dev libc6-dbg +``` + +## Installing From Package + +If you wish to install the deb packages by hand, follow these +instructions. + +### Installing on Non-LTS Ubuntu Releases + +Typically we only package Riak for LTS releases to keep our build and +testing matrix focused. In some cases, such as the historic Ubuntu 11.04 (Natty), +there are changes that affect how Riak is packaged, so we will release a +separate package for that non-LTS release. In most other cases, however, +if you are running a non-LTS release (such as 12.10) it is safe to +follow the below instructions for the LTS release prior to your release. +In the case of later subversions such as Ubuntu 12.10, follow the installation instructions for +Ubuntu 12.04. + +### PAM Library Requirement for Ubuntu + +One dependency that may be missing on your machine is the `libpam0g-dev` +package used for Pluggable Authentication Module (PAM) authentication, +associated with [Riak security][security index]. + +To install: + +```bash +sudo apt-get install libpam0g-dev +``` + +### Riak 64-bit Installation + +#### Ubuntu Bionic Beaver (18.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-1_amd64.deb +sudo dpkg -i riak_3.0.4-1_amd64.deb +``` + +#### Ubuntu Xenial Xerus (16.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-1_amd64.deb +sudo dpkg -i riak_3.0.4-1_amd64.deb +``` + +#### Ubuntu Trusty Tahr (14.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/trusty64/riak_3.0.4-1_amd64.deb +sudo dpkg -i riak_3.0.4-1_amd64.deb +``` + +#### Ubuntu Precise Pangolin (12.04) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/precise64/riak_3.0.4-1_amd64.deb +sudo dpkg -i riak_3.0.4-1_amd64.deb +``` + +#### Debian Stretch (9.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-1_amd64.deb +sudo dpkg -i riak_3.0.4-1_amd64.deb +``` + +#### Debian Jessie (8.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-1_amd64.deb +sudo dpkg -i riak_3.0.4-1_amd64.deb +``` + +#### Debian Wheezy (7.0) + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/7/riak_3.0.4-1_amd64.deb +sudo dpkg -i riak_3.0.4-1_amd64.deb +``` + +#### Raspbian Buster + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/raspbian/buster/riak_3.0.4-1_armhf.deb +sudo dpkg -i riak_3.0.4-1_amd64.deb +``` + + +## Installing From Source + +First, install Riak dependencies using apt: + +```bash +sudo apt-get install build-essential libc6-dev-i386 git +``` + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/riak-3.0.4.tar.gz +tar zxvf riak-3.0.4.tar.gz +cd riak-3.0.4 +make rel +``` + +If the build was successful, a fresh build of Riak will exist in the +`rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/freebsd.md b/content/riak/kv/3.0.4/setup/installing/freebsd.md new file mode 100644 index 0000000000..2f8c31f9fb --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/freebsd.md @@ -0,0 +1,133 @@ +--- +title_supertext: "Installing on" +title: "FreeBSD" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "FreeBSD" + identifier: "installing_freebsd" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-FreeBSD + - /riak/kv/3.0.4/ops/building/installing/Installing-on-FreeBSD + - /riak/3.0.4/installing/freebsd/ + - /riak/kv/3.0.4/installing/freebsd/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/3.0.4/downloads/ +[install verify]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/verify + +You can install Riak on FreeBSD for the AMD64 architecture with a binary package or by building from source code. + +## Installing From Binary Package + +Installing Riak from a binary package is the simplest method with least required dependencies, and requires less time to complete than building from source. + +### Prerequisites and Dependencies + +Riak depends on `sudo` to be installed if the Riak command line tools are to be executed by users other than the *riak* user. Please ensure that `sudo` is installed via packages or the ports collection prior to installing the Riak package. + +### Installation + +You can install the Riak binary package on FreeBSD remotely using the +`pkg_add` remote option. For this example, we're installing `riak-3.0.4.txz`. + +### For FreeBSD 11.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/3.0/3.0.4/freebsd/11.1/riak-3.0.4.txz +``` + + +### For FreeBSD 10.x + +```bash +sudo pkg_add -r https://files.tiot.jp/riak/kv/3.0/3.0.4/freebsd/10.4/riak-3.0.4.txz +``` + +When Riak is installed, a message is displayed with information about the installation and available documentation. + +``` +Thank you for installing Riak. + +Riak has been installed in /usr/local owned by user:group riak:riak + +The primary directories are: + + {platform_bin_dir, "/usr/local/sbin"} + {platform_data_dir, "/var/db/riak"} + {platform_etc_dir, "/usr/local/etc/riak"} + {platform_lib_dir, "/usr/local/lib/riak"} + {platform_log_dir, "/var/log/riak"} + +These can be configured and changed in the platform_etc_dir/app.config. + +Add /usr/local/sbin to your path to run the riak and riak-admin scripts directly. + +Man pages are available for riak(1) and riak-admin(1) +``` + +## Installing From Source + +Installing Riak from source on FreeBSD is a straightforward process which requires installation of more dependencies (such as Erlang) prior to building, and requires more time than a binary package installation. + +That said, installing from source provides for greater flexibility with respect to configuration, data root locations, and more fine grained control over specific dependency versions. + +### Prerequisites and Dependencies + +When building and installing Riak from source, you might be required to install some prerequisite software before proceeding with the build. + +If you do not currently have the following software installed, please install it with packages or the ports collection before proceeding. + +* Erlang ([Installing Erlang][install source erlang]) +* Curl +* Git +* OpenSSL (version 1.0.0_7) +* Python +* sudo +* flex + +### Installation +First download the version you wish to install from the [downloads][downloads]. + +Next, unpack and build a release from source: + +```bash +tar zxf <riak-x.x.x> +cd riak-x.x.x +gmake rel +``` + +Upon conclusion of the build, the `rel/riak` directory will contain a full Riak node environment, including configuration, data, and log directories: + +```bash +bin # Riak binaries +data # Riak data and metadata +erts-5.9.2 # Erlang Run-Time System +etc # Riak Configuration +lib # Third party libraries +log # Operational logs +releases # Release information +``` + +If you'd prefer to build a development environment consisting of 4 nodes which can be run as a cluster on one machine, specify the `devrel` target instead of the `rel` target, like this: + +```bash +gmake devrel +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/mac-osx.md b/content/riak/kv/3.0.4/setup/installing/mac-osx.md new file mode 100644 index 0000000000..da86bdf26e --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/mac-osx.md @@ -0,0 +1,121 @@ +--- +title_supertext: "Installing on" +title: "Mac OS X" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Mac OS X" + identifier: "installing_macosx" + weight: 303 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-Mac-OS-X + - /riak/kv/3.0.4/ops/building/installing/Installing-on-Mac-OS-X + - /riak/3.0.4/installing/mac-osx/ + - /riak/kv/3.0.4/installing/mac-osx/ +--- + + + +[perf open files]: {{<baseurl>}}riak/kv/3.0.4/using/performance/open-files-limit +[install source erlang]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/verify + +The following steps are known to work with Mac OS X 10.8 and higher. You can install from source or download a precompiled tarball. + +> **`ulimit` on OS X** +> +> OS X gives you a very small limit on open file handles, so even with a +backend that uses very few file handles, it's possible to run out. See +[Open Files Limit][perf open files] for more information about changing the limit. + + +## From Precompiled Tarballs + +To run Riak from our precompiled tarball, run these commands for the +appropriate platform: + +### 64-bit + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.4/osx/10.11/riak-3.0.4-OSX-x86_64.tar.gz +tar xzvf riak-3.0.4-osx-x86_64.tar.gz +``` + +After the release is untarred, you will be able to `cd` into the `riak` +directory and execute `bin/riak start` to start the Riak node. + +## Homebrew + +{{% note title="Warning: Homebrew not always up to date" %}} +Homebrew's Riak recipe is community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current recipe is +using the latest supported code (and don't be afraid to update it if it's +not). +{{% /note %}} + +Installing Riak 3.0.4 with [Homebrew](http://brew.sh/) is easy: + +```bash +brew install --devrel riak +``` + +By default, this will place a `3.0.4` folder in +`/usr/local/Cellar/riak`. + +Be aware that you will most likely see the following message after +running `brew install`: + +``` +Error: The `brew link` step did not complete successfully +The formula built, but is not symlinked into /usr/local + +You can try again using: + brew link riak +``` + +We do not recommend using `brew link` with Riak. Instead, we recommend +either copying that directory to a desired location on your machine, +aliasing the executables in the `/bin` directory, or interacting with +the Riak installation directory via environment variables. + +**Note**: Homebrew will install Erlang if you don't have it already. + +## Installing From Source + +You must have Xcode tools installed from [Apple's Developer +website](http://developer.apple.com/). + +{{% note title="Note on Clang" %}} +Riak has had problems compiling with Clang in the past. As of Riak KV +2.9.0p5 and Clang 902.0.39.1, Clang can build Riak. +{{% /note %}} + +Riak requires [Erlang](http://www.erlang.org/) R16B02+. + +If you do not have Erlang already installed, see [Installing Erlang][install source erlang]. + +Next, download and unpack the source distribution. + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.4/riak-3.0.4.tar.gz +tar zxvf riak-3.0.4.tar.gz +cd riak-3.0.4 +make rel +``` + +If you receive errors when building about "incompatible architecture," +please verify that you built Erlang with the same architecture as your +system (Snow Leopard and higher: 64bit). + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/rhel-centos.md b/content/riak/kv/3.0.4/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..edc4c08ce3 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/rhel-centos.md @@ -0,0 +1,134 @@ +--- +title_supertext: "Installing on" +title: "RHEL and CentOS" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "RHEL & CentOS" + identifier: "installing_rhel_centos" + weight: 304 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/kv/3.0.4/ops/building/installing/Installing-on-RHEL-and-CentOS + - /riak/3.0.4/installing/rhel-centos/ + - /riak/kv/3.0.4/installing/rhel-centos/ +--- + + + +[install source index]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/source +[install source erlang]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/source/erlang +[install verify]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/verify + +Riak KV can be installed on CentOS- or Red-Hat-based systems using a binary +package or by [compiling Riak from source code][install source index]. The following steps have been tested to work with Riak on +CentOS/RHEL 6.9, 7.5.1804 and 8.1.1911 . + +> **Note on SELinux** +> +> CentOS enables SELinux by default, so you may need to disable SELinux if +you encounter errors. + +## Installing From Package + +If you wish to install the RHEL/CentOS packages by hand, follow these +instructions. + +### For Centos 8 / RHEL 8 + +Before installing Riak on CentOS 8/RHEL 8, we need to satisfy some Erlang dependencies +from EPEL first by installing the EPEL repository: + +```bash +sudo yum install -y epel-release +``` + +Once the EPEL has been installed, you can install CentOS 8/RHEL 8 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4-1.el8.x86_64.rpm +sudo yum localinstall -y riak-3.0.4-1.el8.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4-1.el8.x86_64.rpm +sudo rpm -Uvh riak-3.0.4-1.el8.x86_64.rpm +``` + +### For Centos 7 / RHEL 7 + +You can install CentOS 7/RHEL 7 using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4-1.el7.x86_64.rpm +sudo yum localinstall -y riak-3.0.4-1.el7.x86_64.rpm +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4-1.el7.x86_64.rpm +sudo rpm -Uvh riak-3.0.4-1.el7.x86_64.rpm +``` + +### For Centos 6 / RHEL 6 + +You can install using yum, which we recommend: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/6/riak-3.0.4-1.el6.x86_64.rpm +sudo yum localinstall -y riak-3.0.4-1.el6.x86_64.rpm + +``` + +Or you can install the `.rpm` package manually: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/6/riak-3.0.4-1.el6.x86_64.rpm +sudo rpm -Uvh riak-3.0.4-1.el6.x86_64.rpm +``` + +## Installing From Source + +Riak requires an [Erlang](http://www.erlang.org/) installation. +Instructions can be found in [Installing Erlang][install source erlang]. + +Building from source will require the following packages: + +* `gcc` +* `gcc-c++` +* `glibc-devel` +* `make` +* `pam-devel` + +You can install these with yum: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make git pam-devel +``` + +Now we can download and install Riak: + +```bash +wget https://files.tiot.jp/riak/kv/3.0/3.0.4/riak-3.0.4.tar.gz +tar zxvf riak-3.0.4.tar.gz +cd riak-3.0.4 +make rel +``` + +You will now have a fresh build of Riak in the `rel/riak` directory. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/smartos.md b/content/riak/kv/3.0.4/setup/installing/smartos.md new file mode 100644 index 0000000000..adb566e7ba --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/smartos.md @@ -0,0 +1,119 @@ +--- +title_supertext: "Installing on" +title: "SmartOS" +description: "" +project: "riak_kv" +project_version: "3.0.4" +menu: + riak_kv-3.0.4: + name: "SmartOS" + identifier: "installing_smartos" + weight: 305 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-SmartOS + - /riak/kv/3.0.4/ops/building/installing/Installing-on-SmartOS + - /riak/3.0.4/installing/smartos/ + - /riak/kv/3.0.4/installing/smartos/ + - /riak/kv/3.0.4/installing/smartos/ +--- + +[install verify]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/verify + +{{% note title="SmartOS End of Life (EOL) for Riak KV 2.0.0" %}} +SmartOS is no longer supported in Riak KV 2.0.0+. If you are interested in using Riak KV on SmartOS, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.2 on SmartOS version **joyent_20120614T184600Z**. They demonstrate installation of a Riak node on SmartOS as the root user. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open +files limit is at least 65536. Check the current limits to verify this: + +```bash +ulimit -a +``` + +To temporarily increase this limit *for the life of your session*, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to `/etc/system`: + +```bash +set rlim_fd_max=65536 +``` + +## Choosing a Version + +SmartOS, albeit powerful, can make some easy tasks (like figuring out a "version" of SmartOS) difficult. Defining the correct version is a combination of the Global Zone snapshot version and the pkgsrc version in the guest zones. Here is the way to determine which Riak package to use. + +The thing that really matters for Riak is what dataset was used to make the SmartOS VM. These datasets come from joyent and appear like this with the `dsadm` command: + +``` +fdea06b0-3f24-11e2-ac50-0b645575ce9d smartos 2012-12-05 sdc:sdc:base64:1.8.4 +f4c23828-7981-11e1-912f-8b6d67c68076 smartos 2012-03-29 sdc:sdc:smartos64:1.6.1 +``` + +This is where the `1.6` and `1.8` versions come from in the package naming. It isn't perfect, but if you know what dataset you used to make your SmartOS VM, you will know which package to use. + +For Joyent Cloud users who don't know what dataset was used, in the guest zone type: + +``` +cat /opt/local/etc/pkgin/repositories.conf +``` + +* If this returns `http://pkgsrc.joyent.com/sdc6/2012Q2/x86_64/All` or any other *2012Q2* you need to use the `1.8` download. +* If this returns `http://pkgsrc.joyent.com/sdc6/2011Q4/x86_64/All` or any other *2011* you need to use the `1.6` download. + +## Download and Install + +Download your version of the Riak binary package for SmartOS: + +```bash +curl -o /tmp/riak-2.1.4-SmartOS-x86_64.tgz https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz +``` + +Next, install the package: + +``` +pkg_add /tmp/riak-2.1.4-SmartOS-x86_64.tgz +``` + +After installing the package, enable the Riak and Erlang Port Mapper Daemon (epmd) services: + +```bash +svcadm -v enable -r riak +``` + +Finally, after enabling the services, check to see that they are online: + +``` +svcs -a | grep -E 'epmd|riak' +``` + +Output from the above command should resemble the following: + +``` +online 17:17:16 svc:/network/epmd:default +online 17:17:16 svc:/application/riak:default +``` + +Finally, and provided that the services are shown to be in an **online** state, go ahead and ping Riak: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed and configured Riak as service on SmartOS. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/3.0.4/setup/installing/solaris.md b/content/riak/kv/3.0.4/setup/installing/solaris.md new file mode 100644 index 0000000000..45a64f7916 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/solaris.md @@ -0,0 +1,91 @@ +--- +title_supertext: "Installing on" +title: "Solaris" +description: "" +project: "riak_kv" +project_version: "3.0.4" +menu: + riak_kv-3.0.4: + name: "Solaris" + identifier: "installing_solaris" + weight: 306 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-Solaris + - /riak/kv/3.0.4/ops/building/installing/Installing-on-Solaris + - /riak/3.0.4/installing/solaris/ + - /riak/kv/3.0.4/installing/solaris/ +--- + + + +[install verify]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/verify + +{{% note title="Solaris End of Life (EOL) for Riak KV 2.2.0" %}} +Solaris is no longer supported in Riak KV 2.2.1+. If you are interested in using Riak KV on Solaris, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +The following steps have been tested to work with Riak version 1.3.1 on Solaris 10 i386. They demonstrate installation of a Riak node on Solaris as the root user. + +> **Note:** Before installing Riak on Solaris, be sure that you've installed `sudo` as Riak's scripts require it for proper operation. + +## Open Files Limit + +Before proceeding with installation, you should ensure that the system's open files limit is at least 65536 by verifying the current value of `nofiles(descriptors)`. Check the current value with the `ulimit` command: + +```bash +ulimit -a +``` + +To temporarily increase this limit for the life of your session, use the following command: + +```bash +ulimit -n 65536 +``` + +To increase this value in a persistent manner that will be enforced after restarting the system, add the following to the `/etc/system` file: + +``` +set rlim_fd_max=65536 +set rlim_fd_cur=65536 +``` + +Note that you must restart to have the above settings take effect. + +## Download and Install + +Download your version of the Riak binary package for Solaris 10: + +```bash +curl -o /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz +``` + +Next, install the package: + +```bash +gunzip /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg.gz +pkgadd /tmp/BASHOriak-2.2.3-Solaris10-i386.pkg +``` + +After installing the package, be sure to include `/opt/riak/bin` in the +appropriate user's `PATH`. After doing so, you can then start Riak: + +```bash +riak start +``` + +Finally, go ahead and ping Riak to ensure it is running: + +```bash +riak ping +``` + +Pinging Riak will result in a `pong` response if the node is up and reachable. If the node is not up and reachable, a `Node <nodename> not responding to pings` error will result instead. + +If all responses indicate that riak is up and running, then you have successfully installed Riak on Solaris 10. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + diff --git a/content/riak/kv/3.0.4/setup/installing/source.md b/content/riak/kv/3.0.4/setup/installing/source.md new file mode 100644 index 0000000000..dff46ce9b1 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/source.md @@ -0,0 +1,110 @@ +--- +title_supertext: "Installing" +title: "Riak KV From Source" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Installing From Source" + identifier: "installing_source" + weight: 310 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/Installing-Riak-from-Source + - /riak/kv/3.0.4/ops/building/Installing-Riak-from-Source + - /riak/3.0.4/installing/source/ + - /riak/kv/3.0.4/installing/source/ +--- + + + +[install source erlang]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/source/erlang +[downloads]: {{<baseurl>}}riak/kv/3.0.4/downloads/ +[install debian & ubuntu#source]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/debian-ubuntu/#installing-from-source +[install freebsd#source]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/freebsd/#installing-from-source +[install mac osx#source]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/mac-osx/#installing-from-source +[install rhel & centos#source]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/rhel-centos/#installing-from-source +[install verify]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/verify + +Riak should be installed from source if you are building on a platform +for which a package does not exist or if you are interested in +contributing to Riak. + +## Dependencies + +### Erlang + +To install Riak, you will need to have [Erlang](http://www.erlang.org/) installed. We strongly recommend using Basho's patched version of Erlang to install Riak 2.0+. All of the patches in this version have been incorporated into later versions of the official Erlang/OTP release. + +See [Installing Erlang][install source erlang] for instructions. + +### Git + +Riak depends on source code located in multiple Git repositories. Install [Git](https://git-scm.com/) on the target system before attempting the build. + +### GCC + +Riak will not compile with Clang. Please make sure your default C/C++ +compiler is [GCC](https://gcc.gnu.org/). + +## Installation + +The following instructions generate a complete, self-contained build of +Riak in `$RIAK/rel/riak` where `$RIAK` is the location of the unpacked +or cloned source. + +### Installing from source package + +Download the Riak source package from the [Download Center][downloads] and build: + +```bash +curl -O https://files.tiot.jp/riak/kv/3.0/3.0.4/riak-3.0.4.tar.gz +tar zxvf riak-3.0.4.tar.gz +cd riak-3.0.4 +make locked-deps +make rel +``` + +### Installing from GitHub + +The [Riak Github respository](http://github.com/basho/riak) has much +more information on building and installing Riak from source. To clone +and build Riak from source, follow the steps below. + +Clone the repository using [Git](http://git-scm.com) and build: + +```bash +git clone git://github.com/basho/riak.git +cd riak +make locked-deps +make rel +``` + +## Platform-Specific Instructions + +For instructions about specific platforms, see: + + * [Debian & Ubuntu][install debian & ubuntu#source] + * [FreeBSD][install freebsd#source] + * [Mac OS X][install mac osx#source] + * [RHEL & CentOS][install rhel & centos#source] + +If you are running Riak on a platform not in the list above and need +some help getting it up and running, join The Riak Mailing List and +inquire about it there. We are happy to help you get up and running with +Riak. + +### Windows + +Riak is not currently supported on Microsoft Windows. + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/source/erlang.md b/content/riak/kv/3.0.4/setup/installing/source/erlang.md new file mode 100644 index 0000000000..4b3b18e4f9 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/source/erlang.md @@ -0,0 +1,571 @@ +--- +title: "Installing Erlang" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Installing Erlang" + identifier: "installing_source_erlang" + weight: 301 + parent: "installing_source" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/erlang + - /riak/kv/3.0.4/ops/building/installing/erlang + - /riak/3.0.4/installing/source/erlang/ + - /riak/kv/3.0.4/installing/source/erlang/ +--- + +[install index]: {{<baseurl>}}riak/kv/3.0.4/setup/installing +[security basics]: {{<baseurl>}}riak/kv/3.0.4/using/security/basics + +Pre-packaged versions of Riak include an Erlang installation. If you are building Riak from source, you will need to install [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). **If you do not use this version of Erlang, you will not be able to use Riak's [security features][security basics].** + +> **Note on Official Support** +> +> Please note that only packaged Riak KV installs are officially supported. Visit [Installing Riak KV][install index] for installing a supported Riak package. + +## Prerequisites + +#### Contents + +* [kerl](#kerl-prerequisites) +* [Debian/Ubuntu](#debian-ubuntu-prerequisites) +* [FreeBSD/Solaris](#freebsd-solaris-prerequisites) +* [Mac OS X](#mac-os-x-prerequisites) +* [RHEL/CentOS](#rhel-centos-prerequisites) + +To build and install Erlang you must have a GNU-compatible build system and these tools: + +**Unpacking** + +* [GNU unzip](http://www.gzip.org/) or a modern uncompressing utility. +* [GNU Tar](http://www.gnu.org/software/tar/) for working with GNU TAR archives. + +**Building** + +* [autoconf](http://www.gnu.org/software/autoconf/autoconf.html): generates configure scripts. +* [make](http://www.gnu.org/software/make/): generates executables and other non-source files of a program. +* [gcc](https://gcc.gnu.org/): for compiling C. +* [ncurses](http://www.gnu.org/software/ncurses/): for terminal-based interfaces. +* [OpenSSL](https://www.openssl.org/): toolkit that implements SSL and TSL protocols. +* [Java SE JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html): platform for deploying Java. + + +## kerl Prerequisites + +[kerl](https://github.com/yrashk/kerl) is the quickest way to install different versions of Erlang on most systems. + +Install kerl by running the following command: + +```bash +curl -O https://raw.githubusercontent.com/spawngrid/kerl/master/kerl +chmod a+x kerl +``` + +If you are using Mac OS X, FreeBSD, or Solaris, see the following sections for additional requirements before building with kerl. + +Otherwise, continue with [Installing with kerl](#installing-with-kerl). + +### Configuring kerl on FreeBSD/Solaris + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc" +``` + +Then check for the presence of autoconf by running: + +```shell +which autoconf +``` +If this returns `autoconf not found`, install autoconf by running: + +```shell +sudo pkg update +sudo pkg install autoconf +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + +### Configuring kerl on Mac OS X + +To compile Erlang as 64-bit on Mac OS X you need to instruct kerl to pass the correct flags to the `configure` command. + +Start by by creating a `~/.kerlrc` file: + +```bash +touch ~/.kerlrc +``` + +Next add the following contents to your `~/.kerlrc` file: + +```shell +KERL_CONFIGURE_OPTIONS="--disable-hipe --enable-smp-support --enable-threads + --enable-kernel-poll --without-odbc --enable-darwin-64bit" +``` + +On OS X 10.9 (Mavericks) or later, you may need to install [autoconf](https://www.gnu.org/software/autoconf/). + +Check for the presence of autoconf by running: + +```shell +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```shell +brew install autoconf +``` + +Or with curl: + +```shell +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've configured kerl and installed autoconf continue with [Installing with kerl](#installing-with-kerl). + + + +## Debian/Ubuntu Prerequisites + +### Dependencies + +To install the required dependencies run the following `apt-get` commands: + +```bash +sudo apt-get update +sudo apt-get install build-essential autoconf libncurses5-dev openssl libssl-dev fop xsltproc unixodbc-dev git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +> **Note on build output** +> +>These packages are not required for operation of a Riak node. +Notes in the build output about missing support for wxWidgets can be +safely ignored when installing Riak in a typical non-graphical server +environment. + +To install packages for graphics support use the following `apt-get` command: + +```bash +sudo apt-get install libwxbase2.8 libwxgtk2.8-dev libqt4-opengl-dev +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu). + + + +## FreeBSD/Solaris Prerequisites + +### Dependencies + +To install the required dependencies run the following `pkg` command: + +```bash +sudo pkg update +sudo pkg install gcc autoconf gmake flex +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `pkg` command: + +```bash +sudo pkg install wx28-gtk2-2.8.12_4 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris). + + + +## Mac OS X Prerequisites + +* [XCode Developer Tools](http://developer.apple.com/) - Apple Software Development Tools. +* [Homebrew](http://brew.sh/) (*optional*) - Package Manager. + +First install [XCode Developer Tools](http://developer.apple.com/). XCode is a set software development tools for developing on OS X. + +We also recommend installing [Homebrew](http://brew.sh/), a package manager for OS X. Homebrew is not required to install Erlang and is optional. + +Next, if you are running OS X 10.9 (Mavericks) or later, you may need to +install [autoconf](https://www.gnu.org/software/autoconf/). To check for +the presence of autoconf run: + +```bash +which autoconf +``` + +If this returns `autoconf not found`, install autoconf with: + +With Homebrew: + +```bash +brew install autoconf +``` + +Or with curl: + +```bash +curl -O http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz +tar zxvf autoconf-2.69.tar.gz +cd autoconf-2.69 +./configure && make && sudo make install +``` + +Once you've installed the prerequisites continue with [Installing on Mac OS X](#installing-on-mac-os-x). + +## RHEL/CentOS Prerequisites + +### Dependencies + +To install the required dependencies run the following `yum` command: + +```bash +sudo yum install gcc gcc-c++ glibc-devel make ncurses-devel openssl-devel autoconf java-1.8.0-openjdk-devel git +``` + +### GUI Dependencies + +If you're using a graphical environment and want to use Erlang's GUI utilities, you will need to install additional dependencies. + +To install packages for graphics support use the following `blank` command: + +```bash +sudo yum install wxBase.x86_64 +``` + +### Next Steps + +Once you've installed the prerequisites, continue with [Installing on RHEL/CentOS](#installing-on-rhel-centos). + + + +## Installation + +* [Installing with kerl](#installing-with-kerl) +* [Installing on Debian/Ubuntu](#installing-on-debian-ubuntu) +* [Installing on FreeBSD/Solaris](#installing-on-freebsd-solaris) +* [Installing on Mac OS X](#installing-on-mac-os-x) +* [Installing on RHEL/CentOS](#installing-on-rhel-centos) + +## Installing with kerl + +First make sure you have installed the necessary dependencies and prerequisites found in [kerl Prerequisites](#kerl-prerequisites). + +With [kerl](https://github.com/yrashk/kerl) installed, you can install Basho's recommended version of +Erlang [from Github](https://github.com/basho/otp) using the following +command: + +```bash +./kerl build git git://github.com/basho/otp.git OTP_R16B02_basho10 R16B02-basho10 +``` + +This builds the Erlang distribution and performs all of the steps +required to manually install Erlang for you. + +After Erlang is successfully built, you can install the build as follows: + +```bash +./kerl install R16B02-basho10 ~/erlang/R16B02-basho10 +. ~/erlang/R16B02-basho10/activate +``` + +The last line activates the Erlang build that was just installed into +`~/erlang/R16B02-basho10`. + +> See the kerl [README](https://github.com/yrashk/kerl) for more details on the available commands. + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Debian/Ubuntu + +First make sure you have installed the necessary dependencies found in [Debian/Ubuntu Prerequisites](#debian-ubuntu-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on FreeBSD/Solaris + +First make sure you installed the necessary dependencies in [FreeBSD/Solaris Prerequisites](#freebsd-solaris-prerequisites). + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +ftp https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && gmake && sudo gmake install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + +## Installing on Mac OS X + +First make sure you have installed the necessary dependencies found in [Mac OS X Prerequisites](#mac-os-x-prerequisites). + +You can install Erlang in several ways on OS X: + +* [From Source](#installing-on-mac-os-x-from-source) +* [Homebrew](#installing-on-mac-os-x-with-homebrew) +* [MacPorts](#installing-on-mac-os-x-with-macports) + +## Installing on Mac OS X from Source + +Next download [Basho's patched version of Erlang](https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz): + +```bash +curl -O https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Follow the steps below to configure Erlang for your operating system. + +#### Configuring Erlang on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), and Lion (OS X 10.7) + +If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion +(OS X 10.7) you can use LLVM (the default) or GCC to compile Erlang. + +Using LLVM: + +```bash +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +Or if you prefer GCC: + +```bash +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on Snow Leopard (OS X 10.6) + +If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an +Intel processor: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll --enable-darwin-64bit +``` + +#### Configuring Erlang on older versions of OS X + +If you're on a non-Intel processor or older version of OS X: + +```bash +./configure --disable-hipe --enable-smp-support --enable-threads \ +--enable-kernel-poll +``` + +After you've configured your system `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with Homebrew + +To install Erlang with Homebrew, use this command: + +```bash +brew install erlang +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on Mac OS X with MacPorts + +Installing with MacPorts: + +```bash +port install erlang +ssl +``` + +Confirm Erlang installed to the correct location by running: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + +## Installing on RHEL/CentOS + +First make sure you have installed the necessary dependencies and prerequisites found in [RHEL/CentOS Prerequisites](#rhel-centos-prerequisites). + +Using `wget`: + +```bash +wget https://files.tiot.jp/riak/erlang/otp_src_R16B02-basho10.tar.gz +``` + +Then unpack the download with: + +```bash +tar zxvf otp_src_R16B02-basho10.tar.gz +``` + +Next `cd` into the unpacked directory, build and install Erlang with: + +```bash +cd OTP_R16B02_basho10 +./otp_build autoconf +./configure && make && sudo make install +``` + +> **Note for RHEL6/CentOS6** +> +> In certain versions of RHEL6 and CentO6 the `openSSL-devel` package +ships with Elliptical Curve Cryptography partially disabled. To +communicate this to Erlang and prevent compile- and run-time errors, the +environment variable `CFLAGS="-DOPENSSL_NO_EC=1"` needs to be added to +Erlang's `./configure` call. +> +> The full `make` invocation then becomes +> +> ```bash +CFLAGS="-DOPENSSL_NO_EC=1" ./configure && make && sudo make install +``` + +Confirm Erlang installed to the correct location: + +```bash +which erl +``` + +And start Erlang from your terminal with: + +```bash +erl +``` + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/source/jvm.md b/content/riak/kv/3.0.4/setup/installing/source/jvm.md new file mode 100644 index 0000000000..217895f5ef --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/source/jvm.md @@ -0,0 +1,55 @@ +--- +title: "Installing the JVM" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Installing the JVM" + identifier: "installing_source_jvm" + weight: 302 + parent: "installing_source" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/jvm + - /riak/kv/3.0.4/ops/building/installing/jvm + - /riak/3.0.4/ops/building/installing/Installing-the-JVM + - /riak/kv/3.0.4/ops/building/installing/Installing-the-JVM + - /riak/3.0.4/installing/source/jvm/ + - /riak/kv/3.0.4/installing/source/jvm/ +--- + +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search + +If you are using [Riak Search 2.0][usage search], codename Yokozuna, +you will need to install **Java 1.6 or later** to run [Apache +Solr](https://lucene.apache.org/solr/), the search platform that powers +Riak Search. + +We recommend using Oracle's [JDK +7u25](http://www.oracle.com/technetwork/java/javase/7u25-relnotes-1955741.html). +Installation packages can be found on the [Java SE 7 Downloads +page](http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html#jdk-7u25-oth-JPR) +and instructions on the [documentation +page](http://www.oracle.com/technetwork/java/javase/documentation/index.html). + +## Installing Solr on OS X + +If you're using Riak Search on Mac OS X, you may see the following +error: + +```java +java.net.MalformedURLException: Local host name unknown: <YOUR_HOST_NAME> +``` + +If you encounter this error, we recommend manually setting the hostname +for `localhost` using +[scutil](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man8/scutil.8.html). + +```bash +scutil --set HostName "localhost" +``` + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/suse.md b/content/riak/kv/3.0.4/setup/installing/suse.md new file mode 100644 index 0000000000..1005338d5e --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/suse.md @@ -0,0 +1,52 @@ +--- +title_supertext: "Installing on" +title: "SUSE" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "SUSE" + identifier: "installing_suse" + weight: 307 + parent: "installing" +toc: false +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-SUSE + - /riak/kv/3.0.4/ops/building/installing/Installing-on-SUSE + - /riak/3.0.4/installing/suse/ + - /riak/kv/3.0.4/installing/suse/ +--- + +[install verify]: {{<baseurl>}}riak/kv/3.0.4/setup/installing/verify + +{{% note title="SUSE End of Life (EOL) for Riak KV 2.2.3" %}} +SUSE is no longer supported in Riak KV 2.2.3+. If you are interested in using Riak KV on SUSE, you can still [build from source](../source). The steps below have been left here for reference only and are no longer maintained. +{{% /note %}} + +Riak KV can be installed on OpenSuse and SLES systems using a binary package. The following steps have been tested to work with Riak on +the following x86/x86_64 flavors of SuSE: + +* SLES11-SP1 +* SLES11-SP2 +* SLES11-SP3 +* SLES11-SP4 +* OpenSUSE 11.2 +* OpenSUSE 11.3 +* OpenSUSE 11.4 + +## Installing with rpm + +```bash +wget https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm +sudo rpm -Uvh riak-2.2.3-1.SLES11.x86_64.rpm +``` + +## Next Steps + +Now that Riak is installed, check out [Verifying a Riak Installation][install verify]. + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/verify.md b/content/riak/kv/3.0.4/setup/installing/verify.md new file mode 100644 index 0000000000..2a2e135f12 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/verify.md @@ -0,0 +1,169 @@ +--- +title: "Verifying a Riak KV Installation" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Verifying an Installation" + identifier: "installing_verify" + weight: 311 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/installing/Post-Installation + - /riak/kv/3.0.4/ops/installing/Post-Installation + - /riak/3.0.4/installing/verify-install/ + - /riak/kv/3.0.4/installing/verify-install/ +--- + +[client libraries]: {{<baseurl>}}riak/kv/3.0.4/developing/client-libraries +[perf open files]: {{<baseurl>}}riak/kv/3.0.4/using/performance/open-files-limit +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/bucket-types +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/inspecting-node + +After you've installed Riak KV, we recommend checking the liveness of +each node to ensure that requests are being properly served. + +In this document, we cover ways of verifying that your Riak nodes are operating +correctly. After you've determined that your nodes are functioning and you're +ready to put Riak KV to work, be sure to check out the resources in the +**Now What?** section below. + +## Starting a Riak Node + +> **Note about source installations** +> +> To start a Riak KV node that was installed by compiling the source code, you +can add the Riak KV binary directory from the installation directory you've +chosen to your `PATH`. +> +> For example, if you compiled Riak KV from source in +the `/home/riak` directory, then you can add the binary directory +(`/home/riak/rel/riak/bin`) to your `PATH` so that Riak KV commands can be used in the same manner as with a packaged installation. + +To start a Riak node, use the `riak start` command: + +```bash +riak start +``` + +A successful start will return no output. If there is a problem starting the +node, an error message is printed to standard error. + +To run Riak with an attached interactive Erlang console: + +```bash +riak console +``` + +A Riak node is typically started in console mode as part of debugging or +troubleshooting to gather more detailed information from the Riak startup +sequence. Note that if you start a Riak node in this manner, it is running as +a foreground process that will be exited when the console is closed. + +You can close the console by issuing this command at the Erlang prompt: + +```erlang +q(). +``` + +Once your node has started, you can initially check that it is running with +the `riak ping` command: + +```bash +riak ping +``` + +The command will respond with `pong` if the node is running or `Node <nodename> not responding to pings` if it is not. + +> **Open Files Limit** +> +> As you may have noticed, if you haven't adjusted your open files limit (`ulimit -n`), Riak will warn you at startup. You're advised +to increase the operating system default open files limit when running Riak. +You can read more about why in the [Open Files Limit][perf open files] documentation. + +## Does it work? + +One convenient means of testing the readiness of an individual Riak node and +its ability to read and write data is with the `riak-admin test` command: + +```bash +riak-admin test +``` + +Successful output from `riak-admin test` looks like this: + +```text +Attempting to restart script through sudo -H -u riak +Successfully completed 1 read/write cycle to '<nodename>' +``` + +You can also test whether Riak is working by using the `curl` command-line +tool. When you have Riak running on a node, try this command to retrieve +the the properties associated with the [bucket type][cluster ops bucket types] `test`: + +```bash +curl -v http://127.0.0.1:8098/types/default/props +``` + +Replace `127.0.0.1` in the example above with your Riak node's IP address or +fully qualified domain name, and you should get a response that looks like this: + +``` +* About to connect() to 127.0.0.1 port 8098 (#0) +* Trying 127.0.0.1... connected +* Connected to 127.0.0.1 (127.0.0.1) port 8098 (#0) +> GET /riak/test HTTP/1.1 +> User-Agent: curl/7.21.6 (x86_64-pc-linux-gnu) +> Host: 127.0.0.1:8098 +> Accept: */* +> +< HTTP/1.1 200 OK +< Vary: Accept-Encoding +< Server: MochiWeb/1.1 WebMachine/1.9.0 (someone had painted it blue) +< Date: Wed, 26 Dec 2012 15:50:20 GMT +< Content-Type: application/json +< Content-Length: 422 +< +* Connection #0 to host 127.0.0.1 left intact +* Closing connection #0 +{"props":{"name":"test","allow_mult":false,"basic_quorum":false, + "big_vclock":50,"chash_keyfun":{"mod":"riak_core_util", + "fun":"chash_std_keyfun"},"dw":"quorum","last_write_wins":false, + "linkfun":{"mod":"riak_kv_wm_link_walker","fun":"mapreduce_linkfun"}, + "n_val":3,"notfound_ok":true,"old_vclock":86400,"postcommit":[],"pr":0, + "precommit":[],"pw":0,"r":"quorum","rw":"quorum","small_vclock":50, + "w":"quorum","young_vclock":20}} +``` + +The output above shows a successful response (`HTTP 200 OK`) and additional +details from the verbose option. The response also contains the bucket +properties for the `default` bucket type. + +## Riaknostic + +It is a good idea to verify some basic configuration and general health +of the Riak node after installation by using Riak's built-in diagnostic +utility [Riaknostic](http://riaknostic.basho.com/). + +To start up Riaknostic, ensure that Riak is running on the node and issue the following command: + +```bash +riak-admin diag +``` + +More extensive documentation for Riaknostic can be found in the [Inspecting a Node][cluster ops inspect node] guide. + +## Now what? + +You have a working Riak node! + +From here you might want to check out the following resources: + +* [Client Libraries][client libraries] to use Riak with your favorite programming language + + + + + diff --git a/content/riak/kv/3.0.4/setup/installing/windows-azure.md b/content/riak/kv/3.0.4/setup/installing/windows-azure.md new file mode 100644 index 0000000000..4eb6d50884 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/installing/windows-azure.md @@ -0,0 +1,197 @@ +--- +title_supertext: "Installing on" +title: "Windows Azure" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Windows Azure" + identifier: "installing_windows_azure" + weight: 308 + parent: "installing" +toc: true +aliases: + - /riak/3.0.4/ops/building/installing/Installing-on-Windows-Azure + - /riak/kv/3.0.4/ops/building/installing/Installing-on-Windows-Azure + - /riak/3.0.4/installing/windows-azure/ + - /riak/kv/3.0.4/installing/windows-azure/ +--- + +## Creating CentOS VMs + +{{% note title="Warning: Azure not always up to date" %}} +Azure's available Riak VMs are community supported, and thus is not always up to +date with the latest Riak package. Please ensure that the current VM is +using the latest supported code (and don't be afraid to update it if it's +not). Alternatively, you can follow the install instructions for [CentOS/RHEL](../rhel-centos/) once you have created your Azure VM(s). +{{% /note %}} + +You will need to sign up for the Windows Azure Virtual Machines preview feature in order to create a virtual machine. You can also sign up for a free trial account if you do not have a Windows Azure account. + +1. Navigate to [https://account.windowsazure.com](https://account.windowsazure.com/) and sign in with your Windows Azure account. + +2. Click "preview features" to view the available previews. + + ![]({{<baseurl>}}images/antares-iaas-preview-01.png) + +3. Scroll down to Virtual Machines & Virtual Networks and click "try it now". + + ![]({{<baseurl>}}images/antares-iaas-preview-02.png) + +4. Select your subscription and click the check. + + ![]({{<baseurl>}}images/antares-iaas-preview-04.png) + +### Create a virtual machine running CentOS Linux + +1. Login to the Windows Azure (Preview) Management Portal using your Windows Azure account. + +2. In the Management Portal, at the bottom left of the web page, click "+New", click "Virtual Machine", and then click "From Gallery". + + ![]({{<baseurl>}}images/createvm_small.png) + +3. Select a CentOS virtual machine image from "Platform Images", and then click the next arrow at the bottom right of the page. + + ![]({{<baseurl>}}images/vmconfiguration0.png) + +4. On the VM Configuration page, provide the following information: + - Provide a "Virtual Machine Name", such as "testlinuxvm". + - Specify a "New User Name", such as "newuser", which will be added to the Sudoers list file. **Do NOT** use the username "riak", as it may conflict with the installation package. + - In the "New Password" box, type a strong password. + - In the "Confirm Password" box, retype the password. + - Select the appropriate "Size" from the drop down list. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration1.png) + +5. On the VM Mode page, provide the following information: + - **If this is the first node**, select the "STANDALONE VIRTUAL MACHINE" radio button. **Otherwise**, select the "CONNECT TO EXISTING VIRTUAL MACHINE" radio button, and select the first node in the drop down list. + - In the "DNS Name" box, type a valid DNS address, e.g "testlinuxvm". + - In the "Storage Account" box, select "Use Automatically Generated Storage Account". + - In the "Region/Affinity Group/Virtual Network" box, select a region where this virtual image will be hosted. + - Click the next arrow to continue. + + ![]({{<baseurl>}}images/vmconfiguration2.png) + +6. On the VM Options page, select "(none)" in the "Availability Set" box. Click the check mark to continue. + + ![]({{<baseurl>}}images/vmconfiguration3.png) + +7. Wait while Windows Azure prepares your virtual machine. + +### Configure Endpoints + +Once the virtual machine is created you must configure endpoints in order to remotely connect. + +1. In the Management Portal, click "Virtual Machines", then click the name of your new VM, then click "Endpoints". + +2. **If this is the first node**, click "Add Endpoint", leave "Add Endpoint" checked, hit the right arrow and fill out the next form as follows: + - Name: https + - Protocol: leave set to 'TCP' + - Public Port: 443 + - private Port: 8069 + +## Connect to CentOS VMs using PuTTY or SSH + +When the virtual machine has been provisioned and the endpoints configured you can connect to it using SSH or PuTTY. + +### Connecting Using SSH + +**For Linux & Mac Users:** + +```bash +ssh newuser@testlinuxvm.cloudapp.net -o ServerAliveInterval=180 +``` +Enter the user's password. + +**For Windows Users, use PuTTY:** + +If you are using a Windows computer, connect to the VM using PuTTY. PuTTY can be downloaded from the [PuTTY Download Page](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + +1. Download and save putty.exe to a directory on your computer. Open a command prompt, navigate to that folder, and execute putty.exe. + +2. Enter the SSH DETAILS as found on the Node's Dashboard, i.e., "testlinuxvm.cloudapp.net" for the Host Name and "22" for the Port. + + ![]({{<baseurl>}}images/putty.png) + +## Install Riak and configure using a shell script + +1. **On each node**, once you've connected using the steps above, execute: + +```bash +sudo su - +curl -s https://raw.githubusercontent.com/basho/riak_on_azure/1.0/azure_install_riak.sh | sh +``` + +## Configure Riak using Riak Control + +You can either use Riak Control or the command line to add nodes to your Riak Cluster. If you wish to add nodes via the command line, skip down to the section entitled "Configure Riak using Command Line" + +1. Find the dns name and "Deployment ID" in the virtual machine dashboard of the VM you created the https endpoint for. For Example: + - **dns:** basho-example.cloudapp.net + - **Deployment ID:** 7ea145743aeb4402a088da1234567890 + +2. Visit https://dns-name.cloudapp.net/admin in your browser + +3. Enter 'admin' as the username, and the "Deployment ID" as the password. + +4. Select 'Cluster' on the left. + +5. Add VMs which also have the Riak software installed and configured by entering riak@yourhostnamehere in the input box, and clicking 'Add Node'. Use the short name of each vm, not the DNS name. For Example: + - riak@basho-centos1 + +You now have a Riak cluster on Azure + +## Configure Riak using Command Line + +If you have already followed the instructions in the section "Configure Riak using Riak Control", skip this section. + +First, SSH into the second (and subsequent nodes) and execute: + +```bash +riak-admin cluster join riak@yourhostnamehere +``` + +(Where 'yourhostnamehere' is the short name of the **first node** in your cluster) + +(NOTE: The host you choose can actually be any host that has already joined the cluster. The first host has no special significance, but it's important not to attempt to join to a node that hasn't joined a cluster yet. Doing this would create a second cluster; thus we use the first node for these instructions.) + +After all the nodes have have been joined to the first node via the previous command, connect to any of the nodes via SSH or PuTTY and execute the following: + +```bash +riak-admin cluster plan +``` + +Verify all the nodes are listed as expected. If the cluster plan looks good: + +```bash +riak-admin cluster commit +``` + +To check the status of clustering use: + +```bash +riak-admin member-status +``` + +You now have a Riak cluster on Azure + +## Load Test Data + +Execute on any one of the nodes: + +```bash +curl -s http://rekon.basho.com | sh +``` + +Visit DNS address listed on the dashboard, at the port we opened as an endpoint: + +``` +http://testlinuxvm.cloudapp.net:8098/riak/rekon/go +``` + + + + + diff --git a/content/riak/kv/3.0.4/setup/planning.md b/content/riak/kv/3.0.4/setup/planning.md new file mode 100644 index 0000000000..29e07cb4ab --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning.md @@ -0,0 +1,61 @@ +--- +title: "Planning Overview" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Planning" + identifier: "planning" + weight: 100 + parent: "setup_index" +toc: true +aliases: +--- + +[plan start]: ./start +[plan backend]: ./backend +[plan cluster capacity]: ./cluster-capacity +[plan bitcask capacity]: ./bitcask-capacity-calc +[plan backend bitcask]: ./backend/bitcask +[plan best practices]: ./best-practices +[plan future]: ./future + +## In This Section + +### [Start Planning][plan start] + +Steps and recommendations for designing and configuring a Riak KV cluster. + +[Learn More >>][plan start] + +### [Choosing a Backend][plan backend] + +Information on choosing the right storage backend for your Riak KV cluster. + +[Learn More >>][plan backend] + +### [Cluster Capacity Planning][plan cluster capacity] + +Outlines variables (such as memory requirements) to keep in mind when planning your Riak KV cluster. + +[Learn More >>][plan cluster capacity] + +### [Bitcask Capacity Calculator][plan bitcask capacity] + +A calculator that will assist you in sizing your cluster if you plan to use the default ([Bitcask][plan backend bitcask]) storage back end. + +[Learn More >>][plan bitcask capacity] + +### [Scaling & Operating Best Practices][plan best practices] + +A set of best practices that will enable you to improve performance and reliability at all stages in the life of your Riak KV cluster. + +[Learn More >>][plan best practices] + + + + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/backend.md b/content/riak/kv/3.0.4/setup/planning/backend.md new file mode 100644 index 0000000000..15e4ad5fb3 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/backend.md @@ -0,0 +1,60 @@ +--- +title: "Choosing a Backend" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Choosing a Backend" + identifier: "planning_choose_backend" + weight: 102 + parent: "planning" +toc: true +aliases: + - /riak/3.0.4/ops/building/planning/backends/ + - /riak/kv/3.0.4/ops/building/planning/backends/ +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/memory +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/multi +[plan backend leveled]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveled +[dev api backend]: {{<baseurl>}}riak/kv/3.0.4/developing/api/backend + +Pluggable storage backends are a key feature of Riak KV. They enable you to +choose a low-level storage engine that suits specific operational needs. +For example, if your use case requires maximum throughput, data +persistence, and a bounded keyspace, then Bitcask is a good choice. On +the other hand, if you need to store a large number of keys or to use +secondary indexes, LevelDB is likely a better choice. + +The following backends are supported: + +* [Bitcask][plan backend bitcask] +* [LevelDB][plan backend leveldb] +* [Memory][plan backend memory] +* [Multi][plan backend multi] +* [Leveled][plan backend leveled] + +Riak KV supports the use of custom storage backends as well. See the +storage [Backend API][dev api backend] for more details. + +Feature or Characteristic |Bitcask|LevelDB|Memory| +:----------------------------------------------|:-----:|:-----:|:----:| +Default Riak KV backend |✓ | | | +Persistent |✓ |✓ | | +Keyspace in RAM |✓ | |✓ | +Keyspace can be greater than available RAM | |✓ | | +Keyspace loaded into RAM on startup<sup>1</sup>|✓ | | | +Objects in RAM | | |✓ | +Object expiration |✓ | |✓ | +Secondary indexes | |✓ |✓ | +Tiered storage + +<sup>1</sup> Noted here since this can affect Riak start times for large +keyspaces. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/backend/bitcask.md b/content/riak/kv/3.0.4/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..c2c9a7d154 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/backend/bitcask.md @@ -0,0 +1,994 @@ +--- +title: "Bitcask" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Bitcask" + identifier: "planning_backend_bitcask" + weight: 100 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/backends/bitcask/ + - /riak/kv/3.0.4/ops/advanced/backends/bitcask/ +--- + +[github bitcask]: https://github.com/basho/bitcask +[bitcask design pdf]: http://basho.com/assets/bitcask-intro.pdf +[use admin riak cli]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-cli +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[learn clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/multi +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search + +[glossary aae]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#active-anti-entropy-aae +[perf open files]: {{<baseurl>}}riak/kv/3.0.4/using/performance/open-files-limit + +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/bitcask-capacity-calc +[usage delete objects]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/deleting-objects + +[Bitcask][github bitcask] is an Erlang application that provides an API for storing and retrieving key/value data using log-structured hash tables that provide very fast access. The [design][bitcask design pdf] of Bitcask was inspired, in part, by log-structured filesystems and log file merging. + +## Bitcask's Strengths + +* **Low latency per item read or written** + + This is due to the write-once, append-only nature of Bitcask + database files. + +* **High throughput, especially when writing an incoming stream of + random items** + + Write operations to Bitcask generally saturate I/O and disk + bandwidth, which is a good thing from a performance perspective. + This saturation occurs for two reasons: because (1) data that is + written to Bitcask doesn't need to be ordered on disk, and (2) the + log-structured design of Bitcask allows for minimal disk head + movement during writes. + +* **Ability to handle datasets larger than RAM without degradation** + + Access to data in Bitcask involves direct lookup from an in-memory + hash table. This makes finding data very efficient, even when + datasets are very large. + +* **Single seek to retrieve any value** + + Bitcask's in-memory hash table of keys points directly to locations + on disk where the data lives. Bitcask never uses more than one disk + seek to read a value and sometimes even that isn't necessary due to + filesystem caching done by the operating system. + +* **Predictable lookup _and_ insert performance** + + For the reasons listed above, read operations from Bitcask have + fixed, predictable behavior. This is also true of writes to Bitcask + because write operations require, at most, one seek to the end of + the current open file followed by and append to that file. + +* **Fast, bounded crash recovery** + + Crash recovery is easy and fast with Bitcask because Bitcask files + are append only and write once. The only items that may be lost are + partially written records at the tail of the last file that was + opened for writes. Recovery operations need to review only the last + record or two written and verify CRC data to ensure that the data is + consistent. + +* **Easy Backup** + + In most systems, backup can be very complicated. Bitcask simplifies + this process due to its append-only, write-once disk format. Any + utility that archives or copies files in disk-block order will + properly back up or copy a Bitcask database. + +## Weaknesses + +* Keys must fit in memory + + Bitcask keeps all keys in memory at all times, which means that your + system must have enough memory to contain your entire keyspace, plus + additional space for other operational components and operating- + system-resident filesystem buffer space. + +## Installing Bitcask + +Bitcask is the default storage engine for Riak. You can verify that +Bitcask is currently being used as the storage backend with the +[`riak`][use admin riak cli] command interface: + +```bash +riak config effective | grep backend +``` + +If this operation returns anything other than `bitcask`, read +the following section for instructions on switching the backend to Bitcask. + +## Enabling Bitcask + +You can set Bitcask as the storage engine using each node's +[configuration files][config reference]: + +```riakconf +storage_backend = bitcask +``` + +```appconfig +{riak_kv, [ + {storage_backend, riak_kv_bitcask_backend}, + %% Other riak_kv settings... + + ]}, +``` + +## Configuring Bitcask + +Bitcask enables you to configure a wide variety of its behaviors, from +filesystem sync strategy to merge settings and more. + +> **Note on configuration systems** +> +> Riak 2.0 enables you to use either the newer [configuration system][config reference] based on a single `riak.conf` file or the older system, based on an `app.config` configuration file. +> Instructions for both systems will be included below. Narrative +descriptions of the various settings will be tailored to the newer +configuration system, whereas instructions for the older system will +largely be contained in the code tabs. + +The default configuration values for Bitcask are as follows: + +```riakconf +bitcask.data_root = ./data/bitcask +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + {data_root, "/var/lib/riak/bitcask"}, + {io_mode, erlang}, + + %% Other Bitcask-specific settings + ]} +``` + +All of the other available settings listed below can be added to your +configuration files. + +### Open Timeout + +The open timeout setting specifies the maximum time Bitcask will block +on startup while attempting to create or open the Bitcask data +directory. The default is 4 seconds. + +In general, you will not need to adjust this setting. If, however, you +begin to receive log messages of the form `Failed to start bitcask +backend: ...`, you may want to consider using a longer timeout. + +Open timeout is specified using the `bitcask.sync.open_timeout` +parameter, and can be set in terms of seconds, minutes, hours, etc. +The following example sets the parameter to 10 seconds: + +```riakconf +bitcask.sync.open_timeout = 10s +``` + +```appconfig +{bitcask, [ + ..., + {open_timeout, 10} %% This value must be expressed in seconds + ... + ]} +``` + +### Sync Strategy + +Bitcask enables you to configure the durability of writes by specifying +when to synchronize data to disk, i.e. by choosing a sync strategy. The +default setting (`none`) writes data into operating system buffers that +will be written to disk when those buffers are flushed by the operating +system. If the system fails before those buffers are flushed, e.g. due +to power loss, that data is lost. This possibility holds for any +database in which values are asynchronously flushed to disk. + +Thus, using the default setting of `none` protects against data loss in +the event of application failure, i.e. process death, but leaves open a +small window in which data could be lost in the event of a complete +system failure, e.g. hardware or OS failure. + +This possibility can be prevented by choosing the `o_sync` sync +strategy, which forces the operating system to flush to stable storage +at write time for every write. The effect of flushing each write is +better durability, although it should be noted that write throughput +will suffer because each write will have to wait for the write to +complete. + +The following sync strategies are available: + + * `none` - lets the operating system manage syncing writes + (default) + * `o_sync` - uses the `O_SYNC` flag, which forces syncs on every + write + * Time interval - Riak will force Bitcask to sync at specified + intervals + +The following are possible configurations: + + +```riakconf +bitcask.sync.strategy = none +bitcask.sync.strategy = o_sync + +bitcask.sync.strategy = interval +bitcask.sync.interval = 65s +``` + +```appconfig +{bitcask, [ + ..., + {sync_strategy, none}, + {sync_strategy, o_sync}, + {sync_strategy, {seconds, 10}}, %% The time interval must be specified in seconds + ... + ]} +``` + +> **Sync strategy interval limitations** +> +> Setting the sync interval to a value lower or equal to + `riak_core.vnode_inactivity_timeout` (default: 60 seconds), will + prevent Riak from performing handoffs. +> +> A vnode must be inactive (not receive any messages) for a certain amount of time before the handoff process can start. The sync mechanism causes a message to be sent to the vnode for every sync, thus preventing the vnode from ever becoming inactive. + +### Max File Size + +The `max_file_size` setting describes the maximum permitted size for any +single data file in the Bitcask directory. If a write causes the current +file to exceed this size threshold then that file is closed, and a new +file is opened for writes. The default is 2 GB. + +Increasing `max_file_size` will cause Bitcask to create fewer, larger +files that are merged less frequently, while decreasing it will cause +Bitcask to create more numerous, smaller files that are merged more +frequently. + +To give an example, if your ring size is 16, your servers could see as +much as 32 GB of data in the bitcask directories before the first merge +is triggered, irrespective of your working set size. You should plan +storage accordingly and be aware that it is possible to see disk data +sizes that are larger than the working set. + +The `max_file_size` setting can be specified using kilobytes, megabytes, +etc. The following example sets the max file size to 1 GB: + +```riakconf +bitcask.max_file_size = 1GB +``` + +```appconfig +%% The max_file_size setting must be expressed in bytes, as in the +%% example below + +{bitcask, [ + ..., + {max_file_size, 16#40000000}, %% 1 GB expressed in bytes + ... + ]} +``` + +### Hint File CRC Check + +During startup, Bitcask will read from `.hint` files in order to build +its in-memory representation of the key space, falling back to `.data` +files if necessary. This reduces the amount of data that must be read +from the disk during startup, thereby also reducing the time required to +start up. You can configure Bitcask to either disregard `.hint` files +that don't contain a CRC value or to use them anyway. + +If you are using the newer, `riak.conf`-based configuration system, you +can instruct Bitcask to disregard `.hint` files that do not contain a +CRC value by setting the `hintfile_checksums` setting to `strict` (the +default). To use Bitcask in a backward-compatible mode that allows for +`.hint` files without CRC signatures, change the setting to +`allow_missing`. + +The following example sets the parameter to `strict`: + +```riakconf +bitcask.hintfile_checksums = strict +``` + +```appconfig +%% In the app.config-based system, substitute "require_hint_crc" for +%% "hintfile_checksums", "true" for "strict", and "false" for +%% "allow_missing" + +{bitcask, [ + ..., + {require_hint_crc, true}, + ... + ]} +``` + +### I/O Mode + +The `io_mode` setting specifies which code module Bitcask should use for +file access. The available settings are: + +* `erlang` (default) - Writes are made via Erlang's built-in file API +* `nif` - Writes are made via direct calls to the POSIX C API + +The following example sets `io_mode` to `erlang`: + +```riakconf +bitcask.io_mode = erlang +``` + +```appconfig +{bitcask, [ + ..., + {io_mode, erlang}, + ... + ]} +``` + +In general, the `nif` IO mode provides higher throughput for certain +workloads, but it has the potential to negatively impact the Erlang VM, +leading to higher worst-case latencies and possible throughput collapse. + +### `O_SYNC` on Linux + +Synchronous file I/O via +[`o_sync`](http://linux.about.com/od/commands/l/blcmdl2_open.htm) is +supported in Bitcask if `io_mode` is set to `nif` and is not supported +in the `erlang` mode. + +If you enable `o_sync` by setting `io_mode` to `nif`, however, you will +still get an incorrect warning along the following lines: + +```log +[warning] <0.445.0>@riak_kv_bitcask_backend:check_fcntl:429 {sync_strategy,o_sync} not implemented on Linux +``` + +If you are using the older, `app.config`-based configuration system, you +can disable the check that generates this warning by adding the +following to the `riak_kv` section of your `app.config`: + +```appconfig +{riak_kv, [ + ..., + {o_sync_warning_logged, false}, + ... + ]} +``` + +### Disk Usage and Merging Settings + +Riak KV stores each [vnode][glossary vnode] of the +[ring][learn clusters] as a separate Bitcask directory within the +configured Bitcask data directory. + +Each of these directories will contain multiple files with key/value +data, one or more "hint" files that record where the various keys exist +within the data files, and a write lock file. The design of Bitcask +allows for recovery even when data isn't fully synchronized to disk +(partial writes). This is accomplished by maintaining data files that +are append-only (i.e. never modified in-place) and are never reopened +for modification (i.e. they are only for reading). + +This data management strategy trades disk space for operational +efficiency. There can be a significant storage overhead that is +unrelated to your working data set but can be tuned in a way that best +fits your use case. In short, disk space is used until a threshold is +met at which point unused space is reclaimed through a process of +merging. The merge process traverses data files and reclaims space by +eliminating out-of-date of deleted key/value pairs, writing only the +current key/value pairs to a new set of files within the directory. + +The merge process is affected by all of the settings described in the +sections below. In those sections, "dead" refers to keys that no longer +contain the most up-to-date values, while "live" refers to keys that do +contain the most up-to-date value and have not been deleted. + +### Merge Policy + +Bitcask enables you to select a merge policy, i.e. when during the day +merge operations are allowed to be triggered. The valid options are: + +* `always` - No restrictions on when merge operations can occur + (default) +* `never` - Merge will never be attempted +* `window` - Merge operations occur during specified hours + +If you are using the newer, `riak.conf`-based configuration system, you +can select a merge policy using the `merge.policy` setting. The +following example sets the merge policy to `never`: + +```riakconf +bitcask.merge.policy = never +``` + +```appconfig +{bitcask, [ + ..., + {merge_window, never}, + ... + ]} +``` + +If you opt to specify start and end hours for merge operations, you can +do so with the `merge.window.start` and `merge.window.end` +settings in addition to setting the merge policy to `window`. +Each setting is an integer between 0 and 23 for hours on a 24h clock, +with 0 meaning midnight and 23 standing for 11 pm. +The merge window runs from the first minute of the `merge.window.start` hour +to the last minute of the `merge.window.end` hour. +The following example enables merging between 3 am and 4:59 pm: + +```riakconf +bitcask.merge.policy = window +bitcask.merge.window.start = 3 +bitcask.merge.window.end = 17 +``` + +```appconfig +%% In the app.config-based system, you specify the merge window using +%% a tuple, as in the following example: + +{bitcask, [ + ..., + {merge_window, {3, 17}}, + ... + ]} +``` + +> **`merge_window` and the Multi backend** +> +>If you are using the older configuration system and using Bitcask with +the [Multi][plan backend multi] backend, please note that if you +wish to use a merge window, you _must_ set it in the global `bitcask` +section of your configuration file. `merge_window` settings +in per-backend sections are ignored. + +If merging has a significant impact on performance of your cluster, or +if your cluster has quiet periods in which little storage activity +occurs, you may want to change this setting from the default. + +A common way to limit the impact of merging is to create separate merge +windows for each node in the cluster and ensure that these windows do +not overlap. This ensures that at most one node at a time can be +affected by merging, leaving the remaining nodes to handle requests. +The main drawback of this approach is that merges will occur less +frequently, leading to increased disk space usage. + +### Merge Triggers + +Merge triggers determine the conditions under which merging will be +invoked. These conditions fall into two basic categories: + +* **Fragmentation** - This describes the ratio of dead keys to total + keys in a file that will trigger merging. The value of this setting is + an integer percentage (0-100). For example, if a data file contains 6 + dead keys and 4 live keys, a merge will be triggered by the default + setting (60%). Increasing this value will cause merging to occur less + often, whereas decreasing the value will cause merging to happen more + often. + +* **Dead Bytes** - This setting describes how much data stored for + dead keys in a single file will trigger merging. If a file meets or + exceeds the trigger value for dead bytes, a merge will be triggered. + Increasing the value will cause merging to occur less often, whereas + decreasing the value will cause merging to happen more often. The + default is 512 MB. + + When either of these constraints are met by any file in the directory, + Bitcask will attempt to merge files. + +You can set the triggers described above using +`merge.triggers.fragmentation` and `merge.triggers.dead_bytes`, +respectively. The former is expressed as an integer between 0 and 100, +whereas the latter can be expressed in terms of kilobytes, megabytes, +gigabytes, etc. The following example sets the dead bytes threshold to +55% and the fragmentation threshold to 1 GB: + +```riakconf +bitcask.merge.triggers.fragmentation = 55 +bitcask.merge.triggers.dead_bytes = 1GB +``` + +```appconfig +%% The equivalent settings in the app.config-based system are +%% frag_merge_trigger and dead_bytes_merge_trigger, respectively. The +%% latter must be expressed in bytes. + +{bitcask, [ + ..., + {frag_merge_trigger, 55}, + {dead_bytes_merge_trigger, 1073741824}, + ... + ]} +``` + +### Merge Thresholds + +Merge thresholds determine which files will be chosen for inclusion in +a merge operation. + +* **Fragmentation** - This setting describes which ratio of dead keys + to total keys in a file will cause it to be included in the merge. The + value of this setting is a percentage (0-100). For example, if a data + file contains 4 dead keys and 6 live keys, it will be included in the + merge at the default ratio (40%). Increasing the value will cause + fewer files to be merged, while decreasing the value will cause more + files to be merged. + +* **Dead Bytes** - This setting describes which ratio the minimum + amount of data occupied by dead keys in a file to cause it to be + included in the merge. Increasing this value will cause fewer files to + be merged, while decreasing this value will cause more files to be + merged. The default is 128 MB. + +* **Small File** - This setting describes the minimum size a file must + be to be _excluded_ from the merge. Files smaller than the threshold + will be included. Increasing the value will cause more files to be + merged, while decreasing the value will case fewer files to be merged. + The default is 10 MB. + +You can set the thresholds described above using the +`merge.thresholds.fragmentation`, `merge.thresholds.dead_bytes`, and +`merge.threshold.small_file` settings, respectively. + +The `fragmentation` setting is expressed as an integer +between 0 and 100, and the `dead_bytes` and `small_file` settings can be +expressed in terms of kilobytes, megabytes, gigabytes, etc. The +following example sets the fragmentation threshold to 45%, the +dead bytes threshold to 200 MB, and the small file threshold to 25 MB: + +```riakconf +bitcask.merge.thresholds.fragmentation = 45 +bitcask.merge.thresholds.dead_bytes = 200MB +bitcask.merge.thresholds.small_file = 25MB +``` + +```appconfig +%% In the app.config-based system, the settings corresponding to those +%% listed above are frag_threshold, dead_bytes_threshold, and +%% small_files threshold, respectively. The latter two settings must be +%% expressed in bytes: + +{bitcask, [ + ..., + {frag_threshold, 45}, + {dead_bytes_threshold, 209715200}, + {small_file_threshold, 26214400}, + ... + ]} +``` +> **Note on choosing threshold values** +> +> The values for the fragmentation and dead bytes thresholds _must be +equal to or less than their corresponding trigger values_. If they are +set higher, Bitcask will trigger merges in cases where no files meet the +threshold, which means that Bitcask will never resolve the conditions +that triggered merging in the first place. + +### Merge Interval + +Bitcask periodically runs checks to determine whether merges are +necessary. You can determine how often those checks take place using +the `bitcask.merge_check_interval` parameter. The default is 3 minutes. + +```riakconf +bitcask.merge_check_interval = 3m +``` + +```appconfig +%% In the app.config-based system, this setting is expressed in +%% milliseconds and found in the riak_kv section rather than the bitcask +%% section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_interval, 180000}, + + %% Other configs + ]} +``` + +If merge check operations happen at the same time on different +[vnodes][glossary vnode] on the same node, this can produce spikes +in I/O usage and undue latency. Bitcask makes it less likely that merge +check operations will occur at the same time on different vnodes by +applying a **jitter** to those operations. A jitter is a random +variation applied to merge times that you can alter using the +`bitcask.merge_check_jitter` parameter. This parameter is expressed as a +percentage of `bitcask.merge_check_interval`. The default is 30%. + +```riakconf +bitcask.merge_check_jitter = 30% +``` + +```appconfig +%% In the app.config-based system, this setting is expressed as a float +%% and found in the riak_kv section rather than the bitcask section: + +{riak_kv, [ + %% Other configs + + {bitcask_merge_check_jitter, 0.3}, + + %% Other configs + ]} +``` + +For example, if you set the merge check interval to 4 minutes and the +jitter to 25%, merge checks will occur at intervals between 3 and 5 +minutes. With the default of 3 minutes and 30%, checks will occur at +intervals between roughly 2 and 4 minutes. + +### Log Needs Merge + +If you are using the older, `app.config`-based configuration system, you +can use the `log_needs_merge` setting to tune and troubleshoot Bitcask +merge settings. When set to `true` (as in the example below), each time +a merge trigger is met, the partition/vnode ID and mergeable files will +be logged. + +```appconfig +{bitcask, [ + ..., + {log_needs_merge, true}, + ... + ]} +``` + +> **Note on `log_needs_merge` and the Multi backend** +> +>If you are using Bitcask with the [Multi][plan backend multi] backend in conjunction with the older, `app.config`-based configuration system, please +note that `log_needs_merge` _must_ be set in the global `bitcask` section of your `app.config`. All `log_needs_merge` settings in per-backend sections are ignored. + +### Fold Keys Threshold + +Fold keys thresholds will reuse the keydir (a) if another fold was +started less than a specified time interval ago and (b) there were fewer +than a specified number of updates. Otherwise, Bitcask will wait until +all current fold keys complete and then start. The default time interval +is 0, while the default number of updates is unlimited. Both thresholds +can be disabled. + +The conditions described above can be set using the `fold.max_age` and +`fold.max_puts` parameters, respectively. The former can be expressed in +terms of minutes, hours, days, etc., while the latter is expressed as an +integer. Each threshold can be disabled by setting the value to +`unlimited`. The following example sets the `max_age` to 1/2 second and +the `max_puts` to 1000: + +```riakconf +bitcask.max_age = 0.5s +bitcask.max_puts = 1000 +``` + +```appconfig +%% In the app.config-based system, the corresponding parameters are +%% max_fold_age and max_fold_puts, respectively. The former must be +%% expressed in milliseconds, while the latter must be an integer: + +{bitcask, [ + ..., + {max_fold_age, 500}, + {max_fold_puts, 1000}, + ... + ]} + +%% Each of these thresholds can be disabled by setting the value to -1 +``` + +<a name="Automatic-Expiration"></a> +### Automatic Expiration + +By default, Bitcask keeps all of your data. But if your data has limited +time value or if you need to purge data for space reasons, you can +configure object expiration, aka expiry. This feature is disabled by +default. + +You can enable and configure object expiry using the `expiry` setting +and either specifying a time interval in seconds, minutes, hours, etc., +or turning expiry off (`off`). The following example configures objects +to expire after 1 day: + +```riakconf +bitcask.expiry = 1d +``` + +```appconfig +%% In the app.config-based system, expiry is expressed in terms of +%% seconds: + +{bitcask, [ + ..., + {expiry_secs, 86400}, %% Sets the duration to 1 day + ... + ]} + +%% Expiry can be turned off by setting this value to -1 +``` + +> **Note on stale data** +> +> Space occupied by stale data _may not be reclaimed immediately_, +but the data will become immediately inaccessible to client requests. +Writing to a key will set a new modification timestamp on the value +and prevent it from being expired. + +By default, Bitcask will trigger a merge whenever a data file contains +an expired key. This may result in excessive merging under some usage +patterns. You can prevent this by configuring an expiry grace time. +Bitcask will defer trigger a merge solely for key expiry by the +configured amount of time. The default is 0, signifying no grace time. + +If you are using the newer, `riak.conf`-based configuration system, you +can set an expiry grace time using the `expiry.grace_time` setting and +in terms of minutes, hours, days, etc. The following example sets the +grace period to 1 hour: + +```riakconf +bitcask.expiry.grace_time = 1h +``` + +```appconfig +%% The equivalent setting in the app.config-based system is +%% expiry_grace_time. This must be expressed in seconds: + +{bitcask, [ + ..., + {expiry_grace_time, 3600}, %% Sets the grace period to 1 hour + ... + ]} +``` + +#### Automatic expiration and Riak Search + +If you are using [Riak Search][usage search] in conjunction with +Bitcask, please be aware that automatic expiry does not apply to [Search Indexes](../../../../developing/usage/search). If objects are indexed using Search, +those objects can be expired by Bitcask yet still registered in Search +indexes, which means that Search queries may return keys that no longer +exist. Riak's [active anti-entropy (AAE)][glossary aae] subsystem will eventually +catch this discrepancy, but this depends on AAE being enabled (which is +the default) and could take some time. If search queries returning +expired keys is a problem for your use case, then we would recommend not +using automatic expiration. + +## Tuning Bitcask + +When tuning your environment, there are a number of things to bear in +mind that can assist you in making Bitcask as stable and reliable as +possible and to minimize latency and maximize throughput. + +### Tips & Tricks + + * **Bitcask depends on filesystem caches** + + Some data storage layers implement their own page/block buffer cache + in-memory, but Bitcask does not. Instead, it depends on the + filesystem's cache. Adjusting the caching characteristics of your + filesystem can impact performance. + + * **Be aware of file handle limits** + + Review the documentation on [open files limit][perf open files]. + + * **Avoid the overhead of updating file metadata (such as last access + time) on every read or write operation** + + You can achieve a substantial speed boost by adding the `noatime` + mounting option to Linux's `/etc/fstab`. This will disable the + recording of the last accessed time for all files, which results + in fewer disk head seeks. If you need last access times but you'd + like some of the benefits of this optimization, you can try + `relatime`. + + ``` + /dev/sda5 /data ext3 noatime 1 1 + /dev/sdb1 /data/inno-log ext3 noatime 1 2 + ``` + + * **Small number of frequently changed keys** + + When keys are changed frequently, fragmentation rapidly increases. + To counteract this, you should lower the fragmentation trigger and + threshold. + + * **Limited disk space** + + When disk space is limited, limiting the space occupied by dead keys + is of paramount importance. Lower the dead bytes threshold and + trigger to counteract wasted space. + + * **Purging stale entries after a fixed period** + + To automatically purge stale values, set the object expiry value to + the desired cutoff time. Keys that are not modified for a period + equal to or greater than this time interval will become + inaccessible. + + * **High number of partitions per node** + + Because each cluster has many partitions running, Bitcask will have + many [open files][perf open files]. To reduce the number of open + files, we suggest increasing the max file size so that larger files + will be written. You could also decrease the fragmentation and + dead-bytes settings and increase the small file threshold so that + merging will keep the number of open files small in number. + + * **High daytime traffic, low nighttime traffic** + + In order to cope with a high volume of writes without performance + degradation during the day, you might want to limit merging to + in non-peak periods. Setting the merge window to hours of the day + when traffic is low will help. + + * **Multi-cluster replication** + + If you are using Riak with the replication feature enabled, your clusters might experience + higher production of fragmentation and dead bytes. Additionally, + because the fullsync feature operates across entire partitions, it + will be made more efficient by accessing data as sequentially as + possible (across fewer files). Lowering both the fragmentation and + dead-bytes settings will improve performance. + +## FAQ + + * [[Why does it seem that Bitcask merging is only triggered when a + Riak node is restarted?|Developing on Riak + FAQs#why-does-it-seem-that-bitc]] + * [[If the size of key index exceeds the amount of memory, how does + Bitcask handle it?|Operating Riak FAQs#if-the-size-of-key-index-e]] + * [Bitcask Capacity Planning][plan bitcask capacity] + +## Bitcask Implementation Details + +Riak will create a Bitcask database directory for each [vnode][glossary vnode] +in a [cluster][learn clusters]. In each of those directories, at most one +database file will be open for writing at any given time. The file being +written to will grow until it exceeds a specified size threshold, at +which time it is closed and a new file is created for additional writes. +Once a file is closed, whether purposely or due to server exit, it is +considered immutable and will never again be opened for writing. + +The file currently open for writes is only written by appending, which +means that sequential writes do not require disk seeking, which can +dramatically speed up disk I/O. Note that this effect can be hampered if +you have `atime` enabled on your filesystem, because the disk head will +have to move to update both the data blocks _and_ the file and directory +metadata blocks. The primary speed advantage from a log-based database +stems of its ability to minimize disk head seeks. + +Deleting a value from Bitcask is a two-step process: first, a +[tombstone][usage delete objects] is recorded in the open file for writes, +which indicates that a value was marked for deletion at that time, while +references to that key are removed from the in-memory "keydir" +information; later, during a merge operation, non-active data files are +scanned, and only those values without tombstones are merged into the +active data file. This effectively removes the obsolete data and +reclaims disk space associated with it. This data management strategy +may use up a lot of space over time, since Bitcask writes new values +without touching the old ones. + +The compaction process referred to as "merging" solves this +problem. The merge process iterates over all non-active (i.e. immutable) +files in a Bitcask database and produces as output a set of data files +containing only the "live" or latest versions of each present key. + +### Bitcask Database Files + +Below are two directory listings showing what you should expect to find +on disk when using Bitcask. In this example, we use a 64-partition +[ring][learn clusters], which results in 64 separate directories, +each holding its own Bitcask database. + +```bash +ls ./data/bitcask +``` + +The result: + +``` +0 +1004782375664995756265033323.0.444576013453623296 +1027618338748291114361965898003636498195577569280 + +... etc ... + +9819464125817003981681007469812.9.03831329677312 +``` + +Note that when starting up the directories are created for each +[vnode][glossary vnode] partition's data. At this point, however, there are not +yet any Bitcask-specific files. + +After performing one PUT (write) into the Riak cluster running Bitcask: + +```bash +curl -XPUT http://localhost:8098/types/default/buckets/test/keys/test \ + -H "Content-Type: text/plain" \ + -d "hello" +``` + +The "N" value for this cluster is 3 (the default), so you'll see that +the three vnode partitions responsible for this data now have Bitcask +database files: + +``` +bitcask/ + +... etc ... + +|-- 1118962191081472546749696200048404186924073353216-1316787078245894 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 1141798154164767904846628775559596109106197299200-1316787078249065 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + + +|-- 116463411724806326294356135107078803128832.9.0184-1316787078254833 +| |-- 1316787252.bitcask.data +| |-- 1316787252.bitcask.hint +| `-- bitcask.write.lock + +... etc ... + +``` + +As more data is written to the cluster, more Bitcask files are created +until merges are triggered. + +``` +bitcask/ +|-- 0-1317147619996589 +| |-- 1317147974.bitcask.data +| |-- 1317147974.bitcask.hint +| |-- 1317221578.bitcask.data +| |-- 1317221578.bitcask.hint +| |-- 1317221869.bitcask.data +| |-- 1317221869.bitcask.hint +| |-- 1317222847.bitcask.data +| |-- 1317222847.bitcask.hint +| |-- 1317222868.bitcask.data +| |-- 1317222868.bitcask.hint +| |-- 1317223014.bitcask.data +| `-- 1317223014.bitcask.hint +|-- 1004782375664995756265033323.0.444576013453623296-1317147628760580 +| |-- 1317147693.bitcask.data +| |-- 1317147693.bitcask.hint +| |-- 13172.9.05.bitcask.data +| |-- 13172.9.05.bitcask.hint +| |-- 1317222514.bitcask.data +| |-- 1317222514.bitcask.hint +| |-- 1317223035.bitcask.data +| |-- 1317223035.bitcask.hint +| |-- 1317223411.bitcask.data +| `-- 1317223411.bitcask.hint +|-- 1027618338748291114361965898003636498195577569280-1317223690337865 +|-- 1050454301831586472458898473514828420377701515264-1317223690151365 + +... etc ... + +``` + +This is normal operational behavior for Bitcask. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/backend/leveldb.md b/content/riak/kv/3.0.4/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..e093e76848 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/backend/leveldb.md @@ -0,0 +1,506 @@ +--- +title: "LevelDB" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "LevelDB" + identifier: "planning_backend_leveldb" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/backends/leveldb/ + - /riak/kv/3.0.4/ops/advanced/backends/leveldb/ +--- + +[upgrade 2.0#upgrading-leveldB]: {{<baseurl>}} +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/3.0.4/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/#active-anti-entropy + +> **Note on upgrading to 2.0** +> +> If you are using LevelDB in a 1.x version of Riak, are upgrading to 2.0, +and wish to keep using your old `app.config` file for configuration, +make sure to follow the steps for setting the +`total_leveldb_mem_percent` parameter in the +[2.0 upgrade guide][upgrade 2.0#upgrading-leveldB]. + +[eLevelDB](https://github.com/basho/eleveldb) is an Erlang application +that encapsulates [LevelDB](http://code.google.com/p/leveldb/), an +open-source, on-disk key/value store created by Google Fellows Jeffrey +Dean and Sanjay Ghemawat. + +LevelDB is a relatively new entrant into the growing list of key/value +database libraries, but it has some very interesting qualities that we +believe make it an ideal candidate for use in Riak. LevelDB's storage +architecture is more like +[BigTable's](http://en.wikipedia.org/wiki/BigTable) memtable/sstable +model than it is like Bitcask. This design and implementation provide +the possibility of a storage engine without Bitcask's RAM limitation. + +> **Note:** Riak uses a fork of LevelDB. The code can be found +[on Github](https://github.com/basho/leveldb). + +A number of changes have been introduced in the LevelDB backend in Riak +2.0: + +* There is now only _one_ performance-related setting that Riak users + need to define---`leveldb.total_mem_percent`---as LevelDB now + dynamically sizes the file cache and block sizes based upon active + [vnodes][glossary vnode] assigned to the node. +* The LevelDB backend in Riak 2.0 utilizes a new, faster threading model + for background compaction work on `.sst` table files. The new model + has increased throughput by at least 10% in all test scenarios. +* Delete operations now receive priority handling in compaction + selection, which means more aggressive reclaiming of disk space than + in previous versions of Riak's LevelDB backend. +* Nodes storing massive key datasets (e.g. in the billions of keys) now + receive increased throughput due to automatic management of LevelDB's + block size parameter. This parameter is slowly raised to increase the + number of files that can open simultaneously, improving random read + performance. + +## Strengths + +1. **License** - The LevelDB and eLevelDB licenses are the [New BSD + License](http://www.opensource.org/licenses/bsd-license.php) and the + [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0.html), + respectively. We'd like to thank Google and the authors of LevelDB at + Google for choosing a completely FLOSS license so that everyone can + benefit from this innovative storage engine. +2. **Data compression** - LevelDB provides two compression algorithms + to reduce storage size and increase efficient use of storage bandwidth: + * Google's [Snappy](https://code.google.com/p/snappy/) data compression + * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) data + compression + + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. + +## Weaknesses + +1. Read access can be slow when there are many levels to search +2. LevelDB may have to do a few disk seeks to satisfy a read; one disk + seek per level and, if 10% of the database fits in memory, one seek + for the last level (since all of the earlier levels should end up + cached in the OS buffer cache for most filesystems) whereas if 1% + fits in memory, LevelDB will need two seeks. + +## Installing eLevelDB + +Riak ships with eLevelDB included within the distribution, so there is +no separate installation required. However, Riak is configured to use +the Bitcask storage engine by default. To switch to eLevelDB, set the +`storage_backend` variable in [`riak.conf`][config reference] to +`leveldb`: + +```riakconf +storage_backend = leveldb +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_eleveldb_backend}, + %% ... + ]} +``` + +## Configuring eLevelDB + +eLevelDb's default behavior can be modified by adding/changing +parameters in the `eleveldb` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify eLevelDB. + +The configuration values that can be set in your +[`riak.conf`][config reference] for eLevelDB are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveldb.data_root` | LevelDB data root | `./data/leveldb` +`leveldb.maximum_memory.percent` | Defines the percentage (between 1 and 100) of total server memory to assign to LevelDB. LevelDB will dynamically adjust its internal cache sizes as Riak activates/inactivates [vnodes][glossary vnode] on this server to stay within this size. | `70` + +If you are using the older, `app.config`-based system, the equivalent to +the `leveldb.data_root` is the `data_root` setting, as in the following +example: + +```appconfig +{eleveldb, [ + {data_root, "/path/to/leveldb"}, + + %% Other eleveldb-specific settings +]} +``` + +The `leveldb.maximum_memory.percent` setting is only available in the +newer configuration system. + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### ext4 Options + +The ext4 filesystem defaults include two options that increase integrity +but slow performance. Because Riak's integrity is based on multiple +nodes holding the same data, these two options can be changed to boost +LevelDB's performance. We recommend setting: `barrier`=0 and +`data`=writeback. + +#### CPU Throttling + +If CPU throttling is enabled, disabling it can boost LevelDB performance +in some cases. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[LevelDB](http://leveldb.googlecode.com/svn/trunk/doc/impl.html) is a +Google-sponsored open source project that has been incorporated into an +Erlang application and integrated into Riak for storage of key/value +information on disk. The implementation of LevelDB is similar in spirit +to the representation of a single Bigtable tablet (section 5.3). + +### How Levels Are Managed + +LevelDB is a memtable/sstable design. The set of sorted tables is +organized into a sequence of levels. Each level stores approximately ten +times as much data as the level before it. The sorted table generated +from a flush is placed in a special young level (also called level-0). +When the number of young files exceeds a certain threshold (currently +four), all of the young files are merged together with all of the +overlapping level-1 files to produce a sequence of new level-1 files (a +new level-1 file is created for every 2MB of data.) + +Files in the young level may contain overlapping keys. However files in +other levels have distinct non-overlapping key ranges. Consider level +number L where L >= 1. When the combined size of files in level-L +exceeds (10^L) MB (i.e. 10MB for level-1, 100MB for level-2, ...), one +file in level-L, and all of the overlapping files in level-(L+1) are +merged to form a set of new files for level-(L+1). These merges have the +effect of gradually migrating new updates from the young level to the +largest level using only bulk reads and writes (i.e., minimizing +expensive disk seeks). + +When the size of level L exceeds its limit, LevelDB will compact it in a +background thread. The compaction picks a file from level L and all +overlapping files from the next level L+1. Note that if a level-L file +overlaps only part of a level-(L+1) file, the entire file at level-(L+1) +is used as an input to the compaction and will be discarded after the +compaction. Compactions from level-0 to level-1 are treated specially +because level-0 is special (files in it may overlap each other). A +level-0 compaction may pick more than one level-0 file in case some of +these files overlap each other. + +A compaction merges the contents of the picked files to produce a +sequence of level-(L+1) files. LevelDB will switch to producing a new +level-(L+1) file after the current output file has reached the target +file size (2MB). LevelDB will also switch to a new output file when the +key range of the current output file has grown enough to overlap more +then ten level-(L+2) files. This last rule ensures that a later +compaction of a level-(L+1) file will not pick up too much data from +level-(L+2). + +Compactions for a particular level rotate through the key space. In more +detail, for each level L, LevelDB remembers the ending key of the last +compaction at level L. The next compaction for level L will pick the +first file that starts after this key (wrapping around to the beginning +of the key space if there is no such file). + +Level-0 compactions will read up to four 1MB files from level-0, and at +worst all the level-1 files (10MB) (i.e., LevelDB will read 14MB and +write 14MB in that case). + +Other than the special level-0 compactions, LevelDB will pick one 2MB +file from level L. In the worst case, this will overlap with +approximately 12 files from level L+1 (10 because level-(L+1) is ten +times the size of level-L, and another two at the boundaries since the +file ranges at level-L will usually not be aligned with the file ranges +at level-L+1). The compaction will therefore read 26MB, write 26MB. +Assuming a disk IO rate of 100MB/s, the worst compaction cost will be +approximately 0.5 second. + +If we throttle the background writing to a reasonably slow rate, for +instance 10% of the full 100MB/s speed, a compaction may take up to 5 +seconds. If the user is writing at 10MB/s, LevelDB might build up lots +of level-0 files (~50 to hold the 5*10MB). This may significantly +increase the cost of reads due to the overhead of merging more files +together on every read. + +### Compaction + +Levels are compacted into ordered data files over time. Compaction first +computes a score for each level as the ratio of bytes in that level to +desired bytes. For level 0, it computes files / desired files instead. +The level with the highest score is compacted. + +When compacting L0 the only special case to consider is that after +picking the primary L0 file to compact, it will check other L0 files to +determine the degree to which they overlap. This is an attempt to avoid +some I/O, we can expect L0 compactions to usually if not always be "all +L0 files". + +See the PickCompaction routine in +[1](https://github.com/basho/leveldb/blob/develop/db/version_set.cc) +for all the details. + +### Comparison of eLevelDB and Bitcask + +LevelDB is a persistent ordered map; Bitcask is a persistent hash table +(no ordered iteration). Bitcask stores keys in memory, so for databases +with large number of keys it may exhaust available physical memory and +then swap into virtual memory causing a severe slow down in performance. +Bitcask guarantees at most one disk seek per look-up. LevelDB may have +to do a small number of disk seeks. For instance, a read needs one disk +seek per level. If 10% of the database fits in memory, LevelDB will need +to do one seek (for the last level since all of the earlier levels +should end up cached in the OS buffer cache). If 1% fits in memory, +LevelDB will need two seeks. + +## Recovery + +LevelDB never writes in place: it always appends to a log file, or +merges existing files together to produce new ones. So an OS crash will +cause a partially written log record (or a few partially written log +records). LevelDB recovery code uses checksums to detect this and will +skip the incomplete records. + +### eLevelDB Database Files + +Below are two directory listings showing what you would expect to find +on disk when using eLevelDB. In this example, we use a 64-partition ring +which results in 64 separate directories, each with their own LevelDB +database: + +```bash +leveldb/ +|-- 0 +| |-- 000003.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| `-- MANIFEST-000002 +|-- 1004782375664995756265033323.0.444576013453623296 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 +|-- 1027618338748291114361965898003636498195577569280 +| |-- 000005.log +| |-- CURRENT +| |-- LOCK +| |-- LOG +| |-- LOG.old +| `-- MANIFEST-000004 + +... etc ... + +`-- 9819464125817003981681007469812.9.03831329677312 + |-- 000005.log + |-- CURRENT + |-- LOCK + |-- LOG + |-- LOG.old + `-- MANIFEST-000004 + +64 directories, 378 files +``` + +After performing a large number of PUT (write) operations, the Riak +cluster running eLevelDB will look something like this: + +```bash +tree leveldb +``` + +The result should look something like this: + +``` +├── 0 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 +├── 1004782375664995756265033323.0.444576013453623296 +│   ├── 000003.log +│   ├── CURRENT +│   ├── LOCK +│   ├── LOG +│   ├── MANIFEST-000002 +│   ├── sst_0 +│   ├── sst_1 +│   ├── sst_2 +│   ├── sst_3 +│   ├── sst_4 +│   ├── sst_5 +│   └── sst_6 + +... etc ... +``` + +## Tiered Storage + +Google's original LevelDB implemented stored all `.sst` table files in a +single database directory. In Riak 1.3, the original LevelDB code was +modified to store `.sst` files in subdirectories representing each +"level" of the file, e.g. `sst_0` or `sst_1`, in the name of speeding up +database repair operations. + +An additional advantage of this approach is that it enables Riak +operators to mount alternative storage devices at each level of a +LevelDB database. This can be an effective strategy because LevelDB is +write intensive in lower levels, with the write intensity declining as +the level number increases. This is due to LevelDB's storage strategy, +which places more frequently updated data in lower levels. + +Because write intensity differs by level, performance can be improved by +mounting faster, more expensive storage arrays in lower levels and +slower, less expensive arrays at higher levels. Tiered storage enables +you to configure the level at which LevelDB switches from a faster array +to a slower array. + +> **Note on write throttling** +> +> High-volume, sustained write operations can occasionally fill the +higher-speed storage arrays before LevelDB has had the opportunity to +move data to the low-speed arrays. LevelDB's write throttle will slow +incoming write operations to allow compactions to catch up, as would be +the case when using a single storage array. + +### Configuring Tiered Storage + +If you are using the newer, `riak.conf`-based configuration system, the +following parameters can be used to configure LevelDB tiered storage: + +Parameter | Description +:---------|:----------- +`leveldb.tiered` | The level number at which data should switch to the slower array. The default is `0`, which disables the feature. +`leveldb.tiered.path.fast` | The path prefix for `.sst` files below the level set by `leveldb.tiered` +`leveldb.tiered.path.slow` | The path prefix for `.sst` files at and above the level set by `leveldb.tiered` + +If you are using the older, `app.config`-based system, the example below +will show you the equivalents of the settings listed in the table above. + +#### Example + +The following example LevelDB tiered storage +[configuration][config reference] for Riak 2.0 sets the level for +switching storage arrays to 4 and the file path prefix to `fast_raid` +for the faster array and `slow_raid` for the slower array: + +```riakconf +leveldb.tiered = 4 +leveldb.tiered.path.fast = /mnt/fast_raid +leveldb.tiered.path.slow = /mnt/slow_raid +``` + +```appconfig +{eleveldb, [ + {tiered_slow_level, 4}, + {tiered_fast_prefix, "/mnt/fast_raid"}, + {tiered_slow_prefix, "/mnt/slow_raid"} +]} +``` + +With this configuration, level directories `sst_0` through `sst_3` will +be stored in `/mnt/fast_raid`, while directories `sst_4` and `sst_6` +will be stored in `/mnt/slow_raid`. + +### Selecting a Level + +LevelDB will perform optimally when as much data as possible is stored +in the faster array. The amount of data that can be stored in the faster +array depends on the size of your array and the total number of LevelDB +databases (i.e. the total number of Riak [vnodes][glossary vnode]) +in your cluster. The following table shows approximate sizes (in +megabytes) for each of the following sizes: the amount of raw data +stored in the level, the cumulative size of all levels up to the +specified level, and the cumulative size including active anti-entropy +data. + +Level | Level Size | Cumulative Size | Cumulative with AAE +:-----|:-----------|:----------------|:------------------- +0 | 360 | 360 | 720 +1 | 2,160 | 2,520 | 5,040 +2 | 2,940 | 5,460 | 10,920 +3 | 6,144 | 11,604 | 23,208 +4 | 122,880 | 134,484 | 268,968 +5 | 2,362,232 | 2,496,716 | 4,993,432 +6 | not limited | not limited | not limited + +To select the appropriate value for `leveldb.tiered`, use the following +steps: + +* Determine the value of (ring size) / (N - 1), where ring size is the + value of the `ring_size` configuration parameter and N is the number + of nodes in the cluster. For a `ring_size` of 128 and a cluster with + 10 nodes, the value would be 14. +* Select either the **Cumulative Size** or **Cumulative with AAE** + column from the table above. Select the third column if you are not + using active anti-entropy or the fourth column if you are (i.e. if the + `anti_entropy` [configuration parameter][config reference#aae] is set to `active`). +* Multiply the value from the first step by the cumulative column in + each row in the table. The first result that exceeds your fast storage + array capacity will provide the level number that should be used for + your `leveldb.tiered` setting. + +### Migrating from One Configuration to Another + +If you want to use tiered storage in a new Riak installation, you don't +need to take any steps beyond setting configuration. The rest is +automated. + +But if you'd like to use tiered storage in an existing installation that +is not currently using it, you will need to manually move your +installation's `.sst` files from one configuration to another. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/backend/leveled.md b/content/riak/kv/3.0.4/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..e7e950ec15 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/backend/leveled.md @@ -0,0 +1,141 @@ +--- +title: "Leveled" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Leveled" + identifier: "planning_backend_leveled" + weight: 101 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/backends/leveled/ + - /riak/kv/3.0.4/ops/advanced/backends/leveled/ +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[perf index]: {{<baseurl>}}riak/kv/3.0.4/using/performance +[config reference#aae]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/#active-anti-entropy + +[leveled](https://github.com/martinsumner/leveled) + +Leveled is a simple Key-Value store based on the concept of Log-Structured Merge Trees, with the following characteristics: + +- Optimised for workloads with larger values (e.g. > 4KB). +- Explicitly supports HEAD requests in addition to GET requests: +- Splits the storage of value between keys/metadata and body (assuming some definition of metadata is provided); +- Allows for the application to define what constitutes object metadata and what constitutes the body (value-part) of the object - and assign tags to objects to manage multiple object-types with different extraction rules; +- Stores keys/metadata in a merge tree and the full object in a journal of CDB files +- Allowing for HEAD requests which have lower overheads than GET requests; and +- Queries which traverse keys/metadatas to be supported with fewer side effects on the page cache than folds over keys/objects. +- Support for tagging of object types and the implementation of alternative store behaviour based on type. +- Allows for changes to extract specific information as metadata to be returned from HEAD requests; +- Potentially usable for objects with special retention or merge properties. +- Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). +- Low cost specifically where there is a need to scan across keys and metadata (not values). +- Written in Erlang as a message passing system between Actors. + + +## Strengths + +1. leveled was developed specifically as a potential backend for Riak, with features such as: + * Support for secondary indexes + * Multiple fold types + * Auto expiry of objects + Enabling compression means more CPU usage but less disk space. Compression + is especially good for text data, including raw text, Base64, JSON, etc. +2. Optimised for workloads with larger values (e.g. > 4KB). +3. Explicitly supports HEAD requests in addition to GET requests. +4. Support for low-cost clones without locking to provide for scanning queries (e.g. secondary indexes). + +## Weaknesses + +1. Leveled is still a comparatively new technology and more likely to suffer from edge case issues than Bitcask or LevelDB simply because they've been around longer and have been more thoroughly tested via usage in customer environments. +2. Leveled works better with medium to larger sized objects. It works perfectly well with small objects but the additional diskspace overhead may render LevelDB a better choice if disk space is at a premium and all of your data will be exclusively limited a few KB or less. This may change as Leveled matures though. + +## Installing leveled + +Leveled is included with Riak KV 3.0.4 and beyond, so there is no need to install anything further. + +```riakconf +storage_backend = leveled +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_leveled_backend}, + %% ... + ]} +``` + +## Configuring leveled + +Leveled's default behavior can be modified by adding/changing +parameters in the `leveled` section of the [`riak.conf`][config reference]. The section below details the parameters you'll use to modify leveled. + +The configuration values that can be set in your +[`riak.conf`][config reference] for leveled are as follows: + +Config | Description | Default +:------|:------------|:------- +`leveled.data_root` | leveled data root. | `./data/leveled` +`leveled.sync_strategy` | Strategy for flushing data to disk. | `none` +`leveled.compression_method` | Compression Method. | `native` +`leveled.compression_point` | Compression Point - The point at which compression is applied to the Journal. | `on_receipt` +`leveled.log_level` | Log Level - Set the minimum log level to be used within leveled. | `info` +`leveled.journal_size` | The approximate size (in bytes) when a Journal file should be rolled. | `1000000000` +`leveled.compaction_runs_perday` | The number of journal compactions per vnode per day | `24` +`leveled.compaction_low_hour` | The hour of the day in which journal compaction can start. | `0` +`leveled.compaction_top_hour` | The hour of the day, after which journal compaction should stop. | `23` +`leveled.max_run_length` | Max Journal Files Per Compaction Run. | `4` + +### Recommended Settings + +Below are **general** configuration recommendations for Linux +distributions. Individual users may need to tailor these settings for +their application. + +#### sysctl + +For production environments, please see [System Performance Tuning][perf index] +for the recommended `/etc/sysctl.conf` settings. + +#### Block Device Scheduler + +Beginning with the 2.6 kernel, Linux gives you a choice of four I/O +[elevator models](http://www.gnutoolbox.com/linux-io-elevator/). We +recommend using the NOOP elevator. You can do this by changing the +scheduler on the Linux boot line: `elevator=noop`. + +#### No Entropy + +If you are using https protocol, the 2.6 kernel is widely known for +stalling programs waiting for SSL entropy bits. If you are using https, +we recommend installing the +[HAVEGE](http://www.irisa.fr/caps/projects/hipsor/) package for +pseudorandom number generation. + +#### clocksource + +We recommend setting `clocksource=hpet` on your Linux kernel's `boot` +line. The TSC clocksource has been identified to cause issues on +machines with multiple physical processors and/or CPU throttling. + +#### swappiness + +We recommend setting `vm.swappiness=0` in `/etc/sysctl.conf`. The +`vm.swappiness` default is 60, which is aimed toward laptop users with +application windows. This was a key change for MySQL servers and is +often referenced in database performance literature. + +## Implementation Details + +[Leveled](https://github.com/martinsumner/leveled) is an open source project that has been developed specifically as a backend option for Riak, rather than a generic backend. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/backend/memory.md b/content/riak/kv/3.0.4/setup/planning/backend/memory.md new file mode 100644 index 0000000000..f39309b4c8 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/backend/memory.md @@ -0,0 +1,147 @@ +--- +title: "Memory" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Memory" + identifier: "planning_backend_memory" + weight: 102 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/backends/memory/ + - /riak/kv/3.0.4/ops/advanced/backends/memory/ +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[plan backend multi]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/multi +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb + +The Memory storage backend uses in-memory tables to store all data. +This data is never persisted to disk or to any other storage mechanism. +The Memory storage engine is best used for testing Riak clusters or for +storing small amounts of transient state in production systems. + +Internally, the Memory backend uses Erlang Ets tables to manage data. +More information can be found in the +[official Erlang documentation](http://www.erlang.org/doc/man/ets.html). + +## Enabling the Memory Backend + +To enable the memory backend, edit your [configuration files][config reference] +for each Riak node and specify the Memory backend as shown in the following +example: + +```riakconf +storage_backend = memory +``` + +```appconfig +{riak_kv, [ + ..., + {storage_backend, riak_kv_memory_backend}, + ... + ]} +``` + +**Note**: If you *replace* the existing specified backend by removing it +or commenting it out as shown in the above example, data belonging to +the previously specified backend will still be preserved on the +filesystem but will no longer be accessible through Riak unless the +backend is enabled again. + +If you require multiple backends in your configuration, please consult +the [Multi backend documentation][plan backend multi]. + +## Configuring the Memory Backend + +The Memory backend enables you to configure two fundamental aspects of +object storage: maximum memory usage per [vnode][glossary vnode] +and object expiry. + +### Max Memory + +This setting specifies the maximum amount of memory consumed by the +Memory backend. It's important to note that this setting acts on a +*per-vnode basis*, not on a per-node or per-cluster basis. This should +be taken into account when planning for memory usage with the Memory +backend, as the total memory used will be max memory times the number +of vnodes in the cluster. + +When the threshold value that you set has been met in a particular +vnode, Riak will begin discarding objects, beginning with the oldest +object and proceeding until memory usage returns below the allowable +threshold. + +You can configure maximum memory using the +`memory_backend.max_memory_per_vnode` setting. You can specify +`max_memory_per_vnode` however you'd like, using kilobytes, megabytes, +or even gigabytes. + +The following are all possible settings: + +```riakconf +memory_backend.max_memory_per_vnode = 500KB +memory_backend.max_memory_per_vnode = 10MB +memory_backend.max_memory_per_vnode = 2GB +``` + +```appconfig +%% In the app.config-based system, the equivalent setting is max_memory, +%% which must be expressed in megabytes: + +{riak_kv, [ + %% storage_backend specifies the Erlang module defining the storage + %% mechanism that will be used on this node. + + {storage_backend, riak_kv_memory_backend}, + {memory_backend, [ + ..., + {max_memory, 4096}, %% 4GB in megabytes + ... + ]} +``` + +To determine an optimal max memory setting, we recommend consulting the +documentation on [LevelDB cache size][plan backend leveldb]. + +### TTL + +The time-to-live (TTL) parameter specifies the amount of time an object +remains in memory before it expires. The minimum time is one second. + +In the newer, `riak.conf`-based configuration system, you can specify +`ttl` in seconds, minutes, hours, days, etc. The following are all +possible settings: + +```riakconf +memory_backend.ttl = 1s +memory_backend.ttl = 10m +memory_backend.ttl = 3h +``` + +```appconfig +%% In the app.config-based system, the ttl setting must be expressed in +%% seconds: + +{memory_backend, [ + %% other settings + {ttl, 86400}, %% Set to 1 day + %% other settings + ]} +``` + +> **Dynamically Changing `ttl`** +> +> There is currently no way to dynamically change the `ttl` setting for a +bucket or bucket type. The current workaround would be to define +multiple Memory backends using the Multi backend, each with different +`ttl` values. For more information, consult the documentation on the +[Multi][plan backend multi] backend. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/backend/multi.md b/content/riak/kv/3.0.4/setup/planning/backend/multi.md new file mode 100644 index 0000000000..9e92759f14 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/backend/multi.md @@ -0,0 +1,230 @@ +--- +title: "Multi-backend" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Multi-backend" + identifier: "planning_backend_multi" + weight: 103 + parent: "planning_choose_backend" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/backends/multi/ + - /riak/kv/3.0.4/ops/advanced/backends/multi/ +--- + +[concept buckets]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend memory]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/memory +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[use admin riak-admin cli]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin + +Riak allows you to run multiple backends within a single Riak cluster. +Selecting the Multi backend enables you to use different storage +backends for different [buckets][concept buckets]. Any combination of the three +available backends---[Bitcask][plan backend bitcask], [LevelDB][plan backend leveldb], and [Memory][plan backend memory]---can be used. + +## Configuring Multiple Backends + +You can set up your cluster to use the Multi backend using Riak's +[configuration files][config reference]. + +```riakconf +storage_backend = multi +``` + +```appconfig +{riak_kv, [ + %% ... + {storage_backend, riak_kv_multi_backend}, + %% ... +]}, +``` + +Remember that you must stop and then re-start each node when you change +storage backends or modify any other configuration. + +## Using Multiple Backends + +In Riak 2.0 and later, we recommend using multiple backends by applying +them to buckets [using bucket types][usage bucket types]. Assuming that the cluster has already been configured to use the `multi` backend, this process +involves three steps: + +1. Creating a bucket type that enables buckets of that type to use the + desired backends +2. Activating that bucket type +3. Setting up your application to use that type + +Let's say that we've set up our cluster to use the Multi backend and we +want to use [LevelDB][plan backend leveldb] and the [Memory][plan backend memory] backend for different sets of data. First, we need to create two bucket types, one which sets the `backend` bucket property to `leveldb` and the other which sets that property to `memory`. All bucket type-related activity is performed through the [`riak-admin`][use admin riak-admin cli] command interface. + +We'll call our bucket types `leveldb_backend` and `memory_backend`, but +you can use whichever names you wish. + +```bash +riak-admin bucket-type create leveldb_backend '{"props":{"backend":"leveldb"}}' +riak-admin bucket-type create memory_backend '{"props":{"backend":"memory"}}' +``` + +Then, we must activate those bucket types so that they can be used in +our cluster: + +```bash +riak-admin bucket-type activate leveldb_backend +riak-admin bucket-type activate memory_backend +``` + +Once those types have been activated, any objects stored in buckets +bearing the type `leveldb_backend` will be stored in LevelDB, whereas +all objects stored in buckets of the type `memory_backend` will be +stored in the Memory backend. + +More information can be found in our documentation on [using bucket types][usage bucket types]. + +## Configuring Multiple Backends + +Once you've set up your cluster to use multiple backends, you can +configure each backend on its own. All configuration options available +for LevelDB, Bitcask, and Memory are all available to you when using the +Multi backend. + +#### Using the Newer Configuration System + +If you are using the newer, `riak.conf`-based [configuration system][config reference], you can configure the backends by +prefacing each configuration with `multi_backend`. + +Here is an example of the general form for configuring multiple +backends: + +```riakconf +multi_backend.$name.$setting_name = setting +``` + +If you are using, for example, the LevelDB and Bitcask backends and wish +to set LevelDB's `bloomfilter` setting to `off` and the Bitcask +backend's `io_mode` setting to `nif`, you would do that as follows: + +```riakconf +multi_backend.leveldb.bloomfilter = off +multi_backend.bitcask.io_mode = nif +``` + +#### Using the Older Configuration System + +If you are using the older, `app.config`-based configuration system, +configuring multiple backends involves adding one or more backend- +specific sections to your `riak_kv` settings (in addition to setting +the `storage_backend` setting to `riak_kv_multi_backend`, as shown +above). + +> **Note**: If you are defining multiple file-based backends of the same +type, each of these must have a separate `data_root` directory defined. + +While all configuration parameters can be placed anywhere within the +`riak_kv` section of `app.config`, in general we recommend that you +place them in the section containing other backend-related settings to +keep the settings organized. + +Below is the general form for your `app.config` file: + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + %% Here's where you set the individual multiplexed backends + {<<"bitcask_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_mult/"}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"bitcask_expiry_mult">>, riak_kv_bitcask_backend, [ + %% bitcask configuration + {data_root, "/var/lib/riak/bitcask_expiry_mult/"}, + {expiry_secs, 86400}, + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"second_eleveldb_mult">>, riak_kv_eleveldb_backend, [ + %% eleveldb with a different configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]}, + {<<"memory_mult">>, riak_kv_memory_backend, [ + %% memory configuration + {config1, ConfigValue1}, + {config2, ConfigValue2} + ]} + ]}, + %% ... +]}, +``` + +Note that in each of the subsections of the `multi_backend` setting, the +name of each backend you wish to configure can be anything you would +like. Directly after naming the backend, you must specify which of the +backends corresponds to that name, i.e. `riak_kv_bitcask_backend`, +`riak_kv_eleveldb_backend`, or `riak_kv_memory_backend`. Once you have +done that, the various configurations for each named backend can be set +as objects in an Erlang list. + +## Example Configuration + +Imagine that you are using both Bitcask and LevelDB in your cluster, and +you would like storage to default to Bitcask. The following +configuration would create two backend configurations, named +`bitcask_mult` and `leveldb_mult`, respectively, while also setting the +data directory for each backend and specifying that `bitcask_mult` is +the default. + +```riakconf +storage_backend = multi + +multi_backend.bitcask_mult.storage_backend = bitcask +multi_backend.bitcask_mult.bitcask.data_root = /var/lib/riak/bitcask_mult + +multi_backend.leveldb_mult.storage_backend = leveldb +multi_backend.leveldb_mult.leveldb.data_root = /var/lib/riak/leveldb_mult + +multi_backend.default = bitcask_mult +``` + +```appconfig +{riak_kv, [ + %% ... + {multi_backend_default, <<"bitcask_mult">>}, + {multi_backend, [ + {<<"bitcask_mult", riak_kv_bitcask_backend, [ + {data_root, "/var/lib/riak/bitcask"} + ]}, + {<<"leveldb_mult", riak_kv_eleveldb_backend, [ + {data_root, "/var/lib/riak/leveldb"} + ]} + ]} + %% ... +]} +``` + +## Multi Backend Memory Use + +Each Riak storage backend has settings for configuring how much memory +the backend can use, e.g. caching for LevelDB or for the entire set of +data for the Memory backend. Each of these backends suggests allocating +up to 50% of available memory for this purpose. When using the Multi +backend, make sure that the sum of all backend memory use is at 50% +or less. For example, using three backends with each set to 50% memory +usage will inevitably lead to memory problems. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/best-practices.md b/content/riak/kv/3.0.4/setup/planning/best-practices.md new file mode 100644 index 0000000000..ddf6cc64a6 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/best-practices.md @@ -0,0 +1,145 @@ +--- +title: "Scaling and Operating Riak Best Practices" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Best Practices" + identifier: "planning_best_practices" + weight: 105 + parent: "planning" +toc: true +aliases: + - /riak/3.0.4/ops/building/planning/best-practices + - /riak/kv/3.0.4/ops/building/planning/best-practices +--- + +[use ref handoff]: {{<baseurl>}}riak/kv/3.0.4/using/reference/handoff +[config mapreduce]: {{<baseurl>}}riak/kv/3.0.4/configuring/mapreduce +[glossary aae]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#active-anti-entropy-aae +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes + +Riak KV is a database designed for easy operation and scaling. Below are some best practices that will enable you to improve performance and reliability at all stages in the life of your Riak cluster. + +## Disk Capacity + +Filling up disks is a serious problem in Riak. In general, you should +add capacity under the following conditions: + +* a disk becomes more than 80% full +* you have fewer than 10 days of capacity remaining at current rates of + growth + +## RAID Levels + +Riak provides resilience through its built-in redundancy. + +* RAID0 can be used to increase the performance at the expense of + single-node reliability +* RAID5/6 can be used to increase the reliability over RAID0 but still + offers higher performance than single disks +* You should choose a RAID level (or no RAID) that you’re comfortable + with + +## Disk Leeway + +* Adding new nodes instantly increases the total capacity of the + cluster, but you should allow enough internal network capacity that + [handing off][use ref handoff] existing data outpaces the arrival of new + data. +* Once you’ve reached a scale at which the amount of new data arriving + is a small fraction of the cluster's total capacity, you can add new + nodes when you need them. You should be aware, however, that adding + new nodes can actually _increase_ disk usage on existing nodes in the + short term as data is rebalanced within the cluster. +* If you are certain that you are likely to run out of capacity, we + recommend allowing a week or two of leeway so that you have plenty of + time to add nodes and for [handoff][use ref handoff] to occur before the disks reach + capacity +* For large volumes of storage it's usually prudent to add more capacity + once a disk is 80% full + +## CPU Capacity Leeway + +* In a steady state, your peak CPU utilization, ignoring other + processes, should be less than 30% +* If you provide sufficient CPU capacity leeway, you’ll have spare + capacity to handle other processes, such as backups, [handoff][use ref handoff], and [active anti-entropy][glossary aae] + +## Network Capacity Leeway + +* Network traffic tends to be “bursty,” i.e. it tends to vary both quite + a bit and quickly +* Your normal load, as averaged over a 10-minute period, should be no + more than 20% of maximum capacity +* Riak generates 3-5 times the amount of intra-node traffic as inbound + traffic, so you should allow for this in your network design + +## When to Add Nodes + +You should add more nodes in the following scenarios: + +* you have reached 80% of storage capacity +* you have less than 10 days of leeway before you expect the cluster to + fill up +* the current node's IO/CPU activity is higher than average for extended + period of time, especially for [MapReduce][config mapreduce] + operations + +An alternative to adding more nodes is to add more storage to existing +nodes. However, you should do this only if: + +* you’re confident that there is plenty of spare network and CPU + capacity, _and_ +* you can upgrade storage _equally across all nodes_. If storage is + added in an unbalanced fashion, Riak will continue storing data + equally across nodes, and the node with the smallest available storage + space is likely to fail first. Thus, if one node uses 1 TB but the + rest use 1.5 TB, Riak will overload the 1 TB node first. + +The recommendations above should be taken only as general guidelines +because the specifics of your cluster will matter a great deal when +making capacity decisions. The following considerations are worth +bearing in mind: + +* If your disks are 90% full but only filling up 1% per month, this + might be a perfectly "safe" scenario. In cases like this, the velocity + of adding new data is more important than any raw total. +* The burstiness of your write load is also an important consideration. + If writes tend to come in large batches that are unpredictably timed, + it can be more difficult to estimate when disks will become full, + which means that you should probably over-provision storage as a + precaution. +* If Riak shares disks with other processes or is on the system root + mount point, i.e. `/`, we recommend leaving a little extra disk space + in addition to the estimates discussed above, as other system + processes might use disk space unexpectedly. + +## How to Add Nodes + +* You should add as many additional nodes as you require in one + operation +* Don’t add nodes one at a time if you’re adding multiple nodes +* You can limit the transfer rate so that priority is given to live + customer traffic + +This process is explored in more detail in [Adding and Removing Nodes][cluster ops add remove node]. + +## Scaling + +* All large-scale systems are bound by the availability of some + resources +* From a stability point of view, the best state for a busy Riak cluster + to maintain is the following: + * New network connections are limited to ensure that existing network + connections consume most network bandwidth + * CPU at < 30% + * Disk IO at < 90% +* You should use HAProxy or your application servers to limit new + network connections to keep network and IO below 90% and CPU below + 30%. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/3.0.4/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..09878de153 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,104 @@ +--- +title: "Bitcask Capacity Calculator" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Bitcask Capacity Calculator" + identifier: "planning_cluster_bitcask_capacity" + weight: 104 + parent: "planning" +toc: true +aliases: + - /riak/3.0.4/ops/building/planning/bitcask + - /riak/kv/3.0.4/ops/building/planning/bitcask +--- + +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask + +These calculators will assist you in sizing your cluster if you plan to +use the default [Bitcask][plan backend bitcask] storage back end. + +This page is designed to give you a rough estimate when sizing your +cluster. The calculations are a _best guess_, and they tend to be a bit +on the conservative side. It's important to include a bit of head room +as well as room for unexpected growth so that if demand exceeds +expectations you'll be able to add more nodes to the cluster and stay +ahead of your requirements. + +<div id="node_info" class="calc_info"></div> +<div class="calculator"> + <ul> + <li> + <label for="n_total_keys">Total Number of Keys:</label> + <input id="n_total_keys" type="text" size="12" name="n_total_keys" value="" class="calc_input"> + <span class="error_span" id="n_total_keys_error"></span> + </li> + <li> + <label for="n_bucket_size">Average Bucket Size (Bytes):</label> + <input id="n_bucket_size"type="text" size="7" name="n_bucket_size" value="" class="calc_input"> + <span class="error_span"id="n_bucket_size_error"></span> + </li> + <li> + <label for="n_key_size">Average Key Size (Bytes):</label> + <input type="text" size="2" name="n_key_size" id="n_key_size" value="" class="calc_input"> + <span class="error_span" id="n_key_size_error"></span> + </li> + <li> + <label for="n_record_size">Average Value Size (Bytes):</label> + <input id="n_record_size"type="text" size="7" name="n_record_size" value="" class="calc_input"> + <span class="error_span"id="n_record_size_error"></span> + </li> + <li> + <label for="n_ram">RAM Per Node (in GB):</label> + <input type="text" size="4" name="n_ram" id="n_ram" value="" class="calc_input"> + <span class="error_span" id="n_ram_error"></span> + </li> + <li> + <label for="n_nval"><i>N</i> (Number of Write Copies):</label> + <input type="text" size="2" name="n_nval" id="n_nval" value="" class="calc_input"> + <span class="error_span" id="n_nval_error"></span> + </li> +</ul> +</div> + +## Recommendations + +<span id="recommend"></span> + +## Details on Bitcask RAM Calculation + +With the above information in mind, the following variables will factor +into your RAM calculation: + +Variable | Description +:--------|:----------- +Static Bitcask per-key overhead | 44.5 bytes per key +Estimated average bucket-plus-key length | The combined number of characters your bucket + keynames will require (on average). We'll assume 1 byte per character. +Estimated total objects | The total number of key/value pairs your cluster will have when started +Replication Value (`n_val`) | The number of times each key will be replicated when written to Riak (the default is 3) + +## The actual equation + +Approximate RAM Needed for Bitcask = (static bitcask per key overhead + +estimated average bucket+key length in bytes) * estimate total number of +keys * `n_val` + +Example: + +* 50,000,000 keys in your cluster to start +* approximately 30 bytes for each bucket+key name +* default `n_val` of 3 + +The amount of RAM you would need for Bitcask is about **9.78 GBs across +your entire cluster.** + +Additionally, Bitcask relies on your operating system's filesystem cache +to deliver high performance reads. So when sizing your cluster, take +this into account and plan on having several more gigabytes of RAM +available for your filesystem cache. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/cluster-capacity.md b/content/riak/kv/3.0.4/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..a92744b14c --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/cluster-capacity.md @@ -0,0 +1,238 @@ +--- +title: "Cluster Capacity Planning" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Cluster Capacity" + identifier: "planning_cluster_capacity" + weight: 103 + parent: "planning" +toc: true +aliases: + - /riak/3.0.4/ops/building/planning/cluster + - /riak/kv/3.0.4/ops/building/planning/cluster +--- + +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/bitcask-capacity-calc +[plan index]: {{<baseurl>}}riak/kv/3.0.4/setup/planning +[concept replication]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/replication +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[perf benchmark]: {{<baseurl>}}riak/kv/3.0.4/using/performance/benchmarking +[LVM]: http://en.wikipedia.org/wiki/Logical_Volume_Manager_(Linux) + + +This document outlines the various elements and variables to keep in mind when planning your Riak cluster. Your use case and environment variables will be specific to what you're building, but this document should set you on the right path when planning and launching a Riak cluster. + +## RAM + +[RAM](http://en.wikipedia.org/wiki/Random-access_memory) is the most important resource when sizing your Riak cluster. Memory keeps data closer to your users. Memory is essential for running complex MapReduce queries or caching data to provide low-latency request times. + +### Bitcask and Memory Requirements + +Your choice of local storage backend for Riak impacts your RAM +needs. Though Riak has pluggable backend storage, Bitcask is the +default. Why? Because it's built for: + +* low-latency request times +* high throughput +* the ability to handle data sets much larger than RAM w/o degradation + +Bitcask's one major requirement, however, is that it must keep the +entire **keydir** in memory. The keydir is a hash table that maps each +concatenated bucket + key name in a Bitcask (“a Bitcask” is the name for +each file contained within each Bitcask backend) to a fixed-size +structure giving the file, offset, and size of the most recently written +entry for that bucket + key on disk. + +To learn about Bitcask see [Hello Bitcask](http://basho.com/hello-bitcask/) on the Basho blog as well as the [Introduction to Bitcask](http://basho.com/assets/bitcask-intro.pdf) paper. + +If your calculated RAM needs will exceed your hardware resources--in other words, if you can't afford the RAM to use Bitcask---we recommend that you use LevelDB. + +Check out [Bitcask Capacity Planning][plan bitcask capacity] for more details on designing a Bitcask-backed cluster. + +### LevelDB + +If RAM requirements for Bitcask are prohibitive, we recommend use of +the LevelDB backend. While LevelDB doesn't require a large amount of RAM +to operate, supplying it with the maximum amount of memory available leads to higher performance. + +For more information see [LevelDB][plan backend leveldb]. + +## Disk + +Now that you have an idea of how much RAM you'll need, it's time to think about disk space. Disk space needs are much easier to calculate. Below is an equation to help you calculate disk space needs: + +#### Estimated Total Objects * Average Object Size * n_val + +For example: + +* 50,000,000 objects +* an average object size of two kilobytes (2,048 bytes) +* the default `n_val` of 3 + +Then you would need just over approximately **286 GB** of disk space in the entire cluster to accommodate your data. + +We believe that databases should be durable out of the box. When we +built Riak, we did so in a way that you could write to disk while +keeping response times below your users' expectations. So this +calculation assumes that you'll be keeping the entire data set on disk. + +Many of the considerations taken when configuring a machine to serve a +database apply to configuring a node for Riak as well. Mounting +disks with noatime and having separate disks for your OS and Riak data +lead to much better performance. See [Planning for a +Riak System](../start) for more information. + +### Disk Space Planning and Ownership Handoff + +When Riak nodes fail or leave the cluster, other nodes in the cluster start the **ownership handoff** process. Ownership handoff is when remaining nodes take ownership of the data partitions handled by an absent node. One side effect of this process is that the other nodes require more intensive disk space usage; in rare cases filling the disk of one or more of those nodes. + +When making disk space planning decisions, we recommend that you: + +* assume that one or more nodes may be down at any time +* monitor your disk space usage and add additional space when usage + exceeds 50-60% of available space. + +Another possibility worth considering is using Riak with a filesystem +that allows for growth, for example +[LVM], +[RAID](http://en.wikipedia.org/wiki/RAID), or +[ZFS](http://en.wikipedia.org/wiki/ZFS). + +## Read/Write Profile + +Read/write ratios, as well as the distribution of key access, should +influence the configuration and design of your cluster. If your use case +is write heavy, you will need less RAM for caching, and if only a +certain portion of keys is accessed regularly, such as a [Pareto +distribution](http://en.wikipedia.org/wiki/Pareto_distribution), you +won't need as much RAM available to cache those keys' values. + +## Number of Nodes + +The number of nodes (i.e. physical servers) in your Riak Cluster depends +on the number of times data is [replicated][concept replication] across the +cluster. To ensure that the cluster is always available to respond to +read and write requests, we recommend a "sane default" of N=3 +replicas. This requirement can be met with a 3 or 4-node +cluster. + +For production deployments, however, we recommend using no fewer than 5 +nodes, as node failures in smaller clusters can compromise the +fault-tolerance of the system. Additionally, in clusters smaller than 5 +nodes, a high percentage of the nodes (75-100% of them) will need to +respond to each request, putting undue load on the cluster that may +degrade performance. For more details on this recommendation, see our +blog post on [Why Your Riak Cluster Should Have at Least Five +Nodes](http://basho.com/posts/technical/Why-Your-Riak-Cluster-Should-Have-At-Least-Five-Nodes/). + +## Scaling + +Riak can be scaled in two ways: vertically, via improved hardware, and +horizontally, by adding more nodes. Both ways can provide performance +and capacity benefits, but should be used in different circumstances. +The [riak-admin cluster command][use admin riak-admin#cluster] can +assist scaling in both directions. + +#### Vertical Scaling + +Vertical scaling, or improving the capabilities of a node/server, +provides greater capacity to the node but does not decrease the overall +load on existing members of the cluster. That is, the ability of the +improved node to handle existing load is increased but the load itself +is unchanged. Reasons to scale vertically include increasing IOPS (I/O +Operations Per Second), increasing CPU/RAM capacity, and increasing disk +capacity. + +#### Horizontal Scaling + +Horizontal scaling, or increasing the number of nodes in the cluster, +reduces the responsibilities of each member node by reducing the number +of partitions and providing additional endpoints for client connections. +That is, the capacity of each individual node does not change but its +load is decreased. Reasons to scale horizontally include increasing I/O +concurrency, reducing the load on existing nodes, and increasing disk +capacity. + +> **Note on horizontal scaling** +> +> When scaling horizontally, it's best to add all planned nodes at once +with multiple `riak-admin cluster join` commands followed by +a `riak-admin cluster plan` and `riak-admin cluster commit`. This will help reduce the amount of data transferred between nodes in the cluster. + +#### Reducing Horizontal Scale + +If a Riak cluster is over provisioned, or in response to seasonal usage decreases, the horizontal scale of a Riak cluster can be decreased using the `riak-admin cluster leave` command. + +## Ring Size/Number of Partitions + +Ring size is the number of partitions that make up your Riak cluster. Ring sizes must be a power of 2. Ring size is configured before your cluster is started, and is set in your [configuration files][config reference]. + +The default number of partitions in a Riak cluster is 64. This works for smaller clusters, but if you plan to grow your cluster past 5 nodes we recommend a larger ring size. + +The minimum number of partitions recommended per node is 10. You can determine the number of partitions allocated per node by dividing the number of partitions by the number of nodes. + +There are no absolute rules for the ideal partitions-per-node ratio. This depends on your particular use case and what features the Riak cluster uses. We recommend between 10 and 50 data partitions per node. + +So if you're running a 3-node development cluster, a ring size of 64 or 128 should work just fine. While a 10-node cluster should work well with a ring size of 128 or 256 (64 is too small while 512 is likely too large). + +The table below provides some suggested combinations: + +Number of nodes | Number of data partitions +:---------------|:------------------------- +3, 4, 5 | 64, 128 +5 | 64, 128 +6 | 64, 128, 256 +7, 8, 9, 10 | 128, 256 +11, 12 | 128, 256, 512 + +By extension, a ring size of 1024 is advisable only in clusters with +more than 20 nodes, 2048 in clusters with more than 40 nodes, etc. + +If you're unsure about the best number of partitions to use, consult the +[Riak mailing +list](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com) +for suggestions from the Riak community. + +## Other Factors + +Riak is built to run in a clustered environment, and while it will +compensate for network partitions, they do cause increased load on the +system. In addition, running in a virtualized environment that lacks +low-latency IO access can drastically decrease performance. Before +putting your Riak cluster in production is recommended that you gain a +full understanding of your environment's behavior so that you know how +your cluster performs under load for an extended period of time. Doing +so will help you size your cluster for future growth and lead to optimal +performance. + +We recommend using [Basho Bench][perf benchmark] for benchmarking the performance of your cluster. + +### Bandwidth + +Riak uses Erlang's built-in distribution capabilities to provide +reliable access to data. A Riak cluster can be deployed in many +different network environments. We recommend that you produce as +little latency between nodes as possible, as high latency leads to +sub-optimal performance. + +Deploying a single Riak cluster across two datacenters is not recommended. If your use case requires this capability, Riak offers a [Multi Data Center Replication: Architecture](../../../using/reference/v3-multi-datacenter/architecture) option that is built to keep multiple Riak clusters in +sync across several geographically diverse deployments. + +### I/O + +In general, the biggest bottleneck for Riak will be the amount of I/O +available to it, especially in the case of write-heavy workloads. Riak +functions much like any other database and the design of your disk +access should take this into account. Because Riak is clustered and your +data is stored on multiple physical nodes, you should consider forgoing +a traditional RAID setup for redundancy and focus on providing the least +latency possible using SATA Drives or SSDs, for example. + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/future.md b/content/riak/kv/3.0.4/setup/planning/future.md new file mode 100644 index 0000000000..a9d27a664b --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/future.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Planning for the Future" +description: "" +project: "riak_kv" +project_version: 3.0.4 +#menu: +# riak_kv-3.0.4: +# name: "Planning for the Future" +# identifier: "planning_future" +# weight: 106 +# parent: "planning" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/operating-system.md b/content/riak/kv/3.0.4/setup/planning/operating-system.md new file mode 100644 index 0000000000..b221e44df3 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/operating-system.md @@ -0,0 +1,30 @@ +--- +title: "Operating System Support" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "OS Support" + identifier: "planning_os" + weight: 101 + parent: "planning" +toc: true +aliases: +--- + +[downloads]: {{<baseurl>}}riak/kv/3.0.4/downloads/ + +We recommend deploying Riak KV on a mainstream Unix-like operating system. +Mainstream distributions have larger support communities, making +solutions to common problems easier to find. + +Basho provides [binary packages][downloads] of Riak KV for the following distributions: + +* **Red Hat based:** Red Hat Enterprise Linux, CentOS, Fedora Core +* **Debian based:** Debian, Ubuntu +* **Solaris based:** Sun Solaris, OpenSolaris + + + + diff --git a/content/riak/kv/3.0.4/setup/planning/start.md b/content/riak/kv/3.0.4/setup/planning/start.md new file mode 100644 index 0000000000..13485354b7 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/planning/start.md @@ -0,0 +1,61 @@ +--- +title: "Start Planning" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Start Planning" + identifier: "planning_start" + weight: 100 + parent: "planning" +toc: true +aliases: + - /riak/3.0.4/ops/building/planning/system-planning + - /riak/kv/3.0.4/ops/building/planning/system-planning +--- + +[plan backend]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend +[plan cluster capacity]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/cluster-capacity +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask +[plan bitcask capacity]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/bitcask-capacity-calc + +Here are some steps and recommendations designing and configuring your +Riak cluster. + +## Backend + +Backends are what Riak KV uses to persist data. Different backends have +strengths and weaknesses, so if you are unsure of which backend you +need, read through the [Choosing a Backend][plan backend] tutorial. + +## Capacity + +[Cluster Capacity Planning][plan cluster capacity] outlines the various elements and variables that should be considered when planning your Riak cluster. + +If you have chosen [Bitcask][plan backend bitcask] as your backend, you will also want to run through [Bitcask Capacity Planning][plan bitcask capacity] to help you calculate a reasonable capacity. + +## Network Configuration / Load Balancing + +There are at least two acceptable strategies for load-balancing requests +across your Riak cluster: **virtual IPs** and **reverse-proxy**. + +For **virtual IPs**, we recommend using any of the various VIP +implementations. We don't recommend VRRP behavior for the VIP because +you'll lose the benefit of spreading client query load to all nodes in a +ring. + +For **reverse-proxy** configurations (HTTP interface), any one of the +following should work adequately: + +* haproxy +* squid +* varnish +* nginx +* lighttpd +* Apache + + + + + diff --git a/content/riak/kv/3.0.4/setup/search.md b/content/riak/kv/3.0.4/setup/search.md new file mode 100644 index 0000000000..3f2ff2d6cc --- /dev/null +++ b/content/riak/kv/3.0.4/setup/search.md @@ -0,0 +1,5 @@ + + + + + diff --git a/content/riak/kv/3.0.4/setup/upgrading.md b/content/riak/kv/3.0.4/setup/upgrading.md new file mode 100644 index 0000000000..9748a24bca --- /dev/null +++ b/content/riak/kv/3.0.4/setup/upgrading.md @@ -0,0 +1,38 @@ +--- +title: "Upgrading Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Upgrading" + identifier: "upgrading" + weight: 102 + parent: "setup_index" +toc: true +aliases: +--- + +[upgrade checklist]: ./checklist +[upgrade version]: ./version +[upgrade cluster]: ./cluster +[upgrade mdc]: ./multi-datacenter +[upgrade search]: ./search + +## In This Section + +### [Production Checklist][upgrade checklist] + +An overview of what to consider before upgrading Riak KV in a production environment. + +[Learn More >>][upgrade checklist] + +### [Upgrading to Riak KV 3.0.4][upgrade version] + +A tutorial on updating to Riak KV 3.0.4 + +[Learn More >>][upgrade version] + + + + diff --git a/content/riak/kv/3.0.4/setup/upgrading/checklist.md b/content/riak/kv/3.0.4/setup/upgrading/checklist.md new file mode 100644 index 0000000000..ef745eb1e6 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/upgrading/checklist.md @@ -0,0 +1,225 @@ +--- +title: "Production Checklist" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Production Checklist" + identifier: "upgrading_checklist" + weight: 100 + parent: "upgrading" +toc: true +aliases: + - /riak/3.0.4/ops/upgrading/production-checklist/ + - /riak/kv/3.0.4/ops/upgrading/production-checklist/ +--- + +[perf open files]: {{<baseurl>}}riak/kv/3.0.4/using/performance/open-files-limit +[perf index]: {{<baseurl>}}riak/kv/3.0.4/using/performance +[ntp]: http://www.ntp.org/ +[security basics]: {{<baseurl>}}riak/kv/3.0.4/using/security/basics +[cluster ops load balance]: {{<baseurl>}}riak/kv/3.0.4/configuring/load-balancing-proxy +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[config backend]: {{<baseurl>}}riak/kv/3.0.4/configuring/backend +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search +[usage conflict resolution]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency +[apps replication properties]: {{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties +[concept strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/bucket-types +[use admin commands]: {{<baseurl>}}riak/kv/3.0.4/using/admin/commands +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-control +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/inspecting-node +[troubleshoot http]: {{<baseurl>}}riak/kv/3.0.4/using/troubleshooting/http-204 +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin +[SANs]: http://en.wikipedia.org/wiki/Storage_area_network + +Deploying Riak KV to a realtime production environment from a development or testing environment can be a complex process. While the specifics of that process will always depend on your environment and practices, there are some basics for you to consider and a few questions that you will want to ask while making this transition. + +We've compiled these considerations and questions into separate categories for you to look over. + +## System + +* Are all systems in your cluster as close to identical as possible in + terms of both hardware and software? +* Have you set appropriate [open files limits][perf open files] on all + of your systems? +* Have you applied the [Riak KV performance improvement recommendations][perf index]? + +## Network + +* Are all systems using the same [NTP servers][ntp] to + synchronize clocks? +* Are you sure that your NTP clients' configuration is monotonic (i.e. + that your clocks will not roll back)? +* Is DNS correctly configured for all systems' production deployments? +* Are connections correctly routed between all Riak nodes? +* Are connections correctly set up in your load balancer? +* Are your [firewalls][security basics] correctly configured? +* Check that network latency and throughput are as expected for all of the + following (we suggest using [iperf][ntp] to verify): + - between nodes in the cluster + - between the load balancer and all nodes in the cluster + - between application servers and the load balancer +* Do all Riak nodes appear in the load balancer's rotation? +* Is the load balancer configured to balance connections with roundrobin + or a similarly random [distribution scheme][cluster ops load balance]? + +## Riak KV + +* Check [configuration files][config reference]: + - Does each machine have the correct name and IP settings in + `riak.conf` (or in `app.config` if you're using the older + configuration files)? + - Are all [configurable settings][config reference] identical + across the cluster? + - Have all of the settings in your configuration file(s) that were + changed for debugging purposes been reverted back to production + settings? + - If you're using [multiple data backends][config backend], are all of your + bucket types configured to use the correct backend? + - If you are using Riak Security, have you checked off all items in + the [security checklist][security basics] and turned on security? + - If you're using [multiple data backends][config backend], do all machines' + config files agree on their configuration? + - Do all nodes agree on the value of the [`allow_mult`][config basic] setting? + - Do you have a [sibling resolution][usage conflict resolution] strategy in + place if `allow_mult` is set to `true`? + - Have you carefully weighed the [consistency trade-offs][concept eventual consistency] that must be made if `allow_mult` is set to `false`? + - Are all of your [apps replication properties][apps replication properties] configured correctly and uniformly across the cluster? + - If you are using [Riak Search][usage search], is it enabled on all + nodes? If you are not, has it been disabled on all nodes? + - If you are using [strong consistency][concept strong consistency] for some or all of your + data: + * Does your cluster consist of at least three nodes? If it does + not, you will not be able to use this feature, and you are + advised against enabling it. + * If your cluster does consist of at least three nodes, has the + strong consistency subsystem been [enabled][config strong consistency] on all nodes? + * Is the [`target_n_val`][config reference] that is set on each node higher than any `n_val` that you intend to use for strongly consistent bucket types (or any bucket types for that matter)? The default is 4, which will likely need to be raised if you are using strong consistency. + - Have all [bucket types][cluster ops bucket types] that you intend to use + been created and successfully activated? + - If you are using [`riak_control`][use admin riak control], is it enabled on the node(s) from which you intend to use it? +* Check data mount points: + - Is `/var/lib/riak` mounted? + - Can you grow that disk later when it starts filling up? + - Do all nodes have their own storage systems (i.e. no + [SANs]), or do you have a plan in place for switching to that configuration later? +* Are all Riak KV nodes up? + - Run `riak ping` on all nodes. You should get `pong` as a response. + - Run `riak-admin wait-for-service riak_kv <node_name>@<IP>` on each + node. You should get `riak_kv is up` as a response. + + The `<node_name>@<IP>` string should come from your [configuration + file(s)][configure reference]. +* Do all nodes agree on the ring state? + - Run `riak-admin ringready`. You should get `TRUE ALL nodes agree on + the ring [list_of_nodes]`. + - Run `riak-admin member-status`. All nodes should be valid (i.e. + listed as `Valid: 1`), and all nodes should appear in the list + - Run `riak-admin ring-status`. The ring should be ready (`Ring Ready: + true`), there should be no unreachable nodes (`All nodes are up and + reachable`), and there should be no pending changes to the ring + (`No pending changes`). + - Run `riak-admin transfers`. There should be no active transfers (`No + transfers active`). + +## Operations + +* Does your monitoring system ensure that [NTP][ntp] is + running? +* Are you collecting [time series data][cluster ops inspect node] on + the whole cluster? + - System metrics + + CPU load + + Memory used + + Network throughput + + Disk space used/available + + Disk input/output operations per second (IOPS) + - Riak metrics (from the [`/stats`][troubleshoot http] HTTP endpoint or + using [`riak-admin`][use admin riak-admin]) + + Latencies: `GET` and `PUT` (mean/median/95th/99th/100th) + + Vnode stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Node stats: `GET`s, `PUT`s, `GET` totals, `PUT` totals + + Finite state machine (FSM) stats: + * `GET`/`PUT` FSM `objsize` (99th and 100th percentile) + * `GET`/`PUT` FSM `times` (mean/median/95th/99th/100th) + + Protocol buffer connection stats + * `pbc_connects` + * `pbc_active` + * `pbc_connects_total` +* Are the following being graphed (at least the key metrics)? + - Basic system status + - Median and 95th and 99th percentile latencies (as these tend to be + leading indicators of trouble) + +## Application and Load + +* Have you benchmarked your cluster with simulated load to confirm that + your configuration will meet your performance needs? +* Are the [develop client libraries] in use in your application up to date? +* Do the client libraries that you're using support the version of Riak KV + that you're deploying? + +## Confirming Configuration with Riaknostic + +Recent versions of Riak KV ship with Riaknostic, a diagnostic utility that +can be invoked by running `riak-admin diag <check>`, where `check` is +one of the following: + +* `disk` +* `dumps` +* `memory_use` +* `nodes_connected` +* `ring_membership` +* `ring_preflists` +* `ring_size` +* `search` +* `sysctl` + +Running `riak-admin diag` with no additional arguments will run all +checks and report the findings. This is a good way of verifying that +you've gotten at least some of the configurations mentioned above +correct, that all nodes in your cluster are up, and that nothing is +grossly misconfigured. Any warnings produced by `riak-admin diag` should +be addressed before going to production. + +## Troubleshooting and Support + +* Does your team, including developing and operations, know how to open + support requests with Basho? +* Is your team familiar with Basho Support's Service-Level Agreement + (SLA) levels? + - Normal and Low are for issues not immediately impacting production + systems + - High is for problems that impact production or soon-to-be-production + systems, but where stability is not currently compromised + - Urgent is for problems causing production outages or for those + issues that are likely to turn into production outages very soon. + On-call engineers respond to urgent requests within 30 minutes, + 24 / 7. +* Does your team know how to gather `riak-debug` results from the whole + cluster when opening tickets? If not, that process goes something like + this: + - SSH into each machine, run `riak-debug`, and grab the resultant + `.tar.gz` file + - Attach all debug tarballs from the whole cluster each time you open + a new High- or Urgent-priority ticket + +## The Final Step: Taking it to Production + +Once you've been running in production for a month or so, look back at +the metrics gathered above. Based on the numbers you're seeing so far, +configure alerting thresholds on your latencies, disk consumption, and +memory. These are the places most likely to give you advance warning of +trouble. + +When you go to increase capacity down the line, having historic metrics +will give you very clear indicators of having resolved scaling problems, +as well as metrics for understanding what to upgrade and when. + + + + + diff --git a/content/riak/kv/3.0.4/setup/upgrading/cluster.md b/content/riak/kv/3.0.4/setup/upgrading/cluster.md new file mode 100644 index 0000000000..76a21fb542 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/upgrading/cluster.md @@ -0,0 +1,303 @@ +--- +title: "Upgrading a Cluster" +description: "" +project: "riak_kv" +project_version: "3.0.4" +menu: + riak_kv-3.0.4: + name: "Upgrading a Cluster" + identifier: "upgrading_cluster" + weight: 102 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/3.0.4/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.4/ops/upgrading/rolling-upgrades/ +--- + +[production checklist]: {{<baseurl>}}riak/kv/3.0.4/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/3.0.4/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/secondary-indexes +[release notes]: https://github.com/basho/riak/blob/master/RELEASE-NOTES.md +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.4/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/3.0.4/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/3.0.4/using/reference/snmp + +{{% note title="Note on upgrading Riak KV from older versions" %}} +Riak KV upgrades are tested and supported for two feature release versions. +For example, upgrades from 1.1.x to 1.3.x are tested and supported, +while upgrades from 1.1.x to 1.4.x are not. When upgrading to a new +version of Riak KV that is more than two feature releases ahead, we +recommend first upgrading to an intermediate version. For example, in an +upgrade from 1.1.x to 1.4.x, we recommend upgrading from 1.1.x to 1.3.x +before upgrading to 1.4.x. + +If you run [Riak Control]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-control), you should disable it during the rolling upgrade process. +{{% /note %}} + +Riak KV nodes negotiate with each other to determine supported +operating modes. This allows clusters containing mixed-versions of Riak KV +to properly interoperate without special configuration, and simplifies +rolling upgrades. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist] page, which covers details and questions to consider while upgrading. + +## Debian/Ubuntu + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Debian/Ubuntu packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up the Riak KV node's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo dpkg -i <riak_package_name>.deb +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## RHEL/CentOS + +The following example demonstrates upgrading a Riak KV node that has been +installed with the RHEL/CentOS packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak +``` + +3\. Upgrade Riak KV: + +```bash +sudo rpm -Uvh <riak_package_name>.rpm +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Solaris/OpenSolaris + +The following example demonstrates upgrading a Riak KV node that has been +installed with the Solaris/OpenSolaris packages provided by Basho. + +1\. Stop Riak KV: + +```bash +riak stop +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to stop Riak KV via `svcadm` instead of using `riak stop`: + +```bash +sudo svcadm disable riak +``` +{{% /note %}} + + +2\. Back up Riak KV's `/etc` and `/data` directories: + +```bash +sudo gtar -czf riak_backup.tar.gz /opt/riak/data /opt/riak/etc +``` + +3\. Uninstall Riak KV: + +```bash +sudo pkgrm BASHOriak +``` + +4\. Install the new version of Riak KV: + +```bash +sudo pkgadd -d <riak_package_name>.pkg +``` + +4\. Restart Riak KV: + +```bash +riak start +``` + +{{% note %}} +If you are using the service management facility (SMF) to manage Riak KV, +you will have to start Riak KV via `svcadm` instead of using `riak start`: + +```bash +sudo svcadm enable riak +``` +{{% /note %}} + +5\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +6\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +`»target_node«` is the node which you have just upgraded (e.g. +`riak@192.168.1.11`) + +7\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +While the node was offline, other nodes may have accepted writes on its +behalf. This data is transferred to the node when it becomes available. + +8\. Repeat the process for the remaining nodes in the cluster. + + +## Rolling Upgrade to Enterprise + +If you would like to upgrade an existing Riak KV cluster to a commercially +supported [Riak KV Enterprise][riak enterprise] cluster with [multi-datacenter replication][cluster ops mdc], undertake the following steps: + +1. Shut down the node you are going to upgrade. +2. Back up your `etc` (app.config and vm.args) and `data` +directories. +3. Uninstall your Riak KV package. +4. Install the `riak_ee` package. +5. A standard package uninstall should not have removed your data + directories. If it did, move your backup to where the data directory + should be. +6. Copy any customizations from your backed-up vm.args to the + `riak_ee` installed vm.args file, these files may be identical. +7. The app.config file from `riak_ee` will be significantly different from your backed-up file. While it will contain all of the same sections as your original, it will have many new ones. Copy the customizations from your original app.config file into the appropriate sections in the new one. Ensure that the following sections are present in app.config: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_jmx` - See [JMX Monitoring][jmx monitor] for more information. + * `snmp` - See [SNMP][snmp] for more information. +8. Start Riak KV on the upgraded node. + +## Basho Patches + +After upgrading, you should ensure that any custom patches contained in +the `basho-patches` directory are examined to determine their +application to the upgraded version. If you find that patches no longer +apply to the upgraded version, you should remove them from the +`basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +## Riaknostic + +It is a good idea to also verify some basic configuration and general +health of the Riak KV node after upgrading by using Riak KV's built-in +diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following +command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal +node operation. + + + + + diff --git a/content/riak/kv/3.0.4/setup/upgrading/multi-datacenter.md b/content/riak/kv/3.0.4/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..c56c1f9fa3 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/upgrading/multi-datacenter.md @@ -0,0 +1,24 @@ +--- +draft: true +title: "Upgrading Multi-Datacenter" +description: "" +project: "riak_kv" +project_version: 3.0.4 +#menu: +# riak_kv-3.0.4: +# name: "Upgrading Multi-Datacenter" +# identifier: "upgrading_multi_datacenter" +# weight: 103 +# parent: "upgrading" +toc: true +aliases: +--- + +## TODO + +How to update to a new version with multi-datacenter. + + + + + diff --git a/content/riak/kv/3.0.4/setup/upgrading/search.md b/content/riak/kv/3.0.4/setup/upgrading/search.md new file mode 100644 index 0000000000..2ee1d049d1 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/upgrading/search.md @@ -0,0 +1,281 @@ +--- +title: "Upgrading Search from 1.x to 2.x" +description: "" +project: "riak_kv" +project_version: "3.0.4" +menu: + riak_kv-3.0.4: + name: "Upgrading Search 1.x to 2.x" + identifier: "upgrading_search" + weight: 104 + parent: "upgrading" +toc: true +version_history: + in: "2.0.0-2.99.999" +aliases: + - /riak/3.0.4/ops/advanced/upgrading-search-2 + - /riak/kv/3.0.4/ops/advanced/upgrading-search-2 +--- + +If you're using Search in a version of Riak prior to 2.0 (1.3.0 to +1.4.x), you should follow these steps to migrate your search indexes +from the legacy `merge_index` to the new Solr-backed ([Yokozuna](../../../using/reference/search) indexes. The legacy version of Riak Search is now deprecated +and does not support most new 2.0 features, i.e. no [Riak Data Types](../../../developing/data-types), [bucket types](../../../using/reference/bucket-types), [strong consistency](../../../using/reference/strong-consistency), or [security](../../../using/security/)), so we highly recommend that you migrate. + +And please note that the legacy `merge_index`-based search (aka legacy +Search) will be removed in a future release of Riak. + +## Overview of an Upgrade + +The migration steps explained here are as automated as they can +reasonably be, but they do include some manual steps for safety. They +are meant to be run on a live cluster, so there's no need to take all of +your nodes down. Like all migration activities, you should undertake +these steps at a time when your cluster is relatively light on traffic, +i.e. _not_ the week before Christmas. + +The main goal of a live migration is to stand up indexes in the new Riak +Search that parallel the existing ones in legacy. New writes add entries +to both indexes while AAE adds entries in the new indexes for existing +data. + +Parallel indexes mean more disk usage. How much more will depend on the +schema but tests have shown Solr to generally use less disk space. A +prudent plan will expect new Search to use as much disk as legacy. You +can also expect more CPU usage as analysis will temporarily be performed +by both systems. Finally, Solr runs on a JVM process requiring its own +RAM. A good start is 2 GB but more will be required for heavier +workloads. On the contrary, do not make too large a heap as it could +cause lengthy garbage collection pauses. + +As the new search indexes catch up with the old, incoming queries will +still be serviced by legacy Search. Once you have determined that the +new indexes are consistent with KV, you can perform a live switch to the +new system and turn off legacy Search. Finally, you can remove the old +merge index directories to reclaim disk space. + +> **Downgrading and Merge Index** +> +> It may be tempting to keep the merge index files in case of a downgrade. +We don't recommend doing that if writes are being made to these buckets +during upgrade. Once `search: false` is set on a bucket, all new KV +data written will have missing indexes in the merge index and +overwritten data will have inconsistent indexes. At this point, a +downgrade requires a full re-index of the data as legacy Search has no +mechanism to cope with inconsistency (such as [active anti-entropy](../../../learn/glossary/#active-anti-entropy-aae) in the new Search). + +> **Active Anti-Entropy (AAE) Required** +> +>Migration requires that Riak's AAE subsystem be enabled. It's +responsible for finding all the missing index entries for existing data +and adding them. Technically speaking, the migration can be performed +without AAE, but it will require a key listing or [MapReduce](../../../developing/usage/mapreduce) job that re-indexes every object. This method will use more CPU, network, and especially disk space from merge index as its GC +algorithm is bad at getting rid of large index files. + +## Steps to Upgrading + +1. First, you'll perform a normal [rolling upgrade](../cluster). + As you upgrade, enable `yokozuna` (the new Riak Search library) on + each node. If you're still using `app.config` it's called `yokozuna`. + If you've chosen to upgrade to the new `riak.conf` config option, it's + called `search`. + + ```riakconf + search = on + ``` + ```appconfig + {yokozuna, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} + ``` + + <div class="note"> + <div class="title">Upgrade First</div> + Don't proceed until all nodes have been upgraded to the newest + version. This way all nodes have new Search capabilities before + running the next steps which require them. + </div> + +2. For every schema in legacy Search, you must create a comparable +schema in new Search. If you want to use the default schema named +[_yz_default](../../../developing/usage/search-schemas), you can skip this step, but we highly recommend you create your own custom schema. + + To create a schema, you can follow the Solr [search schema](../../../developing/usage/search-schemas) + instructions to learn how to define your xml file. Once you've created + the file, you can upload it to the cluster. + + ```curl + curl -XPUT http://localhost:8098/search/schema/my_schema \ + -H 'Content-Type: application/xml' \ + --data-binary @my_schema.xml + ``` + +3. For every index in legacy Search, you must create a comparable index +in new Search, setting the appropriate schema that you created in the +previous step. This index can have the same name as your legacy Search +index. You can find more details about index creation under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/search/index/my_index \ + -H 'Content-Type: application/json' \ + -d '{"schema":"my_schema"}' + ``` + +4. For each bucket which is indexed by legacy Search, you must add the +`search_index` bucket property to point to the new Search index. This +new index is what we are attempting to migrate all of our index data to. +You can find more details about this step under [Using Search](../../../developing/usage/search/#setup). + + ```curl + curl -XPUT http://localhost:8098/buckets/my_bucket/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"my_index"}}' + ``` + + Once a bucket is associated with the new Search, all objects that are + written or modified in Riak will be indexed by **both** legacy and new + Search. However, the HTTP and client query interfaces will still + continue to use the legacy Search. + +5. The new Search [AAE](../../../learn/glossary/#active-anti-entropy-aae) hash +trees must be manually cleared so that AAE will notice the missing +indexes. + + Attach to one of the Riak nodes by calling `riak attach-direct`. Paste + the following code into the shell. It clears the Search hash trees for + each node in the cluster. + + ```erlang + riak_core_util:rpc_every_member_ann(yz_entropy_mgr, clear_trees, [], infinity). + ``` + + Press `Ctrl-D` to exit from the attached shell. + + In the background AAE will rebuild the hash trees and exchange them + with KV. These exchanges will notice objects are missing and index + them in new Search. + + <!-- no re-index command currently exists --> + +6. Monitor the AAE status of every node until a full round of exchanges +have occurred on every node. + + ```bash + riak-admin search aae-status + ``` + + First, you must wait until all trees are rebuilt. This may take a + while as each node is configured, by default, to build a maximum of + one tree per hour. You can determine when a tree is build by looking + at the `Entropy Trees` section. When a tree is not built it will show + `--` under the `Built (ago)` column. Otherwise, it will list how long + ago the tree was built in a human friendly format. Here is an example + of trees that are not built: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 -- + 319703483166135013357056057156686910549735243776 -- + ... + ``` + + Here is an example of built trees: + + ``` + ================================ Entropy Trees ================================ + Index Built (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.3 hr + 319703483166135013357056057156686910549735243776 5.3 hr + ... + ``` + + After all the trees are built you then have to wait for a full + exchange round to occur for every partition on every node. That is, + the full exchange round must be **NEWER** than the time the tree was + built. That way you know the exchange was based on the latest tree. + The exchange information is found under the `Exchanges` section. + Under that section there are two columns: `Last (ago)` and `All + (ago)`. In this was you want to wait until the `All (ago)` section is + newer than the value of `Built (ago)` in the `Entropy Trees` section. + For example, given the entropy tree output above this output would + indicate both partitions have had a full exchange round since the + latest tree was built: + + ``` + ================================== Exchanges ================================== + Index Last (ago) All (ago) + ------------------------------------------------------------------------------- + ... + 296867520082839655260123481645494988367611297792 12.1 hr 12.1 hr + 319703483166135013357056057156686910549735243776 5.1 hr 5.2 hr + ... + ``` + + Notice that `12.1 hr` is newer than `12.3 hr` and `5.2 hr` newer than + `5.3 hr`. Once the exchange is newer for every partition on every + node you know that AAE has brought all new indexes up to date. + +7. Next, call the following command that will give HTTP and PB query +control to the new Riak Search. + + ```curl + riak-admin search switch-to-new-search + ``` + + <div class="note"> + <div class="title">Check Results Before Switching (Optional)</div> + Up until this point all incoming queries are serviced by the legacy + Search system. After the `switch-to-new-search` is run all queries + will be handled by new Search. If you first want to verify the + results of new Search before switching then you can use its dedicated + HTTP resource at `/search/query/<index>?q=...`. + </div> + +8. Set the `search` bucket property to `false` for all legacy indexed +buckets. This deactivates legacy Search. + + ```curl + curl -XPUT "http://localhost:8098/buckets/my_bucket/props" \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search": false}}' + ``` + +9. Disable the Riak Search process on each node by setting `riak_search` +`enabled` to `false`. + + ```appconfig + {riak_search, [ + %% Other configs + {enabled, false}, + %% Other configs + ]}, + ``` + +10. Perform a rolling restart. This is needed both to stop legacy +Search as well as properly remove the legacy Search commit hooks. A bug +in the 1.4.x series allowed bucket properties to leak into what Riak +developers call the "raw ring". In the case of legacy Search it causes +the commit hooks to persist even when legacy Search is disable and the +search property is set to false. + + New Search has code to expunge the legacy hooks from the raw ring but + it only occurs during start-up and requires that legacy Search be + disabled in the configuration. Thus, the easiest way to fix things is + to disable legacy Search (in step 9) and then perform a rolling + restart of the cluster. + +11. Finally, delete the merge index directories to reclaim disk space. + +For any questions reach out to the [Riak community]({{<baseurl>}}community). Preferably, ask your questions up front rather than during the middle of a migration. + + + + + diff --git a/content/riak/kv/3.0.4/setup/upgrading/version.md b/content/riak/kv/3.0.4/setup/upgrading/version.md new file mode 100644 index 0000000000..120cc8a643 --- /dev/null +++ b/content/riak/kv/3.0.4/setup/upgrading/version.md @@ -0,0 +1,252 @@ +--- +title: "Upgrading to Riak KV 3.0.4" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Upgrading to 3.0.4" + identifier: "upgrading_version" + weight: 101 + parent: "upgrading" +toc: true +aliases: + - /riak/3.0.4/upgrade-v20/ + - /riak/kv/3.0.4/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.4/ops/upgrading/rolling-upgrades/ + - /riak/kv/3.0.4/setup/upgrading/cluster/ + +--- + + +[production checklist]: {{<baseurl>}}riak/kv/3.0.4/setup/upgrading/checklist +[use admin riak control]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-control +[use admin commands]: {{<baseurl>}}riak/kv/3.0.4/using/admin/commands +[use admin riak-admin]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin +[usage secondary-indexes]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/secondary-indexes +[release notes]: {{<baseurl>}}riak/kv/3.0.4/release-notes +[riak enterprise]: http://basho.com/products/riak-kv/ +[cluster ops mdc]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.4/configuring/v3-multi-datacenter +[jmx monitor]: {{<baseurl>}}riak/kv/3.0.4/using/reference/jmx +[snmp]: {{<baseurl>}}riak/kv/3.0.4/using/reference/snmp +[Release Notes]: {{<baseurl>}}riak/kv/3.0.4/release-notes + + +## Overview + +You can upgrade one node or your whole cluster to Riak KV 3.0.4 by following the instructions below. + +{{% note title="Tip" %}} KV nodes negotiate with each other to determine supported operating modes. This allows clusters containing mixed-versions of Riak KV to interoperate without special configuration, and simplifies rolling upgrades. +{{% /note %}} + + +### General Process + +For every node in the cluster: + +1. Stop Riak KV. +1. Back up the Riak /etc, /data, and /basho-patches directories. +1. Remove your /basho-patches directory. +1. Upgrade Riak KV. + * If you are upgrading from EE to OSS, uninstall your EE KV package before upgrading. +1. (Optional) If you would like to potentially downgrade at some point, update your advanced.config file to opt-out of the AAE updates. +1. If you're upgrading from EE to OSS, apply your customized settings to vm.args/riak.conf and app.config/advanced.config +1. If you're using MDC replication to clusters with versions less than 2.2.0, update your advanced.config file to over-ride the default bucket properties for compatibility. +1. Start Riak KV. +1. Verify Riak KV is running the upgraded version. +1. Wait for the `riak_kv` service to start. +1. Wait for any hinted handoffs to complete. + +Before starting the rolling upgrade process on your cluster, check out the [Upgrading Riak KV: Production Checklist][production checklist], which covers details and questions to consider before upgrading. + + +## Transitioning to Leveled backend + + +[Riak KV 2.9][release notes] introduced a new backend specifically for Riak, Leveled: + +The leveled backend is not compatible with other backends in terms of the serialised disk format. There is no in-place transition possible from bitcask/eleveldb/hanoidb to leveled. Transitioning requires a node replace operation. It is recommended to: +* First transition to 2.9 with the current backend in-place, minimising the time spent running mis-matched versions in parallel; +* Then as a second phase run a rolling series of node transfers to replace the nodes with the previous backend, with nodes with the leveled backend. + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 3.0.4 only if you plan to use Riak search. +{{% /note %}} + + +### Components That Complicate Downgrades + +We do our best to make all features that change data formats on disk opt-in; however, some features may be introduced that we either believe are so important that we automatically opt-in users on upgrade or there is no way to provide direct backward compatibility. Downgrading environments with these features can require more effort or might not be possible. + +* **Automatic** features alter the data format on disk, but are considered important enough for users to be automatically opted-in. +* **Required** features must be accepted as a part of the upgrade. Internal Solr version upgrades that change the data format on disk are an example of a required feature upgrade. +* **One Way** features, when enabled, will make a clean downgrade of a cluster impossible. + +| Feature | Automatic | Required | One Way | Notes | +|:---|:---:|:---:|:---:|:--- | +|Migration to Solr 4.10.4 |✔ | ✔| | Applies to all clusters using Riak search. +| Active anti-entropy file format changes | ✔ | | | Can opt-out using a capability. +| LZ4 compression in LevelDB | | | ✔ | +| Global expiration in LevelDB | | | ✔ | +| HyperLogLog data type | | |✔| On downgrade data written in HLL format is unreadable.| + + +### When Downgrading is No Longer an Option + +If you decide to upgrade to version 2.9, you can still downgrade your cluster to an earlier version of Riak KV if you wish, unless you transfer all of your nodes to the new Leveled backend. + +If you use other new features, you can still downgrade your cluster, but you will no longer be able to use those features after the downgrade. + + +## Upgrading process + +1\. Stop Riak KV on the node you are going to upgrade: + +```bash +riak stop +``` + +2\. Back up your /etc (app.config and vm.args), /data, and /basho-patches directories. + +```RHEL/CentOS +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib64/riak/lib/basho-patches +``` + +```Ubuntu +sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak /usr/lib/riak/lib/basho-patches +``` + +3\. Remove your /basho-patches directory: + +```RHEL/CentOS +sudo rm -rf /usr/lib64/riak/lib/basho-patches/* +``` + +```Ubuntu +sudo rm -rf /usr/lib/riak/lib/basho-patches* +``` + +4\. Upgrade Riak KV: + +{{% note title="Upgrading from KV Enterprise Edition" %}} +If you are upgrading from Riak KV EE to Riak KV OSS, you must uninstall your Riak KV EE package right now, before you can install the OSS version. +{{% /note %}} + + + +```RHEL/CentOS +sudo rpm -Uvh »riak_package_name«.rpm +``` + +```Ubuntu +sudo dpkg -i »riak_package_name«.deb +``` + +5.a\. (**Optional**) If you would like to keep your AAE trees in a format that will facilitate downgrading, the capability override should be in the `riak_kv` proplist of the advanced.config file: + + ```advanced.config + {riak_kv, [ + {override_capability, [ + {object_hash_version, [{use, legacy}] } + ]} + ]} + ``` + +5.b\. (**Optional**) If you would like to keep your leveldb compression in a format that will facilitate downgrading, the capability override should be in riak.conf: + + ```riak.conf + leveldb.compression.algorithm=snappy + ``` + +5.c\. (**OSS Only**)If you are upgrading from Riak KV OSS =< 2.2.3, you must perform the following steps before moving on: + +* A standard package uninstall should not have removed your data directories, but if it did, move your backup to where the data directory should be. +* Then copy any customizations from your backed-up vm.args/riak.conf to the newly installed vm.args/riak.conf file (these files may be identical). +* The advanced.config file from the newly installed version will be significantly different from your backed-up file. It will have many new sections along with the original ones. Copy the customizations from your original advanced.config file into the appropriate sections in the new one. Ensure that the following sections are present in advanced.conf: + * `riak_core` - the `cluster_mgr` setting must be present. See [MDC v3 Configuration][config v3 mdc] for more information. + * `riak_repl` - See [MDC v3 Configuration][config v3 mdc] for more information. + * There is a sample configuration included at the end of the [Release Notes][release notes] for reference purposes. + +5.d\. (**EE Only with MDC**)If you need to replicate to EE clusters with versions less than 2.2.0, the capability override for bucket properties should be in the `riak_repl` proplist of the advanced.config file: + + ```advanced.config + {riak_repl, [ + {override_capability, [ + {default_bucket_props_hash, [{use, [consistent, datatype, n_val, allow_mult, last_write_wins]}] } + ]} + ]} + ``` +Once all of the clusters have been upgraded to version 2.2.0 or greater, this override should be removed. + +5.e\. (**EE Only**)JMX and SNMP are no longer present in Riak KV. You must remove or comment out all references to them in your riak.conf/advanced.config files for Riak to start successfully post-upgrade. + +6\. Restart Riak KV: + +{{% note %}} +You must have [Java version 7 or higher](http://www.oracle.com/technetwork/java/javase/downloads/index.html) in order to upgrade to Riak KV 3.0.4 if you wish to use Riak search. If you do not have it installed, please install it now. +{{% /note %}} + + + +```bash +riak start +``` + +7\. Verify that Riak KV is running the new version: + +```bash +riak version +``` + +8\. Wait for the `riak_kv` service to start: + +```bash +riak-admin wait-for-service riak_kv »target_node« +``` + +* `»target_node«` is the node which you have just upgraded (e.g. +riak@192.168.1.11) + +9\. Wait for any hinted handoff transfers to complete: + +```bash +riak-admin transfers +``` + +* While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. + +10\. Repeat the process for the remaining nodes in the cluster. + + +### Basho Patches + +After upgrading, you should ensure that any custom patches contained in the `basho-patches` directory are examined to determine their application to the upgraded version. You can find this information in the [Release Notes]. + +If you find that patches no longer apply to the upgraded version, you should remove them from the `basho-patches` directory prior to operating the node in production. + +The following lists locations of the `basho-patches` directory for +each supported operating system: + +- CentOS & RHEL Linux: `/usr/lib64/riak/lib/basho-patches` +- Debian & Ubuntu Linux: `/usr/lib/riak/lib/basho-patches` +- FreeBSD: `/usr/local/lib/riak/lib/basho-patches` +- SmartOS: `/opt/local/lib/riak/lib/basho-patches` +- Solaris 10: `/opt/riak/lib/basho-patches` + +### Riaknostic + +It is a good idea to also verify some basic configuration and general health of the Riak KV node after upgrading by using Riak KV's built-in diagnostic utility Riaknostic. + +Ensure that Riak KV is running on the node, and issue the following command: + +```bash +riak-admin diag +``` + +Make the recommended changes from the command output to ensure optimal node operation. + + + + + diff --git a/content/riak/kv/3.0.4/using.md b/content/riak/kv/3.0.4/using.md new file mode 100644 index 0000000000..90a886a6fd --- /dev/null +++ b/content/riak/kv/3.0.4/using.md @@ -0,0 +1,78 @@ +--- +title: "Using Riak KV" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Using" + identifier: "managing" + weight: 201 + pre: database +toc: true +aliases: +--- + +[use running cluster]: ../using/running-a-cluster +[use admin index]: ../using/admin/ +[cluster ops index]: ../using/cluster-operations +[repair recover index]: ../using/repair-recovery +[security index]: ../using/security +[perf index]: ../using/performance +[troubleshoot index]: ../using/troubleshooting +[use ref]: ../using/reference + +## In This Section + +#### [Running a Cluster][use running cluster] + +A guide on basic cluster setup. + +[Learn More >>][use running cluster] + +#### [Cluster Administration][use admin index] + +Tutorials and reference documentation on cluster administration commands as well as command-line tools. + +[Learn More >>][use admin index] + +#### [Cluster Operations][cluster ops index] + +Step-by-step tutorials on a range of cluster operations, such as adding & removing nodes, renaming nodes, and back-ups. + +[Learn More >>][cluster ops index] + +#### [Repair & Recovery][repair recover index] + +Contains documentation on repairing a cluster, recovering from failure, and common errors. + +[Learn More >>][repair recover index] + +#### [Security][security index] + +Information on securing your Riak KV cluster. + +[Learn More >>][security index] + +#### [Performance][perf index] + +Articles on benchmarking your Riak KV cluster and improving performance. + +[Learn More >>][perf index] + +#### [Troubleshooting][troubleshoot index] + +Guides on troubleshooting issues and current product advisories. + +[Learn More >>][troubleshoot index] + +#### [Reference][use ref] + +Articles providing background information and implementation details on topics such as logging, bucket types, and search. + +[Learn More >>][use ref] + + + + + diff --git a/content/riak/kv/3.0.4/using/admin.md b/content/riak/kv/3.0.4/using/admin.md new file mode 100644 index 0000000000..13fc5a70ac --- /dev/null +++ b/content/riak/kv/3.0.4/using/admin.md @@ -0,0 +1,51 @@ +--- +title: "Cluster Administration" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Cluster Administration" + identifier: "managing_cluster_admin" + weight: 202 + parent: "managing" +toc: true +aliases: + - /riak/3.0.4/ops/running/cluster-admin + - /riak/kv/3.0.4/ops/running/cluster-admin +--- + +[use admin commands]: ./commands/ +[use admin riak cli]: ./riak-cli/ +[use admin riak-admin]: ./riak-admin/ +[use admin riak control]: ./riak-control/ + +## In This Section + +#### [Cluster Admin Commands][use admin commands] + +Explains usage of the `riak-admin cluster` interface, which enables you to perform a wide variety of cluster-level actions. + +[Learn More >>][use admin commands] + +#### [riak-admin Command Line Interface][use admin riak cli] + +Details the `riak-admin` interface. + +[Learn More >>][use admin riak-admin] + +#### [riak Command Line Interface][use admin riak-admin] + +Covers the `riak` interface, which enables control of the processes associated with a Riak node. + +[Learn More >>][use admin riak cli] + +#### [Riak Control][use admin riak control] + +Overview of Riak Control, a web-based administrative console for Riak clusters. + +[Learn More >>][use admin riak control] + + + + diff --git a/content/riak/kv/3.0.4/using/admin/commands.md b/content/riak/kv/3.0.4/using/admin/commands.md new file mode 100644 index 0000000000..42662c7e70 --- /dev/null +++ b/content/riak/kv/3.0.4/using/admin/commands.md @@ -0,0 +1,378 @@ +--- +title: "Cluster Administration Commands" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Cluster Admin Commands" + identifier: "cluster_admin_commands" + weight: 100 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.4/ops/running/cluster-admin + - /riak/kv/3.0.4/ops/running/cluster-admin +--- + +[use admin riak-admin#cluster]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[cluster ops add remove node]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes +[use admin riak-admin#cluster-plan]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster-plan +[use admin riak-admin#cluster-commit]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster-commit + + +This document explains usage of the [`riak-admin cluster`][use admin riak-admin#cluster] interface, which enables you to perform a wide +variety of cluster-level actions. + +## How Cluster Administration Works + +Riak provides a multi-phased approach to cluster administration that +enables you to stage and review cluster-level changes prior to +committing them. This allows you to group multiple changes together, +such as adding multiple nodes at once, adding some nodes and removing +others, etc. + +Enacting cluster-level changes typically follows this set of steps: + +1. Choose an action or set of actions, such as adding a node, removing +multiple nodes, etc. These actions will be **staged** rather than +executed immediately. +1. **Plan** the changes using the [`cluster plan`](#plan) command. This will return a list of staged +commands that you can review. +1. **Commit** the changes using the [`cluster commit`](#commit) command. This will execute the changes that +have been staged and reviewed. + +> **Note on command names** +> +> Many of the commands available through the `riak-admin cluster` +interface are also available as self-standing commands. The `riak-admin +member-status` command is now the `riak-admin cluster status` command, +`riak-admin join` is now `riak-admin cluster join`, etc. +> +> We recommend using the `riak-admin cluster` interface over the older, +deprecated commands. You will receive a deprecation warning if you use +the older commands. + +## status + +Displays a variety of information about the cluster. + +```bash +riak-admin cluster status +``` + +This will return output like the following in a 3-node cluster: + +``` +---- Cluster Status ---- +Ring ready: true + ++--------------------+------+-------+-----+-------+ +| node |status| avail |ring |pending| ++--------------------+------+-------+-----+-------+ +| (C) dev1@127.0.0.1 |valid | up | 34.4| -- | +| dev2@127.0.0.1 |valid | up | 32.8| -- | +| dev3@127.0.0.1 |valid | up | 32.8| -- | ++--------------------+------+-------+-----+-------+ +``` + +In the above output, `Ring ready` denotes whether or not the cluster +agrees on [the ring][concept clusters], i.e. whether the cluster is +ready to begin taking requests. + +The following information is then displayed for each node, by nodename +(in this case `dev1@127.0.0.1`, etc.): + +* `status` - There are five possible values for status: + * `valid` - The node has begun participating in cluster operations + * `leaving` - The node is is currently unloading ownership of its + [data partitions][concept clusters] to other nodes + * `exiting` - The node's ownership transfers are complete and it is + currently shutting down + * `joining` - The node is in the process of joining the cluster but + but has not yet completed the join process + * `down` - The node is not currently responding +* `avail` - There are two possible values: `up` if the node is + available and taking requests and `down!` if the node is unavailable +* `ring` - What percentage of the Riak [ring][concept clusters] the + node is responsible for +* `pending` - The number of pending transfers to or from the node + +In addition, the cluster's [claimant node][cluster ops add remove node] node will have a `(C)` next +to it. + +## join + +Joins the current node to another node in the cluster. + +```bash +riak-admin cluster join <node> +``` + +You _must_ specify a node to join to by nodename. You can join to any +node in the cluster. The following would join the current node to +`riak1@127.0.0.1`: + +```bash +riak-admin cluster join riak1@127.0.0.1 +``` + +Once a node joins, all of the operations necessary to establish +communication with all other nodes proceeds automatically. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster join` command will take effect only after you have both +planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes by running +[`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple joins before planning/committing. + +## leave + +Instructs the current node to hand off its +[data partitions][concept clusters], leave the cluster, and shut down. + +```bash +riak-admin cluster leave +``` + +You can also instruct another node (by nodename) to leave the cluster: + +```bash +riak-admin cluster leave <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster leave` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. +You can stage multiple leave command before planning/committing. + +## force-remove + +Removes another node from the cluster (by nodename) _without_ first +handing off its [data partitions][concept clusters]. This command is +designed for crashed, unrecoverable nodes and should be used with +caution. + +```bash +riak-admin cluster force-remove <node> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-remove` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-remove actions +before planning/committing. + +## replace + +Instructs a node to transfer all of its [data partitions][concept clusters] to another node and then to leave the +cluster and shut down. + +```bash +riak-admin cluster replace <node1> <node2> +``` + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple replace actions before +planning/committing. + +## force-replace + +Reassigns all [data partitions][concept clusters] owned by one node to +another node _without_ first handing off data. + +```bash +riak-admin force-replace <node_being_replaced> <replacement_node> +``` + +Once the data partitions have been reassigned, the node that is being +replaced will be removed from the cluster. + +> **Note**: As with all cluster-level actions, the changes made when you +run the `cluster force-replace` command will take effect only after you have +both planned the changes by running [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] and committed the changes +by running [`riak-admin cluster commit`][use admin riak-admin#cluster-commit]. You can stage multiple force-replace actions +before planning/committing. + +## plan + +Displays the currently staged cluster changes. + +```bash +riak-admin cluster plan +``` + +`riak-admin cluster plan` is complex, depending on the staged changes. + +* If a `leave` operation has been staged, `riak-admin cluster plan` will undo the staged change and no node will be stopped. +* If a `join` operation has been staged, the joining node will be shut down after its ring has been cleared. When this node restarts, it will behave like a fresh unjoined node and can be joined again. +* If a `cluster clear` operation is staged on a node that remains in the cluster, running `riak-admin cluster plan` will leave the node unaffected. + +If there is no current cluster plan, the output will be `There are no +staged changes`. + +If there is a staged change (or changes), however, you +will see a detailed listing of what will take place upon commit, what +the cluster will look like afterward, etc. + +For example, if a `cluster leave` operation is staged in a 3-node cluster the output will look something like this: + +``` +=============================== Staged Changes ================================ +Action Details(s) +------------------------------------------------------------------------------- +leave 'dev2@127.0.0.1' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 2 cluster transitions + +############################################################################### + After cluster transition 1/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +leaving 32.8% 0.0% 'dev2@127.0.0.1' +valid 34.4% 50.0% 'dev1@127.0.0.1' +valid 32.8% 50.0% 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:1 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes + +Transfers resulting from cluster changes: 38 + 6 transfers from 'dev1@127.0.0.1' to 'dev3@127.0.0.1' + 11 transfers from 'dev3@127.0.0.1' to 'dev1@127.0.0.1' + 5 transfers from 'dev2@127.0.0.1' to 'dev1@127.0.0.1' + 16 transfers from 'dev2@127.0.0.1' to 'dev3@127.0.0.1' + +############################################################################### + After cluster transition 2/2 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 50.0% -- 'dev1@127.0.0.1' +valid 50.0% -- 'dev3@127.0.0.1' +------------------------------------------------------------------------------- +Valid:2 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +WARNING: Not all replicas will be on distinct nodes +``` + +Notice that there are distinct sections of the output for each of the +transitions that the cluster will undergo, including warnings, planned +data transfers, etc. + +## commit + +Commits the currently staged cluster changes. Staged cluster changes +must be reviewed using [`riak-admin cluster plan`][use admin riak-admin#cluster-plan] prior to being committed. + +```bash +riak-admin cluster commit +``` + +## clear + +Clears the currently staged cluster changes. + +```bash +riak-admin cluster clear +``` + +## partitions + +Prints primary, secondary, and stopped partition indices and IDs either +for the current node or for another, specified node. The following +prints that information for the current node: + +```bash +riak-admin cluster partitions +``` + +This would print the partition information for a different node in the +cluster: + +```bash +riak-admin cluster partitions --node=<node> +``` + +Partition information is contained in a table like this: + +``` +Partitions owned by 'dev1@127.0.0.1': ++---------+-------------------------------------------------+--+ +| type | index |id| ++---------+-------------------------------------------------+--+ +| primary | 0 |0 | +| primary | 91343852333181432387730302044767688728495783936 |4 | +| primary |182687704666362864775460604089535377456991567872 |8 | +| ... | .... |..| +| primary |1438665674247607560106752257205091097473808596992|63| +|secondary| -- |--| +| stopped | -- |--| ++---------+-------------------------------------------------+--+ +``` + +## partition-count + +Displays the current partition count either for the whole cluster or for +a particular node. This would display the partition count for the +cluster: + +```bash +riak-admin cluster partition-count +``` + +This would display the count for a node: + +```bash +riak-admin cluster partition-count --node=<node> +``` + +When retrieving the partition count for a node, you'll see a table like +this: + +``` ++--------------+----------+-----+ +| node |partitions| pct | ++--------------+----------+-----+ +|dev1@127.0.0.1| 22 | 34.4| ++--------------+----------+-----+ +``` + +The `partitions` column displays the number of partitions claimed by the +node, while the `pct` column displays the percentage of the ring claimed. + +## partition + +The `cluster partition` command enables you to convert partition IDs to +indexes and vice versa using the `partition id` and `partition index` +commands, respectively. Let's say that you run the `riak-admin cluster +partitions` command and see that you have a variety of partitions, one +of which has an index of +`1004782375664995756265033323.0.444576013453623296`. You can convert +that index to an ID like this: + +```bash +riak-admin cluster partition index=1004782375664995756265033323.0.444576013453623296 +``` + +Conversely, if you have a partition with an ID of 20, you can retrieve +the corresponding index: + +```bash +riak-admin cluster partition id=20 +``` + + + + diff --git a/content/riak/kv/3.0.4/using/admin/riak-admin.md b/content/riak/kv/3.0.4/using/admin/riak-admin.md new file mode 100644 index 0000000000..2bb6ce7a19 --- /dev/null +++ b/content/riak/kv/3.0.4/using/admin/riak-admin.md @@ -0,0 +1,721 @@ +--- +title: "riak-admin Command Line Interface" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "riak-admin CLI" + identifier: "cluster_admin_cli" + weight: 101 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.4/ops/running/tools/riak-admin + - /riak/kv/3.0.4/ops/running/tools/riak-admin +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[use admin commands]: {{<baseurl>}}riak/kv/3.0.4/using/admin/commands +[use admin commands#join]: {{<baseurl>}}riak/kv/3.0.4/using/admin/commands/#join +[use admin commands#leave]: {{<baseurl>}}riak/kv/3.0.4/using/admin/commands/#leave +[cluster ops backup]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/backing-up +[config reference#node-metadata]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/#node-metadata +[cluster ops change info]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/changing-cluster-info +[usage mapreduce]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce +[usage commit hooks]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/commit-hooks +[config reference#ring]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/#ring +[cluster ops inspect node]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/inspecting-node +[use ref monitoring]: {{<baseurl>}}riak/kv/3.0.4/using/reference/statistics-monitoring +[downgrade]: {{<baseurl>}}riak/kv/3.0.4/setup/downgrade +[security index]: {{<baseurl>}}riak/kv/3.0.4/using/security/ +[security managing]: {{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources +[cluster ops bucket types]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/bucket-types +[cluster ops 2i]: {{<baseurl>}}riak/kv/3.0.4/using/reference/secondary-indexes +[repair recover index]: {{<baseurl>}}riak/kv/3.0.4/using/repair-recovery +[cluster ops strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/strong-consistency +[cluster ops handoff]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/handoff +[use admin riak-admin#stats]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#stats + +## `riak-admin` + +The riak-admin command performs operations unrelated to node liveness, including: +node membership, backup, and basic status reporting. The node must be +running for most of these commands to work. Running `riak-admin` by itself will output a list of available commands: + +``` +Usage: riak-admin { cluster | join | leave | backup | restore | test | + reip | js-reload | erl-reload | wait-for-service | + ringready | transfers | force-remove | down | + cluster-info | member-status | ring-status | vnode-status | + aae-status | diag | stat | status | transfer-limit | reformat-indexes | + top [-interval N] [-sort reductions|memory|msg_q] [-lines N] | + downgrade-objects | security | bucket-type | repair-2i | + search | services | ensemble-status | handoff | set | + show | describe } +``` + +## Node Naming + +An important thing to bear in mind is that all Riak nodes have unique +names within the cluster that are used for a wide variety of operations. +The name for each node can be set and changed in each node's +[configuration files][config reference]. The examples below set the name of a node to +`riak_node_1@199.99.99.01` in the `riak.conf` file if you are using the +newer configuration system and in `vm.args` if you are using the older +system: + +```riakconf +nodename = riak_node_1@199.99.99.01 +``` + +```vmargs +-name riak_node_1@199.99.99.01 +``` + +The name prior to the `@` symbol can be whatever you'd like, e.g. +`riak1`, `dev`, `cluster1_node1`, or `spaghetti`. After the `@` you must +use a resolvable IP address or hostname. In general, we recommend using +hostnames over IP addresses when possible because this enables the node +to potentially live on different machines over the course of its +existence. + +## cluster + +Documentation for the `riak-admin cluster` command interface can be +found in [Cluster Administration][use admin commands]. + +## join + +> **Deprecation Notice** +> +>As of Riak version 1.2, the `riak-admin join` command has +been deprecated in favor of the [`riak-admin cluster join`][use admin commands#join] command. However, this command can still be +used by providing a `-f` option (which forces the command). + +Joins the running node to another running node so that they participate +in the same cluster. `<node>` is the other node to connect to. + +```bash +riak-admin join -f <node> +``` + +## leave + +> **Deprecation Notice** +> +> As of Riak version 1.2, the `riak-admin leave` command has +been deprecated in favor of the new [`riak-admin cluster leave`][use admin commands#leave] command. However, this +command can still be used by providing a `-f` option (which +forces the command). + +Causes the node to leave the cluster in which it participates. After +this is run, the node in question will hand-off all its replicas to +other nodes in the cluster before it completely exits. + +```bash +riak-admin leave -f +``` + +## backup + +> **Deprecation notice** +The `riak-admin backup` command has been deprecated. We recommend using +backend-specific backup procedures instead. Documentation can be found +in [Backing up Riak KV][cluster ops backup]. + +Backs up the data from the node or entire cluster into a file. + +```bash +riak-admin backup <node> <cookie> <filename> [node|all] +``` + +* `<node>` is the node from which to perform the backup. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup will be stored. _This should + be the full path to the file_. +* `[node|all]` specifies whether the data on this node or the entire + +## restore + +> **Deprecation notice** +> +> The `riak-admin restore` command has been deprecated. It was originally +intended to be used in conjunction with backups performed using the +`riak-admin backup` command, which is also deprecated. We recommend +using the backup and restore methods described in [Backing up Riak KV][cluster ops backup]. + +Restores data to the node or cluster from a previous backup. + +```bash +riak-admin restore <node> <cookie> <filename> +``` + +* `<node>` is the node which will perform the restore. +* `<cookie>` is the Erlang cookie/shared secret used to connect to the + node. This is `riak` in the [default configuration][config reference#node-metadata]. +* `<filename>` is the file where the backup is stored. _This should be + the full path to the file_. + +## test + +Runs a test of a few standard Riak operations against the running node. + +```bash +riak-admin test +``` + +If the test is successful, you should see output like the following: + +``` +Successfully completed 1 read/write cycle to 'dev1@127.0.0.1' +``` + +## reip + +Renames a node. This process backs up and edits the Riak ring, and +**must** be run while the node is stopped. Reip should only be run in +cases where `riak-admin cluster force-replace` cannot be used to +rename the nodes of a cluster. For more information, visit the +[Changing Cluster Information][cluster ops change info] document. + +```bash +riak-admin reip <old nodename> <new nodename> +``` + +{{% note title="Note about reip prior to Riak 2.0" %}} +Several bugs have been fixed related to reip in Riak 2.0. We recommend against +using reip prior to 2.0, if possible. +{{% /note %}} + + +## js-reload + +Forces the embedded Javascript virtual machines to be restarted. This is +useful when deploying custom built-in [MapReduce][usage mapreduce] +functions. + +**Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin js-reload +``` + +## erl-reload + +Reloads the Erlang `.beam` files used for [MapReduce][usage mapreduce] +jobs, [pre- and post-commit hooks][usage commit hooks], and other +purposes. + +> **Note**: This needs to be run on _all nodes_ in the cluster. + +```bash +riak-admin erl-reload +``` + +## wait-for-service + +Waits on a specific watchable service to be available (typically +`riak_kv`). This is useful when (re-)starting a node while the cluster +is under load. Use `riak-admin services` to see which services are +available on a running node. + +```bash +riak-admin wait-for-service <service> <nodename> +``` + +## ringready + +Checks whether all nodes in the cluster agree on the ring state. +Prints `FALSE` if the nodes do not agree. This is useful after changing +cluster membership to make sure that the ring state has settled. + +```bash +riak-admin ringready +``` + +## transfers + +Identifies nodes that are awaiting transfer of one or more partitions. +This usually occurs when partition ownership has changed (after adding +or removing a node) or after node recovery. + +```bash +riak-admin transfers +``` + +## transfer-limit + +Change the `handoff_concurrency` limit. The value set by running this +command will only persist while the node is running. If the node is +restarted, the `transfer-limit` will return to the default of `2` or the +value specified in the [`transfer_limit`][config reference#ring] setting in the `riak.conf` configuration file. + +Running this command with no arguments will display the current +transfer-limit for each node in the cluster. + +```bash +riak-admin transfer-limit <node> <limit> +``` + +## down + +Marks a node as down so that ring transitions can be performed before +the node is brought back online. + +```bash +riak-admin down <node> +``` + +## cluster-info + +Output system information from a Riak cluster. This command will collect +information from all nodes or a subset of nodes and output the data to a +single text file. + +```bash +riak-admin cluster-info <output file> [<node list>] +``` + +The following information is collected: + + * Current time and date + * VM statistics + * `erlang:memory()` summary + * Top 50 process memory hogs + * Registered process names + * Registered process name via `regs()` + * Non-zero mailbox sizes + * Ports + * Applications + * Timer status + * ETS summary + * Nodes summary + * `net_kernel` summary + * `inet_db` summary + * Alarm summary + * Global summary + * `erlang:system_info()` summary + * Loaded modules + * Riak Core config files + * Riak Core vnode modules + * Riak Core ring + * Riak Core latest ring file + * Riak Core active partitions + * Riak KV status + * Riak KV ringready + * Riak KV transfers + +#### Examples + +Output information from all nodes to `/tmp/cluster_info.txt`: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt +``` + +Output information from the current nodeL + +```bash +riak-admin cluster_info /tmp/cluster_info.txt local +``` + +Output information from a subset of nodes: + +```bash +riak-admin cluster_info /tmp/cluster_info.txt riak@192.168.1.10 +riak@192.168.1.11 +``` + +## member-status + +Prints the current status of all cluster members. + +```bash +riak-admin member-status +``` + +## ring-status + +Outputs the current claimant, its status, ringready, pending ownership +handoffs, and a list of unreachable nodes. + +```bash +riak-admin ring-status +``` + +## vnode-status + +Outputs the status of all vnodes the are running on the local node. + +```bash +riak-admin vnode-status +``` + +## aae-status + +This command provides insight into operation of Riak's Active +Anti-Entropy (AAE) feature. + +```bash +riak-admin aae-status +``` + +The output contains information on AAE key/value partition exchanges, +entropy tree building, and key repairs which were triggered by AAE. + +* **Exchanges** + * The *Last* column lists when the most recent exchange between a + partition and one of its sibling replicas was performed. + * The *All* column shows how long it has been since a partition + exchanged with all of its sibling replicas. + +* **Entropy Trees** + * The *Built* column shows when the hash trees for a given partition + were created. + +* **Keys Repaired** + * The *Last* column shows the number of keys repaired during the most + recent key exchange. + * The *Mean* column shows the mean number of keys repaired during all + key exchanges since the last node restart. + * The *Max* column shows the maximum number of keys repaired during all + key exchanges since the last node restart. + +{{% note title="Note in AAE status information" %}} +All AAE status information is in-memory and is reset across a node restart. +Only tree build times are persistent (since trees themselves are persistent) +{{% /note %}} + +More details on the `aae-status` command are available in the [Riak +version 1.3 release notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md#active-anti-entropy). + +## diag + +The `diag` command invokes the [Riaknostic](http://riaknostic.basho.com/) +diagnostic system. + +```bash +riak-admin diag +``` + +This command allows you to specify which diagnostic checks you would +like to run, which types of diagnostic messages you wish to see, and so +on. More comprehensive information can be found in the documentation on +[inspecting a node][cluster ops inspect node]. + +## stat + +Provides an interface for interacting with a variety of cluster-level +metrics and information. + +```bash +riak-admin stat +``` + +Full documentation of this command can be found in [Statistics and Monitoring][use ref monitoring]. + +## status + +Prints status information, including performance statistics, system +health information, and version numbers. Further information about the +output is available in the documentation on [inspecting a node][cluster ops inspect node]. + +```bash +riak-admin status +``` + +## reformat-indexes + +This command reformats integer indexes in Secondary Index data for +versions of Riak prior to 1.3.1 so that range queries over the indexes +will return correct results. + +``` +riak-admin reformat-indexes [<concurrency>] [<batch size>] --downgrade +``` + +The `concurrency` option defaults to `2` and controls how many +partitions are concurrently reformatted. + +The `batch size` option controls the number of simultaneous key +operations and defaults to `100`. + +This command can be executed while the node is serving requests, and +default values are recommended for most cases. You should only change +the default values after testing impact on cluster performance. + +Information is written to `console.log` upon completion of the process. + +A `--downgrade` switch can be specified when downgrading a node to a version +of Riak prior to version 1.3.1. + +Additional details are available in the [Riak 1.3.1 release +notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md). + +## top + +Top uses Erlang's etop to provide information about what the Erlang +processes inside of Riak are doing. Top reports process reductions (an +indicator of CPU utilization), memory used, and message queue sizes. + +```bash +riak-admin top [-interval N] [-sort reductions|memory|msg_q] [-lines N] +``` + +Options: + +* `interval` specifies the number of seconds between each update of the + top output and defaults to `5` +* `sort` determines on which category `riak-admin top` sorts and + defaults to `reductions` +* `lines` specifies the number of processes to display in the top output + and defaults to `10` + +More information about Erlang's etop can be found in the [etop +documentation](http://www.erlang.org/doc/man/etop.html). + +## downgrade-objects + +This command is used when changing the format of Riak objects, usually +as part of a version downgrade. + +```bash +riak-admin downgrade-objects <kill-handoffs> [<concurrency>] +``` + +More detailed information can be found in [Rolling Downgrades][downgrade]. + +## security + +This command enables you to manage to manage Riak users, choose sources +of authentication, assign and revoke permissions to/from users and +groups, enable and disable Riak Security, and more. + +```bash +riak-admin security <command> +``` + +More comprehensive information on user management and can be found in +the [Authentication and Authorization][security index] guide. Detailed information on authentication sources can be found in [Managing Security Sources][security managing]. + +## bucket-type + +Bucket types are a means of managing bucket properties introduced in +Riak 2.0, as well as an additional namespace in Riak in addition to +buckets and keys. This command enables you to create and modify bucket +types, provide the status of currently available bucket types, and +activate created bucket types. + +```bash +riak-admin bucket-type <command> +``` + +More on bucket types can be found in [Using Bucket Types][cluster ops bucket types]. + +## repair-2i + +This command repairs [secondary indexes][cluster ops 2i] in a +specific partition or on a cluster-wide basis. Implementation details +can be found in [Repairing Indexes][repair recover index]. + +To repair secondary indexes throughout the entire cluster, run the +`repair-2i`command by itself, without a subcommand: + +```bash +riak-admin repair-2i +``` + +This will initiate the repair process. When you run this command, you +should see something like the following (where `<ring_size>` is the +number of partitions in your Riak cluster): + +``` +Will repair 2i data on <ring_size> partitions +Watch the logs for 2i repair progress reports +``` + +To repair secondary indexes in a specific partition, provide the ID of +the partition along with the `repair-2i` command: + +```bash +riak-admin repair-2i 593735040165679310520246963290989976735222595584 +``` + +You can check on the status of the repair process at any time: + +```bash +riak-admin repair-2i status +``` + +If the repair is already finished, the console will return `2i repair is +not running`. If the repair is still in progress, the console will +return a series of statistics like this: + +``` +2i repair status is running: + Total partitions: 64 + Finished partitions: 44 + Speed: 100 + Total 2i items scanned: 0 + Total tree objects: 0 + Total objects fixed: 0 +``` + +If you're concerned about the computational resources required to repair +secondary indexes, you can set the speed of the process to an integer +between 1 and 100 (with 100 being the fastest). This command would set +the speed to 90: + +```bash +riak-admin repair-2i --speed 90 +``` + +The repair process can be stopped at any moment using the `kill` +command: + +```bash +riak-admin repair-2i kill +``` + +## search + +The search command provides sub-commands for various administrative +work related to the new Riak Search. + +```bash +riak-admin search <command> +``` + +### aae-status + +```bash +riak-admin search aae-status +``` + +Output active anti-entropy (AAE) statistics for search. There are +three sections. Each section contains statistics for a specific aspect +of AAE for every partition owned by the local node. + +The first section provides information on exchanges. Exchange is the +process of comparing hash trees to determine divergences between KV +data and search indexes. The `Index` column contains the partition +number. The `Last (ago)` column is the amount of time that has passed +since the last exchange. The `All (ago)` column is the amount of time +that has passed since all preflists for that partition have been +exchanged. + +The second section lists how much time has passed since the hashtree +for that partition has been built from scratch. By default trees +expire after 1 week and are rebuilt from scratch. + +The third section presents statistics on repair operations that have +occurred. Repair is performed when AAE notices that the KV and search +hashtree don't match for a particular key. The `Last` column is the +number of keys repaired during the last exchange. The `Mean` column is +the average number of keys repaired for all exchange rounds since the +node has started. The `Max` column is the maximum number of keys +repaired for a given exchange round since the node has started. + +### switch-to-new-search + +{{% note title="Only For Legacy Migration" %}} +This is only needed when migrating from legacy riak search to the new Search +(Yokozuna). +{{% /note %}} + +```bash +riak-admin search switch-to-new-search +``` + +Switch handling of the HTTP `/solr/<index>/select` resource and +protocol buffer query messages from legacy Riak Search to new Search +(Yokozuna). + +## services + +Lists available services on the node (e.g. `riak_kv`). + +```bash +riak-admin services +``` + +## ensemble-status + +This command is used to provide insight into the current status of the +consensus subsystem undergirding Riak's [strong consistency][cluster ops strong consistency] feature. + +```bash +riak-admin ensemble-status +``` + +This command can also be used to check on the status of a specific +consensus group in your cluster: + +```bash +riak-admin ensemble-status <group id> +``` + +Complete documentation of this command can be found in [Managing Strong Consistency][cluster ops strong consistency]. + +## handoff + +Documentation for the `handoff` command can be found in [Handoff][cluster ops handoff]. + +## set + +Enables you to change the value of one of Riak's configuration +parameters on the fly, without needing to stop and restart the node. + +```bash +riak-admin set <variable>=<value> +``` + +The set command can only be used for the following +parameters: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `search.dist_query=off` will disable distributed query for the node +* `search.dist_query=on` will enable distributed query for the node +* `search.dist_query` will get the status of distributed query for the node + +The `search.dist_query` commands above are non-persistent. Any settings you have defined in your riak.conf configuration file will be used when Riak KV is restarted. + + +## show + +Whereas the [`riak-admin status`][use admin riak-admin#stats] command will display all currently available statistics for your Riak +cluster, the `show` command enables you to view only some of those +statistics. + +```bash +riak-admin show <variable> +``` + +## describe + +Provides a brief description of one of Riak's [configurable parameters][config reference]. + +```bash +riak-admin describe <variable> +``` + +If you want to know the meaning of the `nodename` parameter: + +```bash +riak-admin describe nodename +``` + +That will produce the following output: + +``` +nodename: + Name of the Erlang node +``` + + + + diff --git a/content/riak/kv/3.0.4/using/admin/riak-cli.md b/content/riak/kv/3.0.4/using/admin/riak-cli.md new file mode 100644 index 0000000000..6c87c3dee5 --- /dev/null +++ b/content/riak/kv/3.0.4/using/admin/riak-cli.md @@ -0,0 +1,204 @@ +--- +title: "riak Command Line Interface" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "riak CLI" + identifier: "cluster_admin_riak_cli" + weight: 102 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.4/ops/running/tools/riak + - /riak/kv/3.0.4/ops/running/tools/riak +--- + +[configuration file]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/ +[escript]: http://www.erlang.org/doc/man/escript.html +[`riak-admin`]: {{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#top +[configuration]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/ + +## riak + +This is the primary script for controlling the processes associated with a Riak node. Running the `riak` command by itself will output a listing of available commands: + +```bash +Usage: riak «command» +where «command» is one of the following: + { help | start | stop | restart | ping | console | attach + attach-direct | ertspath | chkconfig | escript | version | getpid + top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } | + config { generate | effective | describe VARIABLE } [-l debug] +``` + +## help + +Provides a brief description of all available commands. + +## start + +Starts the Riak node in the background. If the node is already started, you will receive the message `Node is already running!` If the node is not already running, no output will be given. + +```bash +riak start +``` + +## stop + +Stops the running Riak node. Prints `ok` when successful or `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak stop +``` + +## restart + +Stops and then starts the running Riak node without exiting the Erlang VM. +Prints `ok` when successful, `Node <nodename> not responding to pings.` when the node is already stopped or not responding. + +```bash +riak restart +``` + +## ping + +Checks that the Riak node is running. Prints `pong` when successful or `Node <nodename> not responding to pings.` when the node is stopped or not responding. + +```bash +riak ping +``` + +## console + +Starts the Riak node in the foreground, giving access to the Erlang shell and +runtime messages. Prints `Node is already running - use 'riak attach' instead` +when the node is running in the background. You can exit the shell by pressing **Ctrl-C** twice. + +```bash +riak console +``` + +## attach + +Attaches to the console of a Riak node running in the background, giving access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. + +```bash +riak attach +``` + +## attach-direct + +Attaches to the console of a Riak running in the background using a directly-connected first-in-first-out (FIFO), providing access to the Erlang shell and runtime messages. Prints `Node is not running!` when the node cannot be reached. You can exit the shell by pressing **Ctrl-D**. + +```bash +riak attach-direct +``` + +## ertspath + +Outputs the path of the Riak Erlang runtime environment: + +```bash +riak ertspath +``` + +## chkconfig + +Checks whether the [configuration file][configuration file] is valid. If so, `config is OK` will be included in the output. + +```bash +riak chkconfig +``` + +## escript + +Provides a means of calling [escript][escript] scripts using the Riak Erlang runtime environment: + +```bash +riak escript <filename> +``` + +## version + +Outputs the Riak version identifier: + +```bash +riak version +``` + +## getpid + +Outputs the process identifier for the currently-running instance of Riak: + +```bash +riak getpid +``` + +## top + +The `riak top` command is the direct equivalent of `riak-admin top`: + +```bash +riak top [-interval N] [-sort { reductions | memory | msg_q }] [-lines N] } +``` + +More detailed information can be found in the [`riak-admin`][`riak-admin`] documentation. + +## config + +Provides information about the current [configuration][configuration] of a Riak node, i.e. the parameters and values in the node's riak.conf configuration. + +```bash +riak config { generate | effective | describe VARIABLE } [-l debug] +``` + +* `generate` will cause the configuration files to be re-processed. This behavior happens automatically at node startup; however `riak config generate` can be used to test for configuration errors that would prevent the node from starting after modifying the riak.conf or advanced.config files. + The output of a successful run will show the paths to the newly generated configuration files. These configuration files will contain a timestamp to indicate when they were generated. For example: + + ``` + -config /var/lib/riak/generated.configs/app.2016.12.02.17.47.32.config -args_file /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args -vm_args /var/lib/riak/generated.configs/vm.2016.12.02.17.47.32.args + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following message: + + ``` + -config /etc/riak/app.config -args_file /etc/riak/vm.args -vm_args /etc/riak/vm.args + ``` + +* `effective` prints the effective configuration in the following syntax: + + ``` + parameter1 = value1 + parameter2 = value2 + ``` + + If you are using the legacy configuration file format (app.config/vm.args), you will receive the following error: + + ``` + Disabling cuttlefish, legacy configuration files found: + /etc/riak/app.config + /etc/riak/vm.args + Effective config is only visible for cuttlefish conf files. + ``` + +* `describe VARIABLE` prints the setting specified by `VARIABLE`, along with documentation and other useful information, such as the affected location in the configuration file, the data type of the value, the default value, and the effective value. For example, running `riak config describe storage_backend` will return the following: + + ``` + Documentation for storage_backend + Specifies the storage engine used for Riak's key-value data + and secondary indexes (if supported). + + Valid Values: + - one of: bitcask, leveldb, memory, multi, prefix_multi + Default Value : bitcask + Set Value : bitcask + Internal key : riak_kv.storage_backend + ``` + +Adding the `-l debug` flag to any `riak config` command will produce additional debugging information that can be used in advanced troubleshooting of "cuttlefish", Riak's configuration subsystem. + + + + diff --git a/content/riak/kv/3.0.4/using/admin/riak-control.md b/content/riak/kv/3.0.4/using/admin/riak-control.md new file mode 100644 index 0000000000..b3e05d52bd --- /dev/null +++ b/content/riak/kv/3.0.4/using/admin/riak-control.md @@ -0,0 +1,237 @@ +--- +title: "Riak Control" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Riak Control" + identifier: "cluster_admin_riak_control" + weight: 103 + parent: "managing_cluster_admin" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/riak-control + - /riak/kv/3.0.4/ops/advanced/riak-control +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference + +Riak Control is a web-based administrative console for inspecting and +manipulating Riak clusters. + +## Requirements + +Though Riak Control [is maintained as a separate application](https://github.com/basho/riak_control), the necessary code for it ships with versions of Riak 1.1 and above and requires no additional installation steps. + +Before getting started, you should know the address and port of the HTTP (or +HTTPS) listeners for the cluster member(s) running Riak Control. You can obtain +this information from the configuration files as indicated here: + +```riakconf +listener.http.<name> = 127.0.0.1:8098 + +or + +listener.https.<name> = 127.0.0.1:8096 + +## *** The default listeners in the riak.conf file are +## named `internal`, so you would consult the value of +## `listener.http.internal` in your configuration. + +``` + +```appconfig + {riak_api, + [ + %% Other configs + ... if HTTP is configured ... + {http,[{"127.0.0.1",8098}]}, + ... if HTTPS is configured ... + {https,[{"127.0.0.1",8069}]}, + %% Other configs + ]}, + +%% *** This is a truncated configuration to illustrate the +%% pertinent items -- the `http` and `https` tuples within +%% the `riak_api` tuple's value list. +``` + +{{% note title="Note on SSL" %}} +We strongly recommend that you enable SSL for Riak Control. It is disabled by +default, and if you wish to enable it you must do so explicitly. More +information can be found in the document below. +{{% /note %}} + +## Enabling and Disabling Riak Control + +Riak Control is disabled by default, meaning that you should see the +following in your [configuration files][config reference]: + +```riakconf +riak_control = off +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, false}, + %% Other configs + ]} +``` + +Enabling Riak Control is simple: + +```riakconf +riak_control = on +``` + +```appconfig +{riak_control, [ + %% Other configs + {enabled, true}, + %% Other configs + ]} +``` + +Make sure to restart the node once you have enabled Riak Control for the +change to take effect. + +After restarting the node, you should be able to access it by going +to `http://ip_address_of_listener:port/admin`. In the case of a development +cluster using the default configuration, you would access Riak Control at +<http://127.0.0.1:8098/admin></a> + +If you enabled authentication for Riak Control while performing the above +configuration, you will be unable to access Riak Control until you have enabled +and configured SSL and HTTPS. + +## Enabling SSL and HTTPS + +In order to use SSL in conjunction with Riak Control, SSL must be +enabled on each Riak node. For more information, see our [security documentation]({{<baseurl>}}riak/kv/3.0.4/using/security/basics#enabling-ssl). Once SSL is enabled, you can proceed to setting up [authentication](#authentication) for Riak Control. + +Please note that Riak Control will not work if you have enabled +authentication but SSL is not set up properly. + +## Authentication + +Riak Control provides you the option of requiring authentication (via +HTTP basic auth) for users of the web interface. It is disabled by +default. To enable authentication: + +```riakconf +riak_control.auth.mode = userlist +``` + +```appconfig +{riak_control, [ + %% Other configs + {auth, userlist}, %% The only other available option is "none" + %% Other configs + ]} +``` + +When authentication is enabled, you can specify as many +username/password pairs as you wish. The default pair is the username + +`user` and the password `pass`. We strongly recommend selecting +different credentials. The example below would set up three user-defined +pairs: + +```riakconf +riak_control.auth.user.bob.password = bob_is_the_coolest +riak_control.auth.user.polly.password = h4x0r123 +riak_control.auth.user.riakrocks.password = cap_theorem_4_life +``` + +```appconfig +{riak_control, [ + %% Other configs + {userlist, [ + {"bob", "bob_is_the_coolest"}, + {"polly", "h4x0r123"}, + {"riakrocks", "cap_theorem_4_life"} + ]} + %% Other configs +]} +``` + +## User Interface + +To begin using Riak Control, navigate to https://ip_address_of_https_listener:https_port/admin +For a default configuration, this will be <https://localhost:8069/admin>. + +If your browser warns you that it cannot authenticate the page, this may +be because you are using self-signed certificates. If you have +authentication enabled in your configuration, you will next be asked to +authenticate. Enter an appropriate username and password now. + +{{% note title="Note on browser TLS" %}} +Your browser needs to be support TLS v1.2 to use Riak Control over HTTPS. A +list of browsers that support TLS v1.2 can be found +[here](https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers). +TLS v1.2 may be disabled by default on your browser, for example if you are +using Firefox versions earlier than 27, Safari versions earlier than 7, Chrome +versions earlier than 30, or Internet Explorer versions earlier than 11. To +enable it, follow browser-specific instructions. +{{% /note %}} + +### Snapshot View + +When you first navigate to Riak Control, you will land on the Snapshot +view: + +[ ![Snapshot View]({{<baseurl>}}images/control_current_snapshot.png) ] ({{<baseurl>}}images/control_current_snapshot.png) + +In this interface, the health of your cluster is made immediately +obvious. In the event that something isn't quite right (or has the +potential to cause problems in the near future), the green check mark +will turn into a red `X`. The red `X` is accompanied by a list of +reasons for concern. Each item in the list links to a page where you can +get more information about the issue. + +### Cluster Management View + +On the top right side of the admin panel are navigation tabs. If you +click the **Cluster** tab, you will be taken to the cluster management +page. + +On this page, you can see all of the nodes in your cluster, along with +their status, the percentage of the ring owned by that node, and memory +consumption. You can also stage and commit changes to the cluster, such +as adding, removing, and marking nodes as down. + +Staged changes to the cluster: + +[ ![Cluster Management Staged]({{<baseurl>}}images/control_cluster_management_staged.png) ] ({{<baseurl>}}images/control_cluster_management_staged.png) + +Changes committed; transfers active: + +[ ![Cluster Management Transfers]({{<baseurl>}}images/control_cluster_management_transfers.png) ] ({{<baseurl>}}images/control_cluster_management_transfers.png) + +Cluster stabilizes after changes: + +[ ![Cluster Management Stable]({{<baseurl>}}images/control_cluster_management_stable.png) ] ({{<baseurl>}}images/control_cluster_management_stable.png) + +### Node Management View + +The node management view allows you to operate against the individual +nodes in the cluster. + +[ ![Node Management]({{<baseurl>}}images/control_node_management.png) ] ({{<baseurl>}}images/control_node_management.png) + +### Ring View + +One level deeper than the cluster view is the ring view. This is where you can +see the health of each [vnode]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode). + +[ ![Ring View]({{<baseurl>}}images/control_current_ring.png) ] ({{<baseurl>}}images/control_current_ring.png) + +Most of the time, your ring will be too large to effectively manage from +the ring view. That said, with filters you can easily identify partition +ownership, unreachable primaries, and in-progress handoffs. + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations.md b/content/riak/kv/3.0.4/using/cluster-operations.md new file mode 100644 index 0000000000..45a70c10ae --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations.md @@ -0,0 +1,109 @@ +--- +title: "Cluster Operations" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Cluster Operations" + identifier: "managing_cluster_operations" + weight: 203 + parent: "managing" +toc: true +aliases: +--- + +[ops add remove node]: ./adding-removing-nodes +[ops change info]: ./changing-cluster-info +[ops replace node]: ./replacing-node +[ops inspect node]: ./inspecting-node +[ops bucket types]: ./bucket-types +[ops log]: ./logging +[ops backup]: ./backing-up +[ops handoff]: ./handoff +[ops strong consistency]: ./strong-consistency +[ops v3 mdc]: ./v3-multi-datacenter +[ops v2 mdc]: ./v2-multi-datacenter + +## In This Section + + +#### [Adding / Removing Nodes][ops add remove node] + +Describes the process of adding and removing nodes to and from a Riak KV cluster. + +[Learn More >>][ops add remove node] + + +#### [Changing Cluster Information][ops change info] + +Details how to rename nodes and change the HTTP & Protocol Buffers binding address. + +[Learn More >>][ops change info] + + +#### [Replacing a Node][ops replace node] + +Tutorial on replacing a node in a Riak KV cluster. + +[Learn More >>][ops replace node] + + +#### [Inspecting a Node][ops inspect node] + +Guide on some of the available tools for inspecting a Riak KV node. + +[Learn More >>][ops inspect node] + + +#### [Bucket Types][ops bucket types] + +Brief tutorial on creating bucket types. + +[Learn More >>][ops bucket types] + + +#### [Enabling and Disabling Debug Logging][ops log] + +Details toggling debug logging on a single node or all nodes in a cluster. + +[Learn More >>][ops log] + + +#### [Backing Up][ops backup] + +Covers backing up Riak KV data. + +[Learn More >>][ops backup] + + +#### [Enabling and Disabling Handoff][ops handoff] + +Information on using the `riak-admin handoff` interface to enable and disable handoff. + +[Learn More >>][ops handoff] + + +#### [Monitoring Strong Consistency][ops strong consistency] + +Overview of the various statistics used in monitoring strong consistency. + +[Learn More >>][ops strong consistency] + + +#### [V3 Multi-Datacenter][ops v3 mdc] + +Explains how to manage V3 replication with the `riak-repl` command. + +[Learn More >>][ops v3 mdc] + + +#### [V2 Multi-Datacenter][ops v2 mdc] + +Explains how to manage V2 replication with the `riak-repl` command. + +[Learn More >>][ops v2 mdc] + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/3.0.4/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..1f561b17cb --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,289 @@ +--- +title: "Managing Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Managing Active Anti-Entropy" + identifier: "cluster_operations_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +version_history: + in: "2.9.1+" +aliases: + - /riak/kv/3.0.4/ops/advanced/aae/ + - /riak/3.0.4/ops/advanced/aae/ +--- +[config search#throttledelay]: {{<baseurl>}}riak/kv/3.0.4/configuring/search/#search-anti-entropy-throttle-tier-delay +[config search#throttle]: {{<baseurl>}}riak/kv/3.0.4/configuring/search/#search-anti-entropy-throttle + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +In Riak versions 2.9.1 and later, [TicTac AAE]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/tictac-active-anti-entropy/) is included with releases as an option to be used in addition to or instead of traditional AAE in Riak. + +## Enabling Active Anti-Entropy + +Whether AAE is currently enabled in a node is determined by the value of +the `anti_entropy` parameter in the node's [configuration files](../../../configuring/reference/). + +In Riak versions 2.0 and later, AAE is turned on by default. + +```riakconf +anti_entropy = active +``` + +```appconfig +{riak_kv, [ + + {anti_entropy, {on, []}}, + + %% More riak_kv settings... +]} +``` + +For monitoring purposes, you can also activate AAE debugging, which +provides verbose debugging message output: + +```riakconf +anti_entropy = active-debug +``` + +```appconfig +{riak_kv, [ + + %% With debugging + {anti_entropy, {on, [debug]}}, + + %% More riak_kv settings... +]} +``` + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +## Disabling Active Anti-Entropy + +Alternatively, AAE can be switched off if you would like to repair +object inconsistencies using [read repair](../../../learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy) alone: + +```riakconf +anti_entropy = passive +``` + +```appconfig +{riak_kv, [ + + %% AAE turned off + {anti_entropy, {off, []}}, + + %% More riak_kv settings... +]} +``` + +If you would like to reclaim the disk space used by AAE operations, you +must manually delete the directory in which AAE-related data is stored +in each node. + +```bash +rm -Rf <path_to_riak_node>/data/anti_entropy/* +``` + +The default directory for AAE data is `./data/anti_entropy`, as in the +example above, but this can be changed. See the section below titled +**Data Directory**. + +Remember that you will need to [restart the node](../../admin/riak-cli/#restart) for any configuration-related changes to take effect. + +The directory deletion method above can also be used to force a +rebuilding of hash trees. + +## Monitoring AAE + +Riak's command-line interface includes a command that provides insight +into AAE-related processes and performance: + +```bash +riak-admin aae-status +``` + +When you run this command in a node, the output will look like this +(shortened for the sake of brevity): + +``` +================================== Exchanges ================================== +Index Last (ago) All (ago) +------------------------------------------------------------------------------- +0 19.0 min 20.3 min +22835963083295358096932575511191922182123945984 18.0 min 20.3 min +45671926166590716193865151022383844364247891968 17.3 min 19.8 min +68507889249886074290797726533575766546371837952 16.5 min 18.3 min +91343852333181432387730302044767688728495783936 15.8 min 17.3 min +... + +================================ Entropy Trees ================================ +Index Built (ago) +------------------------------------------------------------------------------- +0 5.7 d +22835963083295358096932575511191922182123945984 5.6 d +45671926166590716193865151022383844364247891968 5.5 d +68507889249886074290797726533575766546371837952 4.3 d +91343852333181432387730302044767688728495783936 4.8 d + +================================ Keys Repaired ================================ +Index Last Mean Max +------------------------------------------------------------------------------- +0 0 0 0 +22835963083295358096932575511191922182123945984 0 0 0 +45671926166590716193865151022383844364247891968 0 0 0 +68507889249886074290797726533575766546371837952 0 0 0 +91343852333181432387730302044767688728495783936 0 0 0 + +``` + +Each of these three tables contains information for each +[vnode](../../../learn/concepts/vnodes) in your cluster in these three categories: + +Category | Measures | Description +:--------|:---------|:----------- +**Exchanges** | `Last` | When the most recent exchange between a data partition and one of its replicas was performed + | `All` | How long it has been since a partition exchanged with all of its replicas +**Entropy Trees** | `Built` | When the hash trees for a given partition were created +**Keys Repaired** | `Last` | The number of keys repaired during all key exchanges since the last node restart + | `Mean` | The mean number of keys repaired during all key exchanges since the last node restart + | `Max` | The maximum number of keys repaired during all key exchanges since the last node restart + +All AAE status information obtainable using the `riak-admin aae-status` +command is stored in-memory and is reset when a node is restarted with +the exception of hash tree build information, which is persisted on disk +(because hash trees themselves are persisted on disk). + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn AAE on and +off but also to fine-tune your cluster's use of AAE, e.g. how +much memory AAE processes should consume, how frequently specific +processes should be run, etc. + +### Data Directory + +By default, data related to AAE operations is stored in the +`./data/anti_entropy` directory in each Riak node. This can be changed +by setting the `anti_entropy.data_dir` parameter to a different value. + +### Throttling + +AAE has a built-in throttling mechanism that can insert delays between +AAE repair operations when [vnode](../../../learn/concepts/vnodes) mailboxes reach the length +specified by the [`search.anti_entropy.throttle.$tier.delay`][config search#throttledelay] parameter (more on +that in the section below). Throttling can be switched on and off using +the [`search.anti_entropy.throttle`][config search#throttle] parameter. The default is `on`. + +#### Throttling Tiers + +If you activate AAE throttling, you can use *tiered throttling* to +establish a series of vnode mailbox-size thresholds past which a +user-specified time delay should be observed. This enables you to +establish, for example, that a delay of 10 milliseconds should be +observed if the mailbox of any vnode reaches 50 messages. + +The general form for setting tiered throttling is as follows: + +```riakconf +search.anti_entropy.throttle.$tier.delay +search.anti_entropy.throttle.$tier.solrq_queue_length +``` + +In the above example, `$tier` should be replaced with the desired +name for that tier (e.g. `tier1`, `large_mailbox_tier`, etc). If you +choose to set throttling tiers, you will need to set the mailbox size +for one of the tiers to 0. Both the `.solrq_queue_length` and `.delay` +parameters must be set for each tier. + +Below is an example configuration for three tiers, with mailbox sizes of +0, 50, and 100 and time delays of 5, 10, and 15 milliseconds, +respectively: + +```riakconf +search.anti_entropy.throttle.tier1.solrq_queue_length = 0 +search.anti_entropy.throttle.tier1.delay = 5ms +search.anti_entropy.throttle.tier2.solrq_queue_length = 50 +search.anti_entropy.throttle.tier2.delay = 10ms +search.anti_entropy.throttle.tier3.solrq_queue_length = 100 +search.anti_entropy.throttle.tier3.delay = 15ms +``` + +### Bloom Filters + +Bloom filters are mechanisms used to prevent reads that are destined to +fail because no object exists in the location that they're querying. +Using bloom filters can improve reaction time for some queries, but +entail a small general performance cost. You can switch bloom filters +on and off using the `anti_entropy.bloomfilter` parameter. + +### Trigger Interval + +The `anti_entropy.trigger_interval` setting determines how often Riak's +AAE subsystem looks for work to do, e.g. building or expiring hash +trees, triggering information exchanges between nodes, etc. The default +is every 15 seconds (`15s`). Raising this value may save resources, but +at a slightly higher risk of data corruption. + +### Hash Trees + +As a fallback measure in addition to the normal operation of AAE on-disk +hash trees, Riak periodically clears and regenerates all hash trees +stored on disk to ensure that hash trees correspond to the key/value +data stored in Riak. This enables Riak to detect silent data corruption +resulting from disk failure or faulty hardware. The +`anti_entropy.tree.expiry` setting enables you to determine how often +that takes place. The default is once a week (`1w`). You can set up this +process to run once a day (`1d`), twice a day (`12h`), once a month +(`4w`), and so on. + +In addition to specifying how often Riak expires hash trees after they +are built, you can also specify how quickly and how many hash trees are +built. You can set the frequency using the +`anti_entropy.tree.build_limit.per_timespan` parameter, for which the +default is every hour (`1h`); the number of hash tree builds is +specified by `anti_entropy.tree.build_limit.number`, for which the +default is 1. + +### Write Buffer Size + +While you are free to choose the backend for data storage in Riak, +background AAE processes use [LevelDB](../../../setup/planning/backend/leveldb). You can adjust the size of the +write buffer used by LevelDB for hash tree generation using the +`anti_entropy.write_buffer_size` parameter. The default is `4MB`. + +### Open Files and Concurrency Limits + +The `anti_entropy.concurrency_limit` parameter determines how many AAE +cross-node information exchanges or hash tree builds can happen +concurrently. The default is `2`. + +The `anti_entropy.max_open_files` parameter sets an open-files limit for +AAE-related background tasks, analogous to [open files limit](../../performance/open-files-limit) settings used in operating systems. The default is `20`. + +## AAE and Riak Search + +Riak's AAE subsystem works to repair object inconsistencies both with +for normal key/value objects as well as data related to [Riak Search](../../../developing/usage/search). In particular, AAE acts on indexes stored in +[Solr](http://lucene.apache.org/solr/), the search platform that drives +Riak Search. Implementation details for AAE and Search can be found in +the [Search Details](../../reference/search/#active-anti-entropy-aae) +documentation. + +You can check on the status of Search-related AAE using the following +command: + +```bash +riak-admin search aae-status +``` + +The output from that command can be interpreted just like the output +discussed in the section on [monitoring](#monitoring-aae) above. + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..9cb4519f24 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,198 @@ +--- +title: "Adding / Removing Nodes" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Adding/Removing Nodes" + identifier: "cluster_operations_add_remove_nodes" + weight: 100 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.4/ops/running/nodes/adding-removing + - /riak/kv/3.0.4/ops/running/nodes/adding-removing +--- + +[use running cluster]: {{<baseurl>}}riak/kv/3.0.4/using/running-a-cluster + +This page describes the process of adding and removing nodes to and from +a Riak KV cluster. For information on creating a cluster check out [Running a Cluster][use running cluster]. + +## Start the Node + +Just like the initial configuration steps, this step has to be repeated +for every node in your cluster. Before a node can join an existing +cluster it needs to be started. Depending on your mode of installation, +use either the init scripts installed by the Riak binary packages or +simply the script [`riak`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-cli/): + +```bash +/etc/init.d/riak start +``` + +or + +```bash +bin/riak start +``` + +When the node starts, it will look for a cluster description, known as +the **ring file**, in its data directory. If a ring file does not exist, +it will create a new ring file based on the initially configured +`ring_size` (or `ring_creation_size` if you're using the older, +`app.config`-based configuration system), claiming all partitions for +itself. Once this process completes, the node will be ready to serve +requests. + +## Add a Node to an Existing Cluster + +Once the node is running, it can be added to an existing cluster. Note +that this step isn't necessary for the first node; it's necessary only +for nodes that you want to add later. + +To join the node to an existing cluster, use the `cluster join` command: + +```bash +bin/riak-admin cluster join <node_in_cluster> +``` + +The `<node_in_cluster>` in the example above can be _any_ node in the +cluster you want to join to. So if the existing cluster consists of +nodes `A`, `B`, and `C`, any of the following commands would join the +new node: + +```bash +bin/riak-admin cluster join A +bin/riak-admin cluster join B +bin/riak-admin cluster join C +``` + +To give a more realistic example, let's say that you have an isolated +node named `riak@192.168.2.5` and you want to join it to an existing +cluster that contains a node named `riak@192.168.2.2`. This command +would stage a join to that cluster: + +```bash +bin/riak-admin cluster join riak@192.168.2.2 +``` + +If the join request is successful, you should see the following: + +``` +Success: staged join request for 'riak@192.168.2.5' to 'riak@192.168.2.2' +``` + +If you have multiple nodes that you would like to join to an existing +cluster, repeat this process for each of them. + +## Joining Nodes to Form a Cluster + +The process of joining a cluster involves several steps, including +staging the proposed cluster nodes, reviewing the cluster plan, and +committing the changes. + +After staging each of the cluster nodes with `riak-admin cluster join` +commands, as in the section above, the next step in forming a cluster is +to review the proposed plan of changes. This can be done with the +`riak-admin cluster plan` command, which is shown in the example below. + +``` +=============================== Staged Changes ================================ +Action Nodes(s) +------------------------------------------------------------------------------- +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +join 'riak@192.168.2.2' +------------------------------------------------------------------------------- + + +NOTE: Applying these changes will result in 1 cluster transition + +############################################################################### + After cluster transition 1/1 +############################################################################### + +================================= Membership ================================== +Status Ring Pending Node +------------------------------------------------------------------------------- +valid 100.0% 20.3% 'riak@192.168.2.2' +valid 0.0% 20.3% 'riak@192.168.2.3' +valid 0.0% 20.3% 'riak@192.168.2.4' +valid 0.0% 20.3% 'riak@192.168.2.5' +valid 0.0% 18.8% 'riak@192.168.2.6' +------------------------------------------------------------------------------- +Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + +Transfers resulting from cluster changes: 51 + 12 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.3' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.4' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.5' + 13 transfers from 'riak@192.168.2.2' to 'riak@192.168.2.6' +``` + +If the plan is to your liking, submit the changes by running `riak-admin +cluster commit`. + +{{% note title="Note on ring changes" %}} +The algorithm that distributes partitions across the cluster during membership +changes is non-deterministic. As a result, there is no optimal ring. In the +event that a plan results in a slightly uneven distribution of partitions, the +plan can be cleared. Clearing a cluster plan with `riak-admin cluster clear` +and running `riak-admin cluster plan` again will produce a slightly different +ring. +{{% /note %}} + +## Removing a Node From a Cluster + +A node can be removed from the cluster in two ways. One assumes that a +node is decommissioned, for example, because its added capacity is not +needed anymore or because it's explicitly replaced with a new one. The +second is relevant for failure scenarios in which a node has crashed and +is irrecoverable and thus must be removed from the cluster from another +node. + +The command to remove a running node is `riak-admin cluster leave`. This +command must be executed on the node that you intend to removed from the +cluster. + +Similarly to joining a node, after executing `riak-admin cluster leave` +the cluster plan must be reviewed with `riak-admin cluster plan` and +the changes committed with `riak-admin cluster commit`. + +The other command is `riak-admin cluster leave <node>`, where `<node>` +is the node name as specified in the node's configuration files: + +```bash +riak-admin cluster leave riak@192.168.2.1 +``` + +This command can be run from any other node in the cluster. + +Under the hood, both commands do basically the same thing. Running +`riak-admin cluster leave <node>` selects the current node for you +automatically. + +As with `riak-admin cluster leave`, the plan to have a node leave the +cluster must be first reviewed with `riak-admin cluster plan` and +committed with `riak-admin cluster commit` before any changes will +actually take place. + + +## Pausing a `join` or `leave` + +{{% note title="Warning" %}} +Pausing may impact cluster health and is not recommended for more than a short period of time. +{{% /note %}} + +To pause during `riak-admin cluster join` or `riak-admin cluster leave`, set the node's transfer-limit to 0: + +```bash +riak-admin transfer-limit <node> 0 +``` + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/backend.md b/content/riak/kv/3.0.4/using/cluster-operations/backend.md new file mode 100644 index 0000000000..f9912235ea --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/backend.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Backend" +description: "" +project: "riak_kv" +project_version: 3.0.4 +#menu: +# riak_kv-3.0.4: +# name: "Backend" +# identifier: "cluster_operations_backend" +# weight: 112 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content** + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/backing-up.md b/content/riak/kv/3.0.4/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..cfc7055901 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/backing-up.md @@ -0,0 +1,271 @@ +--- +title: "Backing Up" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Backing Up" + identifier: "cluster_operations_backing_up" + weight: 106 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.4/ops/running/backups + - /riak/kv/3.0.4/ops/running/backups +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[plan backend leveldb]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb +[plan backend bitcask]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/bitcask +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency +[concept aae]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/ +[aae read repair]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy + +Riak KV is a [clustered][concept clusters] system built to survive a wide range of failure scenarios, including the loss of nodes due to network or hardware failure. Although this is one of Riak KV's core strengths, it cannot withstand all failure scenarios. + +Backing up data (duplicating the database on a different long-term storage system) is a common approach to mitigating potential failure scenarios. + +This page covers how to perform backups of Riak KV data. + +## Overview + +Riak KV backups can be performed using operating system features or filesystems that support snapshots, such as LVM or ZFS, or by using tools like rsync or tar. + +Choosing your Riak KV backup strategy will depend on your already-established backup methodologies and the backend configuration of your nodes. + +The basic process for getting a backup of Riak KV from a node is as follows: + +1. Stop Riak KV with `riak stop`. +2. Backup the appropriate data, ring, and configuration directories. +3. Start Riak KV. + +Downtime of a node can be significantly reduced by using an OS feature or filesystem that supports snapshotting. + +{{% note title="Backups and eventual consistency" %}} +Due to Riak KV's eventually consistent nature, backups can become slightly inconsistent from node to node. + +Data could exist on some nodes and not others at the exact time a backup is made. Any inconsistency will be corrected once a backup is restored, either by Riak's [active anti-entropy]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/) processes or when the object is read, via [read repair]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy). +{{% /note %}} + +## OS-Specific Directory Locations + +The default Riak KV data, ring, and configuration directories for each of the supported operating systems is as follows: + +#### Debian and Ubuntu + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### Fedora and RHEL + +Data | Directory +:----|:--------- +Bitcask | `/var/lib/riak/bitcask` +LevelDB | `/var/lib/riak/leveldb` +Ring | `/var/lib/riak/ring` +Configuration | `/etc/riak` +Cluster Metadata | `/var/lib/riak/cluster_meta` +Search | `/var/lib/riak/yz` +Strong consistency | `/var/lib/riak/ensembles` + +#### FreeBSD + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/usr/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### OS X + +Data | Directory +:----|:--------- +Bitcask | `./data/bitcask` +LevelDB | `./data/leveldb` +Ring | `./data/riak/ring` +Configuration | `./etc` +Cluster Metadata | `./data/riak/cluster_meta` +Search | `./data/riak/yz` +Strong consistency | `./data/ensembles` + +**Note**: OS X paths are relative to the directory in which the package +was extracted. + +#### SmartOS + +Data | Directory +:----|:--------- +Bitcask | `/var/db/riak/bitcask` +LevelDB | `/var/db/riak/leveldb` +Ring | `/var/db/riak/ring` +Configuration | `/opt/local/etc/riak` +Cluster Metadata | `/var/db/riak/cluster_meta` +Search | `/var/db/riak/yz` +Strong consistency | `/var/db/riak/ensembles` + +#### Solaris + +Data | Directory +:----|:--------- +Bitcask | `/opt/riak/data/bitcask` +LevelDB | `/opt/riak/data/leveldb` +Ring | `/opt/riak/ring` +Configuration | `/opt/riak/etc` +Cluster Metadata | `/opt/riak/cluster_meta` +Search | `/opt/riak/yz` +Strong consistency | `/opt/riak/data/ensembles` + +## Performing Backups + +{{% note title="Deprecation notice" %}} +In previous versions of Riak KV, there was a [`riak-admin backup`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#backup) command commonly used for +backups. This functionality is now deprecated. We strongly recommend using the backup procedure documented below instead. +{{% /note %}} + +Backups can be accomplished through a variety of common methods. Standard utilities such `cp`, `rsync`, and `tar` can be used, as well as any backup system already in place in your environment. + +A simple shell command, like those in the following examples, are sufficient for creating a backup of your Bitcask or LevelDB data, ring, and Riak KV configuration directories for a binary package-based Riak KV Linux +installation. + +The following examples use `tar`: + +{{% note %}} +Backups must be performed on while Riak KV is stopped to prevent data loss. +{{% /note %}} + +### Bitcask + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/bitcask /var/lib/riak/ring /etc/riak +``` + +### LevelDB + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/leveldb /var/lib/riak/ring /etc/riak +``` + +### Cluster Metadata + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/cluster_meta +``` + +### Search / Solr Data + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/yz +``` + +### Strong Consistency Data + +Persistently stored data used by Riak's [strong consistency][use ref strong consistency] feature +can be stored in an analogous fashion: + +```bash +tar -czf /mnt/riak_backups/riak_data_`date +%Y%m%d_%H%M`.tar.gz \ + /var/lib/riak/ensembles +``` + +## Restoring a Node + +The method you use to restore a node will differ depending on a combination of factors, including node name changes and your network environment. + +If you are replacing a node with a new node that has the same node name (typically a fully qualified domain name or IP address), then restoring the node is a simple process: + +1. Install Riak on the new node. +2. Restore your old node's configuration files, data directory, and ring + directory. +3. Start the node and verify proper operation with `riak ping`, + `riak-admin status`, and other methods you use to check node health. + +If the node name of a restored node (`-name` argument in `vm.args` or +`nodename` parameter in `riak.conf`) is different than the name of the +node that the restored backup was taken from, you will need to +additionally: + +1. Mark the original instance down in the cluster using + [`riak-admin down <node>`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#down) +2. Join the restored node to the cluster using + [`riak-admin cluster join <node>`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster-join) +3. Replace the original instance with the renamed instance with + [`riak-admin cluster force-replace <node1> <node2>`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster-force-replace) +4. Plan the changes to the cluster with `riak-admin cluster plan` +5. Finally, commit the cluster changes with `riak-admin cluster commit` + +{{% note %}} +For more information on the `riak-admin cluster` commands, refer to our documentation on [cluster administration]({{<baseurl>}}riak/kv/3.0.4/using/admin/). +{{% /note %}} + +For example, if there are five nodes in the cluster with the original node names `riak1.example.com` through `riak5.example.com` and you wish to restore `riak1.example.com` as `riak6.example.com`, you would execute the following commands on `riak6.example.com`. + +1. Join to any existing cluster node. + + ```bash + riak-admin cluster join riak@riak2.example.com + ``` + +2. Mark the old instance down. + + ```bash + riak-admin down riak@riak1.example.com + ``` + +3. Force-replace the original instance with the new one. + + ```bash + riak-admin cluster force-replace \ + riak@riak1.example.com riak@riak6.example.com + ``` + +4. Display and review the cluster change plan. + + ```bash + riak-admin cluster plan + ``` + +5. Commit the changes to the cluster. + + ```bash + riak-admin cluster commit + ``` + +Your [configuration files][config reference] should also be changed to match the new name in addition to running the commands (the `-name` setting in `vm.args` in the older config system, and the `nodename` setting in `riak.conf` in the newer system). + +If the IP address of any node has changed, verify that the changes are reflected in your configuration files to ensure that the HTTP and Protocol Buffers interfaces are binding to the correct addresses. + +A robust DNS configuration can simplify the restore process if the IP addresses of the nodes change, but the hostnames are used for the node names and the hostnames stay the same. Additionally, if the HTTP and Protocol Buffers interface settings are configured to bind to all IP interfaces (0.0.0.0), then no changes will need to be made to your configuration files. + +When performing restore operations involving `riak-admin cluster force-replace`, we recommend that you start only one node at a time and verify that each node that is started has the correct name for itself +and for any other nodes whose names have changed: + +1. Verify that the correct name is present your configuration file. +2. Once the node is started, run `riak attach` to connect to the node. The prompt obtained should contain the correct node name. + - (It may be necessary to enter an Erlang atom by typing `x.` and pressing Enter) +3. Disconnect from the attached session with **Ctrl-G + q**. +4. Finally, run `riak-admin member_status` to list all of the nodes and verify that all nodes listed have the correct names. + +## Restoring a Cluster + +Restoring a cluster from backups is documented [on its own page]({{<baseurl>}}riak/kv/3.0.4/using/repair-recovery/failure-recovery/#cluster-recovery-from-backups). + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/bucket-types.md b/content/riak/kv/3.0.4/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..b282deae86 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/bucket-types.md @@ -0,0 +1,63 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Bucket Types" + identifier: "cluster_operations_bucket_types" + weight: 104 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +Buckets are essentially a flat namespace in Riak. They allow the same +key name to exist in multiple buckets and enable you to apply +configurations across keys. + +{{% note title="How Many Buckets Can I Have?" %}} +Buckets come with virtually no cost _except for when you modify the default +bucket properties_. Modified bucket properties are gossiped around the cluster +and therefore add to the amount of data sent around the network. In other +words, buckets using the `default` bucket type are free. More on that in the +next section. +{{% /note %}} + +In Riak versions 2.0 and later, Basho suggests that you [use bucket types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) to namespace and configure all buckets you use. Bucket types have a lower overhead within the cluster than the +default bucket namespace but require an additional setup step on the +command line. + +## Creating a Bucket Type + +When creating a new bucket type, you can create a bucket type without +any properties and set individual buckets to be indexed. The step below +creates and activates the bucket type: + +```bash +riak-admin bucket-type create animals '{"props":{}}' +riak-admin bucket-type activate animals +``` + +And this step applies the index to the `cats` bucket, which bears the +`animals` bucket type we just created and activated: + +```curl +curl -XPUT $RIAK_HOST/types/animals/buckets/cats/props \ + -H 'Content-Type: application/json' \ + -d '{"props":{"search_index":"famous"}}' +``` + +Another possibility is to set the `search_index` as a default property +of the bucket type. This means _any_ bucket under that type will +inherit that setting and have its values indexed. + +```bash +riak-admin bucket-type create animals '{"props":{"search_index":"famous"}}' +riak-admin bucket-type activate animals +``` + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/3.0.4/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..269a58892a --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,458 @@ +--- +title: "Changing Cluster Information" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Changing Cluster Info" + identifier: "cluster_operations_change_info" + weight: 101 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.4/ops/running/nodes/renaming + - /riak/kv/3.0.4/ops/running/nodes/renaming +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference + +## Change the Node Name + +The node name is an important setting for the Erlang VM, especially when +you want to build a cluster of nodes, as the node name identifies both +the Erlang application and the host name on the network. All nodes in +the Riak cluster need these node names to communicate and coordinate +with each other. + +In your configuration files, the node name defaults to `riak@127.0.0.1`. +To change the node name, change the following line: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +Change it to something that corresponds to either the IP address or a +resolvable host name for this particular node, like so: + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +## Change the HTTP and Protocol Buffers binding address + +By default, Riak's HTTP and Protocol Buffers services are bound to the +local interface, i.e. 127.0.0.1, and are therefore unable to serve +requests from the outside network. The relevant setting is in your +[configuration files][config reference]: + +```riakconf +# For HTTP +listener.http.internal = 127.0.0.1:8098 + +# For Protocol Buffers +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +% In the riak_api section + +% For HTTP +{http, [ {"127.0.0.1", 8098 } ]}, + +% For Protocol Buffers +{pb, [ {"127.0.0.1", 8087} ] }, +``` + +Either change it to use an IP address that corresponds to one of the +server's network interfaces, or 0.0.0.0 to allow access from all +interfaces and networks, e.g.: + +```riakconf +listener.http.internal = 0.0.0.0:8098 +``` + +```appconfig +% In the riak_core section +{http, [ {"0.0.0.0", 8098 } ]}, +``` + +The same configuration should be changed for the Protocol Buffers +interface if you intend on using it (which we recommend). Change the +following line: + +```riakconf +listener.protobuf.internal = 0.0.0.0:8087 +``` + +```appconfig +% In the riak_core section +{pb, [ {"0.0.0.0", 8087} ] }, +``` + +## Rename Single Node Clusters + +To rename a single-node development cluster: + +1. Stop the node with `riak stop`. + +2. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args` to the new name. + +3. Change any IP addresses in `riak.conf` or `app.config` if necessary. Specifically: `listener.protobuf.$name`, `listener.http.$name`, and `listener.https.$name` in `riak.conf`, and `pb_ip`, `http`, `https`, and `cluster_mgr` in `app.config`. + +4. Delete the contents of the node's `ring` directory. The location of the ring directory is the value for the `ring.state_dir` in `riak.conf`, or `ring_state_dir` in `app.config`. + +5. Start Riak on the node with `riak start`. + + +## Rename Multi-Node Clusters + +For multi-node clusters, a rename is a slightly more complex procedure; however, it is very similar to the process for renaming a single node. + +Previous to Riak version 1.2, a cluster node's name could only be changed with the [`riak-admin reip`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#reip) command, which involves downtime for the entire cluster. As of Riak version 1.2, that method has been superseded by [`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster-force-replace), which is safer and does not require cluster wide downtime. + +There still exist scenarios that require nodes to be renamed while stopped, such as seeding a cluster with backups from another cluster that does not share the same node names. Please see the [Clusters from Backups](#clusters-from-backups) section for more details on renaming in this scenario. + +The following example describes reconfiguring node names with the new `riak-admin cluster force-replace` method. + +### Example Scenario + +For this example scenario, Riak is operating in a cluster of 5 nodes with the following network configuration: + +* `riak@10.1.42.11` on `node1.localdomain` → IP address changing to 192.168.17.11 +* `riak@10.1.42.12` on `node2.localdomain` → IP address changing to 192.168.17.12 +* `riak@10.1.42.13` on `node3.localdomain` → IP address changing to 192.168.17.13 +* `riak@10.1.42.14` on `node4.localdomain` → IP address changing to 192.168.17.14 +* `riak@10.1.42.15` on `node5.localdomain` → IP address changing to 192.168.17.15 + +The above list shows the network configuration details for our 5 nodes, including the Erlang node name value, the node's fully qualified domain name, and the new IP address each node will be configured to use. + +The nodes in our example cluster are currently configured to use the *10.1.42.* private subnetwork range. Our goal for this example will be to configure the nodes to instead use the *192.168.17.* private subnetwork range and do so in a rolling fashion without interrupting cluster operation. + +### Process + +This process can be accomplished in three phases. The details and steps required of each phase are presented in the following section. + +1. [Down the node to be reconfigured](#down) +2. [Reconfigure node to use new address](#reconfigure) +3. [Repeat previous steps on each node](#repeat) + + +<a id="down"></a> +#### Down the Node + +1. Stop Riak on `node1.localdomain`: + + ```bash + riak stop + ``` + + The output should look like this: + + ``` + Attempting to restart script through sudo -H -u riak + ok + ``` + +2. From the `node2.localdomain` node, mark `riak@10.1.42.11` down: + + ```bash + riak-admin down riak@10.1.42.11 + ``` + + Successfully marking the node down should produce output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: "riak@10.1.42.11" marked as down + ``` + + This step informs the cluster that `riak@10.1.42.11` is offline and ring-state transitions should be allowed. While we're executing the `riak-admin down` command from `node2.localdomain` in this example, the command can be executed from any currently running node. + +<a id="reconfigure"></a> +#### Reconfigure Node to Use New Address + +Reconfigure `node1.localdomain` to listen on the new private IP address *192.168.17.11* by following these steps: + +1. Change the node's `nodename` parameter in `riak.conf`, or `-name` parameter in `vm.args`, to reflect the new node name. For example: + + `riak.conf`: `nodename = riak@192.168.17.11` + `vm.args` : `-name riak@192.168.17.11` + +2. Change any IP addresses to *192.168.17.11* in `riak.conf` or `app.config` as previously described in step 3 of [Single Node Clusters](#single-node-clusters). + +3. Rename the node's `ring` directory, the location of which is described in step 4 of [Single Node Clusters](#single-node-clusters). You may rename it to whatever you like, as it will only be used as a backup during the node renaming process. + +4. Start Riak on `node1.localdomain`. + + ```bash + riak start + ``` + +5. Join the node back into the cluster. + + ```bash + riak-admin cluster join riak@10.1.42.12 + ``` + + Successful staging of the join request should have output like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged join request for 'riak@192.168.17.11' to 'riak@10.1.42.12' + ``` + +6. Use `riak-admin cluster force-replace` to change all ownership references from `riak@10.1.42.11` to `riak@192.168.17.11`: + + ```bash + riak-admin cluster force-replace riak@10.1.42.11 riak@192.168.17.11 + ``` + + Successful force replacement staging output looks like this: + + ```bash + Attempting to restart script through sudo -H -u riak + Success: staged forced replacement of 'riak@10.1.42.11' with 'riak@192.168.17.11' + ``` + +7. Review the new changes with `riak-admin cluster plan:` + + ```bash + riak-admin cluster plan + ``` + + Example output: + + ```bash + Attempting to restart script through sudo -H -u riak + =========================== Staged Changes ============================ + Action Nodes(s) + ----------------------------------------------------------------------- + join 'riak@192.168.17.11' + force-replace 'riak@10.1.42.11' with 'riak@192.168.17.11' + ----------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.11' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ####################################################################### + After cluster transition 1/1 + ####################################################################### + + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 13 + 13 reassigned from 'riak@10.1.42.11' to 'riak@192.168.17.11' + ``` + +8. Commit the new changes to the cluster with `riak-admin cluster commit`: + + ```bash + riak-admin cluster commit + ``` + + Output from the command should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + Cluster changes committed + ``` + +9. Check that the node is participating in the cluster and functioning as expected: + + ```bash + riak-admin member-status + ``` + + Output should resemble this example: + + ```bash + Attempting to restart script through sudo -H -u riak + ============================= Membership ============================== + Status Ring Pending Node + ----------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@10.1.42.12' + valid 20.3% -- 'riak@10.1.42.13' + valid 20.3% -- 'riak@10.1.42.14' + valid 18.8% -- 'riak@10.1.42.15' + ----------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +10. Monitor hinted handoff transfers to ensure they have finished with the `riak-admin transfers` command. + +11. Clean up by deleting the renamed `ring` directory once all previous steps have been successfully completed. + +{{% note title="Note" %}} +When using the `riak-admin force-replace` command, you will always get a +warning message like: `WARNING: All of 'riak@10.1.42.11' replicas will be +lost`. Since we didn't delete any data files and we are replacing the node +with itself under a new name, we will not lose any replicas. +{{% /note %}} + +<a id="repeat"></a> +#### Repeat previous steps on each node + +Repeat the steps above for each of the remaining nodes in the cluster. + +Use *riak@192.168.17.11* as the target node for further `riak-admin cluster join` commands issued from subsequently reconfigured nodes to join those nodes to the cluster. + +```bash +riak-admin cluster join riak@192.168.17.11 +``` + +A successful join request staging produces output similar to this example: + +```bash +Attempting to restart script through sudo -H -u riak +Success: staged join request for 'riak@192.168.17.12' to 'riak@192.168.17.11' +``` + +## Clusters from Backups + +The above steps describe a process for renaming nodes in a running cluster. When seeding a new cluster with backups where the nodes must have new names, typically done as a secondary cluster or in a disaster recovery scenario, a slightly different process must be used. This is because the node names must resolve to the new hosts in order for the nodes to start and communicate with each other. + +Expanding on the Example Scenario above, the below steps can be used to rename nodes in a cluster that is being restored from backups. The below steps assume every node is offline, and they will indicate when to bring each node online. + +#### Bringing Up the First Node + +In order to bring our first node online, we'll first need to use the `riak-admin reip` command on a single node. In this example, we'll use `riak@10.1.42.11` as our first node. + +1. In `riak.conf` change `nodename`, `-name` in `vm.args`, from `riak@10.1.42.11` to your new nodename, `riak@192.168.17.11`. + +2. On `node1.localdomain` run `riak-admin reip riak@10.1.42.11 riak@192.168.17.11`. This will change the name of `riak@10.1.42.11` to `riak@192.168.17.11` in the Riak ring. + +3. Start Riak on `node1.localdomain`. + +4. Once Riak is started on `node1.localdomain`, mark the rest of the nodes in the cluster down, using `riak-admin down`. For example, we would down `riak@10.1.42.12` with `riak-admin down riak@10.1.42.12`. + +5. Confirm every other node in the cluster is marked down by running `riak-admin member-status` on `node1.localdomain`: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + down 20.3% -- 'riak@10.1.42.12' + down 20.3% -- 'riak@10.1.42.13' + down 20.3% -- 'riak@10.1.42.14' + down 18.8% -- 'riak@10.1.42.15' + ------------------------------------------------------------------------------- + Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:4 + + ``` + +6. Ensure `riak@192.168.17.11` is listed as the claimant by running `riak-admin ring-status` on `node1.localdomain`: + + ```bash + ================================== Claimant =================================== + Claimant: 'riak@192.168.17.11' + Status: up + Ring Ready: true + + ============================== Ownership Handoff ============================== + No pending changes. + + ============================== Unreachable Nodes ============================== + All nodes are up and reachable + ``` + +Once all nodes are marked as down and our first node is listed as the claimant, we can proceed with the rest of the nodes. + +#### Bringing Up the Remaining Nodes + +1. On each of the remaining nodes, change `nodename` in `riak.conf`, or `-name` in `vm.args` as described above. + +2. Move aside the ring directory. As in [Multi-Node Clusters](#multi-node-clusters), we will save this ring directory as a backup until were finished. + +3. Start each node. They will start as if they are each a member of their own cluster, but will retain their restored data. + +4. Join each node to our first node using `riak-admin cluster join riak@192.168.17.11`. + +5. Force replace each node with its old node name. For example, `riak-admin cluster force-replace riak@10.1.42.12 riak@192.168.17.12`. + +6. Once the above is complete for each node, run `riak-admin cluster plan` on any node. The output should look similar to below: + + ```bash + =============================== Staged Changes ================================ + Action Details(s) + ------------------------------------------------------------------------------- + force-replace 'riak@10.1.42.12' with 'riak@192.168.17.12' + force-replace 'riak@10.1.42.13' with 'riak@192.168.17.13' + force-replace 'riak@10.1.42.14' with 'riak@192.168.17.14' + force-replace 'riak@10.1.42.15' with 'riak@192.168.17.15' + join 'riak@192.168.17.12' + join 'riak@192.168.17.13' + join 'riak@192.168.17.14' + join 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + + WARNING: All of 'riak@10.1.42.12' replicas will be lost + WARNING: All of 'riak@10.1.42.13' replicas will be lost + WARNING: All of 'riak@10.1.42.14' replicas will be lost + WARNING: All of 'riak@10.1.42.15' replicas will be lost + + NOTE: Applying these changes will result in 1 cluster transition + + ############################################################################### + After cluster transition 1/1 + ############################################################################### + + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + Partitions reassigned from cluster changes: 51 + 13 reassigned from 'riak@10.1.42.12' to 'riak@192.168.17.12' + 13 reassigned from 'riak@10.1.42.13' to 'riak@192.168.17.13' + 13 reassigned from 'riak@10.1.42.14' to 'riak@192.168.17.14' + 12 reassigned from 'riak@10.1.42.15' to 'riak@192.168.17.15' + ``` + +7. If the above plan looks correct, commit the cluster changes with `riak-admin cluster commit`. + +8. Once the cluster transition has completed, all node names should be changed and be marked as valid in `riak-admin member-status` like below: + + ```bash + ================================= Membership ================================== + Status Ring Pending Node + ------------------------------------------------------------------------------- + valid 20.3% -- 'riak@192.168.17.11' + valid 20.3% -- 'riak@192.168.17.12' + valid 20.3% -- 'riak@192.168.17.13' + valid 20.3% -- 'riak@192.168.17.14' + valid 18.8% -- 'riak@192.168.17.15' + ------------------------------------------------------------------------------- + Valid:5 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + + ``` + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/handoff.md b/content/riak/kv/3.0.4/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..b2e458d0f3 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/handoff.md @@ -0,0 +1,120 @@ +--- +title: "Enabling and Disabling Handoff" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Handoff" + identifier: "cluster_operations_handoff" + weight: 107 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.4/ops/running/handoff + - /riak/kv/3.0.4/ops/running/handoff +--- + +Riak KV provides a command-line interface for enabling and disabling handoff on the fly, without needing to set your configuration and restart the node. To +enable handoff: + +```bash +riak-admin handoff enable <inbound|outbound|both> <nodename> +``` + +You must specify two things when enabling handoff: + +* whether you'd like to enable inbound handoff, outbound handoff, or + both +* the node to be targeted by the command (or all nodes) + +You can select a target node using either the `--node` or the `-n` flag. +You can select a direction by specifying `inbound`, `outbound`, or +`both`. The following equivalent commands would enable outbound handoff +on the node `riak3@100.0.0.1`: + +```bash +riak-admin handoff enable outbound --node riak3@100.0.0.1 +riak-admin handoff enable outbound -n riak3@100.0.0.1 +``` + +These two equivalent commands would enable inbound handoff on the node +`riak5@100.0.0.1`: + +```bash +riak-admin handoff enable inbound --node riak5@100.0.0.1 +riak-admin handoff enable inbound -n riak5@127.0.0.1 +``` + +Alternatively, you can enable handoff on all nodes at the same time +using either the `-a` or `--all` flag. This command would enable both +inbound and outbound handoff on all nodes: + +```bash +riak-admin handoff enable both --all +``` + +As for enabling handoff, the `riak-admin disable` command requires that +you specify both both a node or nodes to be targeted by the command and +whether you'd like to disable inbound handoff, outbound handoff, or +both. The `disable` command works just like `enable`. This command +would disable all forms of handoff on all nodes, to give just one +example: + +```bash +riak-admin handoff disable both --all +``` + +## Other Command-line Tools + +In addition to enabling and disabling handoff, the +[`riak-admin`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/) interface enables you to +retrieve a summary of handoff-related activity and other information. + +### summary + +The `summary` command provides high-level information about active +handoffs in a cluster. + +```bash +riak-admin handoff summary +``` + +This will return a table that will provide the following information +about each node in your cluster: + +Header | Description +:------|:----------- +`Node` | The name of the node +`Total` | Total number of active transfers throughout the entire cluster +`Ownership` | Total number of ownership exchanges +`Resize` | Total handoffs related to ring resizing operations (This should always be 0, as the Resize Ring feature has been deprecated) +`Hinted` | Total number of [hinted handoffs](../../reference/handoff#types-of-handoff) +`Repair` | Total repair-related handoffs. More information can be found [here](https://github.com/basho/riak_core/commit/036e409eb83903315dd43a37c7a93c9256863807). + +### details + +This command provides information only about active transfers. + +```bash +riak-admin handoff details +``` + +If no transfers are currently underway, this command will output `No +ongoing transfers`. Otherwise, you will something like this: + +### config + +This command displays the values for handoff-specific [configurable parameters]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#intra-cluster-handoff) on each node in +the cluster, including: + +* `transfer_limit` +* `handoff.outbound` +* `handoff.inbound` +* `handoff.port` + +Descriptions of those parameters can be found in the sections above. + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/inspecting-node.md b/content/riak/kv/3.0.4/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..431a39697f --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/inspecting-node.md @@ -0,0 +1,496 @@ +--- +title: "Inspecting a Node" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Inspecting a Node" + identifier: "cluster_operations_inspecting_node" + weight: 103 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/3.0.4/ops/running/nodes/inspecting + - /riak/kv/3.0.4/ops/running/nodes/inspecting +--- + +When inspection of a Riak node to gather metrics on performance or +potential issues is desired, a number of tools are available to help, +and are either included with Riak itself or made available through the +Riak community. + +This guide provides starting points and details on some of the available +tools for inspecting a Riak node. + +## riak-admin status + +`riak-admin status` is a subcommand of the `riak-admin` command that is +included with every installation of Riak. The `status` subcommand +provides data related to the current operating status for a node. The +output of `riak-admin status` is categorized and detailed below. + +Please note, for some counters, such as `node_get_fsm_objsize`, a +minimum of 5 transactions is required for statistics to be generated. + +#### Performance + +We recommended checking stats every 90-120 seconds for best performance. + +Repeated runs of the `riak-admin status` command should not have a +negative performance impact as the statistics are cached internally in +Riak. + +### Active Stats + +Active Stats represent current activity on the node. + +Stat | Description +------------------------|--------------------------------------------------- +`pbc_active` | Number of active Protocol Buffers connections +`node_get_fsm_active` | Number of active GET FSMs +`node_put_fsm_active` | Number of active PUT FSMs +`index_fsm_active` | Number of active Secondary Index FSMs +`list_fsm_active` | Number of active Keylisting FSMs +`node_get_fsm_rejected` | Number of GET FSMs actively being rejected by Sidejob's overload protection +`node_put_fsm_rejected` | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### Average Stats + +Average Stats represent an average calculated as (total occurrences / +number of samples) since this node was started. In the below stats the +sample time is 1s, giving us a per-second average. Currently, the only +Average Stats are reported by Sidejob - an Erlang library that +implements a parallel, capacity-limited request pool. + +Stat | Description +------------------------|--------------------------------------------------- +`node_get_fsm_in_rate` | Average number of GET FSMs enqueued by Sidejob +`node_get_fsm_out_rate` | Average number of GET FSMs dequeued by Sidejob +`node_put_fsm_in_rate` | Average number of PUT FSMs enqueued by Sidejob +`node_put_fsm_out_rate` | Average number of PUT FSMs dequeued by Sidejob + +### One-Minute Stats + +One-Minute Stats represent the number of times a particular activity has +occurred within the last minute on this node. + +#### General One-Minute Stats + +Stat | Description +--------------------------------------|--------------------------------------------------- +`node_gets` | Number of GETs coordinated by this node, including GETs to non-local vnodes in the last minute +`node_puts` | Number of PUTs coordinated by this node, where a PUT is sent to a local vnode in the last minute +`vnode_gets` | Number of GET operations coordinated by local vnodes on this node in the last minute +`vnode_puts` | Number of PUT operations coordinated by local vnodes on this node in the last minute +`vnode_index_refreshes` | Number of secondary indexes refreshed on this node during secondary index anti-entropy in the last minute +`vnode_index_reads` | Number of local replicas participating in secondary index reads in the last minute +`vnode_index_writes` | Number of local replicas participating in secondary index writes in the last minute +`vnode_index_writes_postings` | Number of individual secondary index values written in the last minute +`vnode_index_deletes` | Number of local replicas participating in secondary index deletes in the last minute +`vnode_index_deletes_postings` | Number of individual secondary index values deleted in the last minute +`pbc_connects` | Number of Protocol Buffers connections made in the last minute +`node_get_fsm_active_60s` | Number of GET FSMs active in the last minute +`node_put_fsm_active_60s` | Number of PUT FSMs active in the last minute +`node_get_fsm_rejected_60s` | Number of GET FSMs rejected by Sidejob's overload protection in the last minute +`node_put_fsm_rejected_60s` | Number of PUT FSMs rejected by Sidejob's overload protection in the last minute +`index_fsm_create` | Number of Secondary Index query FSMs created in the last minute +`index_fsm_create_error` | Number of Secondary Index query FSM creation errors in the last minute +`list_fsm_create` | Number of Keylisting FSMs created in the last minute +`list_fsm_create_error` | Number of Keylisting FSM creation errors in the last minute +`read_repairs` | Number of read repair operations this node has coordinated in the last minute +`read_repairs_primary_outofdate_one` | Number of read repair operations performed on primary vnodes in the last minute due to stale replicas +`read_repairs_primary_notfound_one` | Number of read repair operations performed on primary vnodes in the last minute due to missing replicas +`read_repairs_fallback_outofdate_one` | Number of read repair operations performed on fallback vnodes in the last minute due to stale replicas +`read_repairs_fallback_notfound_one` | Number of read repair operations performed on fallback vnodes in the last minute due to missing replicas + +#### FSM Time + +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time +effectively represents experienced latency. Mean, Median, and 95th-, +99th-, and 100th-percentile (Max) counters are displayed. These are +one-minute stats. + +Stat | Description +---------------------------|--------------------------------------------------- +`node_get_fsm_time_mean` | Mean time between reception of client GET request and subsequent response to client +`node_get_fsm_time_median` | Median time between reception of client GET request and subsequent response to client +`node_get_fsm_time_95` | 95th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_99` | 99th percentile time between reception of client GET request and subsequent response to client +`node_get_fsm_time_100` | 100th percentile time between reception of client GET request and subsequent response to client +`node_put_fsm_time_mean` | Mean time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_median` | Median time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_95` | 95th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_99` | 99th percentile time between reception of client PUT request and subsequent response to client +`node_put_fsm_time_100` | 100th percentile time between reception of client PUT request and subsequent response to client + +#### GET FSM Siblings + +GET FSM Sibling Stats offer a count of the number of siblings +encountered by this node on the occasion of a GET request. These are +one-minute stats. + +Stat | Description +-------------------------------|--------------------------------------------------- +`node_get_fsm_siblings_mean` | Mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | Median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | 95th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | 99th percentile of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | 100th percentile of siblings encountered during all GET operations by this node within the last minute + +#### GET FSM Objsize + +GET FSM Objsize Stats represent a view of the sizes of objects flowing +through this node's GET FSMs. The size of an object is obtained by +summing the length of the bucket name, key, serialized vector clock, +value, and serialized metadata of each sibling. GET FSM Objsize and GET +FSM Siblings are inextricably linked. These are one-minute stats. + +Stat | Description +------------------------------|--------------------------------------------------- +`node_get_fsm_objsize_mean` | Mean object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_median` | Median object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_95` | 95th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_99` | 99th percentile object size (bytes) encountered by this node within the last minute +`node_get_fsm_objsize_100` | 100th percentile object size (bytes) encountered by this node within the last minute + +### Total Stats + +Total Stats represent the total number of times a particular activity +has occurred since this node was started. + +Stat | Description +---------------------------------------|--------------------------------------------------- +`node_gets_total` | Total number of GETs coordinated by this node, including GETs to non-local vnodes +`node_puts_total` | Total number of PUTs coordinated by this node, including PUTs to non-local vnodes +`vnode_gets_total` | Total number of GETs coordinated by local vnodes +`vnode_puts_total` | Total number of PUTS coordinated by local vnodes +`read_repairs_total` | Total number of Read Repairs this node has coordinated +`coord_redirs_total` | Total number of requests this node has redirected to other nodes for coordination +`vnode_index_refreshes_total` | Total number of indexes refreshed during secondary index anti-entropy +`vnode_index_reads_total` | Total number of local replicas participating in secondary index reads +`vnode_index_writes_total` | Total number of local replicas participating in secondary index writes +`vnode_index_writes_postings_total` | Total number of individual secondary index values written +`vnode_index_deletes_total` | Total number of local replicas participating in secondary index deletes +`vnode_index_deletes_postings_total` | Total number of individual secondary index values deleted +`pbc_connects_total` | Total number of Protocol Buffers connections made +`precommit_fail` | Total number of pre-commit hook failures +`postcommit_fail` | Total number of post-commit hook failures +`node_get_fsm_rejected_total` | Total number of GET FSMs rejected by Sidejob's overload protection +`node_put_fsm_rejected_total` | Total number of PUT FSMs rejected by Sidejob's overload protection +`read_repairs_primary_outofdate_count` | Total number of read repair operations performed on primary vnodes due to stale replicas +`read_repairs_primary_notfound_count` | Total number of read repair operations performed on primary vnodes due to missing replicas +`read_repairs_fallback_outofdate_count`| Total number of read repair operations performed on fallback vnodes due to stale replicas +`read_repairs_fallback_notfound_count` | Total number of read repair operations performed on fallback vnodes due to missing replicas + +### Timestamps + +Some of the Erlang applications that Riak is comprised of contribute +statistics to `riak-admin status`. The below timestamps record, in +Epoch time, the last time statistics for that application were +generated. + +Stat | Description +--------------------|--------------------------------------------------- +`riak_kv_stat_ts` | The last time Riak KV stats were generated. +`riak_pipe_stat_ts` | The last time Riak Pipe stats were generated. + +### Ring + +General ring information is reported in `riak-admin status`. + +Stat | Description +---------------------|--------------------------------------------------- +`ring_members` | List of nodes that are members of the ring +`ring_num_partitions`| The number of partitions in the ring +`ring_ownership` | List of all nodes in the ring and their associated partition ownership +`ring_creation_size` | Ring size this cluster was created with + +### CPU and Memory + +CPU statistics are taken directly from Erlang’s cpu_sup module. +Documentation for which can be found at [ErlDocs: +cpu_sup](http://erlang.org/doc/man/cpu_sup.html). + +Stat | Description +-------------|--------------------------------------------------- +`cpu_nprocs` | Number of operating system processes +`cpu_avg1` | The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg5` | The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()) +`cpu_avg15` | The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()) + +Memory statistics are taken directly from the Erlang virtual machine. +Documentation for which can be found at [ErlDocs: +Memory](http://erlang.org/doc/man/erlang.html#memory-0#memory/0). + +Stat | Description +------------------------|--------------------------------------------------- +`memory_total` | Total allocated memory (sum of processes and system) +`memory_processes` | Total amount of memory allocated for Erlang processes +`memory_processes_used` | Total amount of memory used by Erlang processes +`memory_system` | Total allocated memory that is not directly related to an Erlang process +`memory_atom` | Total amount of memory currently allocated for atom storage +`memory_atom_used` | Total amount of memory currently used for atom storage +`memory_binary` | Total amount of memory used for binaries +`memory_code` | Total amount of memory allocated for Erlang code +`memory_ets` | Total memory allocated for Erlang Term Storage +`mem_total` | Total available system memory +`mem_allocated` | Total memory allocated for this node + +### Erlang VM + +The below statistics describe properties of the Erlang VM. + +Stat | Description +--------------------------|--------------------------------------------------- +`nodename` | The name this node uses to identify itself +`connected_nodes` | A list of the nodes that this node is aware of at this time +`sys_driver_version` | String representing the Erlang driver version in use by the runtime system +`sys_global_heaps_size` | Current size of the shared global heap +`sys_heap_type` | String representing the heap type in use (one of private, shared, hybrid) +`sys_logical_processors` | Number of logical processors available on the system +`sys_otp_release` | Erlang OTP release version in use on the node +`sys_process_count` | Number of processes currently running in the Erlang VM +`sys_smp_support` | Boolean value representing whether symmetric multi-processing (SMP) is available +`sys_system_version` | Detailed Erlang version information +`sys_system_architecture` | The node operating system and hardware architecture +`sys_threads_enabled` | Boolean value representing whether threads are enabled +`sys_thread_pool_size` | Number of threads in the asynchronous thread pool +`sys_wordsize` | Size of Erlang term words in bytes as an integer, for examples, on 32-bit architectures 4 is returned and on 64-bit architectures 8 is returned + +### Miscellaneous Information + +Miscellaneous Information provide additional details particular to this +node. + +Stat | Description +---------------------------|--------------------------------------------------- +`leveldb_read_block_error` | The number of LevelDB read block errors. Will read as undefined if LevelDB is not being used. +`disk` | Information about the disk, taken from Erlang's disksup module. Reported as [{"ID",KBytes_Used,Percent_Util}]. +`storage_backend` | The storage backend currently in use. + +### Pipeline Metrics + +The following metrics from from riak_pipe are generated during MapReduce +operations. + +Stat | Description +--------------------------------|--------------------------------------------------- +`pipeline_active` | The number of pipelines active in the last 60 seconds +`pipeline_create_count` | The total number of pipelines created since the node was started +`pipeline_create_error_count` | The total number of pipeline creation errors since the node was started +`pipeline_create_error_one` | The number of pipeline creation errors in the last 60 seconds +`pipeline_create_one` | The number of pipelines created in the last 60 seconds + +### Application and Subsystem Versions + +The specific version of each Erlang application and subsystem which +makes up a Riak node is present in the `riak-admin status` output. Each +application is linked below next to it's version identifier. + +Stat | Description +------------------------|--------------------------------------------------- +`erlydtl_version` | [ErlyDTL](http://github.com/erlydtl/erlydtl) +`riak_control_version` | [Riak Control](http://github.com/basho/riak_control) +`cluster_info_version` | [Cluster Information](http://github.com/basho/cluster_info) +`riak_search_version` | [Riak Search](http://github.com/basho/riak_search) +`merge_index_version` | [Merge Index](http://github.com/basho/merge_index) +`riak_kv_version` | [Riak KV](http://github.com/basho/riak_kv) +`sidejob_version` | [Sidejob](http://github.com/basho/sidejob) +`riak_api_version` | [Riak API](http://github.com/basho/riak_api) +`riak_pipe_version` | [Riak Pipe](http://github.com/basho/riak_pipe) +`riak_core_version` | [Riak Core](http://github.com/basho/riak_core) +`bitcask_version` | [Bitcask](http://github.com/basho/bitcask) +`basho_stats_version` | [Basho Stats](http://github.com/basho/basho_stats) + `webmachine_version` | [Webmachine](http://github.com/basho/webmachine) +`mochiweb_version` | [MochiWeb](http://github.com/basho/mochiweb) +`inets_version` | [inets](http://erlang.org/doc/apps/inets/) +`erlang_js_version` | [Erlang JS](http://github.com/basho/erlang_js) +`runtime_tools_version` | [Erlang Runtime Tools](http://erlang.org/doc/apps/runtime_tools/) +`os_mon_version` | [Erlang Operating System Monitor](http://erlang.org/doc/apps/os_mon/) +`riak_sysmon_version` | [Riak System Monitor](http://github.com/basho/riak_sysmon) +`ssl_version` | [Erlang Secure Sockets Layer (SSL)](http://erlang.org/doc/apps/ssl/) +`public_key_version` | [Erlang Public Key](http://erlang.org/doc/apps/public_key/) +`crypto_version` | [Erlang crypto](http://erlang.org/doc/apps/crypto/) +`sasl_version` | [SASL](http://erlang.org/doc/apps/sasl/) +`lager_version` | [Lager](http://github.com/DeadZen/lager) +`goldrush_version` | [Goldrush](http://github.com/DeadZen/goldrush) +`compiler_version` | [Erlang Compiler](http://erlang.org/doc/apps/compiler/) +`syntax_tools_version` | [Erlang Syntax Tools](http://www.erlang.org/doc/apps/syntax_tools/) +`stdlib_version` | [Standard Library](http://erlang.org/doc/apps/stdlib/) +`kernel_version` | [Kernel](http://erlang.org/doc/apps/kernel/) + +### Riak Search Statistics + +The following statistics related to Riak Search message queues are +available. + +Stat | Description +-----------------------------|--------------------------------------------------- +`riak_search_vnodeq_max` | Maximum number of unprocessed messages all virtual node (vnode) message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_mean` | Mean number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_median` | Median number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_min` | Minimum number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node in the last minute +`riak_search_vnodeq_total` | Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started +`riak_search_vnodes_running` | Total number of vnodes currently running in the Riak Search subsystem + +Note that under ideal operation and with the exception of +`riak_search_vnodes_running` these statistics should contain low values +(e.g., 0-10). Presence of higher values could be indicative of an issue. + +## `riak-debug` + +The `riak-debug` command is used to identify and diagnose common problems with your Riak KV nodes. + +`riak-debug` also runs `riak-admin diag`, which runs a small suite of diagnostic checks against a Riak KV node to discover common problems. It often offers recommendations about how to resolve those problems as well. + +{{% note title="Warning about `riak-debug` and `riak-admin diag` usage" %}} +The `riak-debug` and `riak-admin diag` commands should only be used after a new installation or configuration change. It should not be used as part of regular monitoring. Overuse of `riak-debug` or `riak-admin diag` can eventually cause the node to crash from atom table exhaustion. +{{% /note %}} + +## Strong Consistency Stats + +Riak tabulates a variety of stats related to Riak's optional [strong consistency](../../reference/strong-consistency) feature. The table below lists those stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size (bytes) for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size (bytes) for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +## riak-admin diag + +Running `riak-admin diag` by itself will perform a check of all of the +data partitions in your cluster. It will return a listing of partitions +that have been checked, each of which looks something like this: + +``` +{1393.0.448081016843912887106182707253109560705024, % the partition checked + 'dev-rel@127.0.0.1'}, % that partition's nodename +``` + +At the end of that (potentially very long) listing of checked +partitions, it will print notices, warnings, and other pieces of +information about issues that it has found, including date/time, message +type, and a detailed description. Here's an example: + +``` +15:34:52.736 [warning] Riak crashed at Wed, 07 Dec 2011 21:47:50 GMT, leaving crash dump in /srv/riak/log/erl_crash.dump. Please inspect or remove the file. +15:34:52.736 [notice] Data directory /srv/riak/data/bitcask is not mounted with 'noatime'. Please remount its disk with the 'noatime' flag to improve performance. +``` + +Messages bear the following types (derived from +[syslog](http://en.wikipedia.org/wiki/Syslog) security levels): + +* `debug` +* `info` +* `notice` +* `warning` +* `error` +* `critical` +* `alert` +* `emergency` + +#### Command flags + +Attaching the `--help` flag will return a list of flags and commands +that can be used with Riaknostic: + +``` +Usage: riak-admin diag [-d <level>] [-l] [-h] [--export] [check_name ...] + +-h, --help Display help/usage dialogue +-d, --level Minimum message severity level (default: notice) +-l, --list Describe available diagnostic tasks +--export Package system info in '/export.zip' +check_name A specific check to run +``` + +Running `riak-admin diag` with the `--list` flag will return a list of +available diagnostic checks. The following checks are available: + +Check | Description +:-----|:----------- +`disk` | Data directory permissions and atime +`dumps` | Find crash dumps +`memory_use` | Measure memory usage +`nodes_connected` | Cluster node liveness +`ring_membership` | Cluster membership validity +`ring_preflists` | Check if the ring satisfies `n_val` +`ring_size` | Check if the ring size valid +`search` | Check whether Riak Search is enabled on all nodes + +The `--level` flag enables you to specify the log level and thus to +filter messages based on type. You can pass in any of the message types +listed above (`debug`, `info`, etc.). + +The `--level` flag can be used when running `riak-admin diag` with or +without specifying a diagnostic check. + +#### Contributing + +Do you have an idea that would help us improve Riaknostic? If so, fork +the [GitHub repository](https://github.com/basho/riaknostic) and send us +a pull request with your changes. The code is documented with +[edoc](http://riaknostic.basho.com/edoc/index.html), so give the API +Docs a read before you contribute. + +If you want to run the Riaknostic script while developing and you don't +have it hooked up to your local Riak installation, you can invoke it +directly like so: + +```bash +./riaknostic --etc ~/code/riak/rel/riak/etc --base ~/code/riak/rel/riak --user `whoami` [other options] +``` + +Those extra options are usually assigned by the `riak-admin` script for +you, but here's how to set them: + +* `--etc` - The location of your Riak configuration directory (usually + `/etc`). In the example above, configuration is in the generated + directory of a source checkout of Riak. +* `--base` - The "base" directory of Riak, usually the root of the + generated directory or `/usr/lib/riak` on Linux. Scan the + `riak-admin` script for how the `RUNNER_BASE_DIR` variable is + assigned on your platform. +* `--user` - The user/UID as which the Riak node runs. In a source + checkout, it's the current user; on most systems, it's `riak`. + +## Related Resources + +* [The riak-admin configuration management tool](../../admin/riak-admin/) +* [Riaknostic](http://riaknostic.basho.com/) +* [HTTP API Status](../../../developing/api/http/status/) + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/load-balancing.md b/content/riak/kv/3.0.4/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..f2102fa7a6 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/load-balancing.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Load Balancing" +description: "" +project: "riak_kv" +project_version: 3.0.4 +#menu: +# riak_kv-3.0.4: +# name: "Load Balancing" +# identifier: "cluster_operations_load_balancing" +# weight: 111 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +**TODO: Add content (not sure where this exists in docs)** + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/logging.md b/content/riak/kv/3.0.4/using/cluster-operations/logging.md new file mode 100644 index 0000000000..40baee26a4 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/logging.md @@ -0,0 +1,47 @@ +--- +title: "Enabling and Disabling Debug Logging" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Logging" + identifier: "cluster_operations_logging" + weight: 105 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +If you'd like to enable debug logging on the current node, i.e. set the +console log level to `debug`, you can do so without restarting the node +by accessing the Erlang console directly using the [`riak attach`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-cli/#attach) command. Once you run this command and drop into the console, enter the following: + +```erlang +lager:set_loglevel(lager_file_backend, "/var/log/riak/console.log", debug). +``` + +You should replace the file location above (`/var/log/riak/console.log`) +with your platform-specific location, e.g. `./log/console.log` for a +source installation. This location is specified by the +`log.console.file` parameter explained above. + +If you'd like to enable debug logging on _all_ nodes instead of just one +node, you can enter the Erlang console of any running by running `riak +attach` and enter the following: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", debug])). +``` + +As before, use the appropriate log file location for your cluster. + +At any time, you can set the log level back to `info`: + +```erlang +rp(rpc:multicall(lager, set_loglevel, [lager_file_backend, "/var/log/riak/console.log", info])). +``` + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/replacing-node.md b/content/riak/kv/3.0.4/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..956a275952 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/replacing-node.md @@ -0,0 +1,100 @@ +--- +title: "Replacing a Node" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Replacing a Node" + identifier: "cluster_operations_replace_node" + weight: 102 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +At some point, for various reasons, you might need to replace a node in +your Riak cluster (which is different from [recovering a failed node]({{<baseurl>}}riak/kv/3.0.4/using/repair-recovery)). Here is the recommended way to go +about replacing a node. + +1. Back up your data directory on the node in question. In this example +scenario, we'll call the node `riak4`: + + ```bash + sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak + ``` + + If you have any unforeseen issues at any point in the node + replacement process, you can restore the node's data from this + backup. + +2. Download and install Riak on the new node you wish to bring into the +cluster and have it replace the `riak4` node. We'll call the new node +`riak7` for the purpose of this example. + +3. Start the new `riak7` node with [`riak start`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-cli/#start): + + ```bash + riak start + ``` + +4. Plan the join of the new `riak7` node to an existing node already +participating in the cluster; for example `riak0` with the [`riak-admin cluster join`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster) command executed on the new `riak7` node: + + ```bash + riak-admin cluster join riak0 + ``` + +5. Plan the replacement of the existing `riak4` node with the new +`riak7` node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster replace riak4 riak7 + ``` + + <div class=info> + <div class=title>Single Nodes</div> + If a node is started singly using default settings (as, for example, + you might do when you are building your first test environment), you + will need to remove the ring files from the data directory after you + edit `/etc/vm.args`. `riak-admin cluster replace` will not work as + the node has not been joined to a cluster. + </div> + +6. Examine the proposed cluster changes with the [`riak-admin cluster plan`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster) command executed on the new +`riak7` node: + + ```bash + riak-admin cluster plan + ``` + +7. If the changes are correct, you can commit them with the +[`riak-admin cluster commit`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster) command: + + ```bash + riak-admin cluster commit + ``` + + If you need to clear the proposed plan and start over, use [`riak-admin cluster clear`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster): + + ```bash + riak-admin cluster clear + ``` + +Once you have successfully replaced the node, it should begin leaving +the cluster. You can check on ring readiness after replacing the node +with the [`riak-admin ringready`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#ringready) +and [`riak-admin member-status`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#member-status) +commands. + +{{% note title="Ring Settling" %}} +You'll need to make sure that no other ring changes occur between the time +when you start the new node and the ring settles with the new IP info. + +The ring is considered settled when the new node reports `true` when you run +the `riak-admin ringready` command. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/secondary-indexes.md b/content/riak/kv/3.0.4/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..a224b1ee60 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.4 +#menu: +# riak_kv-3.0.4: +# name: "Secondary Indexes" +# identifier: "cluster_operations_2i" +# weight: 109 +# parent: "managing_cluster_operations" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/strong-consistency.md b/content/riak/kv/3.0.4/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..35bf275578 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/strong-consistency.md @@ -0,0 +1,76 @@ +--- +title: "Monitoring Strong Consistency" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Monitoring Strong Consistency" + identifier: "cluster_operations_strong_consistency" + weight: 110 + parent: "managing_cluster_operations" +toc: true +aliases: +--- + +{{% note title="Please Note:" %}} +Riak KV's strong consistency is an experimental feature and may be removed +from the product in the future. Strong consistency is not commercially +supported or production-ready. Strong consistency is incompatible with +Multi-Datacenter Replication, Riak Search, Bitcask Expiration, LevelDB +Secondary Indexes, Riak Data Types and Commit Hooks. We do not recommend its +usage in any production environment. +{{% /note %}} + +## Monitoring Strong Consistency + +Riak provides a wide variety of data related to the current operating +status of a node. This data is available by running the [`riak-admin status`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#status) command. That data now +includes statistics specific to strongly consistent operations. + +A full listing of these stats is available in [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/inspecting-node). +All strong consistency-related stats are prefixed with `consistent_`, +e.g. `consistent_gets`, `consistent_puts`, etc. Many of these stats are +so-called "one-minute stats," meaning that they reflect node activity in +the last minute. + +Strong consistency stats fall into two categories: GET-related and +PUT-related stats. + +### GET-related stats + +Stat | Description +:----|:----------- +`consistent_gets` | Number of strongly consistent GETs coordinated by this node in the last minute +`consistent_gets_total` | Total number of strongly consistent GETs coordinated by this node +`consistent_get_objsize_mean` | Mean object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_median` | Median object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_95` | 95th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_99` | 99th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_objsize_100` | 100th-percentile object size for strongly consistent GETs on this node in the last minute +`consistent_get_time_mean` | Mean time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_median` | Median time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_95` | 95th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_99` | 99th-percentile time between reception of client GETs to strongly consistent keys and subsequent response +`consistent_get_time_100` | 100th-percentile time between reception of client GETs to strongly consistent keys and subsequent response + +### PUT-related stats + +Stat | Description +:----|:----------- +`consistent_puts` | Number of strongly consistent PUTs coordinated by this node in the last minute +`consistent_puts_total` | Total number of strongly consistent PUTs coordinated by this node +`consistent_put_objsize_mean` | Mean object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_median` | Median object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_95` | 95th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_99` | 99th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_objsize_100` | 100th-percentile object size for strongly consistent PUTs on this node in the last minute +`consistent_put_time_mean` | Mean time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_median` | Median time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_95` | 95th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_99` | 99th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response +`consistent_put_time_100` | 100th-percentile time between reception of client PUTs to strongly consistent keys and subsequent response + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/3.0.4/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..d82d003bc5 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,34 @@ +--- +title: "TicTac Active Anti-Entropy" +description: "An Active Anti-Entropy library" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "TicTac Active Anti-Entropy" + identifier: "TicTac_aae" + weight: 111 + parent: "managing_cluster_operations" +toc: true +aliases: + - /riak/kv/3.0.4/ops/advanced/tictacaae/ + - /riak/3.0.4/ops/advanced/ticktacaae/ +--- + + + +Riak's [active anti-entropy](../../../learn/concepts/active-anti-entropy/) \(AAE) subsystem is a set of background processes that repair object inconsistencies stemming from missing or divergent object values across nodes. Riak operators can turn AAE on and off and configure and monitor its functioning. + +## TicTac AAE + +The version of TicTac AAE included in 2.9 releases is a working prototype with limited testing. The intention is to full integrate the library into the KV 3.0 release. + +TicTac Active Anti-Entropy makes two changes to the way Anti-Entropy has previously worked in Riak. The first change is to the way Merkle Trees are contructed so that they are built incrementally. The second change allows the underlying Anti-entropy key store to be key-ordered while still allowing faster access to keys via their Merkle tree location or the last modified date of the object. + +## Configuring AAE + +Riak's [configuration files](../../../configuring/reference/) enable you not just to turn TicTac AAE on and +off but also to fine-tune your cluster's use of TicTac AAE to suit your requirements. + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/3.0.4/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..c7cca8d496 --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,263 @@ +--- +title_supertext: "V2 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "V2 Multi-Datacenter" + identifier: "cluster_operations_v2" + weight: 115 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v2/operations + - /riak/kv/3.0.4/ops/mdc/v2/operations +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter/) instead. +{{% /note %}} + +Riak's Multi-Datacenter Replication system is largely +controlled by the `riak-repl` command. The sections below detail the +available subcommands. + +## add-listener + +Adds a listener (primary) to the given node, IP address, and port. + +```bash +riak-repl add-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl add-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-nat-listener + +Adds a NAT-aware listener (primary) to the given node, IP address, port, +NAT IP, and NAT port. If a non-NAT listener already exists with the same +internal IP and port, it is "upgraded” to a NAT Listener. + +```bash +riak-repl add-nat-listener <nodename> <internal_ip> <internal_port> <nat_ip> <nat_port> +``` + +Below is an example usage: + +```bash +riak-repl add-nat-listener riak@10.0.1.156 10.0.1.156 9010 50.16.238.123 9010 +``` + +## del-listener + +Removes and shuts down a listener (primary) on the given node, IP +address, and port. + +```bash +riak-repl del-listener <nodename> <listen_ip> <port> +``` + +Below is an example usage: + +```bash +riak-repl del-listener riak@10.0.1.156 10.0.1.156 9010 +``` + +## add-site + +Adds a site (secondary) to the local node, connecting to the specified +listener. + +```bash +riak-repl add-site <ipaddr> <portnum> <sitename> +``` + +Below is an example usage: + +```bash +riak-repl add-site 10.0.1.156 9010 newyork +``` + +## del-site + +Removes a site (secondary) from the local node by name. + +```bash +riak-repl del-site <sitename> +``` + +Below is an example usage: + +```bash +riak-repl del-site newyork +``` + +## status + +Obtains status information about replication. Reports counts on how much +data has been transmitted, transfer rates, message queue lengths of +clients and servers, number of fullsync operations, and connection +status. This command only displays useful information on the leader +node. + +```bash +riak-repl status +``` + +## start-fullsync + +Manually initiates a fullsync operation with connected sites. + +```bash +riak-repl start-fullsync +``` + +## cancel-fullsync + +Cancels any fullsync operations in progress. If a partition is in +progress, synchronization will stop after that partition completes. +During cancellation, `riak-repl status` will show `cancelled` in the +status. + +```bash +riak-repl cancel-fullsync +``` + +## pause-fullsync + +Pauses any fullsync operations in progress. If a partition is in +progress, synchronization will pause after that partition completes. +While paused, `riak-repl status` will show `paused` in the status +information. Fullsync may be cancelled while paused. + +```bash +riak-repl pause-fullsync +``` + +## resume-fullsync + +Resumes any fullsync operations that were paused. If a fullsync +operation was running at the time of the pause, the next partition will +be synchronized. If not, it will wait until the next `start-fullsync` +command or `fullsync_interval`. + +```bash +riak-repl resume-fullsync +``` + +## riak-repl Status Output + +The following definitions describe the output of the `riak-repl status` +command. Please note that many of these statistics will only appear on +the current leader node, and that all counts will be reset to 0 upon +restarting Riak. + +### Client + +Field | Description +:-----|:----------- +`client_stats` | See <a href="{{< baseurl >}}riak/kv/3.0.4/using/reference/multi-datacenter/statistics/#client-statistics">Client Statistics</a> +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected sites +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of site connections made to this node +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the client (site)-received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the client (site)-sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. + +### Server + +Field | Description +:-----|:----------- +`server_bytes_recv` | The total number of bytes the server (listener) has received +`server_bytes_sent` | The total number of bytes the server (listener) has sent +`server_connect_errors` | The number of listener to site connection errors +`server_connects` | The number of times the listener connects to the client site +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`server_rx_kbps` | A snapshot of the server (listener) received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_tx_kbps` | A snapshot of the server (listener) sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`server_stats` | See <a href="{{< baseurl >}}riak/kv/3.0.4/using/reference/multi-datacenter/statistics/#server-statistics">Server Statistics</a> + +### Elections and Objects + +Field | Description +:-----|:----------- +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there aren't any clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a client (site) cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication + +### Other + +Field | Description +:-----|:----------- +`listener_<nodeid>` | Defines a replication listener that is running on node `<nodeid>` +`[sitename]_ips` | Defines a replication site +`leader` | Which node is the current leader of the cluster +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size `| The amount of memory the leader is using + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the client (site) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1` +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The listeners currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>`connected` - The IP address and port of a connected client (site)</li><li>`cluster_name` - The name of the connected client (site)</li><li>`connecting` - The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`request_partition`</li><li>`wait_for_fullsync`</li><li>`send_keylist`</li><li>`wait_ack`</li></ul> + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak MDC Replication Configuration]({{<baseurl>}}riak/kv/3.0.4/configuring/v2-multi-datacenter/) guide for more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size` | The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node on which the server (listener) is running +`site` | The connected site name configured with `riak-repl add-site` +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist` or `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See the <a href="{{< baseurl >}}riak/kv/3.0.4/using/cluster-operations/v2-multi-datacenter/#bounded-queue">Bounded Queue</a> section above +`state` | State shows what the current replication strategy is processing. The following definitions appear in the status output if the keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>`wait_for_partition`</li><li>`build_keylist`</li><li>`wait_keylist`</li><li>`diff_bloom`</li><li>`diff_keylist`</li></ul>s +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + +## Keylist Strategy + +These similar fields are under both the `keylist_server` and +`keylist_client` fields. Any differences are described in the table. + +Field | Description +------|------------ +`fullsync` | On the client, the number of partitions that remain to be processed. On the server, the partition currently being processed by fullsync replication. +`partition_start` | The number of elapsed seconds since replication has started on a given partition +`stage_start` | The number of elapsed seconds since replication has started on a given stage +`get_pool_size` | The number of Riak get finite state workers available to process requests + + + + diff --git a/content/riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..89f11f7cca --- /dev/null +++ b/content/riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,425 @@ +--- +title_supertext: "V3 Multi-Datacenter" +title: "Replication Operations" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "V3 Multi-Datacenter" + identifier: "cluster_operations_v3" + weight: 114 + parent: "managing_cluster_operations" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/operations + - /riak/kv/3.0.4/ops/mdc/v3/operations +--- + +[config v3 mdc]: {{<baseurl>}}riak/kv/3.0.4/configuring/v3-multi-datacenter +[config v3 nat]: {{<baseurl>}}riak/kv/3.0.4/configuring/v3-multi-datacenter/nat +[config v3 quickstart]: {{<baseurl>}}riak/kv/3.0.4/configuring/v3-multi-datacenter/quick-start +[config v3 ssl]: {{<baseurl>}}riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl +[ref v3 stats]: {{<baseurl>}}riak/kv/3.0.4/using/reference/multi-datacenter/statistics + +This document explains how to manage replication with the `riak-repl` +command. Some of these commands can be set or behavior altered by +setting appropriate [configuration][config v3 mdc] values. + +All commands need to be run only once on a single node of a cluster for +the changes to propagate to all other nodes. All changes will persist +across node restarts and will automatically take effect when nodes are +added to the cluster. + +## Cluster Connectivity + +#### clustername + +Set the `clustername` for all nodes in a Riak cluster. + +* Without a parameter, returns the current name of the cluster +* With a parameter, names the current cluster + +To **set** the `clustername`: + +* Syntax: `riak-repl clustername <clustername>` +* Example: `riak-repl clustername Boston` + +To **get** the `clustername`: + +* Syntax: `riak-repl clustername` +* Example: `riak-repl clustername` + +#### connect + +The `connect` command establishes communications from a source cluster +to a sink cluster of the same ring size. The `host:port` of the sink +cluster is used for this. The IP and port to connect to can be found in +the `advanced.config` of the remote cluster, under `riak_core` and +`cluster_mgr`. + +The `host` can be either an IP address + +* Syntax: `riak-repl connect <ip>:<port>` +* Example: `riak-repl connect 192.168.2.1:9080` + +...or a hostname that will resolve to an IP address. + +* Syntax: `riak-repl connect <host>:<port>` +* Example: `riak-repl connect Austin:9080` + +#### disconnect + +Disconnecting a source cluster from a sink cluster. + +You may define a `host:port` combination + +* Syntax: `riak-repl disconnect <host>:<port>` +* Example: `riak-repl disconnect 192.168.2.1:9080` + +...or use the *name* of the cluster. + +* Syntax: `riak-repl disconnect <sink_clustername>` +* Example: `riak-repl disconnect Austin` + +#### connections + +Display a list of connections between source and sink clusters. + +* Syntax: `riak-repl connections` +* Example: `riak-repl connections` + +#### clusterstats + +Displays current cluster stats using an optional `ip:port` as well as an +optional `protocol-id`. + +`protocol-id` can be one of the following: + +* `cluster_mgr` +* `rt_repl` +* `fs_repl` + +The `clusterstats` command in use: + +* Syntax: `riak-repl clusterstats <host>:<port> <protocol-id>` +* Example: `riak-repl clusterstats 192.168.2.1:9080` +* Example: `riak-repl clusterstats 192.168.2.1:9080 fs_repl` + + +## Realtime Replication Commands + +#### realtime enable + +Enable realtime replication from a source cluster to sink clusters. + +This will start queuing updates for replication. The cluster will still +require an invocation of `realtime start` for replication to occur. + +* Syntax: `riak-repl realtime enable <sink_clustername>` +* Example: `riak-repl realtime enable Austin` + +#### realtime disable + +Disable realtime replication from a source cluster to sink clusters. + +* Syntax: `riak-repl realtime disable <sink_clustername>` +* Example: `riak-repl realtime disable Austin` + + +#### realtime start + +Start realtime replication connections from a source cluster to sink +clusters. See also `realtime enable` (above). + +* Syntax: `riak-repl realtime start <sink_clustername>` +* Example: `riak-repl realtime start Austin` + +#### realtime stop + +Stop realtime replication from a source cluster to sink clusters. + +* Syntax `riak-repl realtime stop <sink_clustername>` +* Example `riak-repl realtime stop Austin` + + +## Fullsync Replication Commands + +These behaviors can be altered by using the `advanced.config` +`fullsync_on_connect` parameter. See the [Configuration Guide][config v3 mdc] for more information. + +#### fullsync enable + +Enable fullsync replication from a source cluster to sink clusters. By +default, a fullsync will begin as soon as a connection to the remote +cluster is established. + +* Syntax: `riak-repl fullsync enable <sink_clustername>` +* Example: `riak-repl fullsync enable Austin` + +#### fullsync disable + +Disables fullsync for a cluster. + +* Syntax: `riak-repl fullsync disable <sink_clustername>` +* Example: `riak-repl fullsync disable Austin` + +#### fullsync start + +Starts a fullsync. If the application configuration +`fullsync_on_connect` is set to `false`, a fullsync needs to be started +manually. This is also used to trigger a periodic fullsync using a cron +job. While a fullsync is in progress, a `start` command is ignored and a +message is logged. + +* Syntax: `riak-repl fullsync start <sink_clustername>` +* Example: `riak-repl fullsync start Austin` + +#### fullsync stop + +Stops a fullsync. + +* Syntax: `riak-repl fullsync stop <sink_clustername>` +* Example: `riak-repl fullsync stop Austin` + +## Cascading Realtime Writes + +#### realtime cascades + +Shows the current cascading realtime setting. + +* Syntax: `realtime cascades` +* Example: `riak-repl realtime cascades` + +#### realtime cascades always + +Enable realtime cascading writes. + +* Syntax: `realtime cascades always` +* Example: `riak-repl realtime cascades always` + +#### realtime cascades never + +Disable realtime cascading writes. + +* Syntax: `realtime cascades never` +* Example: `riak-repl realtime cascades never` + + +## NAT + +**Note**: See the [V3 Multi Data Center Replication With NAT][config v3 nat] for more information. + +#### nat-map show + +Show the current NAT mapping table. + +* Syntax: `nat-map show` +* Example: `riak-repl nat-map show` + +#### nat-map add + +Adds a NAT map from the external IP, with an optional port, to an +internal IP. + +* Syntax: `nat-map add <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map add 128.205.106.1:5555 192.168.1.2` + +#### nat-map del + +Deletes a specific NAT map entry. + +* Syntax: `nat-map del <externalip>[:port] <internalip>` +* Example: `riak-repl nat-map del 128.205.106.1:5555 192.168.1.2` + +NAT changes will be applied once fullsync and/or realtime replication +has been stopped and started. + + +## Riak CS MDC Gets + +#### proxy-get enable + +Enable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get enable <sink_clustername>` +* Example: `riak-repl proxy-get enable newyorkbackup` + +#### `proxy-get disable` + +Disable Riak CS `proxy_get` requests from a **sink** cluster (if +`proxy_get` has been enabled in `advanced.config`). + +* Syntax: `proxy-get disable <sink_clustername>` +* Example: `riak-repl proxy-get disable newyorkbackup` + +#### `add-block-provider-redirect` + +Provide a redirection to the `<to-cluster-id>` for `proxy_get` if the +`<from-cluster>` is going to be decommissioned. + +* Syntax: `riak-repl add-block-provider-redirect <from-cluster> <to-cluster>` +* Example: `riak-repl add-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}" "{'dev3@127.0.0.1',{1299,512501,511032}}"` + +#### `show-block-provider-redirect` +Show the mapping for a given cluster-id redirect. + +* Syntax: `riak-repl show-block-provider-redirect <from-cluster>` +* Example: `riak-repl show-block-provider-redirect "{'dev1@127.0.0.1',{1391,544501,519016}}"` + +#### `delete-block-provider-redirect` +Delete a existing redirect such that proxy_gets go again to the original +provider cluster id. + +* Syntax:* `riak-repl delete-block-provider-redirect <from-cluster>` +* Example:* `riak-repl delete-block-provider-redirect "{'dev1@127.0.0.1', {1391,544501,519016}}"` + +#### `show-local-cluster-id` + +Display this cluster's cluster-id tuple, for use with the +`*-block-provider-redirect` commands. + +**Note**: A cluster-id is surrounded by double quotes, which need to be +included when passed to `*-block-provider-redirect`. + +* Syntax: `riak-repl show-local-cluster-id` +* Example: + + ```bash + riak-repl show-local-cluster-id + ``` + + Possible output: + + ``` + local cluster id: "{'dev1@127.0.0.1',{1391,544501,519016}}" + ``` + +## `riak-repl` Status Output + +Details about the `riak-repl status` command can be found under +[Statistics][ref v3 stats]. + + +## Tuning + +These tuning values may also be set via the node's `advanced.config` file. +See the [Configuration Guide][config v3 mdc] for more information. + +#### `fullsync max_fssource_node` + +This limits the number of fullsync workers that will be running on each +individual node in a source cluster. This is a hard limit for *all* +fullsyncs that are enabled. Additional fullsync configurations will +*not* increase the number of fullsync workers allowed to run on any +node. This only affects nodes on the source cluster on which this +parameter is defined via the configuration file or command line. + +* Syntax: `riak-repl fullsync max_fssource_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssource_node 2` + +#### `fullsync max_fssource_cluster` + +This is the hard limit of fullsync workers that will be running on the +source side of a cluster across all nodes on that cluster for a fullsync +to a sink cluster. This means if one has configured fullsync for two +different clusters, both with a max_fssource_cluster of 5, 10 fullsync +workers can be in progress. Only affects nodes on the source cluster on +which this parameter is defined via the configuration file or the +command line. + +* Syntax: `riak-repl fullsync max_fssource_cluster <value>` +* Default: `5` +* Example: `riak-repl fullsync max_fssource_cluster 5` + + +#### `fullsync max_fssink_node` + +This limits the number of fullsync workers allowed to run on each +individual node in a sink cluster. This is a hard limit for each +fullsync source node interacting with a sink node. Thus, multiple +simultaneous source connections to a sink node will have to share the +sink node’s number of maximum connections. Only affects nodes on the +sink cluster on which this parameter is defined via the configuration +file or command line. + +* Syntax: `riak-repl fullsync max_fssink_node <value>` +* Default: `1` +* Example: `riak-repl fullsync max_fssink_node 5` + + +## Mixing Version 2 Replication with Version 3 Replication + +Riak Version 2 Replication and Version 3 Replication can be safely used +at the same time. If you choose to move to Version 3 Replication +completely, we recommend disabling Version 2 realtime +replication bucket hooks with the `riak-repl modes` command. + +#### `riak-repl modes` + +`modelist` is one or both of `mode_repl12` (Version 2) or `mode_repl13` +(Version 3) separated by spaces (without commas). + +* Syntax: `riak-repl modes <modelist>` +* Example: + + ```bash + riak-repl modes mode_repl12 mode_repl13 + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +To check the current replication modes: + +* Syntax: `riak-repl modes` +* Example: + + ```bash + riak-repl modes + ``` + + Possible output: + + ``` + Current replication modes: [mode_repl12,mode_repl13] + ``` + +## Configurations and Metadata in Replication + +Fullsync and Realtime replication replicates data from source clusters to sink clusters, +but some configurations and metadata (such as search indices and bucket properties) will +not be replicated. + +Non-replication of certain configurations and metadata supports +heterogenous cluster configurations in Replication, but there operational things you can +do when you want homogeneous cluster configurations. + +### Search Indices in Replication + +Any search index that is created on a source cluster will _not_ be +created on sink clusters as part of replication. + +If you want search indices on a source cluster to be present on the +sink clusters, you should update this data for each +cluster at the same time you would change the source cluster. + +### Buckets and Bucket Types in Replication + +Buckets and Bucket Type properties on the source cluster +will _not_ be replicated from source clusters to sink clusters. + +If you want the properties for Buckets or Bucket Types +present on the source cluster to be propagated to sink clusters +you should update this data for each cluster at the same +time you would change the source cluster. + + + + diff --git a/content/riak/kv/3.0.4/using/performance.md b/content/riak/kv/3.0.4/using/performance.md new file mode 100644 index 0000000000..027f5e3cd1 --- /dev/null +++ b/content/riak/kv/3.0.4/using/performance.md @@ -0,0 +1,268 @@ +--- +title: "Improving Performance" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Performance" + identifier: "managing_performance" + weight: 206 + parent: "managing" +toc: true +aliases: + - /riak/kv/3.0.4/ops/tuning/linux/ + - /riak/3.0.4/ops/tuning/linux/ +--- + +Many Unix-like operating systems and distributions are tuned for desktop +or light use out of the box and not for a production database. This +guide describes recommended system performance tunings for operators of +new and existing Riak clusters. The tunings present in this guide should +be considered as a starting point. It is important to make note of what +changes are made and when in order to measure the impact of those +changes. + +For performance and tuning recommendations specific to running Riak +clusters on the Amazon Web Services EC2 environment, see [AWS Performance Tuning]({{<baseurl>}}riak/kv/3.0.4/using/performance/amazon-web-services). + +{{% note title="Note on other operating systems" %}} +Unless otherwise specified, the tunings recommended below are for Linux +distributions. Users implementing Riak on BSD and Solaris distributions can +use these tuning recommendations to make analogous changes in those operating +systems. +{{% /note %}} + +## Storage and File System Tuning + +### Virtual Memory + +Due to the heavily I/O-focused profile of Riak, swap usage can result in +the entire server becoming unresponsive. We recommend setting +`vm.swappiness` to 0 in `/etc/sysctl.conf` to prevent swapping as much +as possible: + +```config +vm.swappiness = 0 +``` + +Ideally, you should disable swap to ensure that Riak's process pages are +not swapped. Disabling swap will allow Riak to crash in situations where +it runs out of memory. This will leave a crash dump file, named +`erl_crash.dump`, in the `/var/log/riak` directory which can be used to +determine the cause of the memory usage. + +### Transparent Huge Pages (THP) + +Owing to the way that THP handles memory usage, disproportionately large amounts of memory can become held up in any large database application. We recommend disabling THP at boot time. Unfortunately this operation is rather OS specific. As many of our customers are running Red Hat 6, we have included instructions on how to do so underneath. If you are using a different operating system, please refer to documentation for your OS. + +In Red Hat 6, you can disable THP by editing `grub.conf` and adding the following line: + +``` +transparent_hugepage=never +``` + +For the change to become effective, a server reboot is required. + +{{% note title="Note on Kernel Tuning Tools" %}} +Some Kernel tuning tools such as ktune specify that THP should be enabled. This can cause THP to seem to be enabled even though `transparent_hugepage=never` has already been added to `grub.conf` and the system rebooted. Should this occur, please refer to the documentation for the Kernel tuning tool you are using as to how to disable THP. +{{% /note %}} + +### Mounts + +Riak makes heavy use of disk I/O for its storage operations. It is +important that you mount volumes that Riak will be using for data +storage with the `noatime` flag, meaning that filesystem +[inodes](http://en.wikipedia.org/wiki/Inode) on the volume will not be +touched when read. This flag can be set temporarily using the following +command: + + +```bash +mount -o remount,noatime <riak_data_volume> +``` + +Replace `<riak_data_volume>` in the above example with your actual Riak +data volume. The `noatime` can be set in `/etc/fstab` to mount +permanently. + +### Schedulers + +I/O or disk scheduling is a blanket term used to describe the method by +which an operating system chooses how to order input and output +operations to and from storage. + +The default I/O scheduler (elevator) on Linux is completely fair queuing +or `cfq`, which is designed for desktop use. While a good +general-purpose scheduler, is not designed to provide the kind of +throughput expected in production database deployments. + +Scheduler recommendations: + +* The `noop` scheduler when deploying on iSCSI over HBAs, or any + hardware-based RAID. +* The `deadline` scheduler when using SSD-based storage. + +To check the scheduler in use for block device `sda`, for example, use +the following command: + +```bash +cat /sys/block/sda/queue/scheduler +``` + +To set the scheduler to `deadline`, use the following command: + +```bash +echo deadline > /sys/block/sda/queue/scheduler +``` + +The default I/O scheduler queue size is 128. The scheduler queue sorts +writes in an attempt to optimize for sequential I/O and reduce seek +time. Changing the depth of the scheduler queue to 1024 can increase the +proportion of sequential I/O that disks perform and improve overall +throughput. + +To check the scheduler depth for block device `sda`, use the following +command: + +```bash +cat /sys/block/sda/queue/nr_requests +``` + +To increase the scheduler depth to 1024, use the following command: + +```bash +echo 1024 > /sys/block/sda/queue/nr_requests +``` + +### Filesystem + +Advanced journaling filesystems like [ZFS](http://zfsonlinux.org/) and +[XFS](http://xfs.org/index.php/Main_Page) are recommended on some +operating systems for greater reliability and recoverability. + +At this time, Basho can recommend using ZFS on Solaris, SmartOS, and +OmniOS. ZFS may work well with Riak on direct Solaris clones like +IllumOS, but we cannot yet recommend this. [ZFS on +Linux](http://zfsonlinux.org) is still too early in its project lifetime +to be recommendable for production use due to concerns that have been +raised about excessive memory use. ZFS on FreeBSD is more mature than +ZFS on Linux, but Basho has not yet performed sufficient performance and +reliability testing to recommend using ZFS and Riak on FreeBSD. + +In the meantime, the [ext3](http://en.wikipedia.org/wiki/Ext3) and +[ext4](http://en.wikipedia.org/wiki/Ext4) filesystems are sufficient on +operating systems on which ZFS or XFS are not available or recommended. + +The ext4 file system defaults include two options that increase +integrity but slow performance. Because Riak's integrity is based on +multiple nodes holding the same data, these two options can be changed +to boost I/O performance. We recommend setting `barrier=0` and +`data=writeback` when using the ext4 filesystem. + +Similarly, the XFS file system defaults can be optimized to improve +performance. We recommend setting `nobarrier`, `logbufs=8`, +`logbsize=256k`, and `allocsize=2M` when using the XFS filesystem. + +As with the `noatime` setting, these settings should be added to +`/etc/fstab` so that they are persisted across server restarts. + +## Kernel and Network Tuning + +The following settings are minimally sufficient to improve many aspects +of Riak usage on Linux, and should be added or updated in +`/etc/sysctl.conf`: + +```config +net.ipv4.tcp_max_syn_backlog = 40000 +net.core.somaxconn = 40000 +net.core.wmem_default = 8388608 +net.core.rmem_default = 8388608 +net.ipv4.tcp_sack = 1 +net.ipv4.tcp_window_scaling = 1 +net.ipv4.tcp_fin_timeout = 15 +net.ipv4.tcp_keepalive_intvl = 30 +net.ipv4.tcp_tw_reuse = 1 +net.ipv4.tcp_moderate_rcvbuf = 1 +``` + +{{% note title="Note on system default" %}} +In general, these recommended values should be compared with the system +defaults and only changed if benchmarks or other performance metrics indicate +that networking is the bottleneck. +{{% /note %}} + +The following settings are optional, but may improve performance on a +10Gb network: + +```config +net.core.rmem_max = 134217728 +net.core.wmem_max = 134217728 +net.ipv4.tcp_mem = 134217728 134217728 134217728 +net.ipv4.tcp_rmem = 4096 277750 134217728 +net.ipv4.tcp_wmem = 4096 277750 134217728 +net.core.netdev_max_backlog = 300000 +``` + +Certain network interfaces ship with on-board features that have been +shown to hinder Riak network performance. These features can be disabled +via `ethtool`. + +For an Intel chipset NIC using the +[ixgbe](http://www.intel.com/support/network/adapter/pro100/sb/CS-032530.htm) +driver running as `eth0`, for example, run the following command: + +```bash +ethtool -K eth0 lro off +``` + +For a Broadcom chipset NIC using the `bnx` or `bnx2` driver, run: + +```bash +ethtool -K eth0 tso off +``` + +`ethtool` settings can be persisted across reboots by adding the above +command to the `/etc/rc.local` script. + +{{% note title="Pro tip" %}} +Tuning these values will be required if they are changed, as they affect all +network operations. +{{% /note %}} + +## Optional I/O Settings + +If your cluster is experiencing excessive I/O blocking, the following +settings may help prevent disks from being overwhelmed during periods of +high write activity at the expense of peak performance for spiky +workloads: + +```config +vm.dirty_background_ratio = 0 +vm.dirty_background_bytes = 209715200 +vm.dirty_ratio = 40 +vm.dirty_bytes = 0 +vm.dirty_writeback_centisecs = 100 +vm.dirty_expire_centisecs = 200 +``` + +These settings have been tested and benchmarked by Basho in nodes with +16 GB of RAM. + +## Open Files Limit + +Riak and supporting tools can consume a large number of open file +handles during normal operation. For stability, increasing the number of +open files limit is necessary. See [Open Files Limit]({{<baseurl>}}riak/kv/3.0.4/using/performance/open-files-limit/) for more +details. + +## Other Tuning Docs + +* [AWS Performance Tuning]({{<baseurl>}}riak/kv/3.0.4/using/performance/amazon-web-services) +* [Erlang VM Tuning]({{<baseurl>}}riak/kv/3.0.4/using/performance/erlang) +* [Latency Reduction]({{<baseurl>}}riak/kv/3.0.4/using/performance/latency-reduction) +* [Open Files Limit]({{<baseurl>}}riak/kv/3.0.4/using/performance/open-files-limit/) + + + + diff --git a/content/riak/kv/3.0.4/using/performance/amazon-web-services.md b/content/riak/kv/3.0.4/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..d1ea3c7729 --- /dev/null +++ b/content/riak/kv/3.0.4/using/performance/amazon-web-services.md @@ -0,0 +1,247 @@ +--- +title: "Amazon Web Services Performance Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Amazon Web Services" + identifier: "performance_aws" + weight: 106 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.4/ops/tuning/aws + - /riak/kv/3.0.4/ops/tuning/aws +--- + +This guide introduces best practices for tuning Riak cluster performance +in the Amazon Web Services (AWS) Elastic Compute Cloud (EC2) environment. + +> **Note:** +> +> The following guide is supplementary. Be sure to check out [Improving Performance](../) for general performance and tuning recommendations before continuing with this guide. + +## EC2 Instances + +EC2 instances are available as predefined types which encapsulate a +fixed amount of computing resources. For Riak, the most important of +these resources are Disk I/O, RAM, and Network I/O, followed by CPU +cores. With this in mind, Riak users have reported success with large, +extra large, and cluster compute instance types for use as cluster nodes +in the AWS EC2 environment. + +The most commonly used [instance types](http://aws.amazon.com/ec2/instance-types/) for Riak cluster nodes are `large` and `xlarge` `m` class (General Purpose), such as `m4.xlarge`. In cases where 10-gigabit Ethernet networking is desired, the Cluster Compute class of EC2 instances, such as `cc2.8xlarge` can be used. + +Amazon also offers a High I/O Quadruple Extra Large instance +(`hi1.4xlarge`) that is backed by solid state drives (SSD) and features +very high I/O performance. + +EBS-Optimized EC2 instances, which provide between 500 Megabits per +second and 1,000 Megabits per second of throughput with [Provisioned +IOPS](http://aws.amazon.com/about-aws/whats-new/2012/07/31/announcing-provisioned-iops-for-amazon-ebs/) +EBS volumes are also available, and recommended for use with Provisioned +IOPS EBS volumes. + +Riak's primary bottleneck will be disk and network I/O, meaning that in +most cases, standard EBS will incur too much latency and iowait. Riak's +I/O pattern tends to operate on small blobs from many places on the +disk, whereas EBS is best at bulk reads and writes. The negative effects +of this pattern can be mitigated by adding RAID over multiple volumes, +using Provisioned IOPS, and/or choosing the Bitcask backend if secondary +indexes are not needed for the application. + +In any case, proper benchmarking and tuning are needed to achieve the +desired performance. + +{{% note title="Tip" %}} +Most successful AWS cluster deployments use more EC2 instances than they would +the same number of physical nodes to compensate for the performance +variability caused by shared, virtualized resources. Plan to have more EC2 +instance based nodes than physical server nodes when estimating cluster size +with respect to node count. +{{% /note %}} + +## Operating System + +### Clocks + +NTP is configured by default on Amazon EC2 Linux instances. Please +refer to the [Set the Time for an +Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html) +section of the EC2 documentation for steps on verifying if NTP is +working properly. If NTP is not working properly, significant clock +drift can occur. + +### Mounts and Scheduler + +On EBS volumes, the **deadline** scheduler should be used. To check the +scheduler in use for block device xvdf, for example, use the following +command: + +```bash +cat /sys/block/xvdf/queue/scheduler +``` + +To set the scheduler to deadline, use the following command: + +```bash +echo deadline > /sys/block/xvdf/queue/scheduler +``` + +More information on the disk scheduler is available in [Improving Performance](../). + +### Virtual Memory Subsystem + +EBS volumes have considerably less bandwidth than hardware disks. To +avoid saturating EBS bandwidth and inducing IO latency spikes, it is +recommended to tune the Linux virtual memory subsystem to flush smaller +amounts of data more often. To do so, please see [Linux system performance tuning](../#optional-i-o-settings). + +### Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, back up log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally +and are not affected by a wider problem like an AWS service outage. Try +to determine the cause of the problem from the data you have collected. +If you are paying for [TI Tokyo support services](https://www.tiot.jp/en/solutions/riak/), either directly or re-sold under Erlang Solutions, and the failure comes from Riak or is not immediately obvious, you may open a ticket on the TI Tokyo Client Services help desk. + +Have your collected data ready when contacting TI Tokyo Client Services. A +Client Services Engineer (CSE) might request log files, configuration +files, or other information. + +## Data Loss + +Many failures either do not entail data loss or have minimal loss that +can be repaired automatically, without intervention. Outage of a single +node does not necessarily cause data loss, as other replicas of every +key are available elsewhere in the cluster. Once the node is detected as +down, other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called hinted handoff). + +The more severe data loss scenarios usually relate to hardware failure +(in the case of AWS, service failure or instance termination). In the +cases where data is lost, several options are available for restoring +the data: + +1. Restore from backup. A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but can be used to partially restore data from + lost EBS volumes. If running in a RAID configuration, rebuilding the + array may also be possible. +2. Restore from Multi-Datacenter Replication. If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the + `riak-repl` command. +3. Restore using intra-cluster repair. Riak versions 1.2 and greater + include a "repair" feature which will restore lost partitions with + data from other replicas. This currently has to be invoked manually + using the Riak console and should be performed with guidance from a + Basho CSE. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho is strongly recommended. + +## Benchmarking + +Using a tool such as [Basho Bench](https://github.com/basho/basho_bench), you can generate load that +simulates application operations by constructing and communicating +approximately-compatible data payloads with the Riak cluster directly. + +Benchmarking is critical to determining the appropriate EC2 instance +types, and strongly recommended. More information is available on +benchmarking Riak clusters with [Basho Bench](../benchmarking). + +Besides running Basho Bench, we also advise that you load test Riak with +your own tests to ensure that load imparted by MapReduce queries, +full-text queries, and index queries are within the expected range. + +## Simulating Upgrades, Scaling, and Failure states + +In addition to simply measuring performance, it is also important to +measure how performance degrades when the cluster is not in +steady-state. While under a simulation of live load, the following +states might be simulated: + +1. Stop one or more nodes normally and restart them after a few moments + (simulates [rolling upgrade](../../../setup/upgrading/cluster)). +2. Join two or more nodes to the cluster. +3. Leave nodes from the cluster (after step #2). +4. Hard-kill the Riak `beam.smp` process (i.e., `kill -9`) and then + restart it. +5. Hard-reboot a node's instance using the AWS console and then + restart it. +6. Hard-stop and destroy a node's instance and build a new one from + backup. +7. Via networking, e.g. firewall, partition one or more nodes from + the rest of the cluster and then restore the original + configuration. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. While the Riak node is out, other nodes may also +be at risk if free capacity is low on the rest of the cluster, so +monitor carefully. + +Replacing the EC2 instance type with one that has greater RAM capacity +may temporarily alleviate the problem, but out of memory (OOM) tends to +be an indication that the cluster is underprovisioned. + +Software bugs (memory leaks) could also be a cause of OOM, so we +recommend paid support Riak users to contact TI Tokyo Client Services +if this problem occurs. + +## Dealing with IP addresses + +EC2 instances that are not provisioned inside a VPC can change the +following attributes after a restart: + +* Private IP address +* Public IP address +* Private DNS +* Public DNS + +Because these parameters play a role in a Riak instance's node name, +ensure that you follow the steps outlined in the [Node Name Changed](../../repair-recovery/failed-node/#node-name-changed) section to replace +it. + +To avoid this inconvenience, you can deploy Riak inside a +[VPC](http://aws.amazon.com/vpc/). Instances inside the VPC do not +change their private IP address on restart. In addition you get the +following benefits: + +* Access control lists can be defined at multiple levels +* The instance is not automatically open to the internet +* Amazon VPC is [free](http://aws.amazon.com/vpc/pricing/) + +## Choice of Storage + +EC2 instances support ephemeral and EBS storage. Ephemeral is local to +the instance, generally performs better, but disappears when instances +go down. + +On the other hand, EBS is effectively network attached storage that +persists after instances go down. Along with EBS you can optionally +enable [Provisioned +IOPS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIOPS.html) +(PIOPS) provide more stable performance. + +For more information on EC2 storage options, please see their +[documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Storage.html). + +## References + +* [Improving Performance](../) +* [Failure and Recovery](../../repair-recovery) +* [Basho Client Services Help Desk](https://help.basho.com) + + + + diff --git a/content/riak/kv/3.0.4/using/performance/benchmarking.md b/content/riak/kv/3.0.4/using/performance/benchmarking.md new file mode 100644 index 0000000000..1380945276 --- /dev/null +++ b/content/riak/kv/3.0.4/using/performance/benchmarking.md @@ -0,0 +1,602 @@ +--- +title: "Benchmarking" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Benchmarking" + identifier: "performance_benchmarking" + weight: 100 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.4/ops/building/benchmarking + - /riak/kv/3.0.4/ops/building/benchmarking +--- + +Basho Bench is a benchmarking tool created to conduct accurate and +repeatable performance tests and stress tests, and to produce +performance graphs. + +Basho Bench exposes a pluggable driver interface and has been extended +to serve as a benchmarking tool against a variety of projects. New +drivers can be written in Erlang and are generally less than 200 lines +of code. + +## Installation + +You will need: + +1. One or more load-generating machines on which to install + ```basho_bench```. Especially when testing larger clusters, a + single machine cannot generate enough load to properly exercise + the cluster. Do not run the ```basho_bench``` instances on the + Riak nodes themselves, since the load generation will compete with + Riak for resources. +2. The [R statistics language](http://www.r-project.org/) must be + installed (somewhere available to you) if you wish to generate + graphs (see the [Generating Benchmark Graphs](#generating-benchmark-graphs) section, below). + +### Download ```basho_bench``` + +You can download the pre-built packages below, or build it from source. + +* **Ubuntu 14.04 LTS:** + [basho-bench_0.10.0.53-1_amd64.deb](http://ps-tools.s3.amazonaws.com/basho-bench_0.10.0.53.g0e15158-ubuntu14.04LTS-1_amd64.deb) +* **CentOS 7:** + [basho-bench-0.10.0.53-1.el7.centos.x86_64.rpm](http://ps-tools.s3.amazonaws.com/basho-bench-0.10.0.53.g0e15158-1.el7.centos.x86_64.rpm) + +### Building from Source + +#### Prerequisites + +* Erlang must be installed. See [Installing Erlang]({{<baseurl>}}riak/kv/3.0.4/setup/installing/source/erlang) for instructions + and versioning requirements. Note: Unless you're an experienced + Erlang developer, we recommend that you use Ubuntu 14.04 LTS (and + not CentOS), when building ```basho_bench``` from source. Later + versions of CentOS (6 and 7) have difficulty with installing and + enabling certain parts of the ```erlang-crypto``` package, which + is required by ```basho_bench```. +* Install ```git``` (to check out the ```basho_bench``` code) + +#### Compiling + +```bash +git clone git://github.com/basho/basho_bench.git +cd basho_bench +make +``` + +## Usage + +Run the `basho_bench` script, pass in the config file and the +directory to generate the results into: + +```bash +basho_bench --results-dir <results dir> <config file> +``` + +If you've installed ```basho_bench``` from a pre-built package, you +must specify full paths for the test results directory and config +file. (Also, don't use the common ```~/``` shell notation, specify the +user's home directory explicitly) + +```bash +basho_bench --results-dir /home/username/bench_results/ /etc/basho_bench/riakc_pb.config +``` + +The example above will generate results in +```/home/username/bench_results/current/```. + +If you built ```basho_bench``` from source, you can get away with +relative paths (and the results directory will be created in the +current directory): + +```bash +./basho_bench myconfig.config +``` + +This will generate results in `tests/current/`. You will need to +create a configuration file. The recommended approach is to start from +a file in the `examples` directory and modify settings using the +[Configuration](#configuration) section below for +reference. + +## Generating Benchmark Graphs + +The output of from running the `basho_bench` script can be used to +create graphs showing the following: + +* Throughput - Operations per second over the duration of the test. +* Latency at 99th percentile, 99.9th percentile and max latency for + the selected operations. +* Median latency, mean latency, and 95th percentile latency for the + selected operations. + +### Prerequisites + +The R statistics language is needed to generate graphs. Note: If +necessary, R can be installed on a different machine than the one +running basho_bench, and the performance data can be copied (via +rsync, for example) from the load testing machine to the one that will +be generating and viewing the graphs (such as a desktop). + +#### Installing R on Ubuntu + +``` +sudo apt-get install r-base +``` + +#### Installing R on Other Platforms + +- [More information](http://www.r-project.org/) +- [Download R](http://cran.r-project.org/mirrors.html) + +Follow the instructions for your platform to install R. + +### Generating Graphs + +If you have installed ```basho_bench``` from a pre-built package, and +you also have R installed on the same machine, you can generate the +current result graph with the following: + +```bash +Rscript --vanilla /usr/lib/basho_bench/lib/basho_bench*/priv/summary.r -i /home/username/bench_results/current/ +``` + +This will create a results file in +```/home/username/bench_results/summary.png```. + +If you have built ```basho_bench``` from source, you can just use +```make```. To generate a benchmark graph against the current +results, run: + +```bash +make results +``` + +This will create a results file in `tests/current/summary.png`. + +You can also run this manually: + +```bash +priv/summary.r -i tests/current +``` + +### Troubleshooting Graph Generation + +For additional help, see the [Troubleshooting Graph Generation](https://github.com/basho/basho_bench#troubleshooting-graph-generation) +section of the ```basho_bench/README```. + +## How does it work? + +When Basho Bench starts (`basho_bench.erl`), it reads the +configuration (`basho_bench_config.erl`), creates a new results +directory, and then sets up the test (`basho_bench_app.erl` and +`basho_bench_sup.erl`). + +During test setup, Basho Bench creates the following: + +* One **stats process** (`basho_bench_stats.erl`). This process + receives notifications when an operation completes, plus the + elapsed time of the operation, and stores it in a histogram. At + regular intervals, the histograms are dumped to `summary.csv` as + well as operation-specific latency CSVs (e.g. `put_latencies.csv` + for the PUT operation). +* N **workers**, where N is specified by the [concurrent](#concurrent) configuration setting + (`basho_bench_worker.erl`). The worker process wraps a driver + module, specified by the [driver](#driver) + configuration setting. The driver is randomly invoked using the + distribution of operations as specified by the [operations](#operations) configuration setting. The rate at which the + driver invokes operations is governed by the [mode](#mode) setting. + +Once these processes have been created and initialized, Basho Bench +sends a run command to all worker processes, causing them to begin the +test. Each worker is initialized with a common seed value for random +number generation to ensure that the generated workload is reproducible +at a later date. + +During the test, the workers repeatedly call `driver:run/4`, passing in +the next operation to run, a keygen function, a valuegen function, and +the last state of the driver. The worker process times the operation, +and reports this to the stats process when the operation has completed. + +Finally, once the test has been run for the duration specified in the +config file, all workers and stats processes are terminated and the +benchmark ends. The measured latency and throughput of the test can be +found in `./tests/current/`. Previous results are in timestamped +directories of the form `./tests/YYYYMMDD-HHMMSS/`. + +## Configuration + +Basho Bench ships with a number of sample configuration files, available +in the `/examples` directory. + +### Global Config Settings + +#### mode + +The `mode` setting controls the rate at which workers invoke the +`{driver:run/4}` function with a new operation. There are two possible +values: + +* `{max}` - generate as many ops per second as possible +* `{rate, N}` - generate N ops per second, with exponentially distributed interarrival times + +Note that this setting is applied to each driver independently. For +example, if `{rate, 5}` is used with 3 concurrent workers, Basho Bench +will be generating 15 (i.e. 5 * 3) operations per second. + +```erlang +% Run at max, i.e.: as quickly as possible +{mode, max} + +% Run 15 operations per second per worker +{mode, {rate, 15}} +``` + +#### concurrent + +The number of concurrent worker processes. The default is 3 worker +processes. This determines the number of concurrent clients running +requests on API under test. + +```erlang +% Run 10 concurrent processes +{concurrent, 10} +``` + +#### duration + +The duration of the test, in minutes. The default is 5 minutes. + +```erlang +% Run the test for one hour +{duration, 60} +``` + +#### operations + +The possible operations that the driver will run, plus their "weight," +or likelihood of being run. The default is `[{get,4},{put,4},{delete, +1}]`, which means that out of every 9 operations, GET will be called +four times, PUT will be called four times, and DELETE will be called +once, on average. + +```erlang +{operations, [{get, 4}, {put, 1}]}. +``` + +Operations are defined on a **per-driver** basis. Not all drivers will +implement the GET/PUT operations discussed above. Consult the driver +source to determine the valid operations. If you're testing the HTTP +interface, for example, the corresponding operations are GET and +UPDATE, respectively. + +If a driver does not support a specified operation (`asdfput` in this +example), you may see errors like this: + +```log +DEBUG:Driver basho_bench_driver_null crashed: {function_clause, + [{{{basho_bench_driver_null,run, + [asdfput, + #Fun<basho_bench_keygen.4.4674>, + #Fun<basho_bench_valgen.0.1334>, + undefined]}}}, + {{{basho_bench_worker, + worker_next_op,1}}}, + {{{basho_bench_worker, + max_worker_run_loop,1}}}]} +``` + +#### driver + +The module name of the driver that Basho Bench will use to generate +load. A driver may simply invoke code in-process (such as when +measuring the performance of DETS) or may open network connections and +generate load on a remote system (such as when testing a Riak +server/cluster). + +Available drivers include: + +* `basho_bench_driver_http_raw` - Uses Riak's HTTP interface to + get/update/insert data on a Riak server +* `basho_bench_driver_riakc_pb` - Uses Riak's Protocol Buffers + interface to get/put/update/delete data on a Riak serve +* `basho_bench_driver_riakclient` - Uses Riak's Distributed Erlang + interface to get/put/update/delete data on a Riak server +* `basho_bench_driver_bitcask` - Directly invokes the Bitcask API +* `basho_bench_driver_dets` - Directly invokes the DETS API + +On invocation of the `driver:run/4` method, the driver may return one of +the following results: + +* `{ok, NewState}` - operation completed successfully +* `{error, Reason, NewState}` - operation failed but the driver can + continue processing (i.e. recoverable error) +* `{stop, Reason}` - operation failed; driver can't/won't continue + processing +* `{'EXIT', Reason}` - operation failed; driver crashed + +#### code_paths + +Some drivers need additional Erlang code in order to run. Specify the +paths to this code using the `code_paths` configuration setting. + +#### key_generator + +The generator function to use for creating keys. Generators are defined +in `basho_bench_keygen.erl`. Available generators include: + +* `{sequential_int, MaxKey}` - generates integers from 0..`MaxKey` + in order and then stops the system. Note that each instance of + this keygen is specific to a worker. +* `{partitioned_sequential_int, MaxKey}` - the same as + `{sequential_int}`, but splits the keyspace evenly among the + worker processes. This is useful for pre-loading a large dataset. +* `{partitioned_sequential_int, StartKey, NumKeys}` - the same as + `partitioned_sequential_int`, but starting at the defined + `StartKey` and going up to `StartKey + NumKeys`. +* `{uniform_int, MaxKey}` - selects an integer from uniform + distribution of 0..`MaxKey`, i.e. all integers are equally probable. +* `{pareto_int, MaxKey}` - selects an integer from a Pareto + distribution, such that 20% of the available keys get selected 80% + of the time. Note that the current implementation of this + generator _may_ yield values larger than `MaxKey` due to the + mathematical properties of the Pareto distribution. +* `{truncated_pareto_int, MaxKey}` - the same as `{pareto_int}`, but + will _not> yield values above `MaxKey`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a key generator function. The worker + `Id` will be prepended to `Args` when the function is called. +* `{int_to_bin, Generator}` - takes any of the above `_int` + generators and converts the number to a 32-bit binary. This is + needed for some drivers that require a binary key. +* `{int_to_str, Generator}` - takes any of the above `_int` + generators and converts the number to a string. This is needed for + some drivers that require a string key. + +The default key generator is `{uniform_int, 100000}`. + +Examples: + +```erlang +% Use a randomly selected integer between 1 and 10,000 +{key_generator, {uniform_int, 10000}}. + +% Use a randomly selected integer between 1 and 10,000, as binary. +{key_generator, {int_to_bin, {uniform_int, 10000}}}. + +% Use a pareto distributed integer between 1 and 10,000; values < 2000 +% will be returned 80% of the time. +{key_generator, {pareto_int, 10000}}. +``` + +#### value_generator + +The generator function to use for creating values. Generators are +defined in `basho_bench_valgen.erl`. Available generators include: + +* `{fixed_bin, Size}` - generates a random binary of `Size` + bytes. Every binary is the same size, but varies in content. +* `{exponential_bin, MinSize, Mean}` - generates a random binary + which has an exponentially distributed size. Most values will be + approximately `MinSize` + `Mean` bytes in size, with a long tail + of larger values. +* `{uniform_bin, MinSize, MaxSize}` - generates a random binary + which has an evenly distributed size between `MinSize` and + `MaxSize`. +* `{function, Module, Function, Args}` - specifies an external + function that should return a value generator function. The worker + `Id` will be prepended to `Args` when the function is called. + +The default value generator is `{value_generator, {fixed_bin, 100}}`. + +Examples: + +```erlang +% Generate a fixed size random binary of 512 bytes +{value_generator, {fixed_bin, 512}}. + +% Generate a random binary whose size is exponentially distributed +% starting at 1000 bytes and a mean of 2000 bytes +{value_generator, {exponential_bin, 1000, 2000}}. +``` + +#### rng_seed + +The initial random seed to use. This is explicitly seeded, rather than +seeded from the current time, so that a test can be run in a +predictable, repeatable fashion. + +Default is `{rng_seed, {42, 23, 12}}`. + +```erlang +% Seed to {12, 34, 56} +{rng_seed, {12, 34, 56}. +``` + +#### log_level + +The `log_level` setting determines which messages Basho Bench will log +to the console and to disk. + +The default level is `debug`. + +| Valid levels +|:------------ +| `debug` +| `info` +| `warning` +| `error` + +#### report_interval + +How often, in seconds, the stats process should write histogram data +to disk. The default is 10 seconds. + +#### test_dir + +The directory in which result data is written. The default is `/tests`. + +### basho_bench_driver_riakclient Settings + +These configuration settings apply to the +`basho_bench_driver_riakclient` driver. + +#### riakclient_nodes + +List of Riak nodes to use for testing. + +```erlang +{riakclient_nodes, ['riak1@127.0.0.1', 'riak2@127.0.0.1']}. +``` + +#### riakclient_cookie + +The Erlang cookie to use to connect to Riak clients. The default is `riak`. + +```erlang +{riakclient_cookie, riak}. +``` + +#### riakclient_mynode + +The name of the local node. This is passed into +[net_kernel:start/1](http://erlang.org/doc/man/net_kernel.html). + +```erlang +{riakclient_mynode, ['basho_bench@127.0.0.1', longnames]}. +``` + +#### riakclient_replies + +This value is used for R-values during a get operation, and W-values +during a put operation. + +```erlang +% Expect 1 reply. +{riakclient_replies, 1}. +``` + +#### riakclient_bucket + +The Riak bucket to use for reading and writing values. The Default is +`<<"test">>`. + +```erlang +% Use the "bench" bucket. +{riakclient_bucket, <<"bench">>}. +``` + +### basho_bench_driver_riakc_pb Settings + +#### riakc_pb_ips + +A list of IP addresses to connect the workers to. A random IP will be +chosen for each worker. + +The default is `{riakc_pb_ips, [{127,0,0,1}]}` + +```erlang +% Connect to a cluster of 3 machines +{riakc_pb_ips, [{10,0,0,1},{10,0,0,2},{10,0,0,3}]} +``` + +#### riakc_pb_port + +The port on which to connect to the PBC interface. + +The default is `{riakc_pb_port, 8087}` + +#### riakc_pb_bucket + +The bucket to use for testing. + +The default is `{riakc_pb_bucket, <<"test">>}` + +### basho_bench_driver_http_raw Settings + +#### http_raw_ips + +A list of IP addresses to connect the workers to. Each worker makes +requests to each IP in a round-robin fashion. + +The default is `{http_raw_ips, ["127.0.0.1"]}` + +```erlang +% Connect to a cluster of machines in the 10.x network +{http_raw_ips, ["10.0.0.1", "10.0.0.2", "10.0.0.3"]}. +``` + +#### http_raw_port + +Select the default port to connect to for the HTTP server. + +The default is `{http_raw_port, 8098}`. + +```erlang +% Connect on port 8090 +{http_raw_port, 8090}. +``` + +#### http_raw_path + +The base path to use for accessing Riak, usually `"/riak/<bucket>"`. + +The default is `{http_raw_path, "/riak/test"}`. + +```erlang +% Place test data in another_bucket +{http_raw_path, "/riak/another_bucket"}. +``` + +#### http_raw_params + +Additional parameters to add to the end of the URL. This can be used +to set the `r`/`w`/`dw`/`rw` parameters as desired. + +The default is `{http_raw_params, ""}`. + +```erlang +% Set R=1, W=1 for testing a system with n_val set to 1 +{http_raw_params, "?r=1&w=1"}. +``` + +#### http_raw_disconnect_frequency + +How often, in seconds or number of operations, the HTTP clients +(workers) should forcibly disconnect from the server. + +The default is `{http_raw_disconnect_frequency, infinity}` (which +means that Basho Bench should never forcibly disconnect). + +```erlang +% Disconnect after 60 seconds +{http_raw_disconnect_frequency, 60}. + +% Disconnect after 200 operations +{http_raw_disconnect_frequency, {ops, 200}}. +``` + +## Custom Driver + +A custom driver must expose the following callbacks. + +```erlang +% Create the worker +% ID is an integer +new(ID) -> {ok, State} or {error, Reason}. + +% Run an operation +run(Op, KeyGen, ValueGen, State) -> {ok, NewState} or {error, Reason, NewState}. +``` + +See the [existing +drivers](https://github.com/basho/basho_bench/tree/master/src) for +more details. + + + + diff --git a/content/riak/kv/3.0.4/using/performance/erlang.md b/content/riak/kv/3.0.4/using/performance/erlang.md new file mode 100644 index 0000000000..bfeeebc421 --- /dev/null +++ b/content/riak/kv/3.0.4/using/performance/erlang.md @@ -0,0 +1,371 @@ +--- +title: "Erlang VM Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Erlang VM" + identifier: "performance_erlang" + weight: 105 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.4/ops/tuning/erlang + - /riak/kv/3.0.4/ops/tuning/erlang +--- + +Riak was written almost exclusively in [Erlang](http://www.erlang.org) +and runs on an Erlang virtual machine (VM), which makes proper Erlang VM +tuning an important part of optimizing Riak performance. The Erlang VM +itself provides a wide variety of [configurable parameters](http://erlang.org/doc/man/erl.html) that you can use to tune its performance; Riak enables you to tune a subset of those parameters in each node's [configuration files](../../../configuring/reference/#erlang-vm). + +The table below lists some of the parameters that are available, showing +both their names as used in Erlang and their names as Riak parameters. + +Erlang parameter | Riak parameter +:----------------|:-------------- +[`+A`](http://erlang.org/doc/man/erl.html#async_thread_pool_size) | `erlang.async_threads` +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.K` +[`+P`](http://erlang.org/doc/man/erl.html#+P) | `erlang.process_limit` +[`+Q`](http://erlang.org/doc/man/erl.html#+Q) | `erlang.max_ports` +[`+S`](http://erlang.org/doc/man/erl.html#+S) | `erlang.schedulers.total`, `erlang.schedulers.online` +[`+W`](http://erlang.org/doc/man/erl.html#emu_flags) | `erlang.W` +[`+a`](http://erlang.org/doc/man/erl.html#async_thread_stack_size) | `erlang.async_threads.stack_size` +[`+e`](http://www.erlang.org/doc/man/ets.html#+e) | `erlang.max_ets_tables` +[`+scl`](http://www.erlang.org/doc/main/erl.html#+scl) | `erlang.schedulers.compaction_of_load` +[`+sfwi`](http://www.erlang.org/doc/man/erl.html#+sfwi) | `erlang.schedulers.force_wakeup_interval` +[`-smp`](http://erlang.org/doc/man/erl.html#smp) | `erlang.smp` +[`+sub`](http://www.erlang.org/doc/man/erl.html#+sub) | `erlang.schedulers.utilization_balancing` +[`+zdbbl`](http://erlang.org/doc/man/erl.html#+zdbbl) | `erlang.distribution_buffer_size` +[`-kernel net_ticktime`](http://www.erlang.org/doc/man/kernel_app.html#net_ticktime) | `erlang.distribution.net_ticktime` +[`-env FULLSWEEP_AFTER`](http://www.erlang.org/doc/man/erlang.html#system_flag-2) | `erlang.fullsweep_after` +[`-env ERL_CRASH_DUMP`](http://www.erlang.org/doc/apps/erts/crash_dump.html) | `erlang.crash_dump` +[`-env ERL_MAX_ETS_TABLES`](http://learnyousomeerlang.com/ets) | `erlang.max_ets_tables` +`-name` | `nodename` + +{{% note title="Note on upgrading to 2.0" %}} +In versions of Riak prior to 2.0, Erlang VM-related parameters were specified +in a `vm.args` configuration file; in versions 2.0 and later, all +Erlang-VM-specific parameters are set in the `riak.conf` file. If you're +upgrading to 2.0 from an earlier version, you can still use your old `vm.args` +if you wish. Please note, however, that if you set one or more parameters in +both `vm.args` and in `riak.conf`, the settings in `vm.args` will override +those in `riak.conf`. +{{% /note %}} + +## SMP + +Some operating systems provide Erlang VMs with Symmetric Multiprocessing +capabilities +([SMP](http://en.wikipedia.org/wiki/Symmetric_multiprocessing)) for +taking advantage of multi-processor hardware architectures. SMP support +can be turned on or off by setting the `erlang.smp` parameter to +`enable` or `disable`. It is enabled by default. The following would +disable SMP support: + +```riakconf +erlang.smp = disable +``` + +Because Riak is supported on some operating systems that do not provide +SMP support. Make sure that your OS supports SMP before enabling it for +use by Riak's Erlang VM. If it does not, you should set `erlang.smp` to +`disable` prior to starting up your cluster. + +Another safe option is to set `erlang.smp` to `auto`. This will instruct +the Erlang VM to start up with SMP support enabled if (a) SMP support is +available on the current OS and (b) more than one logical processor is +detected. If neither of these conditions is met, the Erlang VM will +start up with SMP disabled. + +## Schedulers + +> **Note on missing scheduler flags** +> +> We recommend that _all_ users set the `+sfwi` to `500` (milliseconds) +and the `+scl` flag to `false` if using the older, `vm.args`-based +configuration system. If you are using the new, `riak.conf`-based +configuration system, the corresponding parameters are +`erlang.schedulers.force_wakeup_interval` and +`erlang.schedulers.compaction_of_load`. +> +> Please note that you will need to uncomment the appropriate lines in +your `riak.conf` for this configuration to take effect. + +If [SMP support](#smp) has been enabled on your Erlang +VM, i.e. if `erlang.smp` is set to `enable` or `auto` on a machine +providing SMP support _and_ more than one logical processor, you can +configure the number of logical processors, or [scheduler +threads](http://www.erlang.org/doc/man/erl.html#+S), that are created +when starting Riak, as well as the number of threads that are set +online. + +The total number of threads can be set using the +`erlang.schedulers.total` parameter, whereas the number of threads set +online can be set using `erlang.schedulers.online`. These parameters map +directly onto `Schedulers` and `SchedulersOnline`, both of which are +used by [`erl`](http://www.erlang.org/doc/man/erl.html#+S). + +While the maximum for both parameters is 1024, there is no universal +default for either. Instead, the Erlang VM will attempt to determine the +number of configured processors, as well as the number of available +processors, on its own. If the Erlang VM _can_ make that determination, +`schedulers.total` will default to the total number of configured +processors while `schedulers.online` will default to the number of +processors available; if the Erlang VM can't make that determination, +both values will default to 1. + +If either parameter is set to a negative integer, that value will be +subtracted from the default number of processors that are configured or +available, depending on the parameter. For example, if there are 100 +configured processors and `schedulers.total` is set to `-50`, then the +calculated value for `schedulers.total` will be 50. Setting either +parameter to 0, on the other hand, will reset both values to their +defaults. + +If SMP support is not enabled, i.e. if `erlang.smp` is set to `disable` +(or set to `auto` on a machine without SMP support or with only one +logical processor), then the values of `schedulers.total` and +`schedulers.online` will be ignored. + +### Scheduler Wakeup Interval + +Scheduler wakeup is an optional process whereby Erlang VM schedulers are +periodically scanned to determine whether they have "fallen asleep," +i.e. whether they have an empty [run +queue](http://en.wikipedia.org/wiki/Run_queue). The interval at which +this process occurs can be set, in milliseconds, using the +`erlang.schedulers.force_wakeup_interval` parameter, which corresponds +to the Erlang VM's `+sfwi` flag. This parameter is set to `0` by +default, which disables scheduler wakeup. + +Erlang distributions like R15Bx have a tendency to put schedulers to +sleep too often. If you are using a more recent distribution, i.e. a if +you are running Riak 2.0 or later, you most likely won't need to enable +scheduler wakeup. + +### Scheduler Compaction and Balancing + +The Erlang scheduler offers two methods of distributing load across +schedulers: **compaction of load** and **utilization balancing** of +load. + +Compaction of load is used by default. When enabled, the Erlang VM will +attempt to fully load as many scheduler threads as possible, i.e. it +will attempt to ensure that scheduler threads do not run out of work. To +that end, the VM will take into account the frequency with which +schedulers run out of work when making decisions about which schedulers +should be assigned work. You can disable compaction of load by setting +the `erlang.schedulers.compaction_of_load` setting to `false` (in the +older configuration system, set `+scl` to `true`). + +The other option, utilization balancing, is disabled by default in favor +of load balancing. When utilization balancing is enabled instead, the +Erlang VM will strive to balance scheduler utilization as equally as +possible between schedulers, without taking into account the frequency +at which schedulers run out of work. You can enable utilization +balancing by setting the `erlang.schedulers.utilization_balancing` +setting to `true` (or the `+scl` parameter to `false` in the older +configuration system). + +At any given time, only compaction of load _or_ utilization balancing +can be used. If you set both parameters to `false`, Riak will default to +using compaction of load; if both are set to `true`, Riak will enable +whichever setting is listed first in `riak.conf` (or `vm.args` if you're +using the older configuration system). + +## Port Settings + +Riak uses [epmd](http://www.erlang.org/doc/man/epmd.html), the Erlang +Port Mapper Daemon, for most inter-node communication. In this system, +other nodes in the [cluster](../../../learn/concepts/clusters) use the Erlang identifiers specified by the `nodename` parameter (or `-name` in `vm.args`), for example `riak@10.9.8.7`. On each node, the daemon resolves these node +identifiers to a TCP port. You can specify a port or range of ports for +Riak nodes to listen on as well as the maximum number of concurrent +ports/sockets. + +### Port Range + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. epmd uses an unpredictable port for inter-node communication +by default, binding to port 0, which means that it uses the first +available port. This can make it difficult to configure [firewalls](../../security). + +To make configuring firewalls easier, you can instruct the Erlang VM to +use either a limited range of TCP ports or a single TCP port. The +minimum and maximum can be set using the +`erlang.distribution.port_range.minimum` and +`erlang.distribution.port.maximum` parameters, respectively. The +following would set the range to ports between 3000 and 5000: + +```riakconf +erlang.distribution.port_range.minimum = 3000 +erlang.distribution.port_range.maximum = 5000 +``` + +```appconfig +%% The older, app.config-based system uses different parameter names +%% for specifying the minimum and maximum port + +{kernel, [ + % ... + {inet_dist_listen_min, 3000}, + {inet_dist_listen_max, 5000} + % ... + ]} +``` + +You can set the Erlang VM to use a single port by setting the minimum to +the desired port while setting no maximum. The following would set the +port to 5000: + +```riakconf +erlang.distribution.port_range.minimum = 5000 +``` + +```appconfig +{kernel, [ + % ... + {inet_dist_listen_min, 5000}, + % ... + ]} +``` + +If the minimum port is unset, the Erlang VM will listen on a random +high-numbered port. + +### Maximum Ports + +You can set the maximum number of concurrent ports/sockets used by the +Erlang VM using the `erlang.max_ports` setting. Possible values range +from 1024 to 134217727. The default is 65536. In `vm.args` you can use +either `+Q` or `-env ERL_MAX_PORTS`. + +## Asynchronous Thread Pool + +If thread support is available in your Erlang VM, you can set the number +of asynchronous threads in the Erlang VM's asynchronous thread pool +using `erlang.async_threads` (`+A` in `vm.args`). The valid range is 0 +to 1024. If thread support is available on your OS, the default is 64. +Below is an example setting the number of async threads to 600: + +```riakconf +erlang.async_threads = 600 +``` + +```vmargs ++A 600 +``` + +### Stack Size + +In addition to the number of asynchronous threads, you can determine the +memory allocated to each thread using the +`erlang.async_threads.stack_size` parameter, which corresponds to the +`+a` Erlang flag. You can determine that size in Riak using KB, MB, GB, +etc. The valid range is 16-8192 kilowords, which translates to 64-32768 +KB on 32-bit architectures. While there is no default, we suggest a +stack size of 16 kilowords, which translates to 64 KB. We suggest such a +small size because the number of asynchronous threads, as determined by +`erlang.async_threads` might be quite large in your Erlang VM. The 64 KB +default is enough for drivers delivered with Erlang/OTP but might not be +large enough to accommodate drivers that use the `driver_async()` +functionality, documented +[here](http://www.erlang.org/doc/man/erl_driver.html). We recommend +setting higher values with caution, always keeping the number of +available threads in mind. + +## Kernel Polling + +You can utilize kernel polling in your Erlang distribution if your OS +supports it. Kernel polling can improve performance if many file +descriptors are in use; the more file descriptors, the larger an effect +kernel polling may have on performance. Kernel polling is enabled by +default on Riak's Erlang VM, i.e. the default for `erlang.K` is `on`. +This corresponds to the +[`+K`](http://erlang.org/doc/man/erl.html#emu_flags) setting on the +Erlang VM. You can disable it by setting `erlang.K` to `off`. + +## Warning Messages + +Erlang's +[`error_logger`](http://www.erlang.org/doc/man/error_logger.html) is an +event manager that registers error, warning, and info events from the +Erlang runtime. By default, events from the `error_logger` are mapped as +warnings, but you can also set messages to be mapped as errors or info +reports using the `erlang.W` parameter (or `+W` in `vm.args`). The +possible values are `w` (warnings), `errors`, or `i` (info reports). + +## Process Limit + +The `erlang.process_limit` parameter can be used to set the maximum +number of simultaneously existing system processes (corresponding to +Erlang's `+P` parameter). The valid range is 1024 to 134217727. The +default is 256000. + +## Distribution Buffer + +You can set the size of the Erlang VM's distribution buffer busy limit +(denoted by `+zdbbl` on the VM and in `vm.args`) by adding +`erlang.distribution_buffer_size` to `riak.conf`. Modifying this setting can be useful +on nodes with many `busy_dist_port` events, i.e. instances when the +Erlang distribution is overloaded. The default is 32 MB (i.e. `32MB`), +but this may be insufficient for some workloads. The maximum value is +2097151 KB. + +A larger buffer limit will allow processes to buffer more outgoing +messages. When the limit is reached, sending processes will be suspended +until the the buffer size has shrunk below the limit specified by +`erlang.distribution_buffer_size`. Higher values will tend to produce +lower latency and higher throughput but at the expense of higher RAM +usage. You should evaluate your RAM resources prior to increasing this +setting. + +## Erlang Built-in Storage + +Erlang uses a built-in database called +[ets](http://www.erlang.org/doc/man/ets.html) \(Erlang Term Storage) +for some processes that require fast access from memory in constant +access time (rather than logarithmic access time). The maximum number +of tables can be set using the `erlang.max_ets_tables` setting. The +default is 256000, which is higher than the default limit of 1400 on the +Erlang VM. The corresponding setting in `vm.args` is `+e`. + +Higher values for `erlang.max_ets_tables` will tend to provide more +quick-access data storage but at the cost of higher RAM usage. Please +note that the default values for `erlang.max_ets_tables` and +`erlang.distribution_size` (explained in the section [above](#distribution-buffer)) are the same. + +## Crash Dumps + +By default, crash dumps from Riak's Erlang distribution are deposited in +`./log/erl_crash.dump`. You can change this location using +`erlang.crash_dump`. This is the equivalent of setting the +[`ERL_CRASH_DUMP`](http://www.erlang.org/doc/man/erl.html#environment_variables) +environment variable for the Erlang VM. + +## Net Kernel Tick Time + +The [net kernel](http://erlang.org/doc/man/net_kernel.html) is an Erlang +system process that provides various forms of network monitoring. In a +Riak cluster, one of the functions of the net kernel is to periodically +check node liveness. **Tick time** is the frequency with which those +checks happen. You can determine that frequency using the +`erlang.distribution.net_ticktime`. The tick will occur every N seconds, +where N is the value set. Thus, setting +`erlang.distribution.net_ticktime` to `60` will make the tick occur once +every minute. The corresponding flag in `vm.args` is `-kernel +net_ticktime`. + +## Shutdown Time + +You can determine how long the Erlang VM spends shutting down using the +`erlang.shutdown_time` parameter. The default is `10s` (10 seconds). +Once this duration elapses, all existing processes are killed. +Decreasing shutdown time can be useful in situations in which you are +frequently starting and stopping a cluster, e.g. in test clusters. In +`vm.args` you can set the `-shutdown_time` flag in milliseconds. + + + + diff --git a/content/riak/kv/3.0.4/using/performance/latency-reduction.md b/content/riak/kv/3.0.4/using/performance/latency-reduction.md new file mode 100644 index 0000000000..099fc17cff --- /dev/null +++ b/content/riak/kv/3.0.4/using/performance/latency-reduction.md @@ -0,0 +1,267 @@ +--- +title: "Latency Reduction Checklist" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Latency Reduction" + identifier: "performance_latency_reduction" + weight: 104 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.4/ops/tuning/latency-reduction + - /riak/kv/3.0.4/ops/tuning/latency-reduction +--- + +Although latency is unavoidable in distributed systems like Riak, there +are a number of actions that can be undertaken to reduce latency +to the lowest levels possible within a cluster. In this guide, we'll +list potential sources of high latency and what you can do about it. + +## Large Objects + +Riak always performs best with smaller objects. Large objects, which can +be mistakenly inserted into Riak by your application or caused by +siblings (see below), can often increase latency. + +We recommend keeping all objects stored in Riak smaller than 1-2 MB, +preferably below 100 KB. Large objects lead to increased I/O activity +and can put strain on memory resources. In some cases, just a few large +objects can impact latency in a cluster, even for requests that are +unrelated to those objects. + +If your use case requires large objects, we recommend checking out +[Riak CS]({{<baseurl>}}riak/cs/latest/), which is intended as a storage system for large objects. + +### Mitigation + +The best way to find out if large objects are impacting latency is to +monitor each node's object size stats. If you run [`riak-admin status`](../../admin/riak-admin/#status) or make an HTTP `GET` request +to Riak's `/stats` endpoint, you will see the results for the following +metrics related to object size, all of which are calculated only for +`GET` operations (i.e. reads): + +Metric | Explanation +:-----------------------------|:----------- +`fsm_node_get_objsize_mean` | The mean object size encountered by this node in the last minute +`fsm_node_get_objsize_median` | The median object size encountered by this node in the last minute +`fsm_node_get_objsize_95` | The 95th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_99` | The 99th-percentile object size encountered by this node in the last minute +`fsm_node_get_objsize_100` | The 100th-percentile object size encountered by this node in the last minute + +The `mean` and `median` measurements may not be good indicators, +especially if you're storing billions of keys. Instead, you should be on +the lookout for trends in the `95`, `99`, and `100` measures: + +* Is there an upward trend? +* Do the metrics indicate that there are outliers? +* Do these trends coincide with increased latency? + +If you suspect that large object size is impacting latency, try making +the following changes to each node's [configuration](../../../configuring/reference): + +* If you are using the newer, `riak.conf`-based configuration system, +the commented-out value for `erlang.distribution_buffer_size` is `32MB`. +Uncomment this setting and re-start your node. +* If you are using the older, `app.config`/`vm.args`-based configuration +system, try increasing the `+zddbl` setting in `vm.args` to `32768` or +higher (measured in kilobytes). This increases the size of the +distributed Erlang buffer from its default of 1024 KB. Re-start your +node when configuration changes have been made. + +Large objects can also impact latency even if they're only present on +some nodes. If increased latency occurs only on N nodes, where N is your +[replication factor](../../../developing/app-guide/replication-properties/#n-value-and-replication), also known as `n_val`, this could indicate that a single large object and its replicas are slowing down _all_ requests on those nodes. + +If large objects are suspected, you should also audit the behavior of +siblings in your cluster, as explained in the [next section](#siblings). + +## Siblings + +In Riak, object conflicts are handled by keeping multiple versions of +the object in the cluster either until a client takes action to resolve +the conflict or until [active anti-entropy](../../../learn/glossary/#active-anti-entropy) resolves the conflict without client intervention. While sibling production is normal, [sibling explosion](../../../learn/concepts/causal-context/#sibling-explosion) is a problem that can come about if many siblings of an object are produced. The negative effects are the same as those associated with [large objects](#large-objects). + +### Mitigation + +The best way to monitor siblings is through the same [`riak-admin status`](../../admin/riak-admin/#status) interface used to monitor +object size (or via an HTTP `GET` request to `/stats`). In the output of +`riak-admin status` in each node, you'll see the following +sibling-related statistics: + +Metric | Explanation +:------------------------------|:----------- +`node_get_fsm_siblings_mean` | The mean number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_median` | The median number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_95` | The 95th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_99` | The 99th percentile of the number of siblings encountered during all GET operations by this node within the last minute +`node_get_fsm_siblings_100` | The 100th percentile of the number of siblings encountered during all GET operations by this node within the last minute + +Is there an upward trend in these statistics over time? Are there any +large outliers? Do these trends correspond to your observed latency +spikes? + +If you believe that sibling creation problems could be responsible for +latency issues in your cluster, you can start by checking the following: + +* If `allow_mult` is set to `true` for some or all of your buckets, be + sure that your application is correctly resolving siblings. Be sure to + read our documentation on [conflict resolution](../../../developing/usage/conflict-resolution) for a fuller picture of how this can be done. **Note**: In Riak versions 2.0 and later, `allow_mult` is set to `true` by default for all bucket types that you create and activate. + If you wish to set `allow_mult` to `false` on a bucket type, you will have to do so explicitly. +* Application errors are a common source of problems with + siblings. Updating the same key over and over without passing a + [causal context](../../../learn/concepts/causal-context) to Riak can cause sibling explosion. If this seems to be the issue, modify your application's [conflict resolution](../../../developing/usage/conflict-resolution) + strategy. Another possibility worth exploring is using [dotted version vectors](../../../learn/concepts/causal-context/#dotted-version-vectors) \(DVVs) in place of traditional vector clocks. DVVs can be enabled [using bucket types](../../../developing/usage/bucket-types) by setting the `dvv_enabled` parameter to `true` for buckets that seem to be experiencing sibling explosion. + +## Compaction and Merging + +The [Bitcask](../../../setup/planning/backend/bitcask) and [LevelDB](../../../setup/planning/backend/leveldb) storage backends occasionally go through +heavily I/O-intensive compaction phases during which they remove deleted +data and reorganize data files on disk. During these phases, affected +nodes may be slower to respond to requests than other nodes. If your +cluster is using one or both of these backends, there are steps that can +be taken to monitor and address latency issues. + +### Mitigation + +To determine whether compaction and merging cycles align with increased +latency, keep an eye on on your `console.log` files (and LevelDB `LOG` +files if you're using LevelDB). Do Bitcask merging and/or LevelDB +compaction events overlap with increased latencies? + +If so, our first recommendation is to examine your [replication properties](../../../developing/app-guide/replication-properties/) to make sure that neither R nor W are set to N, i.e. that you're not requiring that reads or writes go to all nodes in the cluster. The problem with setting `R=N` or `W=N` is that any request will only respond as quickly as the slowest node amongst the N nodes involved in the request. + +Beyond checking for `R=N` or `W=N` for requests, the recommended +mitigation strategy depends on the backend: + +#### Bitcask + +With Bitcask, it's recommended that you: + +* Limit merging to off-peak hours to decrease the effect of merging +cycles on node traffic +* Stagger merge windows between nodes so that no more than one node is +undergoing a merge phase at any given time + +Instructions on how to accomplish both can be found in our guide to +[tuning Bitcask](../../../setup/planning/backend/bitcask/#tuning-bitcask). + +It's also important that you adjust your maximum file size and merge +threshold settings appropriately. This setting is labeled +`bitcask.max_file_size` in the newer, `riak.conf`-based [configuration files](../../../configuring/reference) and `max_file_size` in the older, `app.config`-based system. + +Setting the maximum file size lower will cause Bitcask to merge more +often (with less I/O churn), while setting it higher will induce less +frequent merges with more I/O churn. To find settings that are ideal for +your use case, we recommend checking out our guide to [configuring Bitcask](../../../setup/planning/backend/bitcask/#configuring-bitcask). + +#### LevelDB + +The more files you keep in memory, the faster LevelDB will perform in +general. To make sure that you are using your system resources +appropriately with LevelDB, check out our guide to [LevelDB parameter planning](../../../setup/planning/backend/leveldb/#parameter-planning). + +## OS Tuning + +While a number of latency-related problems can manifest themselves in +development and testing environments, some performance limits only +become clear in production environments. + +### Mitigation + +If you suspect that OS-level issues might be impacting latency, it might +be worthwhile to revisit your OS-specific configurations. The following +guides may be of help: + +* [Open files limit](../open-files-limit) +* General [System performance tuning](../) +* [AWS performance tuning](../amazon-web-services) if you're running Riak on [Amazon Web Services](http://aws.amazon.com/) + +## I/O and Network Bottlenecks + +Riak is a heavily I/O- and network resource-intensive system. +Bottlenecks on either front can lead to undue latency in your cluster. +We recommend an active monitoring strategy to detect problems +immediately when they arise. + +### Mitigation + +To diagnose potential I/O bottlenecks, there are a number of Linux tools +at your disposal, including +[iowait](http://www.linuxquestions.org/questions/linux-newbie-8/what-is-iowait-415961/) +and [netstat](http://en.wikipedia.org/wiki/Netstat). + +To diagnose potential overloads, Riak versions 1.3.2 and later come +equipped with an overload protection feature designed to prevent +cascading failures in overly busy nodes. This feature limits the number +of GET and PUT finite state machines (FSMs) that can exist +simultaneously on a single Riak node. Increased latency can result if a +node is frequently running up against these maximums. + +* Monitor `node_get_fsm_active` and `node_get_fsm_active_60s` to get an + idea of how many operations your nodes are coordinating. If you see + non-zero values in `node_get_fsm_rejected` or + `node_get_fsm_rejected_60s`, that means that some of your requests are + being discarded due to overload protection. +* The FSM limits can be increased, but disabling overload protection + entirely is not recommended. More details on these settings are + available in the [release + notes](https://github.com/basho/riak/blob/1.3/RELEASE-NOTES.md) for + Riak version 1.3. + +## Object Settings + +In versions 2.0 and later, Riak enables you to configure a variety of +settings regarding Riak objects, including allowable object sizes, how +many [siblings](../../../learn/concepts/causal-context/#siblings) to allow, and so on. If you suspect that undue latency in your cluster stems from object size or related factors, you may consider adjusting these settings. + +A concise listing of object-related settings can be found in the [Riak configuration](../../../configuring/reference/#object-settings) documentation. The sections below explain these settings in detail. + +> **Note on configuration files in 2.0** +> +> The object settings listed below are only available using the new system +for [configuration files](../../../configuring/reference/) in Riak 2.0. If you are using the older, `app.config`-based system, you will not have access to +these settings. + +### Object Size + +As stated above, we recommend _always_ keeping objects below 1-2 MB +and preferably below 100 KB if possible. If you want to ensure that +objects above a certain size do not get stored in Riak, you can do so by +setting the `object.size.maximum` parameter lower than the default of +`50MB`, which is far above the ideal object size. If you set this +parameter to, say, `1MB` and attempt to store a 2 MB object, the write +will fail and an error message will be returned to the client. + +You can also set an object size threshold past which a write will +succeed but will register a warning in the logs, you can adjust the +`object.size.warning_threshold` parameter. The default is `5MB`. + +### Sibling Explosion Management + +In order to prevent or cut down on [sibling explosion](../../../learn/concepts/causal-context/#sibling explosion), you can either prevent Riak from storing +additional siblings when a specified sibling count is reached or set a +warning threshold past which Riak logs an error (or both). This can be +done using the `object.siblings.maximum` and +`object.siblings.warning_threshold` settings. The default maximum is 100 +and the default warning threshold is 25. + +### Object Storage Format + +There are currently two possible binary representations for objects +stored in Riak: + +* Erlang's native `term_to_binary` format, which tends to have a higher + space overhead +* A newer, Riak-specific format developed for more compact storage of + smaller values + +You can set the object storage format using the `object.format` +parameter: `0` selects Erlang's `term_to_binary` format while `1` (the +default) selects the Riak-specific format. + + + + diff --git a/content/riak/kv/3.0.4/using/performance/multi-datacenter-tuning.md b/content/riak/kv/3.0.4/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..254ba13ae0 --- /dev/null +++ b/content/riak/kv/3.0.4/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,47 @@ +--- +title_supertext: "Multi Data Center Replication:" +title: "System Tuning" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Multi-Datacenter Replication" + identifier: "performance_multi_datacenter_tuning" + weight: 110 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +[perf index]: {{<baseurl>}}riak/kv/3.0.4/using/performance + +Depending on the size of your objects and your replication latency +needs, you may need to configure your kernel settings to optimize +throughput. + +## Linux + +Refer to the [System Performance Tuning][perf index] document. + +## Solaris + +On Solaris, the following settings are suggested: + +```bash +/usr/sbin/ndd -set /dev/tcp tcp_ip_abort_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_keepalive_interval 900000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_initial 3000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_max 10000 +/usr/sbin/ndd -set /dev/tcp tcp_rexmit_interval_min 3000 +/usr/sbin/ndd -set /dev/tcp tcp_time_wait_interval 60000 +/usr/sbin/ndd -set /dev/tcp tcp_max_buf 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_cwnd_max 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_xmit_hiwat 4000000 +/usr/sbin/ndd -set /dev/tcp tcp_recv_hiwat 4000000 +``` + + + + diff --git a/content/riak/kv/3.0.4/using/performance/open-files-limit.md b/content/riak/kv/3.0.4/using/performance/open-files-limit.md new file mode 100644 index 0000000000..1736674fea --- /dev/null +++ b/content/riak/kv/3.0.4/using/performance/open-files-limit.md @@ -0,0 +1,351 @@ +--- +title: "Open Files Limit" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Open Files Limit" + identifier: "performance_open_files_limit" + weight: 101 + parent: "managing_performance" +toc: true +aliases: + - /riak/3.0.4/ops/tuning/open-files-limit/ + - /riak/kv/3.0.4/ops/tuning/open-files-limit/ +--- + +[plan backend]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/ +[blog oracle]: http://blogs.oracle.com/elving/entry/too_many_open_files + +Riak KV can accumulate a large number of open file handles during operation. The creation of numerous data files is normal, and the [backend][plan backend] performs periodic merges of data file collections to avoid accumulating file handles. + +To accomodate this you should increase the open files limit on your system. We recommend setting a soft limit of 65536 and a hard limit of 200000. + +{{% note %}} +Superuser or root access may be required to perform these steps. +{{% /note %}} + +## Changing Limit For Current Session + +Most operating systems can change the open-files limit for the current shell session using the `ulimit -n` command: + +```bash +ulimit -n 200000 +``` + +## Debian & Ubuntu + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for Debian & Ubuntu + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/common-session and add the following line: + +```/etc/pam.d/common-session +session required pam_limits.so +``` + +2\. Save and close the file. If /etc/pam.d/common-session-noninteractive exists, append the same line as above. + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the file. + +5\. (**Optional**) If you will be accessing the Riak KV nodes via secure shell (SSH), you should also edit /etc/ssh/sshd_config and uncomment the following line: + +```/etc/ssh/sshd_config +#UseLogin no +``` + +And set its value to `yes` as shown here: + +```/etc/ssh/sshd_config +UseLogin yes +``` + +6\. Restart the machine so the limits take effect and verify that the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## CentOS & Red Hat + +Start by checking the current open file limit values with: + +```bash +ulimit -Hn # Hard limit +ulimit -Sn # Soft limit +``` + +If you installed Riak KV from a binary package, you will need to the add the following settings to the /etc/security/limits.conf file for the `riak` user: + +```/etc/security/limits.conf +riak soft nofile 65536 +riak hard nofile 200000 +``` + +If you use initialization scripts to start Riak KV, you can create a /etc/default/riak file and add the following to specify a limit: + +```/etc/default/riak +ulimit -n 200000 +``` + +This file is automatically sourced from the initialization script, and the Riak KV process will inherit this setting. Since initialization scripts are always run as the root user, there’s no need to set limits in /etc/security/limits.conf. + +## Enable PAM-Based Limits for CentOS and Red Hat + +You can enable PAM-based user limits so that non-root users, such as the `riak` user, may specify a higher value for maximum open files. + +For example, follow these steps to enable PAM-based limits for all users to allow a maximum of 200000 open files. + +1\. Edit /etc/pam.d/login and add the following line: + +```/etc/pam.d/login +session required pam_limits.so +``` + +2\. Save and close /etc/pam.d/login + +3\. Edit /etc/security/limits.conf and append the following lines to the file: + +```/etc/security/limits.conf +* soft nofile 65536 +* hard nofile 200000 +``` + +4\. Save and close the /etc/security/limits.conf file. + +5\. Restart the machine so that the limits to take effect and verify that +the new limits are set with the following command: + +```bash +ulimit -a +``` + +{{% note %}} +In the above examples, the open files limit is raised for all users of the system. The limit can be specified for the `riak` user only by substituting the +two asterisks (`*`) in the examples with `riak`. +{{% /note %}} + + +## Solaris + +To increase the open file limit on Solaris, add the following line to the /etc/system file: + +```/etc/system +set rlim_fd_max=200000 +``` + +[Reference][blog oracle] + +## macOS Sierra and High Sierra + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on macOS Sierra or High Sierra, perform the following steps: + +1\. Add the following line to your .bash\_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next create the file /Library/LaunchDaemons/limit.maxfiles.plist (owned by `root` in the group `wheel` with the mode `0644`). In it place the following XML: + +``` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + +<plist version="1.0"> + <dict> + <key>Label</key> + <string>limit.maxfiles</string> + <key>ProgramArguments</key> + <array> + <string>launchctl</string> + <string>limit</string> + <string>maxfiles</string> + <string>65536</string> + <string>200000</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>ServiceIPC</key> + <false/> + </dict> +</plist> + +``` + +3\. Save and close the file. + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + +## Mac OS X El Capitan + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X El Capitan, perform the following steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next open /etc/sysctl.conf (or create it if it doesn't already exist) and add the following settings: + +```/etc/sysctl.conf +kern.maxfiles=200000 +kern.maxfilesperproc=200000 +``` + +4\. Restart your computer and enter `ulimit -n` into your terminal. If your system is configured correctly, you should see that `maxfiles` has been set to 200000. + + +## Mac OS X Yosemite + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 65536 65536 +``` + +The first column is the soft limit and the last column is the hard limit. + +To change the open files limits on Mac OS X Yosemite, perform these steps: + +1\. Add the following line to your .bash_profile or analogous file: + +```bash +ulimit -n 65536 200000 +``` + +2\. Save and close the file. Next edit the /etc/launchd.conf file and add: + +```/etc/launchd.conf +limit maxfiles 200000 +``` + +3\. Save and close the file. + +4\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + +## Mac OS X Older Versions + +Start by checking the current open file limit values with: + +```bash +launchctl limit maxfiles +``` + +The response should look something like this: + +```bash +maxfiles 10240 10240 +``` + +The first column is the soft limit and the last column is the hard limit. + +To adjust the maximum open file limits in OS X 10.7 (Lion) up to but not including OS X Yosemite, perform the following steps: + +1\. Edit (or create) /etc/launchd.conf and increase the limits by adding: + +```bash +limit maxfiles 65536 200000 +``` + +2\. Save the file and restart the system for the new limits to take effect. + +3\. After restarting, verify the new limits by running: + +```bash +launchctl limit maxfiles +``` + +The response output should look something like this: + +```bash +maxfiles 65536 200000 +``` + + + + diff --git a/content/riak/kv/3.0.4/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/3.0.4/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..0b5c51da7a --- /dev/null +++ b/content/riak/kv/3.0.4/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,50 @@ +--- +title: "V2 Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "V2 Scheduling Fullsync" + identifier: "performance_v2_scheduling_fullsync" + weight: 103 + parent: "managing_performance" +toc: true +commercial_offering: true +aliases: +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter/#fullsync-replication-commands/) instead. +{{% /note %}} + + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/3.0.4/using/reference.md b/content/riak/kv/3.0.4/using/reference.md new file mode 100644 index 0000000000..51464463b3 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference.md @@ -0,0 +1,135 @@ +--- +title: "Riak KV Usage Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Reference" + identifier: "managing_ref" + weight: 208 + parent: "managing" +toc: true +aliases: +--- + +[ref log]: ./logging +[ref handoff]: ./handoff +[ref bucket types]: ./bucket-types +[ref obj del]: ./object-deletion/ +[ref runtime]: ./runtime-interaction/ +[ref monitoring]: ./statistics-monitoring +[ref snmp]: ./snmp +[ref jmx]: ./jmx +[ref search]: ./search +[ref 2i]: ./secondary-indexes +[ref custom code]: ./custom-code +[ref strong consistency]: ./strong-consistency +[ref mdc]: ./multi-datacenter +[ref v3 mdc]: ./v3-multi-datacenter +[ref v2 mdc]: ./v2-multi-datacenter +[ref arch]: ./architecture + +## In This Section + +#### [Logging Reference][ref log] + +Overview of logging in Riak KV. + +[Learn More >>][ref log] + + +#### [Handoff Reference][ref handoff] + +Details Riak KV's handoff system. + +[Learn More >>][ref handoff] + + +#### [Bucket Types Reference][ref bucket types] + +Explanation of bucket types in Riak KV. + +[Learn More >>][ref bucket types] + + +#### [Object Deletion Reference][ref obj del] + +Information on object deletion scenarios and tombstones. + +[Learn More >>][ref obj del] + + +#### [Runtime Interaction Reference][ref runtime] + +Describes the how Riak interacts with distribution ports and operating system +processes/garbage collection. + +[Learn More >>][ref runtime] + + +#### [Statistics & Monitoring Reference][ref monitoring] + +Presents commonly monitored & gathered statistics, as well as solutions for monitoring and gathering statistics. + +[Learn More >>][ref monitoring] + + +#### [Simple Network Management Protocol][ref snmp] + +Cover's Riak Enterprise's deprecated SNMP server used allow an external system to query nodes for statistics. + +[Learn More >>][ref snmp] + + +#### [JMX Monitoring][ref jmx] + +Details Riak KV's deprecated JMX monitoring system. + +[Learn More >>][ref jmx] + + +#### [Search Reference][ref search] + +Overview of search in Riak KV. + +[Learn More >>][ref search] + + +#### [Secondary Indexes Reference][ref 2i] + +Implementation details for Riak KV's secondary indexes feature + +[Learn More >>][ref 2i] + + +#### [Installing Custom Code][ref custom code] + +Steps for installing custom code modules for pre/post-commit hooks and MapReduce operations. + +[Learn More >>][ref custom code] + + +#### [Strong Consistency Reference][ref strong consistency] + +Overview of strong consistency in Riak KV. + +[Learn More >>][ref strong consistency] + + +#### [Multi-Datacenter Reference][ref mdc] + +Overview of Riak's Multi-Datacenter system. + +[Learn More >>][ref mdc] + + +#### [V3 Multi-Datacenter Replication Reference][ref v3 mdc] + +Details Riak's V3 Multi-Datacenter system. + +[Learn More >>][ref v3 mdc] + + + + diff --git a/content/riak/kv/3.0.4/using/reference/architecture.md b/content/riak/kv/3.0.4/using/reference/architecture.md new file mode 100644 index 0000000000..0bc91f3942 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/architecture.md @@ -0,0 +1,21 @@ +--- +draft: true +title: "Architecture Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +#menu: +# riak_kv-3.0.4: +# name: "Architecture" +# identifier: "managing_ref_architecture" +# weight: 116 +# parent: "managing_ref" +toc: true +aliases: +--- + +<!-- TODO: Content --> + + + + diff --git a/content/riak/kv/3.0.4/using/reference/bucket-types.md b/content/riak/kv/3.0.4/using/reference/bucket-types.md new file mode 100644 index 0000000000..1566463d18 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/bucket-types.md @@ -0,0 +1,823 @@ +--- +title: "Bucket Types" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Bucket Types" + identifier: "managing_ref_bucket_types" + weight: 102 + parent: "managing_ref" +toc: true +aliases: +--- + +Bucket types allow groups of buckets to share configuration details and +for Riak users to manage bucket properties more efficiently than in the +older configuration system based on [bucket properties]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types/#bucket-properties-and-operations). + +{{% note title="Important note on cluster downgrades" %}} +If you upgrade a Riak to version 2.0 or later, you can still downgrade the +cluster to a pre-2.0 version _as long as you have not created and activated a +bucket type in the cluster_. Once any bucket type has been created and +activated, you can no longer downgrade the cluster to a pre-2.0 version. +{{% /note %}} + +## How Bucket Types Work + +The older configuration system, based on bucket properties, involves +setting bucket properties for specific buckets either through +[HTTP]({{<baseurl>}}riak/kv/3.0.4/developing/api/http/set-bucket-props) or [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/set-bucket-props). With this approach, you can take any given bucket and +modify a wide range of properties, from `n_val` to `allow_mult` and far +beyond. + +Using bucket *types* also involves dealing with bucket properties, but +with a few crucial differences: + +* Bucket types enable you to create bucket configurations and assign + those configurations to as many buckets as you wish, whereas the + previous system required configuration to be set on a per-bucket basis +* Nearly all bucket properties can be updated using bucket types, except the + `datatype` and `consistent` properties, related to + [Riak data types]({{<baseurl>}}riak/kv/3.0.4/developing/data-types), and [strong consistency]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/strong-consistency) respectively +* Bucket types are more performant than bucket properties because + divergence from Riak's defaults doesn't have to be gossiped around the + cluster for every bucket, which means less computational overhead + +It is important to note that buckets are not assigned types in the same +way that they are configured when using [bucket properties]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types/#bucket-properties-and-operations). You cannot simply take a +bucket `my_bucket` and assign it a type the way that you would, say, +set `allow_mult` to `false` or `n_val` to `5`, because there is no +`type` parameter contained within the bucket's properties (i.e. +`props`). + +Instead, bucket types are applied to buckets _on the basis of how those +buckets are queried_. Queries involving bucket types take the following +form: + +``` +GET/PUT/DELETE /types/<type>/buckets/<bucket>/keys/<key> +``` + +In the older system, only bucket and key are specified in queries: + +``` +GET/PUT/DELETE /buckets/<bucket>/keys/<key> +``` + +## When to Use Bucket Types + +In many respects, bucket types are a major improvement over the older +system of bucket configuration, including the following: + +* Bucket types are more flexible because they enable you to define a + bucket configuration and then change it if you need to. +* Bucket types are more reliable because the buckets that bear a given + type only have their properties changed when the type is changed. + Previously, it was possible to change the properties of a bucket only + through client requests. +* Whereas bucket properties can only be altered by clients interacting + with Riak, bucket types are more of an operational concept. The + `riak-admin bucket-type` interface (discussed in depth below) enables + you to manage bucket configurations on the operations side, without + recourse to Riak clients. + +For these reasons, we recommend _always_ using bucket types in versions +of Riak 2.0 and later. + +## Managing Bucket Types Through the Command Line + +Bucket types are created, updated, activated, and more through the +`riak-admin bucket-type` interface. + +Below is a full list of available sub-commands: + +Command | Action | Form | +:-------|:-------|:-----| +`create` | Create or modify a bucket type before activation | `create <type> <json>` | +`activate` | Activate a bucket type | `activate <type>` | +`list` | List all currently available bucket types and their activation status | `list` | +`status` | Display the status and properties of a specific bucket type | `status <type>` | +`update` | Update a bucket type after activation | `update <type> <json>` | + +### Creating a Bucket Type + +Creating new bucket types involves using the `create <type> <json>` +command, where `<type>` is the name of the type and `<json>` is a JSON +object of the following form: + +```json +{ + "props": { + "prop1": "val1", + "prop2": "val2", + ... + } +} +``` + + +> **Getting started with Riak clients** +> +> If you are connecting to Riak using one of Basho's official [client libraries]({{<baseurl>}}riak/kv/3.0.4/developing/client-libraries), you can find more information about getting started with your client in our [Developing with Riak KV: Getting Started]({{<baseurl>}}riak/kv/3.0.4/developing/getting-started) section. + +If creation is successful, you should see the following output: + +``` +type_using_defaults created +``` + +{{% note %}} +The `create` command can be run multiple times prior to a bucket type being +activated. Riak will persist only those properties contained in the final call +of the command. +{{% /note %}} + +Creating bucket types that assign properties _always_ involves passing +stringified JSON to the `create` command. One way to do that is to pass +a JSON string directly. The following creates a bucket type +`n_equals_1`, which sets `n_val` to 1: + +```bash +riak-admin bucket-type create n_equals_1 '{"props":{"n_val":1}}' +``` + +If you wish, you can also pass in a JSON string through a file, such as +a `.json` file: + +```bash +riak-admin bucket-type create from_json_file '`cat props.json`' +``` + +Like all bucket types, this type needs to be activated to be usable +within the cluster. + +### Activating a Bucket Type + +Activating a bucket type involves the `activate` command from the same +`bucket-type` interface used before: + +```bash +riak-admin bucket-type activate my_bucket_type +``` + +When activation has succeeded, you should see the following output: + +``` +my_bucket_type has been activated +``` + +A bucket type can be activated only when the type has been propagated to +all running nodes. You can check on the type's readiness by running +`riak-admin bucket-type status <type_name>`. The first line of output +will indicate whether or not the type is ready. + +In a stable cluster, bucket types should propagate very quickly. If, +however, a cluster is experiencing network partitions or other issues, +you will need to resolve those issues before bucket types can be +activated. + +### Listing Bucket Types + +You can list currently available bucket types using the `list` command: + +```bash +riak-admin bucket-type list +``` + +This will return a simple list of types along with their current status +(either `active` or `not active`). Here is an example console output: + +```bash +riak-admin bucket-type list +``` + +An example response: + +``` +type1 (active) +type2 (not active) +type3 (active) +``` + +### Checking a Type's Status + +You can check on the status---i.e. the configuration details---of a +bucket type using the `status <type>` command: + +```bash +riak-admin bucket-type status my_bucket_type +``` + +The console will output two things if the type exists: + +1. Whether or not the type is active +2. The bucket properties associated with the type + +If you check the status of a currently active type called +`my_bucket_type` that simply bears a default bucket configuration, the +output will be as follows: + +```bash +my_bucket_type is active + +active: true +allow_mult: true + +... other properties ... + +w: quorum +young_vclock:20 +``` + +### Updating a Bucket Type + +The `bucket-type update` command functions much like the `bucket-type +create` command. It simply involves specifying the name of the bucket +type that you wish to modify and a JSON object containing the properties +of the type: + +```bash +riak-admin bucket-type update type_to_update '{"props":{ ... }}' +``` + +{{% note title="Immutable Configurations" %}} +Any bucket properties associated with a type can be modified after a bucket is +created, with three important exceptions: + +* `consistent` +* `datatype` +* `write_once` + +If a bucket type entails strong consistency (requiring that `consistent` be +set to `true`), is set up as a `map`, `set`, or `counter`, or is defined as a +write-once bucket (requiring `write_once` be set to `true`), then this will +be true of the bucket types. + +If you need to change one of these properties, we recommend that you simply +create and activate a new bucket type. +{{% /note %}} + +## Buckets as Namespaces + +In versions of Riak prior to 2.0, all queries are made to a bucket/key +pair, as in the following example read request: + +```java +Location myKey = new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch = new FetchValue.Builder(myKey).build(); +client.execute(fetch); +``` + +```ruby +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```php +$location = new Location('my_key', new Bucket('my_bucket')); +(new \Basho\Riak\Command\Builder\FetchObject($riak)) + ->atLocation($location) + ->build() + ->execute(); +``` + +```python +bucket = client.bucket('my_bucket') +bucket.get('my_key') +``` + +```csharp +var id = new RiakObjectId("my_bucket", "my_key"); +client.Get(id); +``` + +```javascript +client.fetchValue({ bucket: 'my_bucket', key: 'my_key' }, function (err, rslt) { +}); +``` + +```erlang +{ok, Object} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +``` + +With the addition of bucket types in Riak 2.0, bucket types can be used +as _an additional namespace_ on top of buckets and keys. The same bucket +name can be associated with completely different data if it used in +accordance with a different type. Thus, the following two requests will +be made to _completely different objects_, even though the bucket and key +names are the same: + +```java +Location key1 = + new Location(new Namespace("type1", "my_bucket"), "my_key"); +Location key2 = + new Location(new Namespace("type2", "my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(key1).build(); +FetchValue fetch2 = new FetchValue.Builder(key2).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'type1')); +$location2 = new Location('my_key', new Bucket('my_bucket', 'type2')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('type1').bucket('my_bucket') +bucket2 = client.bucket_type('type2').bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("type1", "my_bucket", "my_key"); +var id2 = new RiakObjectId("type2", "my_bucket", "my_key"); +var rslt1 = client.Get(id1); +var rslt2 = client.Get(id2); +``` + +```javascript +client.fetchValue({ + bucketType: 'type1', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); + +client.fetchValue({ + bucketType: 'type2', bucket: 'my_bucket', key: 'my_key' +}, function (err, rslt) { +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"type1">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + {<<"type2">>, <<"my_bucket">>}, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/types/type1/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/type2/buckets/my_bucket/keys/my_key +``` + +{{% note title="Note on object location" %}} +In Riak 2.x, _all requests_ must be made to a location specified by a bucket +type, bucket, and key rather than to a bucket/key pair, as in previous +versions. +{{% /note %}} + +If requests are made to a bucket/key pair without a specified bucket +type, `default` will be used in place of a bucket type. The following +queries are thus identical: + +```java +Location withDefaultBucketType = + new Location(new Namespace("default", "my_bucket"), "my_key"); +Location noBucketType = + new Location(new Namespace("my_bucket"), "my_key"); +FetchValue fetch1 = new FetchValue.Builder(withDefaultBucketType).build(); +FetchValue fetch2 = new FetchValue.Builder(noBucketType).build(); +client.execute(fetch1); +client.execute(fetch2); +``` + +```ruby +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```php +$location1 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket', 'default')); +$location2 = new \Basho\Riak\Location('my_key', new Bucket('my_bucket')); +$builder = new \Basho\Riak\Command\Builder\FetchObject($riak); +$builder->atLocation($location1) + ->build() + ->execute(); +$builder->atLocation($location2) + ->build() + ->execute(); +``` + +```python +bucket1 = client.bucket_type('default').bucket('my_bucket') +bucket2 = client.bucket('my_bucket') +bucket1.get('my_key') +bucket2.get('my_key') +``` + +```csharp +var id1 = new RiakObjectId("default", "my_bucket", "my_key"); +var obj1 = new RiakObject(id1, "value", RiakConstants.ContentTypes.TextPlain); +client.Put(obj1); + +var id2 = new RiakObjectId("my_bucket", "my_key"); +var getRslt = client.Get(id2); + +RiakObject obj2 = getRslt.Value; +// Note: obj1.Value and obj2.Value are equal +``` + +```javascript +var obj1 = new Riak.Commands.KV.RiakObject(); +obj1.setContentType('text/plain'); +obj1.setBucketType('default'); +obj1.setBucket('my_bucket'); +obj1.setKey('my_key'); +obj1.setValue('value'); +client.storeValue({ value: obj1 }, function (err, rslt) { + if (err) { + throw new Error(err); + } + + client.fetchValue({ + bucketType: 'default', bucket: 'my_bucket', key: 'my_key' + }, function (err, rslt) { + if (err) { + throw new Error(err); + } + var obj2 = rslt.values.shift(); + assert(obj1.value == obj2.value); + }); +}); +``` + +```erlang +{ok, Obj1} = riakc_pb_socket:get(Pid, + {<<"default">>, <<"my_bucket">>}, + <<"my_key">>), +{ok, Obj2} = riakc_pb_socket:get(Pid, + <<"my_bucket">>, + <<"my_key">>). +``` + +```curl +curl http://localhost:8098/buckets/my_bucket/keys/my_key +curl http://localhost:8098/types/default/my_bucket/keys/my_key +``` + +## Default Bucket Properties + +Below is a listing of the default bucket properties (i.e. `props`) +associated with the `default` bucket type: + +```json +{ + "props": { + "allow_mult": false, + "basic_quorum": false, + "big_vclock": 50, + "chash_keyfun": { + "fun": "chash_std_keyfun", + "mod": "riak_core_util" + }, + "dvv_enabled": false, + "dw": "quorum", + "last_write_wins": false, + "linkfun": { + "fun": "mapreduce_linkfun", + "mod": "riak_kv_wm_link_walker" + }, + "n_val": 3, + "notfound_ok": true, + "old_vclock": 86400, + "postcommit": [], + "pr": 0, + "precommit": [], + "pw": 0, + "r": "quorum", + "rw": "quorum", + "small_vclock": 50, + "w": "quorum", + "young_vclock": 20 + } +} +``` + +## Bucket Types and the `allow_mult` Setting + +Prior to Riak 2.0, Riak created [siblings]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/causal-context/#siblings) in the case of conflicting updates only when explicitly instructed to do so, i.e. when `allow_mult` is to `true`. The default `allow_mult` setting was `false`. + +In version 2.0, this is changing in a subtle way. Now, there are two +different default settings for `allow_mult` in play: + +* For the `default` bucket type, `allow_mult` is set to `false` by + default, as in previous versions of Riak +* For all newly-created bucket types, the default is now `true`. It is + possible to set `allow_mult` to `false` if you wish to avoid resolving + sibling conflicts, but this needs to be done explicitly. + +The consequence is that applications that have previously ignored +conflict resolutions in certain buckets (or all buckets) can continue to +do so. New applications, however, are encouraged to retain and [resolve siblings]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution) with the appropriate application-side business logic. + +To give an example, let's have a look at the properties associated with +the `default` bucket type: + +```bash +riak-admin bucket-type status default | grep allow_mult +``` + +The output: + +``` +allow_mult: false +``` + +Now, let's create a new bucket type called `n_val_of_2`, which sets the +`n_val` to 2 but doesn't explicitly set `allow_mult`: + +```bash +riak-admin bucket-type create n_val_of_2 '{"props":{"n_val":2}}' +``` + +When specifying this bucket type's properties as above, the `allow_mult` +parameter was not changed. However, if we view the bucket type's +properties, we can see in the console output that `allow_mult` is set to +`true`: + +```bash +riak-admin bucket-type status n_val_of_2 | grep allow_mult +``` + +The output: + +``` +allow_mult: true +``` + +This is important to bear in mind when using versions of Riak 2.0 and +later any time that you create, activate, and use your own bucket types. +It is still possible to set `allow_mult` to `false` in any given bucket +type, but it must be done explicitly. If we wanted to set +`allow_mult` to `false` in our `n_val_of_2` bucket type from above, we +would need to create or modify the already existing type as follows: + +```bash +riak-admin bucket-type update n_val_of_2 '{"props":{"allow_mult":false}}' +``` + +## Bucket Type Example + +Let's say that you'd like to create a bucket type called +`user_account_bucket` with a [pre-commit hook]({{<baseurl>}}riak/kv/3.0.4/developing/usage/commit-hooks/#pre-commit-hooks) called `syntax_check` and two [post-commit +hooks]({{<baseurl>}}riak/kv/3.0.4/developing/usage/commit-hooks/#Post-Commit-Hooks) called `welcome_email` and `update_registry`. This would involve four steps: + +1. Creating a JavaScript object containing the appropriate `props` + settings: + + ```json + { + "props": { + "precommit": ["syntax_check"], + "postcommit": ["welcome_email", "update_registry"] + } + } + ``` + +2. Passing that JSON to the `bucket-type create` command: + + ```bash + riak-admin bucket-type create user_account_bucket '{"props":{"precommit": ["syntax_check"], ... }}' + ``` + + If creation is successful, the console will return + `user_account_bucket created`. + +3. Verifying that the type is ready to be activated: + + Once the type is created, you can check whether your new type is + ready to be activated by running: + + ```bash + riak-admin bucket-type status user_account_bucket + ``` + + If the first line reads `user_account_bucket has been created and + may be activated`, then you can proceed to the next step. If it + reads `user_account_bucket has been created and is not ready to + activate`, then wait a moment and try again. If it still does not + work, then there may be network partition or other issues that need + to be addressed in your cluster. + +4. Activating the new bucket type: + + ```bash + riak-admin bucket-type activate user_account_bucket + ``` + + If activation is successful, the console will return + `user_account_bucket has been activated`. The bucket type is now + ready to be used. + +## Client Usage Example + +If you have created the bucket type `no_siblings` (with the property +`allow_mult` set to `false`) and would like that type to be applied to +the bucket `sensitive_user_data`, you would need to run operations on +that bucket in accordance with the format above. Here is an example +write: + +```java +Location key = new Location("sensitive_user_data") + .setBucketType("no_siblings") + .setKey("user19735"); +RiakObject obj = new RiakObject() + .setContentType("application/json") + .setValue(BinaryValue.create("{ ... user data ... }")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = Riak::RObject.new(bucket, 'user19735') +obj.content_type = 'application/json' +obj.raw_data = '{ ... user data ... }' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildJsonObject("{ ... user data ... }") + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('sensitive_user_data') +obj = RiakObject(client, bucket, 'user19735') +obj.content_type = 'application/json' +obj.data = '{ ... user data ... }' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "sensitive_user_data", "user19735"); +var obj = new RiakObject(id, "{\"name\":\"Bob\"}"); +var rslt = client.Put(obj); +``` + +```javascript +var obj = { name: 'Bob' }; +client.storeValue({ + bucketType: 'no_siblings', bucket: 'sensitive_user_data', + key: 'user19735', value: obj +}, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"sensitive_user_data">>}, + <<"user19735">>, + <<"{ ... user data ... }">>, + <<"application/json">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: application/json" \ + -d "{ ... user data ... }" \ + http://localhost:8098/types/no_siblings/buckets/sensitive_user_data/keys/user19735 +``` + +In this example, the bucket `sensitive_user_data` bears the +configuration established by the `no_siblings` bucket type, and it bears +that configuration _on the basis of the query's structure_. This is +because buckets act as a [separate namespace](#buckets-as-namespaces) in Riak, in addition to [buckets]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/buckets) and [keys]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/keys-and-objects). + +Let's say that we're using Riak to store internet memes. We've been +using a bucket called `current_memes` using the bucket type +`no_siblings` (from above). At a certain point, we decide that our +application needs to use a new bucket called `old_memes` to store memes +that have gone woefully out of fashion, but that bucket also needs to +bear the type `no_siblings`. + +The following request seeks to add the meme "all your base are belong to +us" to the `old_memes` bucket. If the bucket type `no_siblings` has been +created and activated, the request will ensure that the `old_memes` +bucket inherits all of the properties from the type `no_siblings`: + +```java +Location allYourBaseKey = + new Location(new Namespace("no_siblings", "old_memes"), "all_your_base"); +RiakObject obj = new RiakObject() + .setContentType("text/plain") + .setValue(BinaryValue.create("all your base are belong to us")); +StoreValue store = new StoreValue.Builder(obj).build(); +client.execute(store); +``` + +```ruby +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = Riak::RObject.new(bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.raw_data = 'all your base are belong to us' +obj.store +``` + +```php +(new \Basho\Riak\Command\Builder\StoreObject($riak)) + ->buildObject("all your base are belong to us", ['Content-Type' => 'text/plain']) + ->buildLocation('user19735', 'sensitive_user_data', 'no_siblings') + ->build() + ->execute(); +``` + +```python +bucket = client.bucket_type('no_siblings').bucket('old_memes') +obj = RiakObject(client, bucket, 'all_your_base') +obj.content_type = 'text/plain' +obj.data = 'all your base are belong to us' +obj.store() +``` + +```csharp +var id = new RiakObjectId("no_siblings", "old_memes", "all_your_base"); +var obj = new RiakObject(id, "all your base are belong to us", + RiakConstants.ContentTypes.TextPlain); +var rslt = client.Put(obj); +``` + +```javascript +var obj = new Riak.Commands.KV.RiakObject(); +obj.setContentType('text/plain'); +obj.setBucketType('no_siblings'); +obj.setBucket('old_memes'); +obj.setKey('all_your_base'); +obj.setValue('all your base are belong to us'); +client.storeValue({ value: obj }, function (err, rslt) { + if (err) { + throw new Error(err); + } +}); +``` + +```erlang +Object = riakc_obj:new({<<"no_siblings">>, <<"old_memes">>}, + <<"all_your_base">>, + <<"all your base are belong to us">>, + <<"text/plain">>), +riakc_pb_socket:put(Pid, Object). +``` + +```curl +curl -XPUT \ + -H "Content-Type: text/plain" \ + -d "all your base are belong to us" \ + http://localhost:8098/types/no_siblings/buckets/old_memes/keys/all_your_base +``` + +This query would both create the bucket `old_memes` and ensure that the +configuration contained in the `no_siblings` bucket type is applied to +the bucket all at once. + +If we wished, we could also store both old and new memes in +buckets with different types. We could use the `no_siblings` bucket from +above if we didn't want to deal with siblings, vclocks, and the like, +and we could use a `siblings_allowed` bucket type (with all of the +default properties except `allow_mult` set to `true`). This would give +use four bucket type/bucket pairs: + +* `no_siblings` / `old_memes` +* `no_siblings` / `new_memes` +* `siblings_allowed` / `old_memes` +* `siblings_allowed` / `new_memes` + +All four of these pairs are isolated keyspaces. The key `favorite_meme` +could hold different values in all four bucket type/bucket spaces. + + + + diff --git a/content/riak/kv/3.0.4/using/reference/custom-code.md b/content/riak/kv/3.0.4/using/reference/custom-code.md new file mode 100644 index 0000000000..1fdd70ca27 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/custom-code.md @@ -0,0 +1,135 @@ +--- +title: "Installing Custom Code" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Installing Custom Code" + identifier: "managing_ref_custom_code" + weight: 111 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/install-custom-code/ + - /riak/kv/3.0.4/ops/advanced/install-custom-code/ +--- + +Riak supports the use of Erlang named functions in compiled modules for +[pre/post-commit hooks]({{<baseurl>}}riak/kv/3.0.4/developing/usage/commit-hooks), and MapReduce operations. This +doc contains installation steps with simple examples for each use case. + +Your developers can compile [custom erlang code]({{<baseurl>}}riak/kv/3.0.4/developing/usage/commit-hooks), which +they can send to you as a *beam* file. You should note that in Erlang, a file +name must have the same name the module. So if you are given a file named +`validate_json.beam`, do not rename it. + +> *Note: The [Configure](#configure) step (`add_paths`) also applies to installing JavaScript files.* + +### Compiling + +If you have been given Erlang code and are expected to compile it for +your developers, keep the following notes in mind. + +{{% note title="Note on the Erlang Compiler" %}} +You must use the Erlang compiler (`erlc`) associated with the Riak +installation or the version of Erlang used when compiling Riak from source. +For packaged Riak installations, you can consult Table 1 below for the default +location of Riak's `erlc` for each supported platform. If you compiled from +source, use the `erlc` from the Erlang version you used to compile Riak. +{{% /note %}} + +<table style="width: 100%; border-spacing: 0px;"> +<tbody> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>CentOS & RHEL Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib64/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Debian & Ubuntu Linux</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>FreeBSD</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/usr/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>SmartOS</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/local/lib/riak/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +<tr align="left" valign="top"> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"><strong>Solaris 10</strong></td> +<td style="padding: 15px; margin: 15px; border-width: 1px 0 1px 0; border-style: solid;"> +<p><tt>/opt/riak/lib/erts-5.9.1/bin/erlc</tt></p> +</td> +</tr> +</tbody> +</table> + +Table 1: Erlang compiler executable location for packaged Riak installations + on supported platforms + +Compiling the module is a straightforward process. + +```text +erlc validate_json.erl +``` + +Next, you'll need to define a path from which compiled modules can be stored +and loaded. For our example, we'll use a temporary directory `/tmp/beams`, +but you should choose a directory for production functions based on your +own requirements such that they will be available where and when needed. + +{{% note %}} +Ensure that the directory chosen above can be read by the `riak` user. +{{% /note %}} + +Successful compilation will result in a new `.beam` file, +`validate_json.beam`. + +### Configure + +Take the `validate_json.beam` and copy this file to the `/tmp/beams` directory. + +```text +cp validate_json.beam /tmp/beams/ +``` + +After copying the compiled module into `/tmp/beams/`, you must update +`app.config` and configure Riak to allow loading of compiled modules from +the directory where they're stored (again in our example case, `/tmp/beams`). + +Edit `app.config` and insert an `add_paths` setting into the `riak_kv` +section as shown: + +```erlang +{riak_kv, [ + %% ... + {add_paths, ["/tmp/beams/"]}, + %% ... +``` + +After updating `app.config`, Riak must be restarted. In production cases, you +should ensure that if you are adding configuration changes to multiple nodes, +that you do so in a rolling fashion, taking time to ensure that the Riak key +value store has fully initialized and become available for use. + +This is done with the `riak-admin wait-for-service` command as detailed +in the [Commands documentation]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#wait-for-service). + +{{% note %}} +It is important that you ensure riak_kv is active before restarting the next +node. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.4/using/reference/failure-recovery.md b/content/riak/kv/3.0.4/using/reference/failure-recovery.md new file mode 100644 index 0000000000..393f59bbbb --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/failure-recovery.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Failure & Recovery Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Failure & Recovery" + identifier: "managing_ref_failure_recovery" + weight: 105 + parent: "managing_ref" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.4/using/reference/handoff.md b/content/riak/kv/3.0.4/using/reference/handoff.md new file mode 100644 index 0000000000..dc132f42a1 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/handoff.md @@ -0,0 +1,201 @@ +--- +title: "Handoff Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Handoff" + identifier: "managing_ref_handoff" + weight: 101 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.4/ops/running/handoff/ + - /riak/kv/3.0.4/ops/running/handoff/ +--- + +[cluster ops handoff]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/handoff + +Riak is a distributed system built with two essential goals in mind: + +* **fault tolerance**, whereby a Riak cluster can withstand node + failure, network partitions, and other events in a way that does not + disrupt normal functioning, and +* **scalability**, whereby operators can gracefully add and remove nodes + to/from a Riak cluster + +Both of these goals demand that Riak is able to either temporarily or +permanently re-assign responsibility for portions of the keyspace. That +re-assigning is referred to as **intra-cluster handoff** (or simply +**handoff** in our documentation). + +## Types of Handoff + +Intra-cluster handoff typically takes one of two forms: **hinted +handoff** and **ownership transfer**. + +Hinted handoff occurs when a [vnode]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode) temporarily takes over responsibility for some data and then returns that data to its original "owner." Imagine a 3-node cluster with nodes A, B, and C. If node C goes offline, e.g. during a network partition, nodes A and B will pick +up the slack, so to speak, assuming responsibility for node C's +operations. When node C comes back online, responsibility will be handed +back to the original vnodes. + +Ownership transfer is different because it is meant to be permanent. +It occurs when a [vnode]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode) no longer belongs to the node on which it's running. This typically happens when the very +makeup of a cluster changes, e.g. when nodes are added or removed from +the cluster. In this case, responsibility for portions of the keyspace +needs to be fundamentally re-assigned. + +Both types of handoff are handled automatically by Riak. Operators do +have the option, however, of enabling and disabling handoff on +particular nodes or all nodes and of configuring key aspects of Riak's +handoff behavior. More information can be found below. + +## Configuring Handoff + +A full listing of configurable parameters can be found in our +[configuration files]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#intra-cluster-handoff) +document. The sections below provide a more narrative description of +handoff configuration. + +### SSL + +If you want to encrypt handoff behavior within a Riak cluster, you need +to provide each node with appropriate paths for an SSL certfile (and +potentially a keyfile). The configuration below would designate a +certfile at `/ssl_dir/cert.pem` and a keyfile at `/ssl_dir/key.pem`: + +```riakconf +handoff.ssl.certfile = /ssl_dir/cert.pem +handoff.ssl.keyfile = /ssl_dir/key.pem +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_ssl_options, [ + {certfile, "/ssl_dir/cert.pem"}, + {keyfile, "/ssl_dir/key.pem"} + ]}, + %% Other configs +]} +``` + +### Port + +You can set the port used by Riak for handoff-related interactions using +the `handoff.port` parameter. The default is 8099. This would change the +port to 9000: + +```riakconf +handoff.port = 9000 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_port, 9000}, + %% Other configs +]} +``` + +### Background Manager + +Riak has an optional background manager that limits handoff activity in +the name of saving resources. The manager can help prevent system +response degradation during times of heavy load, when multiple +background tasks may contend for the same system resources. The +background manager is disabled by default. The following will enable it: + +```riakconf +handoff.use_background_manager = on +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_use_background_manager, on}, + %% Other configs +]} +``` + +### Maximum Rejects + +If you're using Riak features such as [Riak Search]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search/), +those subsystems can block handoff of primary key/value data, i.e. data +that you interact with via normal reads and writes. + +The `handoff.max_rejects` setting enables you to set the maximum +duration that a [vnode]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode) can be blocked by multiplying the +`handoff.max_rejects` setting by the value of +[`vnode_management_timer`]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#vnode_management_timer). +Thus, if you set `handoff.max_rejects` to 10 and +`vnode_management_timer` to 5 seconds (i.e. `5s`), non-K/V subsystems +can block K/V handoff for a maximum of 50 seconds. The default for +`handoff.max_rejects` is 6, while the default for +`vnode_management_timer` is `10s`. This would set `max_rejects` to 10: + +```riakconf +handoff.max_rejects = 10 +``` + +```appconfig +{riak_kv, [ + %% Other configs + {handoff_rejected_max, 10}, + %% Other configs +]} +``` + +### Transfer Limit + +You can adjust the number of node-to-node transfers (which includes +handoff) using the `transfer_limit` parameter. The default is 2. Setting +this higher will increase node-to-node communication but at the expense +of higher resource intensity. This would set `transfer_limit` to 5: + +```riakconf +transfer_limit = 5 +``` + +```appconfig +{riak_core, [ + %% Other configs + {handoff_concurrency, 5}, + %% Other configs +]} +``` + +## Enabling and Disabling Handoff + +Handoff can be enabled and disabled in two ways: via configuration or +on the command line. + +### Enabling and Disabling via Configuration + +You can enable and disable both outbound and inbound handoff on a node +using the `handoff.outbound` and `handoff.inbound` settings, +respectively. Both are enabled by default. The following would disable +both: + +```riakconf +handoff.outbound = off +handoff.inbound = off +``` + +```appconfig +{riak_core, [ + %% Other configs + {disable_outbound_handoff, true}, + {disable_inbound_handoff, true}, + %% Other configs +]} +``` + +### Enabling and Disabling Through the Command Line + +Check out the [Cluster Operations: Handoff][cluster ops handoff] for steps on enabling and disabling handoff via the command line. + + + + diff --git a/content/riak/kv/3.0.4/using/reference/jmx.md b/content/riak/kv/3.0.4/using/reference/jmx.md new file mode 100644 index 0000000000..fd6fccfb13 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/jmx.md @@ -0,0 +1,190 @@ +--- +title: "JMX Monitoring" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "JMX Monitoring" + identifier: "managing_ref_jmx" + weight: 108 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/running/monitoring/jmx + - /riak/kv/3.0.4/ops/running/monitoring/jmx +--- + +Riak exposes monitoring data via JMX. To enable JMX monitoring, edit the [`app.config`]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#app-config) associated with your Riak installation and set the `enabled` property of the `riak_jmx` section to `true` as shown below. The TCP port on which the JMX provider listens is also configurable in this section (the default JMX port is `41110`). + +```erlang + {riak_jmx, [ + {enabled, true}, + {port, 41110} + ]} +``` + +To view JMX data---assuming that you have the Sun JDK installed---launch JConsole as follows: + +```bash +$ jconsole <hostname_to_monitor>:<jmx_port> +``` + +Once connected, click on the **MBeans** tab, expand the **com.basho.riak** tree view, and select **Attributes**. The attributes listed in the table below will be displayed. + +Riak JMX has been tested with the Sun JRE 1.6.0_12 and 1.6.0_20. Some older/non-Sun JREs do not work (e.g. the default java-gcj JRE installed on Debian lenny). If you have problems with JMX or see the message below, please try upgrading to the Sun JRE: + +```log + =INFO REPORT==== 9-Jun-2010::08:14:57 === + JMX server monitor <pid> exited with code <non-zero>. +``` + +## Exported JMX Attributes +<br> +<table> + <tr> + <th WIDTH="30%">Attribute</th> + <th WIDTH="15%">Type</th> + <th WIDTH="55%">Description</th> + </tr> + <tr> + <td><tt>CPUNProcs</tt></td> + <td>int</td> + <td>Number of running processes</td> + </tr> + <tr> + <td><tt>CpuAvg1</tt></td> + <td>int</td> + <td>1 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg5</tt></td> + <td>int</td> + <td>5 minute load average</td> + </tr> + <tr> + <td><tt>CpuAvg15</tt></td> + <td>int</td> + <td>15 minute load average</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime95</tt></td> + <td>float</td> + <td>95th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTime99</tt></td> + <td>float</td> + <td>99th percentile GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMean</tt></td> + <td>float</td> + <td>Mean GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGetFsmTimeMedian</tt></td> + <td>float</td> + <td>Median GET time (microseconds)</td> + </tr> + <tr> + <td><tt>NodeGets</tt></td> + <td>int</td> + <td>Number of GETs in past minute</td> + </tr> + <tr> + <td><tt>NodeGetsTotal</tt></td> + <td>int</td> + <td>Number of GETs since node start</td> + </tr> + <tr> + <td><tt>NodeName</tt></td> + <td>string</td> + <td>Node name</td> + </tr> + <tr> + <td><tt>NodePutFsmTime95</tt></td> + <td>float</td> + <td>95th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTime99</tt></td> + <td>float</td> + <td>99th percentile PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMax</tt></td> + <td>float</td> + <td>Maximum PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMean</tt></td> + <td>float</td> + <td>Mean PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePutFsmTimeMedian</tt></td> + <td>float</td> + <td>Median PUT time (microseconds)</td> + </tr> + <tr> + <td><tt>NodePuts</tt></td> + <td>int</td> + <td>Number of PUTs in past minute</td> + </tr> + <tr> + <td><tt>NodePutsTotal</tt></td> + <td>int</td> + <td>Number of PUTs since node start</td> + </tr> + <tr> + <td><tt>PBCActive</tt></td> + <td>int</td> + <td>Number of active Protocol Buffers connections</td> + </tr> + <tr> + <td><tt>PBCConnects</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections in past minute</td> + </tr> + <tr> + <td><tt>PBCConnectsTotal</tt></td> + <td>int</td> + <td>Number of Protocol Buffers connections since node start</td> + </tr> + <tr> + <td><tt>RingCreationSize</tt></td> + <td>int</td> + <td>Number of partitions in Riak ring</td> + </tr> + <tr> + <td><tt>VnodeGets</tt></td> + <td>int</td> + <td>Number of vnode-level GETs in past minute</td> + </tr> + <tr> + <td><tt>VnodeGetsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level GETs since node start</td> + </tr> + <tr> + <td><tt>VnodePuts</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs in past minute</td> + </tr> + <tr> + <td><tt>VnodePutsTotal</tt></td> + <td>int</td> + <td>Number of vnode-level PUTs since node start</td> + </tr> +</table> + + + + diff --git a/content/riak/kv/3.0.4/using/reference/logging.md b/content/riak/kv/3.0.4/using/reference/logging.md new file mode 100644 index 0000000000..42d4e23fbc --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/logging.md @@ -0,0 +1,301 @@ +--- +title: "Logging Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Logging" + identifier: "managing_ref_logging" + weight: 100 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.4/ops/running/logging + - /riak/kv/3.0.4/ops/running/logging +--- + +[cluster ops log]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/logging + +Logging in Riak KV is handled by a Basho-produced logging framework for +[Erlang](http://www.erlang.org) called +[lager](https://github.com/basho/lager). + +lager provides a number of configuration options that you can use to fine-tune your Riak cluster's logging output. A compact listing of parameters can be found in our [configuration files]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#logging) documentation. A more thorough explanation of these options can be found in this document. + +## Log Directory + +Riak's log files are stored in a `/log` directory on each node. The +location of that directory differs from platform to platform. The table +below shows you where log files are stored on all supported operating +systems. + +OS | Directory +:--|:--------- +Ubuntu, Debian, CentOS, RHEL | `/var/log/riak` +Solaris, OpenSolaris | `/opt/riak/log` +Source install and Mac OS X | `./log` (where the `.` represents the root installation directory) + +## Log Files + +Below is a list of files that can be found in each node's `/log` +directory: + +File | Significance +:----|:------------ +`console.log` | Console log output +`crash.log` | Crash logs +`erlang.log` | Logs emitted by the [Erlang VM](../../performance/erlang) on which Riak runs. +`error.log` | [Common errors](../../repair-recovery/errors) emitted by Riak. +`run_erl.log` | The log file for an Erlang process called `run_erl`. This file can typically be ignored. + +## Log Syntax + +Riak logs tend to be structured like this: + +```log +<date> <time> [<level>] <PID> <prefix>: <message> +``` + +The `date` segment is structured `YYYY-MM-DD`, `time` is structured +`hh:mm:ss.sss`, `level` depends on which log levels are available in the +file you are looking at (consult the sections below), the `PID` is the +Erlang process identifier for the process in which the event occurred, +and the message `prefix` will often identify the Riak subsystem +involved, e.g. `riak_ensemble_peer` or `alarm_handler` (amongst many +other possibilities). + +{{% note title="Warning: Log messages may contain newline characters" %}} +As of Riak KV 3.0.4 a few of the log messages may contain newline +characters, preventing reliable identification of the end of each log +when attempting log files ingestion by external tools. + +A known workaround is ingesting not the logs enabled by the +`log.console` configurable parameter but rather the logs as enabled by +the `log.syslog` configurable parameter and processed by syslog, +e.g. exploiting the +[`no-multi-line`](https://www.balabit.com/documents/syslog-ng-ose-3.5-guides/en/syslog-ng-ose-guide-admin/html-single/index.html) +option (e.g. see [this StackExchange topic +answer](https://unix.stackexchange.com/questions/317422/is-there-a-way-to-rewrite-parts-of-a-message-globally-instead-of-inserting-rewri/317474#317474)) +- or equivalent - of syslog implementations. +{{% /note %}} + +The exception to this syntax is in crash logs (stored in `crash.log` +files). For crash logs, the syntax tends to be along the following +lines: + +```log +<date> <time> =<report title>==== +<message> +``` + +Here is an example crash report: + +```log +2014-10-17 15:56:38 =ERROR REPORT==== +Error in process <0.4330.323> on node 'dev1@127.0.0.1' with exit value: ... +``` + +## Log Files + +In each node's `/log` directory, you will see at least one of each of +the following: + +File | Contents +:----|:-------- +`console.log` | General messages from all Riak subsystems +`crash.log` | Catastrophic events, such as node failures, running out of disk space, etc. +`erlang.log` | Events from the Erlang VM on which Riak runs +`run_erl.log` | The command-line arguments used when starting Riak + +### Log File Rotation + +Riak maintains multiple separate files for `console.log`, `crash.log`, +`erlang.log`, and `error.log`, which are rotated as each file reaches +its maximum capacity of 100 KB. In each node's `/log` directory, you may +see, for example, files name `console.log`, `console.log.0`, +`console.log.1`, and so on. Riak's log rotation is somewhat non +traditional, as it does not always log to `*.1` (e.g. `erlang.log.1`) +but rather to the oldest log file. + +After, say, `erlang.log.1` is filled up, the logging system will begin +writing to `erlang.log.2`, then `erlang.log.3`, and so on. When +`erlang.log.5` is filled up, it will loop back to `erlang.log.1`. + +## SASL + +[SASL](http://www.erlang.org/doc/man/sasl_app.html) (System Architecture +Support Libraries) is Erlang's built-in error logger. You can enable it +and disable it using the `sasl` parameter (which can be set to `on` or +`off`). It is disabled by default. The following would enable it: + +```riakconf +sasl = on +``` + +## Error Messages + +By default, Riak stores error messages in `./log/error.log` by default. +You can change this using the `log.error.file` parameter. Here is an +example, which uses the default: + +```riakconf +log.error.file = ./log/error.log +``` + +By default, error messages are redirected into lager, i.e. the +`log.error.redirect` parameter is set to `on`. The following would +disable the redirect: + +```riakconf +log.error.redirect = off +``` + +You can also throttle the number of error messages that are handled per +second. The default is 100. + +```riakconf +log.error.messages_per_second = 100 +``` + +## Crash Logs + +Riak crash logs are stored in `./log/crash.log` by default. You can +change this using the `log.crash.file` parameter. This example uses the +default: + +```riakconf +log.crash.file = ./log/crash.log +``` + +While crash logs are kept by default, i.e. the `log.crash` parameter is +set to `on`, you can disable crash logs like this: + +```riakconf +log.crash = off +``` + +### Crash Log Rotation + +Like other Riak logs, crash logs are rotated. You can set the crash logs +to be rotated either when a certain size threshold is reached and/or at +designated times. + +You can set the rotation time using the `log.crash.rotation` parameter. +The default is `$D0`, which rotates the logs every day at midnight. You +can also set the rotation to occur weekly, on specific days of the +month, etc. Complete documentation of the syntax can be found +[here](https://github.com/basho/lager/blob/master/README.md#internal-log-rotation). +Below are some examples: + +* `$D0` - Every night at midnight +* `$D23` - Every day at 23:00 (11 pm) +* `$W0D20` - Every week on Sunday at 20:00 (8 pm) +* `$M1D0` - On the first day of every month at midnight +* `$M5D6` - On the fifth day of the month at 6:00 (6 am) + +To set the maximum size of the crash log before it is rotated, use the +`log.crash.size` parameter. You can specify the size in KB, MB, etc. The +default is `10MB`. + + +### Other Crash Log Settings + +The maximum size of individual crash log messages can be set using the +`log.crash.maximum_message_size`, using any size denomination you wish, +e.g. `KB` or `MB` The default is 64 KB. The following would set that +maximum message size to 1 MB: + +```riakconf +log.crash.maximum_message_size = 1MB +``` + +## Syslog + +Riak log output does not go to syslog by default, i.e. the `log.syslog` +setting is set to `off` by default. To enable syslog output: + +```riakconf +log.syslog = on +``` + +If syslog output is enabled, you can choose a prefix to be appended to +each syslog message. The prefix is `riak` by default. + +```riakconf +log.syslog.ident = riak +``` + +### Syslog Level and Facility Level + +If syslog is enabled, i.e. if `log.syslog` is set to `on`, you can +select the log level of syslog output from amongst the available levels, +which are listed in the table below. The default is `info`. + +* `alert` +* `critical` +* `debug` +* `emergency` +* `error` +* `info` +* `none` +* `notice` +* `warning` + +In addition to a log level, you must also select a [facility +level](https://en.wikipedia.org/wiki/Syslog#Facility) for syslog +messages amongst the available levels, which are listed in the table +below. The default is `daemon`. + +* `auth` +* `authpriv` +* `clock` +* `cron` +* `daemon` +* `ftp` +* `kern` +* `lpr` +* `mail` +* `news` +* `syslog` +* `user` +* `uucp` + +In addition to these options, you may also choose one of `local0` +through `local7`. + +## Console Logs + +Riak console logs can be emitted to one of three places: to a log file +(you can choose the name and location of that file), to standard output, +or to neither. This is determined by the value that you give to the +`log.console` parameter, which gives you one of four options: + +* `file` - Console logs will be emitted to a file. This is Riak's + default behavior. The location of that file is determined by the + `log.console.file` parameter. The default location is + `./log/console.log` on an installation from [source]({{<baseurl>}}riak/kv/3.0.4/setup/installing/source), but will differ on platform-specific installation, + e.g. `/var/log/riak` on Ubuntu, Debian, CentOS, and RHEL or + `/opt/riak/log` on Solaris-based platforms. +* `console` - Console logs will be emitted to standard output, which + can be viewed by running the [`riak attach-direct`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-cli/#attach-direct) command +* `both` - Console logs will be emitted both to a file and to standard + output +* `off` - Console log messages will be disabled + +In addition to the the placement of console logs, you can also choose +the severity of those messages using the `log.console.level` parameter. +The following four options are available: + +* `info` (the default) +* `debug` +* `warning` +* `error` + +## Enabling and Disabling Debug Logging + +Checkout [Cluster Operations: Enabling and Disabling Debug Logging][cluster ops log] + + + + diff --git a/content/riak/kv/3.0.4/using/reference/multi-datacenter.md b/content/riak/kv/3.0.4/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..c833e2eeb3 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/multi-datacenter.md @@ -0,0 +1,53 @@ +--- +title: "Multi-Datacenter Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Multi-Datacenter" + identifier: "managing_ref_mdc" + weight: 113 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: +--- + +[ref mdc stats]: ./statistics +[ref mdc per bucket]: ./per-bucket-replication +[ref mdc monitor]: ./monitoring +[ref mdc comparison]: ./comparison + +## In This Section + +#### [Multi-Datacenter Replication Reference: Statistics][ref mdc stats] + +Describes the output of `riak-repl status` interface. + +[Learn More >>][ref mdc stats] + + +#### [Multi-Datacenter Replication Reference: Per Bucket][ref mdc per bucket] + +Details enabling & disabling of per bucket replication. + +[Learn More >>][ref mdc per bucket] + + +#### [Multi-Datacenter Replication Reference: Monitoring][ref mdc monitor] + +Overview of monitoring in a Multi-Datacenter environment. + +[Learn More >>][ref mdc monitor] + + +#### [Multi-Datacenter Replication Reference: Comparison][ref mdc comparison] + +Compares Version 2 and Version 3 of Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][ref mdc comparison] + + + + diff --git a/content/riak/kv/3.0.4/using/reference/multi-datacenter/comparison.md b/content/riak/kv/3.0.4/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..f9d4861b36 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,100 @@ +--- +title: "Multi-Datacenter Replication Reference: Comparsion" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Comparison" + identifier: "managing_ref_mdc_comparison" + weight: 103 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.4/ops/mdc/comparison + - /riak/kv/3.0.4/ops/mdc/comparison +--- + +This document is a systematic comparison of [Version 2]({{<baseurl>}}riak/kv/3.0.4/using/reference/v2-multi-datacenter) and [Version 3]({{<baseurl>}}riak/kv/3.0.4/using/reference/v3-multi-datacenter) of Riak's Multi-Datacenter +Replication capabilities. + +{{% note title="Important note on mixing versions" %}} +If you are installing Riak anew, you should use version 3 +replication. Under no circumstances should you mix version 2 and version 3 +replication. This comparison is meant only to list improvements introduced in +version 3. +{{% /note %}} + +## Version 2 + +* Version 2 replication relies upon the twin concepts of **listeners** + and **sites**. Listeners are the sources of replication data, while + sites are the destination of replication data. Sites and listeners are + manually configured on each node in a cluster. This can be a burden to + the administrator as clusters become larger. +* A single connection tied to the **cluster leader** manages all + replication communications. This can cause performance problems on the + leader and is a bottleneck for realtime and fullsync replication data. +* Connections are established from site to listener. This can be + confusing for firewall administrators. +* The realtime replication queue will be lost if the replication + connection breaks, even if it's re-established. Reconciling data in + this situation would require manual intervention using either of the + following: + * a fullsync + * another Riak write to the key/value on the listener, thus + re-queueing the object +* Riak CS MDC `proxy_get` connections can only request data from a + single leader node + +### When to use version 2 replication + +* If you are running clusters below version 1.3.0 of Riak Enterprise, + version 2 replication is the only method of replication available. +* In the Riak 1.3 series, version 3 replication was provided as a + technology preview and did not have feature parity with version 2. + This was provided in the Riak 1.4 series. + +## Version 3 + +* Version 3 replication uses the twin concepts of **sources** and + **sinks**. A source is considered the primary provider of replication + data, whereas a sink is the destination of replication data. +* Establishing replication connections between clusters has been + greatly simplified. A single `riak-repl connect` command needs to be + issued from a source cluster to a sink cluster. IP and port + information of all nodes that can participate in replication on both + source and sink clusters are exchanged by the **replication cluster + manager**. The replication cluster manager also tracks nodes joining + and leaving the cluster dynamically. +* If the source has M nodes, and the sink has N nodes, there will be M + realtime connections. Connections aren't tied to a leader node as they + are with version 2 replication. +* Communications for realtime, fullsync, and `proxy_get` operations are + multiplexed over the same connection for each node participating in + replication. This reduces the amount of firewall configuration on both + sources and sinks. +* A fullsync coordinator runs on a leader of the source cluster. The + coordinator assigns work across nodes in the sources cluster in an + optimized fashion. +* Realtime replication establishes a bounded queue on each source node + that is shared between *all* sinks. This queue requires consumers to + acknowledge objects when they have been replicated. Dropped TCP + connections won't drop objects from the queue. +* If a node in the source cluster is shut down via the command line, a + realtime replication queue is migrated to other running nodes in the + source cluster. +* Network statistics are kept per socket. +* Fullsyncs between clusters can be tuned to control the maximum number + of workers that will run on a source node, a sink node, and across the + entire source cluster. This allows for limiting impact on the cluster + and dialing in fullsync performance. +* Version 3 is able to take advantage of [Active Anti-Entropy]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/) \(AAE) + technology, which can greatly improve fullsync performance. +* Riak CS MDC `proxy_get` connections will be distributed across the + source cluster (as CS blocks are requested from the sink cluster in + this scenario). + + + + diff --git a/content/riak/kv/3.0.4/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/3.0.4/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..66b9579874 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,170 @@ +--- +title: "Multi-Datacenter Replication Reference: Monitoring" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Monitoring" + identifier: "managing_ref_mdc_monitor" + weight: 102 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.4/ops/mdc/monitoring + - /riak/kv/3.0.4/ops/mdc/monitoring +--- + +Monitoring Riak's realtime replication allows you to identify trends and +to receive alerts during times when replication is halted or delayed. +Issues or delays in replication can be caused by: + +* Sudden increases or spikes in write traffic +* Network connectivity issues or outages +* Errors experienced in Riak + +Identification and trending of issues or delays in realtime replication +is important for identifying a root cause, while alerting is important +for addressing any SLA-impacting issues or delays. We recommend +combining the two approaches below when monitoring Riak's realtime +replication: + +* Monitor Riak's replication status output, from either `riak-repl + status` or the HTTP `/riak-repl/stats` endpoint +* Use canary (test) objects to test replication and establish trip times + from source to sink clusters + +{{% note title="Note on querying and time windows" %}} +Riak's statistics are calculated over a sliding 60-second window. Each time +you query the stats interface, each sliding statistic shown is a sum or +histogram value calculated from the previous 60 seconds of data. Because of +this, the stats interface should not be queried more than once per minute. +{{% /note %}} + +## Statistics + +The following questions can be answered through the monitoring and +graphing of realtime replication statistics: + +* Is the realtime replication queue backed up? +* Have any errors occurred on either the source or sink cluster? +* Have any objects been dropped from the realtime queue? + +#### Is the realtime replication queue backed up? + +Identifying times when the realtime replication queue experiences +increases in the number of `pending` objects can help identify problems +with realtime replication or identify times when replication becomes +overloaded due to increases in traffic. The `pending` statistic, found +under the `realtime_queue_stats` section of the replication status +output, should be monitored and graphed. Graphing this statistic allows +you to identify trends in the number of `pending` objects. Any repeating +or predictable trend in this statistic can be used to help identify a +need for tuning and capacity changes, while unexpected variation in this +statistic may indicate either sudden changes in load or errors at the +network, system, or Riak level. + +#### Have any errors occurred on either the source or sink cluster? + +Errors experienced on either the source or sink cluster can result in +failure to replicate object(s) via realtime replication. The top-level +`rt_dirty` statistic in `riak-repl status` indicates whether such an +error has occurred and how many times. This statistic only tracks +errors and does not definitively indicate that an object was not +successfully replicated. For this reason, a fullsync should be performed +any time `rt_dirty` is non-zero. `rt_dirty` is then reset to zero once a +fullsync successfully completes. + +The size of `rt_dirty` can quantify the number of errors that have +occurred and should be graphed. Since any non-zero value indicates an +error, an alert should be set so that a fullsync can be performed (if +not regularly scheduled). Like realtime queue back ups, trends in +`rt_dirty` can reveal problems with the network, system, or Riak. + +#### Have any objects been dropped from the realtime queue? + +The realtime replication queue will drop objects when the queue is full, +with the dropped object(s) being the last (oldest) in the queue. Each +time an object is dropped, the `drops` statistic, which can be found +under the `realtime_queue_stats` section of the replication status +output, is incremented. An object dropped from the queue has not been +replicated successfully, and a fullsync should be performed when a drop +occurs. A dropped object can indicate a halt or delay in replication or +indicate that the realtime queue is overloaded. In cases of high load, +increases to the maximum size of the queue (displayed in the +`realtime_queue_stats` section of the replication status output as +`max_bytes`) can be made to accommodate a usage pattern of expected high +load. + +Although the above statistics have been highlighted to answer specific +questions, other statistics can also be helpful in diagnosing issues +with realtime replication. We recommend graphing any statistic that is +reported as a number. While their values and trends may not answer +common questions or those we've highlighted here, they may nonetheless +be important when investigating issues in the future. Other questions +that cannot be answered through statistics alone may be addressed +through the use of canary objects. + +### Canary Objects + +Canary object testing is a technique that uses a test object stored in +your environment with your production data but not used or modified by +your application. This allows the test object to have predictable states +and to be used to answer questions about the functionality and duration +of realtime replication. + +The general process for using canary objects to test realtime replication is: + +* Perform a GET for your canary object on both your source and sink + clusters, noting their states. The state of the object in each cluster + can be referred to as state `S0`, or the object's initial state. +* PUT an update for your canary object to the source cluster, updating + the state of the object to the next state, `S1`. +* Perform a GET for your canary on the sink cluster, comparing the state + of the object on the source cluster to the state of the object on the + sink cluster. + +By expanding upon the general process above, the following questions can +be answered: + +* Is a backed-up realtime replication queue still replicating objects + within a defined SLA? +* How long is it taking for objects to be replicated from the source + cluster to the sink cluster? + +#### Is a backed-up realtime replication queue still replicating objects within a defined SLA? + +Building on the final step of the general process, we can determine if +our objects are being replicated from the source cluster to the sink +cluster within a certain SLA time period by adding the following steps: + +- If the state of the object on the source cluster is not equal to the + state of the object on the sink cluster, repeat step 3 until an SLA + time threshold is exceeded. +- If the SLA time threshold is exceeded, alert that replication is not + meeting the necessary SLA. + +#### How long is it taking for objects to be replicated from the source cluster to the sink cluster? + +Getting a rough estimate of how long it takes an object PUT to a source +cluster to be replicated to a sink cluster get be done by either: + +* Comparing the time the object was PUT to the source with the time the + states of the object in the source and sink were equivalent +* Comparing the timestamps of the object on the source and sink when the + states are equivalent + +These are rough estimates, as neither method is 100% accurate. The first +method relies on a timestamp for a GET and subsequent successful +comparison, which means that the object was replicated prior to that +timestamp; the second method relies on the system clocks of two +different machines, which may not be in sync. + +It's important to note that each node in a cluster has its own realtime +replication queue. The general process needs to be applied to every +node in the source cluster, with a variety of canary objects and states, +to get a complete picture of realtime replication between two clusters. + + + + diff --git a/content/riak/kv/3.0.4/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/3.0.4/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..6b70bd96e0 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,66 @@ +--- +title: "Multi-Datacenter Replication Reference: Per Bucket" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Per Bucket" + identifier: "managing_ref_mdc_per_bucket" + weight: 101 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.4/ops/mdc/per-bucket + - /riak/kv/3.0.4/ops/mdc/per-bucket +--- + +To enable or disable replication per bucket, you can use the `repl` +bucket property. + +Some changes have occurred between 1.1 and 1.2. + +These `repl` values are available in Riak Enterprise version 1.1 and +above: + + * `true` - Enable replication (realtime + fullsync) + * `false` - Disable replication (realtime + fullsync) + +These option values are only available in Riak Enterprise version 1.2 +and above: + + * `realtime` - Replication only occurs in realtime for this bucket + * `fullsync` - Replication only occurs during a fullsync operation + * `both` - Replication occurs in realtime and during fullsync + +### Example of Disabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":false}}' +``` + +### Example of Enabling + +```curl +curl -v -XPUT http://127.0.0.1:8098/buckets/my_bucket \ + -H "Content-Type: application/json" \ + -d '{"props":{"repl":true}}' +``` + +## How Bucket Properties Work in Riak KV + +When using Multi-Datacenter Replication, each bucket's write properties +are derived from the bucket's properties in the destination cluster. If +the bucket doesn't exist, the default properties of the destination +cluster are used. + +It's important to note that this goes for properties such as `backend`. +If the bucket doesn't exist in the destination cluster, Riak will create +it with the default backend and _not_ with the backend used in the +source cluster. + + + + diff --git a/content/riak/kv/3.0.4/using/reference/multi-datacenter/statistics.md b/content/riak/kv/3.0.4/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..a8921226a7 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,244 @@ +--- +title: "Multi-Datacenter Replication Reference: Statistics" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Statistics" + identifier: "managing_ref_mdc_stats" + weight: 100 + parent: "managing_ref_mdc" +toc: true +aliases: + - /riak/3.0.4/ops/mdc/statistics + - /riak/kv/3.0.4/ops/mdc/statistics +--- + +The following definitions describe the output of `riak-repl status`. +Both Version 2 and Version 3 Replication statistics can be obtained +using the `riak-repl status` command. + +There are two things that you should note: + +1. Many of these statistics will appear only on the current + leader node +2. The counts for all statistics will be reset to 0 upon restarting Riak + Riak unless otherwise noted + +Field | Description +:-----|:---------- +`cluster_leader` | Which node is the current leader of the cluster +`connected_clusters` | A list of all sink clusters to which this source is connected + +## Performance + +The `riak-repl status` command should not be executed more than once a +minute, as statistics are recalculated every time the command is +executed, and some statistics require network communication between +nodes. This performance note also applies to the HTTP `/riak-repl/stats` +endpoint. + +## Realtime Replication Statistics + +Statistics for both the source or sink sides of realtime replication. +These values can be found under either `sources.source_stats` or +`sinks.sink_stats`. + +Field | Description +------|------------ +`realtime_enabled` | A list of all realtime sinks that are enabled +`realtime_started` | A list of all realtime sinks that are started +`rt_dirty` | The number of errors detected that can prevent objects from being replicated via realtime. These include errors on the source or sink connection, or realtime queue overload resulting in objects being dropped from the queue. *This value will persist across restarts until a fullsync is complete.* +`rt_sink_errors` | A sink error has been detected on the source node. This value will be reset to 0 after a node restarts. +`rt_sink_connected_to.source_drops` | The number of dropped put transfers from the perspective of the sink cluster +`rt_source_errors` | A source error has been detected on the source node. This value will be reset to 0 after a node restarts. + +Field | Description +------|------------ +`rt_source_connected_to` | The name of the sink cluster to which the source cluster is connected +`rt_sink_connected_to` | The name of the source cluster to which the sink cluster is connected +`connected` | If `true`, then the source is connected to a sink (or vice versa) +`objects` | The number of realtime replication objects that have been successfully transmitted to the sink cluster +`sent_seq` | The last realtime queue sequence number that has been transmitted +`acked_seq` | The last realtime queue sequence number that has been acknowledged +`expect_seq` | The next realtime queue sequence number that is expected +`hb_rtt` | Realtime replication heartbeat round-trip time in milliseconds, recorded on the replication source +`hb_last` | `{MegaSeconds, Seconds, MicroSeconds}` since a heartbeat message was received on the realtime sink + + +These values are under `realtime_queue_stats`. + +Field | Description +------|------------ +`bytes` | The size in bytes of all objects currently in the realtime queue +`consumers` | A list of source consumers of the realtime queue +`consumers.<clustername>.drops` | The number of dropped realtime sync put transfers per sink cluster, from the perspective of the source cluster ("dropped" in this context meaning either that the outgoing data queue was full or that there was a connection error) +`drops` | The number of objects dropped from the realtime queue as the result of the queue being full or other errors +`errs` | The number of errors while pushing/popping from the realtime queue +`overload_drops` | The number of put transfers that have been dropped due to an overload of the message queue of the Erlang process responsible for processing outgoing transfers +`pending` | The number of objects waiting to be sent to the sink cluster +`sinkclustername` | A consumer of the realtime queue +`unacked` | The number of objects waiting to be acknowledged by a queue consumer + + +## Fullsync Replication Statistics + +Field | Description +------|------------ +`fullsync_enabled` | A list of all sinks that are enabled +`fullsync_running` | A list of all sinks that are running +`server_fullsyncs` | The number of fullsync operations that have occurred since the server was started +`fullsyncs_completed` | The number of fullsyncs that have been completed to the specified sink cluster. +`fullsync_start_time` | The time the current fullsink to the specified cluster began. +`last_fullsync_duration `| The duration (in seconds) of the last completed fullsync. + +If this cluster is acting as a **source**, the `fullsync_coordinator` field returns a list of `{<sink_clustername>:<fullsync_stats>}`. If this cluster is acting as a **sink**, the `fullsync_coordinator_srv` field returns a list of `{<LocalIP:Port>:<fullsync_coordinator_srv_stats>}`. + +Those fields are described in the following tables. + +Field | Description +------|------------ +`cluster` | The name of the sink cluster +`queued` | The number of partitions that are waiting for an available process +`in_progress` | The number of partitions that are being synced +`starting` | The number of partitions connecting to remote cluster +`successful_exits` | The number of partitions successfully synced. When completed, this will be the same number as total number of partitions in the ring. +`error_exits` | If a sync failed or was aborted, the partition will be queued again and try again later +`running_stats` | `[{<PID>, <stats>},…]` Any running sync processes are listed here, and described in the table below +`socket` | See [Socket Statistics](#socket-statistics) +`fullsync_suggested` | Realtime replication errors occurred on these nodes, a fullsync is suggested +`fullsync_suggested_during_fs` | Realtime replication errors occurred on these nodes while a fullsync is already in progress. A fullsync is suggested after the current fullsync completes. These value will be moved to the `fullsync_suggested` value when the current fullsync completes. +`socket` | `{peername: <RemoteIP:Port>`, `sockname: <LocalIP:Port>}` + +The `running_stats` field contains the following fields. + +Field | Description +------|------------ +`node` | The local cluster source node currently participating in fullsync replication +`site` | The name of the sink cluster. *Warning: This will be renamed in future versions of Riak*. +`strategy` | The strategy that fulfills fullsync replication. In previous versions of replication, different values could be configured. This value could be changed depending on your replication needs. +`fullsync_worker` | The Erlang process id of the fullsync worker. +`socket` | See [Socket Statistics](#socket-statistics) +`state` | The current state of fullsync replication. This can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`fullsync` | The partition that is currently being synchronized with the sink cluster +`partition_start` | Elapsed time in seconds since the *fullsync* partition started replication to a sink +`stage_start` | Elapsed time in seconds since the `state` started running on the source +`get_pool_size` | The number of workers that are used to read data from Riak during a fullsync + +## Socket Statistics + +Many sections of the status output include a `socket` section. A reading is taken once every 10 seconds, and the last 7 readings are stored. + +Field | Description +------|------------ +`peername` | `<ip:port>` The address and port for the other end of a connection +`recv_avg` | The average size of packets in bytes received to the socket +`recv_cnt` | The number of packets received by the socket +`recv_dvi` | The average packet size deviation in bytes received by the socket +`recv_kbps` | Socket kilobits/second received +`recv_max` | Size of the largest packet in bytes received to the socket +`send_cnt` | Number of packets sent from the socket +`send_kbps` | Socket kilobits/second sent +`send_pend` | The number of bytes in the Erlang VM to be sent over the socket +`sockname` | `<host:port>` The address and port for "this end" of the connection + +## Version 2 Replication Statistics + +The following definitions describe the output of `riak-repl status`. +Please note that many of these statistics will only appear on the +current leader node. + +**Note**: All counts will be reset to 0 upon restarting Riak. + +Field | Description +------|------------ +`listener_[nodeid]` | Defines a replication listener (primary) that is running on node `[nodeid]` +`[sitename]_ips` | Defines a replication skin +`client_bytes_recv` | The total number of bytes the client has received since the server has been started +`client_bytes_sent` | The total number of bytes sent to all connected secondaries +`client_connect_errors` | The number of TCP/IP connection errors +`client_connects` | A count of the number of sink connections made to this node. +`client_redirect` | If a client connects to a non-leader node, it will be redirected to a leader node +`client_rx_kbps` | A snapshot of the sink received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`client_tx_kbps` | A snapshot of the sink sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`elections_elected` | If the replication leader node becomes unresponsive or unavailable, a new leader node in the cluster will be elected +`elections_leader_changed` | The number of times a Riak node has surrendered leadership +`objects_dropped_no_clients` | If the realtime replication work queue is full and there are no clients to receive objects, then objects will be dropped from the queue. These objects will be synchronized during a fullsync operation. +`objects_dropped_no_leader` | If a sink cannot connect to a leader, objects will be dropped during realtime replication +`objects_forwarded` | The number of Riak objects forwarded to the leader the participate in replication. *Please note that this value will only be accurate on a non-leader node*. +`objects_sent` | The number of objects sent via realtime replication +`server_bytes_recv` | The total number of bytes the primary has received +`server_bytes_sent` | The total number of bytes the primary has sent +`server_connect_errors` | The number of primary to sink connection errors +`server_connects` | The number of times the primary connects to the client sink +`server_rx_kbps` | A snapshot of the primary received kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list +`server_tx_kbps` | A snapshot of the primary sent kilobits/second taken once a minute. The past 8 snapshots are stored in this list. Newest snapshots appear on the left side of the list. +`leader` | Which node is the current leader of the cluster for Version 2 Replication +`local_leader_message_queue_len` | The length of the object queue on the leader +`local_leader_heap_size` | The amount of memory the leader is using +`client_stats` | See [Client Statistics](#client-statistics) +`server_stats` | See [Server Statistics](#server-statistics) + +## Client Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the sink in running on +`site` | The connected site (sink) name. **Warning**: This will be renamed in a future version of Riak. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`waiting_to_retry` | The primaries currently waiting to retry replication after a failure +`connected` | A list of connected clients<ul><li>**`connected`** The IP address and port of a connected sink</li><li>**`cluster_name`** The name of the connected sink</li><li>**`connecting`** The PID, IP address, and port of a client currently establishing a connection</li></ul> +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`request_partition`**</li><li>**`wait_for_fullsync`**</li><li>**`send_keylist`**</li><li>**`wait_ack`**</li></ul> + + +## Server Statistics + +Field | Description +------|------------ +`node` | A unique ID for the Riak node that the source is running on +`site` | The connected site (sink) name configured with. *Warning: This will be renamed in a future version of Riak*. +`strategy` | A replication strategy defines an implementation of the Riak Replication protocol. Valid values: `keylist`, `syncv1`. +`fullsync_worker` | The Erlang process ID of the fullsync worker +`bounded_queue` | See [Bounded Queue](#bounded-queue) +`state` | State shows what the current replication strategy is currently processing. The following definitions appear in the status output if keylist strategy is being used. They can be used by Basho support to identify replication issues.<ul><li>**`wait_for_partition`**</li><li>**`build_keylist`**</li><li>**`wait_keylist`**</li><li>**`diff_bloom`**</li><li>**`diff_keylist`**</li></ul> +`message_queue_len` | The number of Erlang messages that are waiting to be processed by the server + + +## Bounded Queue + +The bounded queue is responsible for holding objects that are waiting to +participate in realtime replication. Please see the [Riak V2 MDC Replication Configuration][config v2 mdc] or [Riak V3 MDC Replication Configuration][config v3 mdc] guides for +more information. + +Field | Description +------|------------ +`queue_pid` | The Erlang process ID of the bounded queue +`dropped_count` | The number of objects that failed to be enqueued in the bounded queue due to the queue being full. *These objects will be replicated during the next fullsync operation*. +`queue_length` | The number of Riak objects currently in the bounded queue +`queue_byte_size` | The size of all objects currently in the queue +`queue_max_size `| The number of bytes the queue can hold before objects are dropped. *These objects will be replicated during the next fullsync operation*. +`queue_percentage` | The percentage of the queue that is full +`queue_pending` | The current count of "in-flight" objects we've sent that the client has not acknowledged +`queue_max_pending` | The maximum number of objects that can be "in flight" before we refuse to send any more. + + +## Accessing Replication Web-Based Statistics + +These stats can be accessed via the command line with the following +command: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats +``` + +A simple way to view formatted statistics is to use a command such as: + +```curl +curl -q http://127.0.0.1:8098/riak-repl/stats | json_pp +``` + + + + diff --git a/content/riak/kv/3.0.4/using/reference/object-deletion.md b/content/riak/kv/3.0.4/using/reference/object-deletion.md new file mode 100644 index 0000000000..6abd918fbe --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/object-deletion.md @@ -0,0 +1,121 @@ +--- +title: "Object Deletion Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Object Deletion" + identifier: "managing_ref_object_deletion" + weight: 103 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/deletion +--- + +[concept eventual consistency]: ../../../learn/concepts/eventual-consistency +[concept clusters]: ../../../learn/concepts/clusters +[glossary vnode]: ../../../learn/glossary/#vnode +[usage delete objects]: ../../../developing/usage/deleting-objects +[developing keylist]: ../../../developing/api/http/list-keys +[developing mapreduce]: ../../../developing/usage/mapreduce +[cluster mdc]: ../../cluster-operations/v3-multi-datacenter +[config advanced]: ../../../configuring/reference/#advanced-configuration +[glossary sloppy quorum]: ../../../learn/glossary/#sloppy-quorum +[bitcask merging]: ../../../setup/planning/backend/bitcask/#disk-usage-and-merging-settings +[leveldb compaction]: ../../../setup/planning/backend/leveldb/#compaction + +In single-server, non-clustered data storage systems, object deletion +is a trivial process. In an [eventually consistent][concept eventual consistency], [clustered][concept clusters] system like Riak, however, +object deletion is far less trivial because objects live on multiple +[nodes](../../../learn/glossary/#nodes), which means that a deletion process must be chosen to determine when an object can be removed from the storage backend. + +## Object Deletion Example + +The problem of object deletion in distributed systems can be illustrated more concretely using the following example: + +* An object is stored on nodes A, B, and C +* Node C suddenly goes offline due to a network failure +* A client sends a delete request to node A, which forwards that + request to node B, but it cannot reach node C +* On nodes A and B, the object is deleted +* Node C comes back online +* A client attempts to read the object, and the request hits node C +* Node C asks nodes A and B for the object, but they return `not_found`. Node C, on the other hand, still has the object. + +The question here is: how should node C respond to the client? Given only the above information, it isn't possible to determine which of two possible scenarios actually occurred: + +1. the object was deleted on A & B but not on C +2. the object was created on C but not on A & B + +To get around this problem, Riak uses *Tombstones*. + +## Tombstones + +Riak addresses the problem of deletion in distributed systems by replacing the deleted object with a special object called a **tombstone** rather than just removing it. + +This allows Riak to understand the difference between an object that has been deleted, and one that was never written in the first place. A tombstone specifically has `X-Riak-Deleted` = `true` in the metadata and a value of `<<>>` (the Erlang empty binary) in its contents, whereas an unwritten object has *no entry at all*. + +The problem with tombstones is that they take up space, albeit not very much. For this reason, Riak can be configured to automatically remove tombstones after a set period of time. This process is called **reaping**. + +After being reaped, a tombstone is completely removed, and the object entry ceases to exist entirely (as if it had never been written to). + +## Configuring Object Deletion + +The `delete_mode` setting in a cluster's [configuration files][config advanced] will determine how long a tombstone will remain before being reaped. + +There are three possible settings: + +* `keep` - Disables tombstone removal +* `immediate` - The tombstone is removed as soon as the request is + received +* Custom time interval - How long to wait until the tombstone is + removed, expressed in milliseconds. The default is `3000`, i.e. to + wait 3 seconds + +In general, we recommend setting the `delete_mode` parameter to `keep` +if you plan to delete and recreate objects under the same key. This protects against failure scenario cases in which a deleted object may be resurrected. + +Setting `delete_mode` to `immediate` can be useful in situations in +which an aggressive space reclamation process is necessary, such as +when running [MapReduce jobs][developing mapreduce], but we do not recommend +this in general. + +Setting `delete_mode` to a longer time duration than the default can be +useful in certain cases involving [Multi-Datacenter Replication][cluster mdc], e.g. when +network connectivity is an issue. + +## Deletion from Backends + +When attempting to reclaim disk space, deleting data may seem like the obvious first step. However, in Riak this is not necessarily the best thing to do if the disk is nearly full. This is because Riak's disk-based backends don't remove data immediately. This is true both for the initial deletion when a Riak tombstone is created, and later when that tombstone is reaped. + +In the case of Bitcask, a new entry is written in the log with either the Riak tombstone or, after reaping, a Bitcask tombstone. The in-memory key-pointer is then updated to point to this new value. + +In LevelDB, a newly written value obscures the earlier value. Again, this is either the Riak tombstone or, after reaping, a LevelDB tombstone. + +Some time later, the backends will perform their regular garbage collection procedures. For Bitcask this is [merging][bitcask merging], for LevelDB it is [compaction][leveldb compaction]. At this time, stale entries containing the original objects will be purged from disk, along with any Bitcask or LevelDB tombstones. Riak tombstones will *not* be purged, because the backends treat them like regular objects. + +Thus, reclaiming disk space is not immediate with respect to delete operations, nor even with respect to reaping, and prior to garbage collection delete operations will actually cause disk space usage to rise slightly. + +## Tombstones & Reporting + +When designing applications and operating a live Riak cluster, it is important to know how to interpret Riak's responses to requests. With respect to deletion and tombstones, please note the following: + +* A delete request is considered a special case of an update. It will fail if the `W` and `PW` values are not satisfied. However, due to [Sloppy Quorum][glossary sloppy quorum], deletes will usually succeed. This does not mean that tombstones have been written over *all* copies of the object, but rather that tombstones have been written on at least `W` nodes, of which at least `PW` are primaries. +* Successful delete requests do not guarantee successful reaping. If a node fails before its reap timer expires, the reap timer will not automatically recommence upon restart. Rather, the tombstone will remain upon the node until a further request finds it. At this time, a new reap timer will be initiated. +* A GET request that sees a quorum of Riak tombstones will return a `not_found` response to the client, even though internally Riak knows there used to be an object there. +* A GET request will never see backend tombstones, because the backends report these as `not_found`. To RiakKV, such answers will appear as if the object has never been written. A `not_found` will be sent up to the client in this case, too. +* A [Keylist][developing keylist] or [MapReduce][developing mapreduce] operation *will* return Riak tombstones, but *not* backend tombstones. This is because these operations fold over the backends directly, and make no attempt to filter Riak tombstones out of the fold by default. + +## Client Library Examples + +Check out [Deleting Objects][usage delete objects] in the Developing section for examples of deleting objects client-side. + +## Resources + +* [Discussion on the Riak mailing list](http://lists.basho.com/pipermail/riak-users_lists.basho.com/2011-October/006048.html) + + + + diff --git a/content/riak/kv/3.0.4/using/reference/runtime-interaction.md b/content/riak/kv/3.0.4/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..4569e913a9 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/runtime-interaction.md @@ -0,0 +1,70 @@ +--- +title: "Runtime Interaction Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Runtime Interaction" + identifier: "managing_ref_runtime_interaction" + weight: 104 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/runtime + - /riak/kv/3.0.4/ops/advanced/runtime +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters + +Riak's [configuration files][config reference] provide a variety of parameters that +enable you to fine-tune how Riak interacts with two important elements +of the underlying operating system: distribution ports and OS +processes/garbage collection. + +## Ports + +Distribution ports connect Riak nodes within a [cluster][concept clusters]. The +following port-related parameters are available: + +* `runtime_health.triggers.distribution_port` - Whether distribution + ports with full input buffers will be counted as busy. + * Default: `on` +* `runtime_health.triggers.port` - Whether ports with full input + buffers will be counted as busy. Ports can represent open files or network sockets. + * Default: `on` +* `runtime_health.thresholds.busy_ports` - The threshold at which a + warning will be triggered about the number of ports that are overly + busy. Ports with full input buffers count toward this threshold. + * Default: `2` + +## Processes + +Riak will log warnings related to busy operating system processes and +garbage collection. You can specify the conditions in which warnings are +triggered using the following parameters: + +* `runtime_health.thresholds.busy_processes` - The threshold at which + a warning will be triggered about the number of processes that are + overly busy. Processes with large heaps or that take a long time to + garbage collect will count toward this threshold. + * Default: `30` +* `runtime_health.triggers.process.heap_size` - A process will be + marked as busy when its size exceeds this size (in bytes). + * Default: `160444000` +* `runtime_health.triggers.process.garbage_collection` - A process + will be marked as busy when it exceeds this amount of time doing + garbage collection. Enabling this setting can cause performance + problems on multi-core systems. + * Default: `off` + * Example when enabled: `50ms` +* `runtime_health.triggers.process.long_schedule` - A process will + become busy when it exceeds this length of time during a single + process scheduling and execution cycle. + * Default: `off` + * Example when enabled: `20ms` + + + + diff --git a/content/riak/kv/3.0.4/using/reference/search.md b/content/riak/kv/3.0.4/using/reference/search.md new file mode 100644 index 0000000000..e0a5bd86bd --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/search.md @@ -0,0 +1,457 @@ +--- +title: "Search Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Search" + identifier: "managing_ref_search" + weight: 109 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/search + - /riak/kv/3.0.4/dev/advanced/search +--- + +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters +[configuring search]: {{<baseurl>}}riak/kv/3.0.4/configuring/search + +> **Note on search 2.0 vs. legacy search** +> +> This document refers to Riak search 2.0 with +[Solr](http://lucene.apache.org/solr/) integration (codenamed +Yokozuna). + +The project that implements Riak search is codenamed Yokozuna. This is a +more detailed overview of the concepts and reasons behind the design of +Yokozuna, for those interested. If you're simply looking to use Riak +search, you should check out the [Using Search]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search) document. + +![Yokozuna]({{<baseurl>}}images/yokozuna.png) + +## Riak Search is Erlang + +In Erlang OTP, an "application" is a group of modules and Erlang +processes which together perform a specific task. The word application +is confusing because most people think of an application as an entire +program such as Emacs or Photoshop. But Riak Search is just a sub-system +in Riak itself. Erlang applications are often stand-alone, but Riak +Search is more like an appendage of Riak. It requires other subsystems +like Riak Core and KV, but also extends their functionality by providing +search capabilities for KV data. + +The purpose of Riak Search is to bring more sophisticated and robust +query and search support to Riak. Many people consider Lucene and +programs built on top of it, such as Solr, as the standard for +open-source search. There are many successful applications built on +Lucene/Solr, and it sets the standard for the feature set that +developers and users expect. Meanwhile, Riak has a great story as a +highly-available, distributed key/value store. Riak Search takes +advantage of the fact that Riak already knows how to do the distributed +bits, combining its feature set with that of Solr, taking advantage of +the strengths of each. + +Riak Search is a mediator between Riak and Solr. There is nothing +stopping a user from deploying these two programs separately, but this +would leave the user responsible for the glue between them. That glue +can be tricky to write. It requires dealing with monitoring, querying, +indexing, and dissemination of information. + +Unlike Solr by itself, Riak Search knows how to do all of the following: + +* Listen for changes in key/value (KV) data and to make the appropriate + changes to indexes that live in Solr. It also knows how to take a user + query on any node and convert it to a Solr distributed search, which + will correctly cover the entire index without overlap in replicas. +* Take index creation commands and disseminate that information across + the cluster. +* Communicate and monitor the Solr OS process. + +## Solr/JVM OS Process + +Every node in a Riak [cluster][concept clusters] has a corresponding operating +system (OS) process running a JVM which hosts Solr on the Jetty +application server. This OS process is a child of the Erlang OS process +running Riak. + +Riak Search has a `gen_server` process which monitors the JVM OS +process. The code for this server is in `yz_solr_proc`. When the JVM +process crashes, this server crashes, causing its supervisor to restart +it. + +If there is more than 1 restart in 45 seconds, the entire Riak node will +be shut down. If Riak Search is enabled and Solr cannot function for +some reason, the Riak node needs to go down so that the user will notice +and take corrective action. + +Conversely, the JVM process monitors the Riak process. If for any reason +Riak goes down hard (e.g. a segfault) the JVM process will also exit. +This double monitoring along with the crash semantics means that neither +process may exist without the other. They are either both up or both +down. + +All other communication between Riak Search and Solr is performed via +HTTP, including querying, indexing, and administration commands. The +ibrowse Erlang HTTP client is used to manage these communications as +both it and the Jetty container hosting Solr pool HTTP connections, +allowing for reuse. Moreover, since there is no `gen_server` involved in +this communication, there's no serialization point to bottleneck. + +## Indexes + +An index, stored as a set of files on disk, is a logical namespace that +contains index entries for objects. Each such index maintains its own +set of files on disk---a critical difference from Riak KV, in which a +bucket is a purely logical entity and not physically disjoint at all. A +Solr index requires significantly less disk space than the corresponding +legacy Riak Search index, depending on the Solr schema used. + +Indexes may be associated with zero or more buckets. At creation time, +however, each index has no associated buckets---unlike the legacy Riak +Search, indexes in the new Riak Search do not implicitly create bucket +associations, meaning that this must be done as a separate configuration +step. + +To associate a bucket with an index, the bucket property `search_index` must +be set to the name of the index you wish to associate. Conversely, in +order to disassociate a bucket you use the sentinel value +`_dont_index_`. + +Many buckets can be associated with the same index. This is useful for +logically partitioning data into different KV buckets which are of the +same type of data, for example if a user wanted to store event objects +but logically partition them in KV by using a date as the bucket name. + +A bucket _cannot_ be associated with many indexes---the `search_index` +property must be a single name, not a list. + +See the [main Search documentation]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search/#simple-setup) for details on creating an index. + +## Extractors + +There is a tension between Riak KV and Solr when it comes to data. Riak +KV treats object values as mostly opaque, and while KV does maintain an +associated content type, it is simply treated as metadata to be returned +to the user to provide context for interpreting the returned object. +Otherwise, the user wouldn't know what type of data it is! + +Solr, on the other hand, wants semi-structured data, more specifically a +flat collection of field-value pairs. "Flat" here means that a field's +value cannot be a nested structure of field-value pairs; the values are +treated as-is (non-composite is another way to say it). + +Because of this mismatch between KV and Solr, Riak Search must act as a +mediator between the two, meaning it must have a way to inspect a KV +object and create a structure which Solr can ingest for indexing. In +Solr this structure is called a **document**. This task of creating a +Solr document from a Riak object is the job of the **extractor**. To +perform this task two things must be considered. + +**Note**: This isn't quite right, the fields created by the extractor +are only a subset of the fields created. Special fields needed for +Yokozuna to properly query data and tagging fields are also created. +This call happens inside `yz_doc:make_doc`. + +1. Does an extractor exist to map the content-type of the object to a + Solr document? +2. If so, how is the object's value mapped from one to the other? + For example, the value may be `application/json` which contains + nested objects. This must somehow be transformed into a flat + structure. + +The first question is answered by the _extractor mapping_. By default +Yokozuna ships with extractors for several common data types. Below is a +table of this default mapping: + +Content Type | Erlang Module +:------------|:------------- +`application/json` | `yz_json_extractor` +`application/xml` | `yz_xml_extractor` +`text/plain` | `yz_text_extractor` +`text/xml` | `yz_xml_extractor` +N/A | `yz_noop_extractor` + +The answer to the second question is a function of the implementation +of the extractor module. Every extractor must conform to the +following Erlang specification: + +```erlang +-spec(ObjectValue::binary(), Options::proplist()) -> fields() | {error, term()}. +-type field_name() :: atom() | binary(). +-type field_value() :: binary(). +-type fields() :: [{field_name(), field_value()}] +``` + +The value of the object is passed along with options specific to each +extractor. Assuming the extractor correctly parses the value it will +return a list of fields, which are name-value pairs. + +The text extractor is the simplest one. By default it will use the +object's value verbatim and associate if with the field name `text`. +For example, an object with the value "How much wood could a woodchuck +chuck if a woodchuck could chuck wood?" would result in the following +fields list. + +```erlang +[{text, <<"How much wood could a woodchuck chuck if a woodchuck could chuck wood?">>}] +``` + +An object with the content type `application/json` is a little trickier. +JSON can be nested arbitrarily. That is, the key of a top-level object +can have an object as a value, and this object can have another object +nested inside, an so on. Yokozuna's JSON extractor must have some method +of converting this arbitrary nesting into a flat list. It does this by +concatenating nested object fields with a separator. The default +separator is `.`. An example should make this more clear. + +Below is JSON that represents a person, what city they are from and what +cities they have traveled to. + +```json +{ + "name": "ryan", + "info": { + "city": "Baltimore", + "visited": ["Boston", "New York", "San Francisco"] + } +} +``` + +Below is the field list that would be created by the JSON extract. + +```erlang +[{<<"info.visited">>,<<"San Francisco">>}, + {<<"info.visited">>,<<"New York">>}, + {<<"info.visited">>,<<"Boston">>}, + {<<"info.city">>,<<"Baltimore">>}, + {<<"name">>,<<"ryan">>}] +``` + +Some key points to notice. + +* Nested objects have their field names concatenated to form a field + name. The default field separator is `.`. This can be modified. +* Any array causes field names to repeat. This will require that your + schema defines this field as multi-valued. + +The XML extractor works in very similar fashion to the JSON extractor +except it also has element attributes to worry about. To see the +document created for an object, without actually writing the object, you +can use the extract HTTP endpoint. This will do a dry-run extraction and +return the document structure as `application/json`. + +```curl +curl -XPUT http://localhost:8098/search/extract \ + -H 'Content-Type: application/json' \ + --data-binary @object.json +``` + +## Schemas + +Every index must have a schema, which is a collection of field names and +types. For each document stored, every field must have a matching name +in the schema, used to determine the field's type, which in turn +determines how a field's value will be indexed. + +Currently, Yokozuna makes no attempts to hide any details of the Solr +schema: a user creates a schema for Yokozuna just as she would for Solr. +Here is the general structure of a schema. + + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<schema name="my-schema" version="1.5"> + <fields> + <!-- field definitions go here --> + </fields> + + <!-- DO NOT CHANGE THIS --> + <uniqueKey>_yz_id</uniqueKey> + + <types> + <!-- field type definitions go here --> + </types> +</schema> +``` + +The `<fields>` element is where the field name, type, and overriding +options are declared. Here is an example of a field for indexing dates. + +```xml +<field name="created" type="date" indexed="true" stored="true"/> +``` + +The corresponding date type is declared under `<types>` like so. + +```xml +<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> +``` + +You can also find more information on to how customize your own [search schema]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search-schemas). + +Yokozuna comes bundled with a [default schema](https://github.com/basho/yokozuna/blob/develop/priv/default_schema.xml) +called `_yz_default`. This is an extremely general schema which makes +heavy use of dynamic fields---it is intended for development and +testing. In production, a schema should be tailored to the data being +indexed. + +## Active Anti-Entropy (AAE) + +[Active Anti-Entropy]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/) \(AAE) is the process of discovering and +correcting entropy (divergence) between the data stored in Riak's +key-value backend and the indexes stored in Solr. The impetus for AAE is +that failures come in all shapes and sizes---disk failure, dropped +messages, network partitions, timeouts, overflowing queues, segment +faults, power outages, etc. Failures range from obvious to invisible. +Failure prevention is fraught with failure, as well. How do you prevent +your prevention system from failing? You don't. Code for detection, not +prevention. That is the purpose of AAE. + +Constantly reading and re-indexing every object in Riak could be quite +expensive. To minimize the overall cost of detection AAE make use of +hashtrees. Every partition has a pair of hashtrees; one for KV and +another for Yokozuna. As data is written the hashtrees are updated in +real-time. + +Each tree stores the hash of the object. Periodically a partition is +selected and the pair of hashtrees is _exchanged_. First the root hashes +are compared. If equal then there is no more work to do. You could have +millions of keys in one partition and verifying they **all** agree takes +the same time as comparing two hashes. If they don't match then the +root's children are checked and this process continues until the +individual discrepancies are found. If either side is missing a key or +the hashes for a key do not match then _repair_ is invoked on that key. +Repair converges the KV data and its indexes, removing the entropy. + +Since failure is inevitable, and absolute prevention impossible, the +hashtrees themselves may contain some entropy. For example, what if the +root hashes agree but a divergence exists in the actual data? Simple, +you assume you can never fully trust the hashtrees so periodically you +_expire_ them. When expired, a tree is completely destroyed and the +re-built from scratch. This requires folding all data for a partition, +which can be expensive and take some time. For this reason, by default, +expiration occurs after one week. + +For an in-depth look at Riak's AAE process, watch Joseph Blomstedt's +[screencast](http://coffee.jtuple.com/video/AAE.html). + + +## Analysis & Analyzers + +Analysis is the process of breaking apart (analyzing) text into a +stream of tokens. Solr allows many different methods of analysis, +an important fact because different field values may represent +different types of data. For data like unique identifiers, dates, and +categories you want to index the value verbatim---it shouldn't be +analyzed at all. For text like product summaries, or a blog post, +you want to split the value into individual words so that they may be +queried individually. You may also want to remove common words, +lowercase words, or perform stemming. This is the process of +_analysis_. + +Solr provides many different field types which analyze data in different +ways, and custom analyzer chains may be built by stringing together XML +in the schema file, allowing custom analysis for each field. For more +information on analysis, see [Search Schema]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search-schemas). + +## Tagging + +Tagging is the process of adding field-value pairs to be indexed via +Riak object metadata. It is useful in two scenarios. + +1. The object being stored is opaque but your application has metadata + about it that should be indexed, for example storing an image with + location or category metadata. + +2. The object being stored is not opaque, but additional indexes must + be added _without_ modifying the object's value. + +See +[Tagging](https://github.com/basho/yokozuna/blob/develop/docs/TAGGING.md) +for more information. + +## Coverage + +Yokozuna uses _doc-based partitioning_. This means that all index +entries for a given Riak Object are co-located on the same physical +machine. To query the entire index all partitions must be contacted. +Adjacent partitions keep replicas of the same object. Replication allows +the entire index to be considered by only contacting a subset of the +partitions. The process of finding a covering set of partitions is known +as _coverage_. + +Each partition in the coverage plan has an owning node. Thus a plan can +be thought of as a unique set of nodes along with a covering set of +partitions. Yokozuna treats the node list as physical hostnames and +passes them to Solr's distributed search via the `shards` parameter. +Partitions, on the other hand, are treated logically in Yokozuna. All +partitions for a given node are stored in the same index; unlike KV +which uses _partition_ as a physical separation. To properly filter out +overlapping replicas the partition data from the cover plan is passed to +Solr via the filter query (`fq`) parameter. + +Calculating a coverage plan is handled by Riak Core. It can be a very +expensive operation as much computation is done symbolically, and the +process amounts to a knapsack problem. The larger the ring the more +expensive. Yokozuna takes advantage of the fact that it has no physical +partitions by computing a coverage plan asynchronously every few +seconds, caching the plan for query use. In the case of node failure or +ownership change this could mean a delay between cluster state and the +cached plan. This is, however, a good trade-off given the performance +benefits, especially since even without caching there is a race, albeit +one with a smaller window. + +## Statistics + +The Riak Search batching subsystem provides statistics on run-time characteristics of search system components. These statistics are accessible via the standard Riak KV stats interfaces and can be monitored through standard enterprise management tools. + + +* `search_index_throughput_(count|one)` - The total count of objects that have been indexed, per Riak node, and the count of objects that have been indexed within the metric measurement window. + +* `search_index_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of indexing latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr, divided by the batch size. + +* `search_queue_batch_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of batch latency, as measured from the time it takes to send a batch to Solr to the time the response is received from Solr. + +* `search_queue_batch_throughput_(count|one)` - The total number of batches delivered into Solr, per Riak node, and the number of batches that have been indexed within the metric measurement window. + +* `search_queue_batchsize_(min|mean|max|median)` - The minimum, mean, maximum, and median measurements of the batch size across all indices and Solrq worker processes. + +* `search_queue_hwm_purged_(count|one)` - The total number of purged objects, and the number of purged objects within the metric measurement window. + +* `search_queue_capacity` - The capacity of the existing queues, expressed as a integral percentage value between 0 and 100. This measurement is based on the ratio of equeued objects and the configured high water mark. + +* `search_queue_drain_(count|one)` - The total number of drain operations, and the number of drain operations within the metric measurement window. + +* `search_queue_drain_fail_(count|one)` - The total number of drain failures, and the number of drain failures within the metric measurement window. + +* `search_queue_drain_timeout_(count|one)` - The total number of drain timeouts, and the number of drain timeouts within the metric measurement window. + +* `search_queue_drain_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of drain latency, as measured from the time it takes to initiate a drain to the time the drain is completed. + +* `search_detected_repairs_count` - The total number of AAE repairs that have been detected when comparing YZ and Riak/KV AAE trees. Note that this statistic is a measurement of the differences found in the AAE trees; there may be some latency between the time the trees are compared and the time that the repair is written to Solr. + +* `search_blockedvnode_(count|one)` - The total count of vnodes that have been blocked, per Riak node, and the count of blocked vnodes within the metric measurement window. Vnodes are blocked when a Solrq worker exceeds its high water mark, as defined by the [`search.queue.high_watermark`][configuring search] configuration setting. + +* `search_index_fail_(count|one)` - The total count of failed attempts to index, per Riak node, and the count of index failures within the metric measurement window. + +* `search_query_throughput_(count|one)` - The total count of queries, per Riak node, and the count of queries within the metric measurement window. + +* `search_query_latency_(min|mean|max|median|95|99|999)` - The minimum, mean, maximum, median, 95th percentile, 99th percentile, and 99.9th percentile measurements of querying latency, as measured from the time it takes to send a request to Solr to the time the response is received from Solr. + +* `search_query_fail_(count|one)` - The total count of failed queries, per Riak node, and the count of query failures within the metric measurement window. + +* `search_index_bad_entry_count` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. + +* `search_index_bad_entry_one` - the number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) within the past minute. + +* `search_index_extract_fail_count` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) since the last start of Riak. + +* `search_index_extract_fail_one` - the number of failures that have occurred extracting data into a format suitable for Solr (e.g., badly formatted JSON) within the past minute. + +While most of the default values are sufficient, you may have to +increase [`search.solr.start_timeout`][configuring search] as more data is indexed, which may cause Solr to require more time to start. + + + diff --git a/content/riak/kv/3.0.4/using/reference/secondary-indexes.md b/content/riak/kv/3.0.4/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..508f23020d --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/secondary-indexes.md @@ -0,0 +1,76 @@ +--- +title: "Secondary Indexes Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Secondary Indexes" + identifier: "managing_ref_2i" + weight: 110 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.4/dev/advanced/2i + - /riak/kv/3.0.4/dev/advanced/2i +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[use ref strong consistency]: {{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency + +> **Note: Riak Search preferred for querying** +> +> If you're interested in non-primary-key-based querying in Riak, i.e. if +you're looking to go beyond straightforward K/V operations, we now +recommend [Riak Search]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search/) rather than secondary indexes for a variety of reasons. Riak Search has a far more capacious querying API and can be used with all of Riak's storage backends. + +This document provides implementation and other details for Riak's +[secondary indexes]({{<baseurl>}}riak/kv/3.0.4/developing/usage/secondary-indexes/) \(2i) feature. + +## How It Works + +Secondary indexes use **document-based partitioning**, a system where +indexes reside with each document, local to the [vnode]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode). This +system is also a local index. Secondary indexes are a list of key/value +pairs that are similar to HTTP headers. At write time, objects are +tagged with index entries consisting of key/value metadata. This +metadata can be queried to retrieve the matching keys. + +![Secondary Index]({{<baseurl>}}images/Secondary-index-example.png) + +Indexes reside on multiple machines. Since indexes for an object are +stored on the same partition as the object itself, query-time +performance issues might arise. When issuing a query, the system must +read from a "covering" set of partitions and then merge the results. +The system looks at how many replicas of data are stored---the N value +or `n_val`---and determines the minimum number of partitions that it +must examine (1 / `n_val`) to retrieve a full set of results, also +taking into account any offline nodes. + +An application can modify the indexes for an object by reading an +object, adding or removing index entries, and then writing the object. +Finally, an object is automatically removed from all indexes when it is +deleted. The object's value and its indexes should be thought of as a +single unit. There is no way to alter the indexes of an object +independently from the value of an object, and vice versa. Indexing is +atomic, and is updated in real time when writing an object. This means +that an object will be present in future index queries as soon as the +write operation completes. + +Riak stores 3 replicas of all objects by default, although this can be +changed [using bucket types][usage bucket types], which manage buckets' [replication properties]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties). The system is capable of generating a full set of results +from one third of the system’s partitions as long as it chooses the +right set of partitions. The query is sent to each partition, the index +data is read, and a list of keys is generated and then sent back to the +requesting node. + +> **Note on 2i and strong consistency** +> +> Secondary indexes do not currently work with the [strong consistency][use ref strong consistency] feature introduced in Riak version 2.0. If you store objects in [strongly consistent buckets]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/strong-consistency/#creating-a-strongly-consistent-bucket-type) and attach +secondary index metadata to those objects, you can still perform +strongly consistent operations on those objects but the secondary +indexes will be ignored. + + + + diff --git a/content/riak/kv/3.0.4/using/reference/snmp.md b/content/riak/kv/3.0.4/using/reference/snmp.md new file mode 100644 index 0000000000..f17d130076 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/snmp.md @@ -0,0 +1,166 @@ +--- +title: "Simple Network Management Protocol" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "SNMP" + identifier: "managing_ref_snmp" + weight: 107 + parent: "managing_ref" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/running/monitoring/snmp + - /riak/kv/3.0.4/ops/running/monitoring/snmp +--- + +Riak Enterprise provided a built-in SNMP server that allows an external system, such as Hyperic, to query the Riak node for statistics such as the average get and put times as well as the number of puts and gets. This document only covers SNMP v2c support at this time which was the last supported version. After the release of Riak KV 2.2.3 Enterprise Edition, support for SNMP has been dropped. The below configuration examples are left for people analysing legacy settings and only work with the Enterprise Edition of Riak KV 2.2.3 or lower. + +## Configuration + +The first step in configuring your SNMP setup is to edit the appropriate files in the Riak node's `etc/snmp/agent/conf/` directory. + +First, edit the `agent.conf` file and set the appropriate IP on which the SNMP server should listen (Ex: `192.168.1.20`): + +```erlang +{intAgentIpAddress, [192,168,1,20]}. +{intAgentUDPPort, 4000}. +{snmpEngineID, "agent's engine"}. +{snmpEngineMaxMessageSize, 484}. + +%% Note: The commas in the IP are in the correct format +``` + +Next, edit the `community.conf` file if you would like to change your community from public to a different string. + +Finally, edit the `standard.conf` file and update it with the proper information: + +```erlang +{sysName, "Riak Node 1"}. +{sysDescr, "Riak Agent"}. +{sysContact, "syadmin@company.com"}. +{sysLocation, "System and Rack Location"}. +{sysObjectID, [3,6,1,4,1,193,19]}. %% {ericsson otp} - don't change +{sysServices, 72}. %% don't change +``` + +Riak needs to be restarted for configuration changes to take affect. + +**Note**: Prior to Riak Enterprise 0.13, SNMP configuration values were not reloaded during a restart. + +To force Riak to reload SNMP configuration files on startup: + + 1. Open `app.config` (most package installs place this file in `/etc/riak/`; Solaris package installs place this file in `/opt/riak/etc/`). + + 2. Locate the SNMP term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 3. Add `{force_load, true}` to the `config` term: + + ```erlang + {snmp, + [{agent, + [{config, [{dir, "/etc/riak/snmp/agent/conf/"}, + {force_load, true}]}, + {db_dir, "/var/lib/riak/snmp/agent/db/"}]}]} + ``` + + 4. Save `app.config` + + 5. Restart Riak + +Once you have configured the SNMP settings you can start your Riak node and will be able to snmpwalk the node to verify that the setup is working: + +```bash +$ snmpwalk -OS -c public -v2c -m all 192.168.52.129:4000 . +``` + +If you would like to query the OIDs associated with Riak you will need to reference the MIB shipped with Riak. For example, the x86_64 packages have the MIB in the following folder: + +```bash +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs +``` + +This folder can be referenced in the snmpwalk command as follows: + +```bash +$ snmpwalk -OS -c public -v 2c -m ALL \ + -M +/usr/lib64/riak/lib/riak_snmp-0.2/priv/mibs \ + 192.168.52.129:4000 RIAK +``` + + +## SNMP Counters + +**vnodeGets** +*Type:* Counter +Number of vnode-level GETs in past minute + +**vnodePuts** +*Type:* Counter +Number of vnode-level PUTs in past minute + +**nodeGets** +*Type:* Counter +Number of GETs in past minute + +**nodePuts** +*Type:* Counter +Number of PUTs in past minute + +**nodeGetTimeMean** +*Type:* Gauge +Mean GET time (microseconds) + +**nodeGetTimeMedian** +*Type:* Gauge +Median GET time (microseconds) + +**nodeGetTime95** +*Type:* Gauge +95th percentile GET time (microseconds) + +**nodeGetTime99** +*Type:* Gauge +99th percentile GET time (microseconds) + +**nodeGetTime100** +*Type:* Gauge +Maximum GET time (microseconds) + +**nodePutTime95** +*Type:* Gauge +95th percentile PUT time (microseconds) + +**nodePutTime99** +*Type:* Gauge +99th percentile PUT time (microseconds) + +**nodePutTime100** +*Type:* Gauge +Maximum PUT time (microseconds) + +**nodePutTimeMean** +*Type:* Gauge +Mean PUT time (microseconds) + +**nodePutTimeMedian** +*Type:* Gauge +Median PUT time (microseconds) + + + + diff --git a/content/riak/kv/3.0.4/using/reference/statistics-monitoring.md b/content/riak/kv/3.0.4/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..fd0d54f186 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/statistics-monitoring.md @@ -0,0 +1,395 @@ +--- +title: "Statistics & Monitoring Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Monitoring" + identifier: "managing_ref_monitoring" + weight: 106 + parent: "managing_ref" +toc: true +aliases: + - /riak/3.0.4/ops/running/stats-and-monitoring + - /riak/kv/3.0.4/ops/running/stats-and-monitoring +--- + +Riak provides data related to current operating status, which includes +statistics in the form of counters and histograms. These statistics +are made available through the HTTP API via the [`/stats`]({{<baseurl>}}riak/kv/3.0.4/developing/api/http/status) endpoint, or through the [`riak-admin`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/) interface, in particular the `stat` and `status` commands. + +This page presents the most commonly monitored and gathered +statistics, as well as numerous solutions for monitoring and gathering +statistics that our customers and community report using successfully +in Riak cluster environments. You can learn more about the specific +Riak statistics provided in the [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/inspecting-node) and [HTTP Status]({{<baseurl>}}riak/kv/3.0.4/developing/api/http/status) documentation. + +## System Metrics To Graph + +Graphing general system metrics of Riak nodes will help with +diagnostics and early warnings of potential problems, as well as help +guide provisioning and scaling decisions. + +* CPU (user/system/wait/idle) +* Processor Load +* Available Memory +* Available disk space +* Used file descriptors +* Swap Usage +* IOWait +* Read operations +* Write operations +* Network throughput +* Network errors + +We also recommend tracking your system's virtual and +writebacks. Things like massive flushes of dirty pages or steadily +climbing writeback volumes can indicate poor virtual memory tuning. +More information can be found [here][sysctl_vm_txt] and in our +documentation on [system tuning]({{<baseurl>}}riak/kv/3.0.4/using/performance/#storage-and-file-system-tuning). + +## Riak Metrics to Graph +Riak metrics fall into several general categories: + +1. Throughput metrics +2. Latency metrics +3. Erlang resource usage metrics +4. General Riak load/health metrics + +If graphing all of the [available Riak metrics]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/inspecting-node) is +not practical, you should pick a minimum relevant subset from these +categories. Some of the most helpful metrics are discussed below. + +### Throughput Metrics + +Graphing the throughput stats relevant to your use case is often +helpful for capacity planning and usage trend analysis. In addition, +it helps you establish an expected baseline -- that way, you can +investigate unexpected spikes or dips in the throughput. The +following stats are recorded for operations that happened *during the +last minute*. + +Metric | Relevance | Operations (for the last minute) +:--------|:--------|:-------------------------------- +```node_gets``` | K/V | Reads coordinated by this node +```node_puts``` | K/V | Writes coordinated by this node +```vnode_counter_update``` | Data Types | Update [Counters][data_types_counters] operations coordinated by local vnodes +```vnode_set_update``` | Data Types | Update [Sets][data_types_sets] operations coordinated by local vnodes +```vnode_map_update``` | Data Types | Update [Maps][data_types_maps] operations coordinated by local vnodes +```search_query_throughput_one``` | Search | Search queries on the node +```search_index_throughtput_one``` | Search | Documents indexed by Search +```consistent_gets``` | Strong Consistency | Consistent reads on this node +```consistent_puts``` | Strong Consistency | Consistent writes on this node +```vnode_index_reads``` | Secondary Indexes | Number of local replicas participating in secondary index reads + +Note that there are no separate stats for updates to Flags or +Registers, as these are included in ```vnode_map_update```. + +### Latency Metrics + +As with the throughput metrics, keeping an eye on average (and max) +latency times will help detect usage patterns, and provide advanced +warnings for potential problems. + +{{% note title="Note on FSM Time Stats" %}} +FSM Time Stats represent the amount of time in microseconds required to +traverse the GET or PUT Finite State Machine code, offering a picture of +general node health. From your application's perspective, FSM Time effectively +represents experienced latency. Mean, Median, and 95th-, 99th-, and +100th-percentile (Max) counters are displayed. These are one-minute stats. +{{% /note %}} + +Metric | Also | Relevance | Latency (in microseconds) +:------|:-----|:----------|:------------------------- +```node_get_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client read request and subsequent response to client +```node_put_fsm_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | K/V | Time between reception of client write request and subsequent response to client +```object_counter_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Counter operation +```object_set_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Set operation +```object_map_merge_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Data Types | Time it takes to perform an Update Map operation +```search_query_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Search query latency +```search_index_latency_median``` | ```_min```, ```_95```, ```_99```, ```_999```, ```_max``` | Search | Time it takes Search to index a new document +```consistent_get_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent read latency +```consistent_put_time_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Strong Consistency | Strongly consistent write latency + +### Erlang Resource Usage Metrics + +These are system metrics from the perspective of the Erlang VM, +measuring resources allocated and used by Erlang. + +Metric | Notes +:------|:------------------------- +```sys_process_count``` | Number of processes currently running in the Erlang VM +```memory_processes``` | Total amount of memory allocated for Erlang processes (in bytes) +```memory_processes_used``` | Total amount of memory used by Erlang processes (in bytes) + +### General Riak Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Also | Notes +:------|:-----|:------------------ +```node_get_fsm_siblings_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of siblings encountered during all GET operations by this node within the last minute. Watch for abnormally high sibling counts, especially max ones. +```node_get_fsm_objsize_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Object size encountered by this node within the last minute. Abnormally large objects (especially paired with high sibling counts) can indicate sibling explosion. +```riak_search_vnodeq_mean``` | ```_median```, ```_95```, ```_99```, ```_100``` | Number of unprocessed messages in the vnode message queues of the Riak Search subsystem on this node in the last minute. The queues give you an idea of how backed up Solr is getting. +```search_index_fail_one``` | | Number of "Failed to index document" errors Search encountered for the last minute +```pbc_active``` | | Number of currently active protocol buffer connections +```pbc_connects``` | | Number of new protocol buffer connections established during the last minute +```read_repairs``` | | Number of read repair operations this node has coordinated in the last minute (determine baseline, watch for abnormal spikes) +```list_fsm_active``` | | Number of List Keys FSMs currently active (should be 0) +```node_get_fsm_rejected``` | | Number of GET FSMs actively being rejected by Sidejob's overload protection +```node_put_fsm_rejected``` | | Number of PUT FSMs actively being rejected by Sidejob's overload protection + +### General Riak Search Load/Health Metrics + +These various stats give a picture of the general level of activity or +load on the Riak node at any given moment. + +Metric | Description +:------|:------------ +`search_index_bad_entry_count` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) since the last restart of Riak. +`search_index_bad_entry_one ` | Number of writes to Solr that have resulted in an error due to the format of the data (e.g., non-unicode data) in the past minute. +`search_index_extract_fail_count` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) since the last start of Riak. +`search_index_extract_fail_one` | Number of failures that have occurred extracting data into a format suitable to insert into Solr (e.g., badly formatted JSON) in the past minute. + + +## Command-line Interface + +The [`riak-admin`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/) tool provides two +interfaces for retrieving statistics and other information: `status` +and `stat`. + +### status + +Running the `riak-admin status` command will return all of the +currently available information from a running node. + +```bash +riak-admin status +``` + +This will return a list of over 300 key/value pairs, like this: + +``` +1-minute stats for 'dev1@127.0.0.1' +------------------------------------------- +connected_nodes : ['dev2@127.0.0.1','dev3@127.0.0.1'] +consistent_get_objsize_100 : 0 +consistent_get_objsize_195 : 0 +... etc ... +``` + +A comprehensive list of available stats can be found in the +[Inspecting a Node]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/inspecting-node/#riak-admin-status) document. + +### stat + +The `riak-admin stat` command is related to the `riak-admin status` +command but provides a more fine-grained interface for interacting with +stats and information. Full documentation of this command can be found +in the [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#stat) document. + +## Statistics and Monitoring Tools + +There are many open source, self-hosted, and service-based solutions for +aggregating and analyzing statistics and log data for the purposes of +monitoring, alerting, and trend analysis on a Riak cluster. Some +solutions provide Riak-specific modules or plugins as noted. + +The following are solutions which customers and community members have +reported success with when used for monitoring the operational status of +their Riak clusters. Community and open source projects are presented +along with commercial and hosted services. + +{{% note title="Note on Riak 2.x Statistics Support" %}} +Many of the below tools were either created by third-parties or Basho +engineers for general usage, and have been passed to the community for further +updates. As such, many of the below only aggregate the statistics and messages +that were output by Riak 1.4.x. + +Like all code under [Basho Labs](https://github.com/basho-labs/), the below +tools are "best effort" and have no dedicated Basho support. We both +appreciate and need your contribution to keep these tools stable and up to +date. Please open up a GitHub issue on the repository if you'd like to be a +maintainer. + +Look for banners calling out the tools we've verified that support the latest +Riak 2.x statistics! +{{% /note %}} + +### Self-Hosted Monitoring Tools + +#### Riaknostic + +[Riaknostic](http://riaknostic.basho.com) is a growing suite of +diagnostic checks that can be run against your Riak node to discover +common problems and recommend how to resolve them. These checks are +derived from the experience of the Basho Client Services Team as well as +numerous public discussions on the mailing list, IRC room, and other +online media. + +Riaknostic integrates into the `riak-admin` command via a `diag` +subcommand, and is a great first step in the process of diagnosing and +troubleshooting issues on Riak nodes. + +#### Riak Control + +[Riak Control]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-control/) is Basho's REST-driven user-interface for managing Riak +clusters. It is designed to give you quick insight into the health of +your cluster and allow for easy management of nodes. + +While Riak Control does not currently offer specific monitoring and +statistics aggregation or analysis functionality, it does offer features +which provide immediate insight into overall cluster health, node +status, and handoff operations. + +#### collectd + +[collectd](http://collectd.org) gathers statistics about the system it +is running on and stores them. The statistics are then typically graphed +to find current performance bottlenecks, predict system load, and +analyze trends. + +#### Ganglia + +[Ganglia](http://ganglia.info) is a monitoring system specifically +designed for large, high-performance groups of computers, such as +clusters and grids. Customers and community members using Riak have +reported success in using Ganglia to monitor Riak clusters. + +A [Riak Ganglia module][riak_ganglia] for collecting statistics from +the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/3.0.4/developing/api/http/status) endpoint is also available. + +#### Nagios + +{{% note %}} +**Tested and Verified Support for Riak 2.x.** +{{% /note %}} + +[Nagios](http://www.nagios.org) is a monitoring and alerting solution +that can provide information on the status of Riak cluster nodes, in +addition to various types of alerting when particular events occur. +Nagios also offers logging and reporting of events and can be used for +identifying trends and capacity planning. + +A collection of [reusable Riak-specific scripts][riak_nagios] are +available to the community for use with Nagios. + +#### OpenTSDB + +[OpenTSDB](http://opentsdb.net) is a distributed, scalable Time Series Database +(TSDB) used to store, index, and serve metrics from various sources. It can +collect data at a large scale and graph these metrics on the fly. + +A [Riak collector for OpenTSDB][tcollector_riak_plugin] is available as part of +the [tcollector framework][tcollector]. + +#### Riemann + +[Riemann](http://github.com/riemann/riemann/) uses a powerful stream +processing language to aggregate events from client agents running on +Riak nodes, and can help track trends or report on events as they occur. +Statistics can be gathered from your nodes and forwarded to a solution +such as Graphite for producing related graphs. + +A [Riemann Tools](https://github.com/aphyr/riemann.git) project +consisting of small programs for sending data to Riemann provides a +module specifically designed to read Riak statistics. + +#### Zabbix + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[Zabbix](http://www.zabbix.com) is an open-source performance monitoring, +alerting, and graphing solution that can provide information on the state of +Riak cluster nodes. + +A [Zabbix plugin for Riak][riak_zabbix] is available to get you started +monitoring Riak using Zabbix. + + +### Hosted Service Monitoring Tools + +The following are some commercial tools which Basho customers have +reported successfully using for statistics gathering and monitoring +within their Riak clusters. + +#### Circonus +[Circonus](http://circonus.com) provides organization-wide monitoring, +trend analysis, alerting, notifications, and dashboards. It can been +used to provide trend analysis and help with troubleshooting and +capacity planning in a Riak cluster environment. + +#### New Relic + +{{% note %}} +**Tested and Verified Support for Riak 2.x Stats.** +{{% /note %}} + +[New Relic](http://newrelic.com) is a data analytics and visualization platform +that can provide information on the current and past states of Riak nodes and +visualizations of machine generated data such as log files. + +A [Riak New Relic Agent][riak_new_relic] for collecting statistics from the Riak HTTP [`/stats`]({{<baseurl>}}riak/kv/3.0.4/developing/api/http/status) endpoint is also available. + +#### Splunk + +[Splunk](http://www.splunk.com) is available as downloadable software or +as a service, and provides tools for visualization of machine generated +data such as log files. It can be connected to Riak's HTTP statistics +[`/stats`]({{<baseurl>}}riak/kv/3.0.4/developing/api/http/status) endpoint. + +Splunk can be used to aggregate all Riak cluster node operational log +files, including operating system and Riak-specific logs and Riak +statistics data. These data are then available for real time graphing, +search, and other visualization ideal for troubleshooting complex issues +and spotting trends. + +## Summary + +Riak exposes numerous forms of vital statistic information which can be +aggregated, monitored, analyzed, graphed, and reported on in a variety +of ways using numerous open source and commercial solutions. + +If you use a solution not listed here with Riak and would like to +include it (or would otherwise like to update the information on this +page), feel free to fork the docs, add it in the appropriate section, +and send a pull request to the [Riak +Docs](https://github.com/basho/basho_docs). + +## References + +* [Inspecting a Node]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/inspecting-node) +* [Riaknostic](http://riaknostic.basho.com) +* [Riak Control]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-control/) +* [collectd](http://collectd.org) +* [Ganglia](http://ganglia.info) +* [Nagios](http://www.nagios.org) +* [OpenTSDB](http://opentsdb.net) +* [tcollector framework][tcollector] +* [Riemann](http://github.com/riemann/riemann/) +* [Riemann Github](https://github.com/aphyr/riemann) +* [Zabbix](http://www.zabbix.com) +* [Circonus](http://circonus.com) +* [New Relic](http://newrelic.com) +* [Splunk](http://www.splunk.com) +* [Riak Docs on Github](https://github.com/basho/basho_docs) + + +[sysctl_vm_txt]: https://www.kernel.org/doc/Documentation/sysctl/vm.txt +[data_types_counters]: {{< baseurl >}}riak/kv/latest/developing/data-types/counters/ +[data_types_sets]: {{< baseurl >}}riak/kv/latest/developing/data-types/sets/ +[data_types_maps]: {{< baseurl >}}riak/kv/latest/developing/data-types/maps/ +[riak_nagios]: https://github.com/basho/riak_nagios +[tcollector]: https://github.com/stumbleupon/tcollector +[tcollector_riak_plugin]: https://github.com/stumbleupon/tcollector/blob/master/collectors/0/riak.py +[riak_zabbix]: https://github.com/basho/riak-zabbix +[riak_new_relic]: https://github.com/basho/riak_newrelic +[riak_ganglia]: https://github.com/jnewland/gmond_python_modules/tree/master/riak/ + + + + diff --git a/content/riak/kv/3.0.4/using/reference/strong-consistency.md b/content/riak/kv/3.0.4/using/reference/strong-consistency.md new file mode 100644 index 0000000000..36d6e471e6 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/strong-consistency.md @@ -0,0 +1,150 @@ +--- +title: "Strong Consistency Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Strong Consistency" + identifier: "managing_ref_strong_consistency" + weight: 112 + parent: "managing_ref" +toc: true +aliases: +--- + +[usage bucket types]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types +[concept eventual consistency]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency + +Riak was originally designed as an [eventually consistent]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/eventual-consistency) system, fundamentally geared toward providing partition +(i.e. fault) tolerance and high read and write availability. + +While this focus on high availability is a great fit for many data +storage needs, there are also many use cases for which strong data +consistency is more important than availability. Basho introduced a new +strong consistency option in version 2.0 to address these use cases. +In Riak, strong consistency is applied [using bucket types][usage bucket types], which +enables developers to apply strong consistency guarantees on a per-key +basis. + +Elsewhere in the documentation there are instructions for [enabling and using]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/strong-consistency/) strong consistency, as well as a [guide for operators]({{<baseurl>}}riak/kv/3.0.4/configuring/strong-consistency) looking to manage, +configure, and monitor strong consistency. + +## Strong vs. Eventual Consistency + +If you successfully write a value to a key in a strongly consistent +system, the next successful read of that key is guaranteed to show that +write. A client will never see out-of-date values. The drawback is that +some operations may fail if an insufficient number of object replicas +are available. More on this in the section on [trade-offs](#trade-offs). + +In an eventually consistent system, on the other hand, a read may return +an out-of-date value, particularly during system or network failures. +The advantage of this approach is that reads and writes can succeed even +when a cluster is experiencing significant service degradation. + +### Example + +Building on the example presented in the [eventual consistency][concept eventual consistency] doc, +imagine that information about who manages Manchester United is stored +in Riak, in the key `manchester-manager`. In the eventual consistency +example, the value associated with this key was originally +`David Moyes`, meaning that that was the first successful write to that +key. But then `Louis van Gaal` became Man U's manager, and a write was +executed to change the value of `manchester-manager`. + +Now imagine that this write failed on one node in a multi-node cluster. +Thus, all nodes report that the value of `manchester-manager` is `Louis +van Gaal` except for one. On the errant node, the value of the +`manchester-manager` key is still `David Moyes`. An eventually +consistent system is one in which a get request will most likely return +`Louis van Gaal` but could return the outdated value `David Moyes`. + +In a strongly consistent system, conversely, any successful read on +`manchester-manager` will return `Louis van Gaal` and never `David Moyes`. +Reads will return `Louis van Gaal` every single time until Man U gets a new +manager and someone performs a successful write to `manchester-manager` +to change its value. + +It might also be useful to imagine it a bit more abstractly. The +following causal sequence would characterize a strongly consistent +system: + +1. The value of the key `k` is set to `v` +2. All successful reads on `k` return `v` +3. The value of `k` is changed to `v2` +4. All successful reads on `k` return `v2` +5. And so forth + +At no point in time does this system return an out-of-date value. + +The following sequence could characterize an eventually consistent +system: + +1. A write is made that sets the value of the key `k` to `v` +2. Nearly all reads to `k` return `v`, but a small percentage return + `not found` +3. A write to `k` changes the value to `v2` +4. Nearly all reads to `k` now return `v2`, but a small number return + the outdated `v` (or even `not found`) because the newer value hasn't + yet been replicated to all nodes + +## Making the Strong vs. Eventual Decision + +The first system described above may sound like the undisputed champion, +and the second system undesirable. However: + +1. Reads and writes on the first system will often be slower---if only + by a few milliseconds---because the system needs to manage reads and + writes more carefully. If performance is of primary concern, the + first system might not be worth the sacrifice. +2. Reads and writes on the first system may fail entirely if enough + servers are unavailable. If high availability is the top priority, + then the second system has a significant advantage. + +So when deciding whether to use strong consistency in Riak, the +following question needs to be asked: + +#### For the specific use case at hand, is it better for reads to fail than to return a potentially out-of-date value? + +If the answer is yes, then you should seriously consider using Riak in a +strongly consistent way for the data that demands it, while bearing in +mind that other data can still be stored in Riak in an eventually +consistent way. + +## Trade-offs + +Using Riak in a strongly consistent fashion comes with two unavoidable +trade-offs: + +1. Less availability +2. Slightly slower performance + +Strongly consistent operations are necessarily less highly available +than eventually consistent operations because they require a **quorum** +of available object replicas to succeed. Quorum is defined as N / 2 + 1, +or `n_val` / 2 + 1. If N is set to 7, at least 4 object replicas must be +available, 2 must be available if N=3, etc. + +If there is a network partition that leaves less than a quorum of object +replicas available within an ensemble, strongly consistent operations +against the keys managed by that ensemble will fail. + +Nonetheless, consistent operations do provide a great deal of fault +tolerance. Consistent operations can still succeed when a minority of +replicas in each ensemble can be offline, faulty, or unreachable. In +other words, **strongly consistent operations will succeed as long as +quorum is maintained**. A fuller discussion can be found in the +[operations]({{<baseurl>}}riak/kv/3.0.4/configuring/strong-consistency/#fault-tolerance) +documentation. + +A second trade-off regards performance. Riak's implementation of strong +consistency involves a complex [consensus subsystem]({{<baseurl>}}riak/kv/3.0.4/using/reference/strong-consistency/#implementation-details) that typically requires more communication between Riak nodes than eventually consistent operations, +which can entail a performance hit of varying proportions, depending on +a variety of factors. + +Ways to address this issue can be found in [strong consistency and performance]({{<baseurl>}}riak/kv/3.0.4/configuring/strong-consistency/#performance). + + + + diff --git a/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter.md b/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..66f3d7f6ee --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter.md @@ -0,0 +1,40 @@ +--- +title: "V2 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "V2 Replication" + identifier: "managing_ref_v2" + weight: 115 + parent: "managing_ref" +toc: true +aliases: +--- + +[v2 mdc arch]: ./architecture +[v2 mdc fullsync]: ./scheduling-fullsync + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.4/using/reference/v3-multi-datacenter/) instead. +{{% /note %}} + + +## In This Section + +#### [V2 Multi-Datacenter Replication Reference: Architecture][v2 mdc arch] + +Overview of the architecture undergirding Riak's Multi-Datacenter Replication capabilities. + +[Learn More >>][v2 mdc arch] + +#### [V2 Multi-Datacenter Replication Reference: Scheduling Fullsync][v2 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v2 mdc fullsync] + + + + diff --git a/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..a0bbace314 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,130 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Architecture" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Architecture" + identifier: "managing_ref_v2_architecture" + weight: 100 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/3.0.4/ops/mdc/v2/architecture + - /riak/kv/3.0.4/ops/mdc/v2/architecture +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.4/using/reference/v3-multi-datacenter/architecture/) instead. +{{% /note %}} + + +This document provides a basic overview of the architecture undergirding +Riak's Multi-Datacenter Replication capabilities. + +## How Replication Works + +When Multi-Datacenter Replication is implemented, one Riak cluster acts +as a **primary cluster**. The primary cluster handles replication +requests from one or more **secondary clusters** (generally located in +datacenters in other regions or countries). If the datacenter with the +primary cluster goes down, a secondary cluster can take over as the +primary cluster. In this sense, Riak's multi-datacenter capabilities are +masterless. + +Multi-Datacenter Replication has two primary modes of operation: +**fullsync** and **realtime**. In fullsync mode, a complete +synchronization occurs between primary and secondary cluster(s); in +realtime mode, continual, incremental synchronization occurs, i.e. +replication is triggered by new updates. + +Fullsync is performed upon initial connection of a secondary cluster, +and then periodically thereafter (every 360 minutes is the default, but +this can be modified). Fullsync is also triggered if the TCP connection +between primary and secondary cluster is severed and then recovered. + +Both fullsync and realtime mode are described in detail below. +But first, a few key concepts. + +## Concepts + +### Listener Nodes + +Listeners, also called **servers**, are Riak nodes in the primary +cluster that listen on an external IP address for replication requests. +Any node in a Riak cluster can participate as a listener. Adding more +nodes will increase the fault tolerance of the replication process in +the event of individual node failures. If a listener node goes down, +another node can take its place. + +### Site Nodes + +Site nodes, also called **clients**, are Riak nodes on a secondary +cluster that connect to listener nodes and send replication initiation +requests. Site nodes are paired with a listener node when started. + +### Leadership + +Only one node in each cluster will serve as the lead site (client) or +listener (server) node. Riak replication uses a leadership-election +protocol to determine which node in the cluster will participate in +replication. If a site connects to a node in the primary cluster that is +not the leader, it will be redirected to the listener node that is +currently the leader. + +## Fullsync Replication + +Riak performs the following steps during fullsync +replication, as illustrated in the Figure below. + +1. A TCP connection is established between the primary and secondary + clusters +2. The site node in the secondary cluster initiates fullsync replication + with the primary node by sending a message to the listener node in + the primary cluster +3. The site and listener nodes iterate through each [vnode]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode) in their respective clusters and compute a hash for + each key's object value. The site node on the secondary cluster sends + its complete list of key/hash pairs to the listener node in the + primary cluster. The listener node then sequentially compares its + key/hash pairs with the primary cluster's pairs, identifying any + missing objects or updates needed in the secondary cluster. +4. The listener node streams the missing objects/updates to the + secondary cluster. +5. The secondary cluster replicates the updates within the cluster to + achieve the new object values, completing the fullsync cycle + +<br> +![MDC Fullsync]({{<baseurl>}}images/MDC_Full-sync-small.png) +<br> + +## Realtime Replication + +Riak performs the following steps during realtime +replication, as illustrated in the Figure below. + +1. The secondary cluster establishes a TCP connection to the primary +2. Realtime replication of a key/object is initiated when an update is + sent from a client to the primary cluster +3. The primary cluster replicates the object locally +4. The listener node on the primary cluster streams an update to the + secondary cluster +5. The site node within the secondary cluster receives and replicates + the update + +<br> +![MDC Realtime]({{<baseurl>}}images/MDC-real-time-sync-small.png) +<br> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters/#the-ring); if you are using fullsync +replication, every bucket's [`n_val`]({{<baseurl>}}riak/kv/3.0.4/developing/app-guide/replication-properties#n-value-and-replication) must be the same in both the +source and sink cluster. + + + + diff --git a/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..33e31733f0 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,53 @@ +--- +title: "V2 Multi-Datacenter Replication Reference: Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Scheduling Fullsync" + identifier: "managing_ref_v2_fullsync" + weight: 101 + parent: "managing_ref_v2" +toc: true +aliases: + - /riak/3.0.4/ops/mdc/v2/scheduling-fullsync + - /riak/kv/3.0.4/ops/mdc/v2/scheduling-fullsync +--- + +{{% note title="Deprecation Warning" %}} +v2 Multi-Datacenter Replication is deprecated and will be removed in a future version. Please use [v3]({{<baseurl>}}riak/kv/3.0.4/using/reference/v3-multi-datacenter/scheduling-fullsync/) instead. +{{% /note %}} + + +## Scheduling Fullsync Operation + +With the `pause` and `resume` commands it is possible to limit the +fullsync operation to off-peak times. First, disable `fullsync_interval` +and set `fullsync_on_connect` to `false`. Then, using cron or something +similar, execute the commands below at the start of the sync window. +In these examples, the commands are combined in a `.sh` or analogous +file: + +```bash +#!/bin/sh + +## Resume from where we left off +riak-repl resume-fullsync + +## Start fullsync if nothing is running +riak-repl start-fullsync +``` + +At the end of the sync window: + +```bash +#!/bin/sh + +## Stop fullsync until start of next sync window +riak-repl pause-fullsync +``` + + + + diff --git a/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter.md b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..c835f5b9c7 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter.md @@ -0,0 +1,52 @@ +--- +title: "V3 Multi-Datacenter Replication Reference" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "V3 Replication" + identifier: "managing_ref_v3" + weight: 114 + parent: "managing_ref" +toc: true +aliases: +--- + +[v3 mdc arch]: ./architecture +[v3 mdc aae]: ./aae +[v3 mdc cascade]: ./cascading-writes +[v3 mdc fullsync]: ./scheduling-fullsync + +## In This Section + +#### [V3 Multi-Datacenter Replication Reference: Architecture][v3 mdc arch] + +Overview of the architecture undergirding Riak's Version 3 Multi-Datacenter Replication capabilities. + +[Learn More >>][v3 mdc arch] + + +#### [V3 Multi-Datacenter Replication Reference: With Active Anti-Entropy][v3 mdc aae] + +Overview of using Riak KV's active anti-entropy (AAE) subsystem with Multi-Datacenter. + +[Learn More >>][v3 mdc aae] + + +#### [V3 Multi-Datacenter Replication Reference: Cascading Realtime Writes][v3 mdc cascade] + +Details the cascading realtime writes feature. + +[Learn More >>][v3 mdc cascade] + + +#### [V3 Multi-Datacenter Replication Reference: Scheduling Fullsync][v3 mdc fullsync] + +Brief tutorial on scheduling fullsync operations. + +[Learn More >>][v3 mdc fullsync] + + + + diff --git a/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..10955082bb --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,129 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Fullsync via Active Anti-Entropy" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Active Anti-Entropy" + identifier: "managing_ref_v3_aae" + weight: 101 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/aae + - /riak/kv/3.0.4/ops/mdc/v3/aae +--- + +[glossary aae]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#active-anti-entropy-aae +[config reference#advanced]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/#advanced-configuration +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters + +> **Note: Technical preview** +> +> The active anti-entropy fullsync strategy, as it pertains to +replication, is currently in **technical preview** mode. This means that +it hasn't been tested at large scale and that there may be issues that +Basho must address prior to a general release. Please don't use this +feature on a production system without professional services or customer +service engineering support. + +## Overview + +Riak Multi-Datacenter (MDC) Replication version 3 (originally limited to Riak +Enterprise versions 1.4.0 - 2.2.3 and now available to all versions post 2.2.3) can now take advantage of Riak's [active anti-entropy][glossary aae] \(AAE) subsystem, which was first introduced as a +technology preview in Riak 1.3.0. + +AAE plus Replication uses existing Riak AAE hash trees stored in +LevelDB, so if AAE is already active, there is no additional startup +delay for enabling the `aae` fullsync strategy. AAE can also be enabled +for the first time on a cluster, although some custom settings can +enhance performance in this case to help AAE trees be built more +quickly. See [Configuration/AAE Tree Build Optimization](#aae-tree-build-optimization). + +## Requirements: + +* Riak Enterprise version 1.4.0 or later installed on source and sink + clusters +* Riak MDC Replication Version 3 enabled on source and sink + clusters +* Both source and sink clusters must be of the same ring size +* AAE must be enabled on both source and sink clusters +* `fullsync_strategy` in the `riak_repl` section of the + `advanced.config` configuration file must be set to `aae` on both + source and sink clusters +* AAE trees must have been built on both source and sink clusters. In + the event that an AAE tree is not built on both the source and sink, + fullsync will default to the `keylist` fullsync strategy for that + partition. + +## Configuration + +If you are using Riak version 2.0, configuration is managed +using the `advanced.config` files on +each node. The semantics of the `advanced.config` file are similar to +the formerly used `app.config` file. For more information and for a list +of configurable parameters, see our documentation on [Advanced Configuration][config reference#advanced]. + +## Enable Active Anti-Entropy + +To enable [active anti-entropy][glossary aae] \(AAE) in Riak, you must enable it in Riak in both source and sink clusters. If it is not +enabled, the `keylist` strategy will be used. + +To enable AAE in Riak KV: + +```riakconf +anti_entropy = active +``` + +By default, it could take a couple of days for the cluster to build all +of the necessary hash trees because the default **build rate** of trees +is to build 1 partition per hour, per node. With a +[ring size][concept clusters] of 256 and 5 nodes, that is 2 days. + +Changing the rate of tree building can speed up this process, with the +caveat that rebuilding a tree takes processing time from the cluster, +and this should not be done without assessing the possible impact on +get/put latencies for normal cluster operations. For a production +cluster, we recommend leaving the default in place. + +For a test cluster, the build rate can be changed in `riak.conf`. If a +partition has not had its AAE tree built yet, it will default to using +the `keylist` replication strategy. Instructions on these settings can +be found in the section directly below. + +<div id="aae-tree-build-optimization"></div> + +### AAE Tree Build Optimization + +You can speed up the build rate for AAE-related hash trees by adjusting +the `anti_entropy.tree.build_limit.*` and `anti_entropy.concurrency_limit` +settings. + +```riakconf +anti_entropy.tree.build_limit.number = 10 +anti_entropy.tree.build_limit.per_timespan = 1h +anti_entropy.concurrency_limit = 10 +``` + +### Enable AAE Fullsync Replication Strategy + +Finally, the replication fullsync strategy must be set to use `aae` on +both source and sink clusters. If not, the `keylist` replication +strategy will be used. + +To enable AAE w/ Version 3 MDC Replication: + +```advancedconfig +{riak_repl, [ + % ... + {fullsync_strategy, aae}, + % ... + ]} +``` + + + + diff --git a/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..9188625bd2 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,186 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Architecture" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Architecture" + identifier: "managing_ref_v3_architecture" + weight: 100 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/architecture + - /riak/kv/3.0.4/ops/mdc/v3/architecture +--- + +[glossary vnode]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#vnode +[concept clusters]: {{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters + +## How Version 3 Replication Works + +In Multi-Datacenter (MDC) Replication, a cluster can act as either the + +* **source cluster**, which sends replication data to one or +* **sink clusters**, which are generally located in datacenters in other + regions or countries. + +Bidirectional replication can easily be established by making a cluster +both a source and sink to other clusters. Riak +Multi-Datacenter Replication is considered "masterless" in that all +clusters participating will resolve replicated writes via the normal +resolution methods available in Riak. + +In Multi-Datacenter Replication, there are two primary modes of +operation: + +* **Fullsync** replication is a complete synchronization that occurs + between source and sink cluster(s), which can be performed upon + initial connection of a sink cluster if you wish +* **Realtime** replication is a continual, incremental synchronization + triggered by successful writing of new updates on the source cluster + +Fullsync and realtime replication modes are described in detail below. + +## Concepts + +### Sources + +A source refers to a cluster that is the primary producer of replication +data. A source can also refer to any node that is part of the source +cluster. Source clusters push data to sink clusters. + +### Sinks + +A sink refers to a cluster that is the primary consumer of replication +data. A sink can also refer to any node that is part of the sink +cluster. Sink clusters receive data from source clusters. + +### Cluster Manager + +The cluster manager is a Riak service that provides +information regarding nodes and protocols supported by the sink and +source clusters. This information is primarily consumed by the +`riak-repl connect` command. + +### Fullsync Coordinator + +In fullsync replication, a node on the source cluster is elected to be +the *fullsync coordinator*. This node is responsible for starting and +stopping replication to the sink cluster. It also communicates with the +sink cluster to exchange key lists and ultimately transfer data across a +TCP connection. If a fullsync coordinator is terminated as the result of +an error, it will automatically restart on the current node. If the node +becomes unresponsive, a leader election will take place within 5 seconds +to select a new node from the cluster to become the coordinator. In the +event of a coordinator restart, a fullsync will have to restart. + +## Fullsync Replication + +Fullsync replication scans through the list of partitions in a Riak +cluster and determines which objects in the sink cluster need to be +updated. A source partition is synchronized to a node on the sink +cluster containing the current partition. + +## Realtime Replication + +In realtime replication, a node in the source cluster will forward data +to the sink cluster. A node in the source cluster does not necessarily +connect to a node containing the same [vnode][glossary vnode] on +the sink cluster. This allows Riak to spread out realtime replication +across the entire cluster, thus improving throughput and making +replication more fault tolerant. + +### Initialization + +Before a source cluster can begin pushing realtime updates to a sink, +the following commands must be issued: + +1. `riak-repl realtime enable <sink_cluster>` + + After this command, the realtime queues (one for each Riak node) are + populated with updates to the source cluster, ready to be pushed to + the sink. + +2. `riak-repl realtime start <sink_cluster>` + + This instructs the Riak connection manager to contact the sink + cluster. + + <br /> + ![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime1.png) + <br /> + + At this point realtime replication commences. + +<ol start="3"> +<li>Nodes with queued updates establish connections to the sink cluster +and replication begins.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime2.png) +<br /> + +### Realtime queueing and synchronization + +Once initialized, realtime replication continues to use the queues to +store data updates for synchronization. + +<ol start="4"> +<li>The client sends an object to store on the source cluster.</li> +<li>Riak writes N replicas on the source cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime3.png) +<br /> + +<ol start="6"> +<li>The new object is stored in the realtime queue.</li> +<li>The object is copied to the sink cluster.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime4.png) +<br /> + +<ol start="8"> +<li>The destination node on the sink cluster writes the object to N +nodes.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime5.png) +<br /> + +<ol start="9"> +<li>The successful write of the object to the sink cluster is +acknowledged and the object removed from the realtime queue.</li> +</ol> + +<br /> +![MDC fullsync]({{<baseurl>}}images/MDC-v3-realtime6.png) +<br /> + +## Restrictions + +It is important to note that both clusters must have certain attributes +in common for Multi-Datacenter Replication to work. If you are using +either fullsync or realtime replication, both clusters must have the +same [ring size][concept clusters]; if you are using fullsync +replication, every bucket's `n_val` must be the same in both the +source and sink cluster. + + +<script type="text/javascript"> +document.write(unescape("%3Cscript src='//munchkin.marketo.net/munchkin.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script>Munchkin.init('721-DGT-611');</script> + + + + diff --git a/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..25ee66dd04 --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,102 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Cascading Realtime Writes" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Cascading Writes" + identifier: "managing_ref_v3_cascading_writes" + weight: 102 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/cascading-writes + - /riak/kv/3.0.4/ops/mdc/v3/cascading-writes +--- + +## Introduction + +Riak includes a feature that cascades realtime writes across +multiple clusters. + +Cascading Realtime Writes is enabled by default on new clusters running +Riak. It will need to be manually enabled on existing clusters. + +Cascading realtime requires the `{riak_repl, rtq_meta}` capability to +function. + +{{% note title="Note on cascading tracking" %}} +Cascading tracking is a simple list of where an object has been written. This +works well for most common configurations. Larger installations, however, may +have writes cascade to clusters to which other clusters have already written. +{{% /note %}} + + +``` ++---+ +---+ +---+ +| A | <-> | B | <-> | C | ++---+ +---+ +---+ + ^ ^ + | | + V V ++---+ +---+ +---+ +| F | <-> | E | <-> | D | ++---+ +---+ +---+ +``` + +In the diagram above, a write at cluster A will begin two cascades. One +goes to B, C, D, E, and finally F; the other goes to F, E, D, C, and +finally B. Each cascade will loop around to A again, sending a +replication request even if the same request has already occurred from +the opposite direction, creating 3 extra write requests. + +This can be mitigated by disabling cascading in a cluster. If cascading +were disabled on cluster D, a write at A would begin two cascades. One +would go through B, C, and D, the other through F, E, and D. This +reduces the number of extraneous write requests to 1. + +A different topology can also prevent extra write requests: + +``` ++---+ +---+ +| A | | E | ++---+ +---+ + ^ ^ ^ ^ + | \ +---+ +---+ / | + | > | C | <-> | D | < | + | / +---+ +---+ \ | + V V V V ++---+ +---+ +| B | | F | ++---+ +---+ +``` + +A write at A will cascade to C and B. B will not cascade to C because +A will have already added C to the list of clusters where the write has +occurred. C will then cascade to D. D then cascades to E and F. E and F +see that the other was sent a write request (by D), and so they do not +cascade. + +## Usage + +Riak Cascading Writes can be enabled and disabled using the +`riak-repl` command. Please see the [Version 3 Operations guide]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/v3-multi-datacenter) for more information. + +To show current the settings: + +`riak-repl realtime cascades` + +To enable cascading: + +`riak-repl realtime cascades always` + +To disable cascading: + +`riak-repl realtime cascades never` + + + + diff --git a/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..aea437c59d --- /dev/null +++ b/content/riak/kv/3.0.4/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,72 @@ +--- +title_supertext: "V3 Multi-Datacenter Replication Reference:" +title: "Scheduling Fullsync" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Scheduling Fullsync" + identifier: "managing_ref_v3_fullsync" + weight: 103 + parent: "managing_ref_v3" +toc: true +commercial_offering: true +aliases: + - /riak/3.0.4/ops/mdc/v3/scheduling-fullsync + - /riak/kv/3.0.4/ops/mdc/v3/scheduling-fullsync +--- + +[config reference#advanced]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/#advanced-configuration + +The `fullsync_interval` parameter can be configured in the `riak-repl` +section of [`advanced.config`][config reference#advanced] with either: + +* a single integer value representing the duration to wait, in minutes, + between fullsyncs, _or_ +* a list of pairs of the form `[{"clustername", time_in_minutes}, + {"clustername", time_in_minutes}, ...]` pairs for each sink + participating in fullsync replication. Note the commas separating each + pair, and `[ ]` surrounding the entire list. + +## Examples + +Sharing a fullsync time (in minutes) for all sinks: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + {fullsync_interval, 90} %% fullsync runs every 90 minutes + % ... + ]} +``` + +List of multiple sinks with separate times in minutes: + +```advancedconfig +{riak_repl, [ + % ... + {data_root, "/configured/repl/data/root"}, + % clusters sink_boston + sink_newyork have difference intervals (in minutes) + {fullsync_interval, [ + {"sink_boston", 120}, %% fullsync to sink_boston with run every 120 minutes + {"sink_newyork", 90}]} %% fullsync to sink_newyork with run every 90 minutes + + ]} +``` + +## Additional Fullsync Stats + +Additional fullsync stats per sink have been added in Riak. + +* `fullsyncs_completed` — The number of fullsyncs that have been + completed to the specified sink cluster. +* `fullsync_start_time` — The time the current fullsink to the + specified cluster began. +* `last_fullsync_duration` — The duration (in seconds) of the last + completed fullsync. + + + + diff --git a/content/riak/kv/3.0.4/using/repair-recovery.md b/content/riak/kv/3.0.4/using/repair-recovery.md new file mode 100644 index 0000000000..2fd6646f1b --- /dev/null +++ b/content/riak/kv/3.0.4/using/repair-recovery.md @@ -0,0 +1,53 @@ +--- +title: "Repair & Recovery" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Repair & Recovery" + identifier: "managing_repair_recover" + weight: 204 + parent: "managing" +toc: true +aliases: +--- + +[repair recover fail]: ./failure-recovery/ +[repair recover errors]: ./errors/ +[repair recover repairs]: ./repairs/ +[repair recover restart]: ./rolling-restart/ + +## In This Section + +#### [Failure & Recovery][repair recover fail] + +Lists steps that can be taken to minimize the harm caused by a general +cluster failure. + +[Learn More >>][repair recover fail] + + +#### [Errors & Messages][repair recover errors] + +Details most common errors & messages. + +[Learn More >>][repair recover errors] + + +#### [Repairs][repair recover repairs] + +Tutorials on running various repair operations. + +[Learn More >>][repair recover repairs] + + +#### [Rolling Restarts][repair recover restart] + +Brief guide on performing node-by-node restarts. + +[Learn More >>][repair recover restart] + + + + diff --git a/content/riak/kv/3.0.4/using/repair-recovery/errors.md b/content/riak/kv/3.0.4/using/repair-recovery/errors.md new file mode 100644 index 0000000000..ce6d3e9748 --- /dev/null +++ b/content/riak/kv/3.0.4/using/repair-recovery/errors.md @@ -0,0 +1,366 @@ +--- +title: "Errors & Messages" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Errors" + identifier: "repair_recover_errors" + weight: 101 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.4/ops/running/recovery/errors + - /riak/kv/3.0.4/ops/running/recovery/errors +--- + +[config reference]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference + +This is not a comprehensive listing of every error that Riak may +encounter -- screws fall out all of the time, the world is an imperfect +place. This is an attempt at capturing the most common recent errors +that users do encounter, as well as give some description to non +critical error atoms which you may find in the logs. + +Discovering the source of an error can take some detective work, since +one error can cause a cascade of errors. + +The tables in this document do not specify which logs these error +messages may appear in. Depending upon your log configuration some may +appear more often (i.e., if you set the log to debug), while others may +output to your console (eg. if you tee'd your output or started as `riak +console`). + +You can optionally customize your log message format via the +`lager_default_formatter` field under `lager` in `app.config`. If you +do, your messages will look different from those shown in this document. + +Finally, this document is organized to be able to lookup portions of a +log message, since printing every variation would be a bit unwieldy. For +example, this message: + +``` +12:34:27.999 [error] gen_server riak_core_capability terminated with reason:\ +no function clause matching orddict:fetch('riak@192.168.2.81', []) line 72 +``` + +Starts with a date (`12:34:27.999`), followed by the log severity +(`[error]`), with a message formatted by lager (found in the Lager table +below as *gen_server `Mod` terminated with reason: `Reason`*) + +### Lager Formats + +Riak's main logging mechanism is the project Lager, so it's good to note +some of the more common message formats. In almost every case the +reasons for the error are described as variables, such as `Reason` of +`Mod` (meaning the Erlang module which is generally the source of the +error). + +Riak does not format all error messages that it receives into +human-readable sentences. However, It does output errors as objects. + +The above example error message corresponds with the first message in +this table, where the Erlang `Mod` value is `riak_core_capability` and +the reason was an Erlang error: `no function clause matching +orddict:fetch('riak@192.168.2.81', []) line 72`. + +Error | Message +------|-------- + | `gen_server <Mod> terminated with reason: <Reason>` + | `gen_fsm <Mod> in state <State> terminated with reason: <Reason>` + | `gen_event <ID> installed in <Mod> terminated with reason: <Reason>` +`badarg` | `bad argument in call to <Mod1> in <Mod2>` +`badarith` | `bad arithmetic expression in <Mod>` +`badarity` | `fun called with wrong arity of <Ar1> instead of <Ar2> in <Mod>` +`badmatch` | `no match of right hand value <Val> in <Mod>` +`bad_return` | `bad return value <Value> from <Mod>` +`bad_return_value` | `bad return value: <Val> in <Mod>` +`badrecord` | `bad record <Record> in <Mod>` +`case_clause` | `no case clause matching <Val> in <Mod>` +`emfile` | `maximum number of file descriptors exhausted, check ulimit -n` +`function_clause` | `no function clause matching <Mod>` +`function not exported` | `call to undefined function <Func> from <Mod>` +`if_clause` | `no true branch found while evaluating if expression in <Mod>` +`noproc` | `no such process or port in call to <Mod>` +`{system_limit, {erlang, open_port}}` | `maximum number of ports exceeded` +`{system_limit, {erlang, spawn}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, spawn_opt}}` | `maximum number of processes exceeded` +`{system_limit, {erlang, list_to_atom}}` | `tried to create an atom larger than 255, or maximum atom count exceeded` +`{system_limit, {ets, new}}` | `maximum number of Erlang Term Storage (ETS) tables exceeded` +`try_clause` | `no try clause matching <Val> in <Mod>` +`undef` | `call to undefined function <Mod>` + +### Error Atoms + +Since Erlang programming support is a "happy path/fail fast" style, one +of the more common error log strings you might encounter contain +`{error,{badmatch,{...`. This is Erlang's way of telling you that an +unexpected value was assigned, so these errors can prefix the more +descriptive parts. In this case, `{error,{badmatch,{...` prefixes the +more interesting `insufficient_vnodes_available` error, which can be +found in the `riak_kv` table later on in this document. + +```log +2012-01-13 02:30:37.015 [error] <0.116.0> webmachine error: path="/riak-docs/riak/contexts"\ +{error,{error,{badmatch,{error,insufficient_vnodes_available}},\ +[{riak_kv_wm_keylist,produce_bucket_body,2},{webmachine_resource,resource_call,3},\ +{webmachine_resour,resource_call,1},{webmachine_decision_core,decision,1},\ +{webmachine_decision_core,handle_request,2},\ +{webmachine_mochiweb,loop,1},{mochiweb_http,headers,5}]}} +``` + +## Erlang Errors + +Although relatively rare once a Riak cluster is running in production, +users new to Riak or Erlang occasionally encounter errors on initial +installation. These spring from a setup Erlang does not expect, +generally due to network, permission, or configuration problems. + +Error | Description | Resolution +:-----|:------------|:---------- +`{error,duplicate_name}` | You are trying to start a new Erlang node, but another node with the same name is already running | You might be attempting to start multiple nodes on the same machine with the same `vm.args` `-name` value; or if Riak is already running, check for `beam.smp`; or epmd thinks Riak is running, check/kill epmd +`{error,econnrefused}` | Remote Erlang node connection refused | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.4/using/repair-recovery/errors/#more">Step 1</a>. +`{error,ehostunreach}` | Remote node cannot be connected to | Ensure that nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.4/using/repair-recovery/errors/#more">Step 1</a>. +`{error,eacces}` | Cannot write a given file | Ensure the Riak beam process has permission to write to all `*_dir` values in `app.config`, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,enoent}` | Missing an expected file or directory | Ensure all `*_dir` values in `app.config` exist, for example, `ring_state_dir`, `platform_data_dir`, and others +`{error,erofs}` | A file/directory is attempted to be written to a read-only filesystem | Only set Riak directories to read/write filesystems +`system_memory_high_watermark` | Often a sign than an <a href="http://www.erlang.org/doc/man/ets.html">ETS table</a> has grown too large | Check that you are using a backend appropriate for your needs (LevelDB for very large key counts) and that your vnode count is reasonable (measured in dozens per node rather than hundreds) +`temp_alloc` | Erlang attempting to allocate memory | Often associated with `Cannot allocate X bytes of memory`, which means that you're either creating too large of an object or that you simply don't have enough RAM. Base minimum suggested RAM per node is 4GB. + +## Riak Errors and Messages + +Many KV errors have prescriptive messages. For such cases we leave it to +Riak to explain the correct course of action. For example, the +`map/reduce` `parse_input` phase will respond like this when it +encounters an invalid input: + +{{% note title="Note on inputs" %}} +Inputs must be a binary bucket, a tuple of bucket and key-filters, a list of +target tuples, a search index, or modfun tuple: `INPUT`. +{{% /note %}} + +For the remaining common error codes, they are often marked by Erlang +atoms (and quite often wrapped within an `{error,{badmatch,{...` tuple, +as described in the [Error](#erlang-errors) section +above). This table lays out those terse error codes and related log +messages, if they exist. + +### Riak Core + +Riak Core is the underlying implementation for KV. These are errors +originating from that framework, and can appear whether you use KV, +Search, or any Core implementation. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`behavior` | | Attempting to execute an unknown behavior | Ensure that your configuration file choices (e.g. backends) support the behaviors you're attempting to use, such as configuring LevelDB to use secondary indexes +`already_leaving` | `Node is already in the process of leaving the cluster` | An error marking a node to leave when it is already leaving | No need to duplicate the `leave` command +`already_replacement` | | This node is already in the replacements request list | You cannot replace the same node twice +`{different_owners, N1, N2}` | | Two nodes list different partition owners, meaning the ring is not ready | When the ring is ready, the status should be ok +`different_ring_sizes` | | The joining ring is a different size from the existing cluster ring | Don't join a node already joined to a cluster +`insufficient_vnodes_available` | | When creating a query coverage plan, not enough vnodes are available | Check the `riak-admin ring-status` and ensure all of your nodes are healthy and connected +`invalid_replacement` | | A new node is currently joining from a previous operation, so a replacement request is invalid until it is no longer joining | Wait until the node is finished joining +`invalid_ring_state_dir` | `Ring state directory <RingDir> does not exist, and could not be created: <Reason>` | The ring directory does not exist and no new dir can be created in expected location | Ensure that the Erlang proc can write to `ring_state_dir`or has permission to create that dir +`is_claimant` | | A node cannot be the claimant of its own remove request | Remove/replace nodes from another node +`is_up` | | Node is expected to be down but is up | When a node is downed, it should be down +`legacy` | | Attempting to stage a plan against a legacy ring | Staging is a feature only of Riak versions 1.2.0+ +`max_concurrency` | `Handoff receiver for partition <Partition> exited abnormally after processing <Count> objects: <Reason>` | Disallow more handoff processes than the `riak_core` `handoff_concurrency` setting (defaults to 2) | If this routinely kills vnodes, this issue has been linked to LevelDB compactions which can build up and block writing, which will also be accompanied by LevelDB logs saying `Waiting...` or `Compacting` +`{nodes_down, Down}` | | All nodes must be up to check | +`not_member` | | This node is not a member of the ring | Cannot leave/remove/down when this is not a ring member +`not_reachable` | | Cannot join unreachable node | Check your network connections, ensure Erlang cookie setting `vm.args` `-setcookie` +`{not_registered, App}` | | Attempting to use an unregistered process | Ensure that your `app.config` choices contain the app you're attempting to use `{riak_kv_stat, true}` +`not_single_node` | | There are no other members to join | Join with at least one other node +`nothing_planned` | | Cannot commit a plan without changes | Ensure at least one ring change is planned before running commit +`only_member` | | This is the only member of the ring | Cannot leave/remove/down when this is the only member of the ring +`ring_not_ready` | | Ring not ready to perform command | Attempting to plan a ring change before the ring is ready to do so +`self_join` | | Cannot join node with itself | Join another node to form a valid cluster +`timeout` | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of TCP recv timeout` | | Ensure that ports chosen in your configuration files do not overlap with ports being used by your system, or with each other +`unable_to_get_join_ring` | | Cannot access cluster ring to join | Possible corrupted ring +`{unknown_capability, Capability}` | | Attempting to use a capability unsupported by this implementation | Ensure that your configuration choices support the capability you're attempting to use, such as Pipe MapReduce (setting a `mapred_2i_pipe` value in `app.config`) +`vnode_exiting` | `<Mod> failed to store handoff obj: <Err>` | | A vnode fails to hand off data because the handoff state is deleted +`vnode_shutdown` | | The vnode worker pool is shutting down | Various reasons can cause a shutdown, check other log messages + | `Bucket validation failed <Detail>` | | Only set value bucket properties + | `set_recv_data called for non-existing receiver` | Cannot connect to receiver during handoff | Ensure receiver node is still up and running, and that the standard + | `An <Dir> handoff of partition <M> was terminated because the vnode died` | Handoff stopped because of vnode was `DOWN` and sender must be killed | An expected message if a vnode dies during handoff. Check the logs for other causes. + | `status_update for non-existing handoff <Target>` | Cannot get the status of a handoff `Target` module that doesn't exist | An expected message. Check the logs for other causes. + | `SSL handoff config error: property <FailProp>: <BadMat>.` | The receiver may reject the senders attempt to start a handoff | Ensure your SSL settings and certificates are proper + | `Failure processing SSL handoff config <Props>:<X>:<Y>` | | Ensure your SSL settings and certificates are proper + | `<Type> transfer of <Module> from <SrcNode> <SrcPartition> to <TargetNode> <TargetPartition> failed because of <Reason>` | Nodes cannot hand off data | Ensure that your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.4/using/repair-recovery/errors/#more"> Step 1</a>. + | `Failed to start application: <App>` | Expected application cannot load | This relates to an Erlang application, and not necessarily the Riak application in general. The app may fail to load for many reasons, such as a missing native library. Read other log messages for clues + | `Failed to read ring file: <Reason>` | Gives a reason why the ring file cannot be read on startup | The reason given explains the problem, such as `eacces` meaning the Erlang process does not have permission to read + | `Failed to load ring file: <Reason>` | Gives a reason why the ring file cannot be loaded on startup | The reason given explains the problem, such as `enoent` meaning the expected file cannot be found + | `ring_trans: invalid return value: <Other>` | Transferring ring data between nodes received an invalid value | Often associated with ring corruption, or an unexpected exit from the transferring node + | `Error while running bucket fixup module <Fixup> from application <App> on bucket <BucketName>: <Reason>` | | Various sources for a fixup error, read associated errors + | `Crash while running bucket fixup module <Fixup> from application <App> on bucket <BucketName> : <What>:<Why>` | | Various source for a fixup error, read associated errors + | `<Index> <Mod> worker pool crashed <Reason>` | | Various reasons can be the source of a worker pool crash, read associated errors + | `Received xfer_complete for non-existing repair: <ModPartition>` | Unexpected repair message | Not much to do here, but a node did not expect to receive a `xfer_complete` status + +### Riak KV + +Riak KV is the key/value implementation, generally just considered to be +Riak proper. This is the source of most of the code, and consequently, +most of the error messages. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`all_nodes_down` | | No nodes are available | Check `riak-admin member-status` and ensure that all expected nodes in the cluster are of `valid` Status +`{bad_qterm, QueryTerm}` | | Bad query when performing MapReduce | Fix your MapReduce query +`{coord_handoff_failed, Reason}` | `Unable to forward put for <Key> to <CoordNode> - <Reason>` | Vnodes unable to communicate | Check that coordinating vnode is not down. Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.4/using/repair-recovery/errors/#more"> Step 1</a>. +`{could_not_reach_node, Node}` | | Erlang process was not reachable | Check network settings; ensure remote nodes are running and reachable; ensure all nodes have the same Erlang cookie setting `vm.args` `-setcookie`. See <a href="{{< baseurl >}}riak/kv/3.0.4/using/repair-recovery/errors/#more"> Step 1</a>. +`{deleted, Vclock}` | | The value was already deleted, includes the current vector clock | Riak will eventually clean up this tombstone +`{dw_val_violation, DW}` | | Same as `w_val_violation` but concerning durable writes | Set a valid DW value +`{field_parsing_failed, {Field, Value}}` | `Could not parse field +<Field>, value <Value>.` | Could not parse an index field | Most commonly an `_int` field which cannot be parsed. For example a query like this is invalid: `/buckets/X/index/Y_int/BADVAL`, since BADVAL should instead be an integer +`{hook_crashed, {Mod, Fun, Class, Exception}}` | `Problem invoking pre-commit hook` | Precommit process exited due to some failure | Fix the precommit function code, follow the message's exception and stacktrace to help debug +`{indexes_not_supported, Mod}` | | The chosen backend does not support indexes (only LevelDB currently supports secondary indexes) | Set your configuration to use the LevelDB backend +`{insufficient_vnodes, NumVnodes, need, R}` | | R was set greater than the total vnodes | Set a proper R value; or too many nodes are down; or too many nodes are unavailable due to crash or network partition. Ensure all nodes are available by running riak-admin ring-status. +`{invalid_hook_def, HookDef}` | `Invalid post-commit hook definition <Def>` | No Erlang module and function or JavaScript function name | Define the hook with the correct settings +`{invalid_inputdef, InputDef}` | | Bad inputs definitions when running MapReduce | Fix inputs settings; set `mapred_system` from `legacy` to `pipe` +`invalid_message` | | Unknown event sent to module | Ensure you're running similar versions of Riak across (and specifically poolboy) across all nodes +`{invalid_range, Args}` | | Index range query hasStart > End | Fix your query +`{invalid_return, {Mod, Fun, Result}}` | `Problem invoking pre-commit hook <Mod>:<Fun>, invalid return <Result>` | The given precommit function gave an invalid return for the given `Result` | Ensure your pre-commit functions return a valid result +`invalid_storage_backend` | `storage_backend <Backend> is non-loadable.` | Invalid backend choice when starting up Riak | Set a valid backend in your configuration files +`key_too_large` | | The key was larger than 65536 bytes | Use a smaller key +`local_put_failed` | | A local vnode PUT operation failed | This has been linked to a LevelDB issue related to restricted memory usage and inability to flush a write to disk. If this happens repetitively, stop/start the riak node, forcing a memory realloc +`{n_val_violation, N}` | | (W > N) or (DW > N) or (PW > N) or (R > N) or (PR > N) | No W or R values may be greater than N +`{nodes_not_synchronized, Members}` | | Rings of all members are not synchronized | Backups will fail if nodes are not synchronized +`{not_supported, mapred_index, FlowPid}` | | Index lookups for MapReduce are only supported with Pipe | Set mapred_system from legacy to pipe +`notfound` | | No value found | Value was deleted, or was not yet stored or replicated +`{pr_val_unsatisfied, PR, Primaries}` | | Same as `r_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PR` value was set too high +`{pr_val_violation, R}` | | Same as `r_val_violation` but concerning `Primary` reads | Set a valid `PR` value +`precommit_fail` | `Pre-commit hook <Mod>:<Fun> failed with reason <Reason>` | The given precommit function failed for the given `Reason` | Fix the precommit function code +`{pw_val_unsatisfied, PR, Primaries}` | | Same as `w_val_unsatisfied` but only counts `Primary` node replies | Too many primary nodes are down or the `PW` value was set too high +`{pw_val_violation, PW}` | | Same as `w_val_violation` but concerning primary writes | Set a valid `PW` value +`{r_val_unsatisfied, R, Replies}` | | Not enough nodes replied to satisfy the `R` value, contains the given `R` value and the actual number of `Replies` | Too many nodes are down or the R value was set too high +`{r_val_violation, R}` | | The given R value was non-numeric and not a valid setting (`on`, `all`, `quorum`) | Set a valid R value +`receiver_down` | | Remote process failed to acknowledge request | Can occur when listkeys is called +`{rw_val_violation, RW}` | | The given `RW` property was non-numeric and not a valid setting (`one`, `all`, `quorum`) | Set a valid `RW` value +`{siblings_not_allowed, Object}` | `Siblings not allowed: <Object>` | The hook to index cannot abide siblings | Set the buckets `allow_mult` property to `false` +`timeout`| | The given action took too long to reply | Ensure your cluster is up and nodes are able to communicate with each other. See <a href="{{< baseurl >}}riak/kv/3.0.4/using/repair-recovery/errors/#more"> Step 1</a>. Or check you have a reasonable `ulimit` size. Note that listkeys commands can easily timeout and shouldn't be used in production. +`{too_few_arguments, Args}` | | Index query requires at least one argument | Fix your query format +`{too_many_arguments, Args}` | | Index query is malformed with more than 1 (exact) or 2 (range) values | Fix your query format +`too_many_fails` | | Too many write failures to satisfy W or DW | Try writing again. Or ensure your nodes/network is healthy. Or set a lower W or DW value +`too_many_results` | | Too many results are attempted to be returned | This is a protective error. Either change your query to return fewer results, or change your `max_search_results` setting in `app.config` (it defaults to 100,000) +`{unknown_field_type, Field}` | `Unknown field type for field: <Field>.` | Unknown index field extension (begins with underscore) | The only value field types are `_int` and `_bin` +`{w_val_unsatisfied, RepliesW, RepliesDW, W, DW}` | | Not enough nodes replied to satisfy the W value, contains the given W value and the actual number of `Replies*` for either `W` or `DW` | Too many nodes are down or the `W` or `DW` value was set too high +`{w_val_violation, W}` | | The given W property was non-numeric and not a valid setting (on, all, quorum) | Set a valid W value + | `Invalid equality query <SKey>` | Equality query is required and must be binary for an index call | Pass in an equality value when performing a 2i equality query + | `Invalid range query: <Min> -> <Max>` | Both range query values are required and must be binary an index call | Pass in both range values when performing a 2i equality query + | `Failed to start <Mod> <Reason>:<Reason>` | Riak KV failed to start for given `Reason` | Several possible reasons for failure, read the attached reason for insight into resolution + +### Backend Errors + +These errors tend to stem from server-based problems. Backends are +sensitive to low or corrupt disk or memory resources, native code, and +configuration differences between nodes. Conversely, a network issue is +unlikely to affect a backend. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`data_root_not_set` | | Same as `data_root_unset` | Set the `data_root` directory in config +`data_root_unset` | `Failed to create bitcask dir: data_root is not set` | The `data_root` config setting is required | Set `data_root` as the base directory where to store bitcask data, under the `bitcask` section +`{invalid_config_setting, multi_backend, list_expected}` | | Multi backend configuration requires a list | Wrap `multi_backend` config value in a list +`{invalid_config_setting, multi_backend, list_is_empty`} | | Multi backend configuration requires a value | Configure at least one backend under `multi_backend` in `app.config` +`{invalid_config_setting, multi_backend_default, backend_not_found}` | | | Must choose a valid backend type to configure +`multi_backend_config_unset` | | No configuration for Multi backend | Configure at least one backend under `multi_backend` in `app.config` +`not_loaded` | | Native driver not loading | Ensure your native drivers exist (.dll or .so files {riak_kv_multi_backend, undefined_backend, BackendName} | | Backend defined for a bucket is invalid | Define a valid backed before using this bucket under lib/`project`/priv, where `project` is most likely eleveldb). +`reset_disabled` | | Attempted to reset a Memory backend in production | Don't use this in production + +### JavaScript + +These are some errors related to JavaScript pre-commit functions, +MapReduce functions, or simply the management of the pool of JavaScript +VMs. If you do not use JavaScript, these should not be encountered. If +they are, check your configuration for high `*js_vm*` values or as an +epiphenomenon to a real issue, such as low resources. + +Error | Message | Description | Resolution +---------|---------|-------------|------- +`no_vms` | `JS call failed: All VMs are busy.` | All JavaScript VMs are in use | Wait and run again; increase JavaScript VMs in `app.config` (`map_js_vm_count`, `reduce_js_vm_count`, or `hook_js_vm_count`) +`bad_utf8_character_code` | `Error JSON encoding arguments: <Args>` | A UTF-8 character give was a bad format | Only use correct UTF-8 characters for JavaScript code and arguments +`bad_json` | | Bad JSON formatting | Only use correctly formatted JSON for JavaScript command arguments + | `Invalid bucket properties: <Details>` | Listing bucket properties will fail if invalid | Fix bucket properties +`{load_error, "Failed to load spidermonkey_drv.so"}` | | The JavaScript driver is corrupted or missing | In OS X you may have compiled with `llvm-gcc` rather than `gcc`. + +### MapReduce + +These are possible errors logged by Riak's MapReduce implementation, +both legacy as well as Pipe. If you never use or call MapReduce, you +should not run across these. + +Error | Message | Description | Resolution +:-----|:--------|:------------|:---------- +`bad_mapper_props_no_keys` | | At least one property should be found by default. *Unused in Riak 1.3+* | Set mapper properties, or don't use it +`bad_mapred_inputs` | | A bad value sent to MapReduce. *Unused in Riak 1.3+* | When using the Erlang client interface, ensure all MapReduce and search queries are correctly binary +`bad_fetch` | | An expected local query was not retrievable. *Unused in Riak 1.3+* | Placing javascript MapReduce query code as a riak value must first be stored before execution +`{bad_filter, <Filter>}` | | An invalid keyfilter was used | Ensure your MapReduce keyfilter is correct +`{dead_mapper, <Stacktrace>, <MapperData>}` | | Getting a reply from a mapper for a job that has already exited. *Unused in Riak 1.3+* | Check for a stuck Erlang process, or if using legacy MR ensure `map_cache_size` is set (Both issues may require a node restart) +`{inputs, Reason}` | `An error occurred parsing the "inputs" field.` | MapReduce request has invalid input field | Fix MapReduce fields +`{invalid_json, Message}` | `The POST body was not valid JSON. The error from the parser was: <Message>` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`javascript_reduce_timeout` | | JavaScript reduce function taking too long | For large numbers of objects, your JavaScript functions may become bottlenecks. Decrease the quantity of values being passed to and returned from the reduce functions, or rewrite as Erlang functions +`missing_field` | `The post body was missing the "inputs" or "query" field.` | Either an inputs or query field is required | Post MapReduce request with at least one +`{error,notfound}` | | Used in place of a RiakObject in the mapping phase | Your custom Erlang map function should deal with this type of value +`not_json` | `The POST body was not a JSON object.` | Posting a MapReduce command requires correct JSON | Format MapReduce requests correctly +`{no_candidate_nodes, exhausted_prefist, <Stacktrace>, <MapperData>}` | | Some map phase workers died | Possibly a long running job hitting MapReduce timeout, upgrade to Pipe +`{<query>, Reason}` | `An error occurred parsing the "query" field.` | MapReduce request has invalid query field | Fix MapReduce query +`{unhandled_entry, Other}` | `Unhandled entry: <Other>` | The `reduce_identity` function is unused | If you don't need `reduce_identity`, just don't set reduce phase at all +`{unknown_content_type, ContentType}` | | Bad content type for MapReduce query | Only `application/json` and `application/x-erlang-binary` are accepted + | `Phase <Fitting>: <Reason>` | A general error when something happens using the Pipe MapReduce implementation with a bad argument or configuration | Can happen with a bad map or reduce implementation, most recent known gotcha is when a JavaScript function improperly deals with tombstoned objects + | `riak_kv_w_reduce requires a function as argument, not a <Type>` | Reduce requires a function object, not any other type | This shouldn't happen +  +## Specific messages + +Although you can put together many error causes with the tables above, +here are some common yet esoteric messages with known causes and +solutions. + + Message | Resolution +:--------|:---------- +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('`Node`', []) | The Node has been changed, either through change of IP or `vm.args` `-name` without notifying the ring. Either use the `riak-admin cluster replace` command, or remove the corrupted ring files `rm -rf /var/lib/riak/ring/*` and rejoin to the cluster +gen_server <`PID`> terminated with reason: no function clause matching riak_core_pb:encode(`Args`) line 40 | Ensure you do not have different settings on different nodes (for example, a ttl mem setting on one node's mem backend, and another without) +monitor `busy_dist_port` `Pid` [...{almost_current_function,...] | This message means distributed Erlang buffers are filling up. Try setting zdbbl higher in `vm.args`, such as `+zdbbl 16384`. Or check that your network is not slow. Or ensure you are not slinging large values. If a high bandwidth network is congested, try setting RTO_min down to 0 msec (or 1msec). +<`PID`>@riak_core_sysmon___handler:handle_event:89 Monitor got {suppressed,port_events,1} | Logged as info, you can add `+swt very_low` to your `vm.args` +(in LevelDB LOG files) Compaction error | Turn off the node and run repair on the LevelDB partition. See <a href="{{< baseurl >}}riak/kv/3.0.4/using/repair-recovery/errors/#more">Step 2</a>. +enif_send: env==NULL on non-SMP VM/usr/lib/riak/lib/os_mon-2.2.9/priv/bin/memsup: Erlang has closed. | Riak's Erlang VM is built with SMP support and if Riak is started on a non-SMP system, an error like this one is logged. This is commonly seen in virtualized environments configured for only one CPU core. +exit with reason bad return value: {error,eaddrinuse} in context start_error | An error like this example can occur when another process is already bound to the same address as the process being started is attempting to bind to. Use operating system tools like `netstat`, `ps`, and `lsof` to determine the root cause for resolving this kind of errors; check for existence of stale `beam.smp` processes. +exited with reason: eaddrnotavail in gen_server:init_it/6 line 320 | An error like this example can result when Riak cannot bind to the addresses specified in the configuration. In this case, you should verify HTTP and Protocol Buffers addresses in `app.config` and ensure that the ports being used are not in the privileged (1-1024) range as the `riak` user will not have access to such ports. +gen_server riak_core_capability terminated with reason: no function clause matching orddict:fetch('riak@192.168.2.2', []) line 72 | Error output like this example can indicate that a previously running Riak node with an original `-name` value in `vm.args` has been modified by simply changing the value in `vm.args` and not properly through `riak-admin cluster replace`. +** Configuration error: [FRAMEWORK-MIB]: missing context.conf file => generating a default file | This error is commonly encountered when starting Riak Enterprise without prior [SNMP]({{<baseurl>}}riak/kv/3.0.4/using/reference/snmp) configuration. +RPC to 'node@example.com' failed: {'EXIT', {badarg, [{ets,lookup, [schema_table,<<"search-example">>], []} {riak_search_config,get_schema,1, [{file,"src/riak_search_config.erl"}, {line,69}]} ...| This error can be caused when attempting to use Riak Search without first enabling it in each node's `app.config`. See the [configuration files][config reference] documentation for more information on enabling Riak Search. + + +### More + +1. <a name="f1"></a>Ensure node inter-communication + - Check `riak-admin member-status` and ensure the cluster is valid. + - Check `riak-admin ring-status` and ensure the ring and vnodes are communicating as expected. + - Ensure your machine does not have a firewall or other issue that prevents traffic to the remote node. + - Your remote `vm.args` `-setcookie` must be the same value for every node in the cluster. + - The `vm.args` `-name` value must not change after joining the node (unless you use `riak-admin cluster replace`). + +2. <a name="f2"></a>Run LevelDB compaction + 1. `find . -name "LOG" -exec grep -l 'Compaction error' {} \;` *(Finding one compaction error is interesting, more than one might be a strong indication of a hardware or OS bug)* + 2. Stop Riak on the node: `riak stop` + 3. Start an Erlang session (do not start riak, we just want Erlang) + 4. From the Erlang console perform the following command to open the LevelDB database + + ```erlang + [application:set_env(eleveldb, Var, Val) || {Var, Val} <- + [{max_open_files, 2000}, + {block_size, 1048576}, + {cache_size, 20*1024*1024*1024}, + {sync, false}, + {data_root, "/var/db/riak/leveldb"}]]. + ``` + 5. For each of the corrupted LevelDB databases (found by `find . -name "LOG" -exec` | `grep -l 'Compaction error' {} \; `) run this command substituting in the proper vnode number. + + ```erlang + eleveldb:repair("/var/db/riak/leveldb/442446784738847563128068650529343492278651453440", []). + ``` + 6. When all have finished successfully you may restart the node: `riak start` + 7. Check for proper operation by looking at log files in /var/log/riak and in the LOG files in the effected LevelDB vnodes. + + + + diff --git a/content/riak/kv/3.0.4/using/repair-recovery/failed-node.md b/content/riak/kv/3.0.4/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..131af30106 --- /dev/null +++ b/content/riak/kv/3.0.4/using/repair-recovery/failed-node.md @@ -0,0 +1,114 @@ +--- +title: "Recovering a Failed Node" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Recover a Failed Node" + identifier: "repair_recover_failed_node" + weight: 104 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.4/ops/running/recovery/failed-node + - /riak/kv/3.0.4/ops/running/recovery/failed-node +--- + +## General Recovery Notes + +A Riak node can fail for many reasons, but a handful of checks enable you to +uncover some of the most common problems that can lead to node failure, +such as checking for RAID and filesystem consistency or faulty memory and +ensuring that your network connections are fully functioning. + +When a node fails and is then brought back into the cluster, make sure that it has the same node name that it did before it crashed. If the name has changed, the cluster will assume that the node is entirely new and that the crashed node is still part of the cluster. + +During the recovery process, hinted handoff will kick in and update the data on +the recovered node with updates accepted from other nodes in the cluster. Your +cluster may temporarily return `not found` for objects that are currently +being handed off (see our page on [Eventual Consistency](../../../learn/concepts/eventual-consistency) for more details on +these scenarios, in particular how the system behaves while the failed node is +not part of the cluster). + +## Node Name Changed + +If you are recovering from a scenario in which node name changes are out of +your control, you'll want to notify the cluster of its *new* name using the +following steps: + +1. Stop the node you wish to rename: + + ```bash + riak stop + ``` + + +2. Mark the node down from another node in the cluster: + + ```bash + riak-admin down <previous_node_name> + ``` + +3. Update the node name in Riak's configuration files: + + ```riakconf + nodename = <updated_node_name> + ``` + + ```vmargs + -name <updated_node_name> + ``` + +4. Delete the ring state directory (usually `/var/lib/riak/ring`). + +5. Start the node again: + + ```bash + riak start + ``` + +6. Ensure that the node comes up as a single instance: + + ```bash + riak-admin member-status + ``` + + The output should look something like this: + + ``` + ========================= Membership ========================== +Status Ring Pending Node +--------------------------------------------------------------- +valid 100.0% -- 'dev-rel@127.0.0.1' +--------------------------------------------------------------- +Valid:1 / Leaving:0 / Exiting:0 / Joining:0 / Down:0 + ``` + +7. Join the node to the cluster: + + ```bash + riak-admin cluster join <node_name_of_a_member_of_the_cluster> + ``` + +8. Replace the old instance of the node with the new: + + ```bash + riak-admin cluster force-replace <previous_node_name> <new_node_name> + ``` + +9. Review the changes: + + ```bash + riak-admin cluster plan + ``` + + Finally, commit those changes: + + ```bash + riak-admin cluster commit + ``` + + + + diff --git a/content/riak/kv/3.0.4/using/repair-recovery/failure-recovery.md b/content/riak/kv/3.0.4/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..88f4b725c6 --- /dev/null +++ b/content/riak/kv/3.0.4/using/repair-recovery/failure-recovery.md @@ -0,0 +1,129 @@ +--- +title: "Failure & Recovery" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Failure & Recovery" + identifier: "repair_recover_failure" + weight: 100 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.4/ops/running/recovery/failure-recovery + - /riak/kv/3.0.4/ops/running/recovery/failure-recovery +--- + +Riak was built to withstand---or at the very least reduce the severity +of---many types of system failure. Nonetheless, bugs are a reality, +hardware does break, and occasionally Riak itself will fail. Here, we'll +list some steps that can be taken to minimize the harm caused by a general +cluster failure. + +## Forensics + +When a failure occurs, collect as much information as possible. Check +monitoring systems, backup log and configuration files if they are +available, including system logs like `dmesg` and `syslog`. Make sure +that the other nodes in the Riak cluster are still operating normally and +are not affected by a wider problem like a virtualization or network outage. +Try to determine the cause of the problem from the data you have collected. + +## Data Loss + +Many failures incur no data loss or minimal loss that can be +repaired automatically, without intervention. Outage of a single node +does not necessarily cause data loss, as other replicas of every key are +available elsewhere in the cluster. Once the node is detected as down, +other nodes in the cluster will take over its responsibilities +temporarily and transmit the updated data to it when it eventually +returns to service (also called [hinted handoff]({{<baseurl>}}riak/kv/3.0.4/learn/glossary/#hinted-handoff)). + +More severe data loss scenarios usually relate to hardware failure. +If data is lost, several options are available for restoring it. + +1. **Restore from backup** - A daily backup of Riak nodes can be helpful. + The data in this backup may be stale depending on the time at which + the node failed, but it can be used to partially restore data from + lost storage volumes. If running in a RAID configuration, rebuilding + the array may also be possible. +2. **Restore from multi-cluster replication** - If replication is enabled + between two or more clusters, the missing data will gradually be + restored via realtime replication and fullsync replication. A + fullsync operation can also be triggered manually via the `riak-repl` + command. +3. **Restore using intra-cluster repair** - Riak versions 1.2 and greater + include a repair feature which will restore lost partitions with + data from other replicas. Currently, this must be invoked manually + using the Riak console and should be performed with guidance from a + Basho Client Services Engineer. + +Once data has been restored, normal operations should continue. If +multiple nodes completely lose their data, consultation and assistance +from Basho are strongly recommended. + +## Data Corruption + +Data at rest on disk can become corrupted by hardware failure or other +events. Generally, the Riak storage backends are designed to handle +cases of corruption in individual files or entries within files, and can +repair them automatically or simply ignore the corrupted parts. +Otherwise, clusters can recover from data corruption in roughly the same +way that they recover from data loss. + +## Out-of-Memory + +Sometimes, Riak will exit when it runs out of available RAM. While this +does not necessarily cause data loss, it may indicate that the cluster +needs to be scaled out. If free capacity is low on the rest of the cluster while the node is out, other nodes may also be at risk, so monitor carefully. + +Replacing the node with one that has greater RAM capacity may temporarily +alleviate the problem, but out-of-memory (OOM) issues tend to be an indication +that the cluster is under-provisioned. + +## High Latency / Request Timeout + +High latencies and timeouts can be caused by slow disks or networks or an +overloaded node. Check `iostat` and `vmstat` or your monitoring system to +determine the state of resource usage. If I/O utilization is high but +throughput is low, this may indicate that the node is responsible for +too much data and growing the cluster may be necessary. Additional RAM +may also improve latency because more of the active dataset will be +cached by the operating system. + +Sometimes extreme latency spikes can be caused by [sibling explosion]({{<baseurl>}}riak/kv/3.0.4/developing/usage/conflict-resolution#siblings). This condition occurs when the client application does not resolve conflicts properly or in a timely fashion. In that scenario, the size of the value on disk grows in proportion to +the number of siblings, causing longer disk service times and slower +network responses. + +Sibling explosion can be detected by examining the `node_get_fsm_siblings` +and `node_get_fsm_objsize` statistics from the `riak-admin status` command. +To recover from sibling explosion, the application should be throttled and +the resolution policy might need to be invoked manually on offending keys. + +A Basho CSE can assist in manually finding large values, i.e. those that +potentially have a sibling explosion problem, in the storage backend. + +MapReduce requests typically involve multiple I/O operations and are +thus the most likely to time out. From the perspective of the client +application, the success of MapReduce requests can be improved by reducing the +number of inputs, supplying a longer request timeout, and reducing the usage +of secondary indexes. Heavily loaded clusters may experience more MapReduce +timeouts simply because many other requests are being serviced as well. Adding +nodes to the cluster can reduce MapReduce failure in the long term by +spreading load and increasing available CPU and IOPS. + + +## Cluster Recovery From Backups + +See [Changing Cluster Information]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/changing-cluster-info/#clusters-from-backups) for instructions on cluster recovery. + +{{% note title="Tip" %}} +If you are a TI Tokyo Riak supprt customer and require assistance or +further advice with a cluster recovery, please file a ticket with the +<a href="https://support.tiot.jp">TI Tokyo Helpdesk</a>. +{{% /note %}} + + + + diff --git a/content/riak/kv/3.0.4/using/repair-recovery/repairs.md b/content/riak/kv/3.0.4/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..1ba2428fba --- /dev/null +++ b/content/riak/kv/3.0.4/using/repair-recovery/repairs.md @@ -0,0 +1,391 @@ +--- +title: "Repairs" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Repairs" + identifier: "repair_recover_repairs" + weight: 102 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.4/ops/running/recovery/repairing-indexes + - /riak/kv/3.0.4/ops/running/recovery/repairing-indexes + - /riak/3.0.4/ops/running/recovery/failed-node + - /riak/kv/3.0.4/ops/running/recovery/failed-node + - /riak/3.0.4/ops/running/recovery/repairing-leveldb + - /riak/kv/3.0.4/ops/running/recovery/repairing-leveldb + - /riak/3.0.4/ops/running/recovery/repairing-partitions + - /riak/kv/3.0.4/ops/running/recovery/repairing-partitions +--- + +[cluster ops aae]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/active-anti-entropy/ +[config ref]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/ +[Erlang shell]: http://learnyousomeerlang.com/starting-out +[glossary AAE]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#active-anti-entropy-aae +[glossary readrep]: {{<baseurl>}}riak/kv/3.0.4/learn/glossary/#read-repair +[search config]: {{<baseurl>}}riak/kv/3.0.4/configuring/search/#search-config-settings +[tiered storage]: {{<baseurl>}}riak/kv/3.0.4/setup/planning/backend/leveldb/#tiered-storage + + + +## Repairing Search Indexes + +Riak search indexes are repaired whenever objects are corrected by [read repair][glossary readrep]. + +[Active anti-entropy (AAE)][glossary AAE] is provided for Riak search. + +Riak KV's [configuration for AAE][cluster ops aae] will be used for Riak search's AAE hashtrees by default. + +Riak search can be provided its own AAE settings in the [search config settings][search config]. + +## Repairing Secondary Indexes + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i »Partition ID« +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +## Repairing LevelDB + +In the event of major hardware or filesystem problems, LevelDB can become corrupted. These failures are uncommon, but they could happen, as heavy loads can push I/O limits. + +### Checking for Compaction Errors + +Any time there is a compaction error, it will be noted in the LevelDB logs. Those logs are located in a `LOG` file in each instance of LevelDB in a Riak node, specifically in `#(platform_data_dir)/leveldb/<vnode>/LOG`. The `platform_data_dir` can be specified in the [`riak.conf`][config ref] configuration file. The default is `./data`. + +Compaction error messages take the following form: + +``` +<timestamp> Compaction Error: Corruption: corrupted compressed block contents +``` + +To check whether your node has experienced such errors, you will need to run a script that searches for `Compaction Error` in each `LOG` file. Here is an example script: + +```bash +find . -name "LOG" -exec grep -l 'Compaction error' {} \; +``` + +If there are compaction errors in any of your vnodes, those will be listed in the console. If any vnode has experienced such errors, you would see output like this: + +``` +./442446784738847563128068650529343492278651453440/LOG +``` + + +{{% note %}} +While corruption on one vnode is not uncommon, corruption in several vnodes very likely means that there is a deeper problem that needs to be address, perhaps on the OS or hardware level. +{{% /note %}} + + +## Healing Corrupted LevelDBs + +When you have discovered corruption in your LevelDB backend, the steps you take to resolve it will depend on whether you are using [tiered storage] or not. + +Choose your setup below: + +1. [Just LevelDB](#leveldb) +2. [LevelDB with tiered storage](#leveldb-with-tiered-storage) + + +### LevelDB + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +3\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +4\. Then set `Options` equal to an empty list: + +```erlang +Options = []. +``` + +5\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB that you found using the [`find` command above](#checking-for-compaction-errors). + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +6\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` + +7\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + +### LevelDB with Tiered Storage + +Follow the steps below to heal your corrupted LevelDB. + +1\. Stop the node: + +```bash +riak stop +``` + +2\. Check your riak.conf file and make note of the following values: + +* leveldb.tiered (integer) +* leveldb.tiered.path.fast +* leveldb.tiered.path.slow + +3\. To repair the corrupted LevelDB through the [Erlang shell], you will run the the `riak ertspath` command to output the path to Riak's internal Erlang runtime, and the `erl` command to start the Erlang shell. You can run them in a single command: + +```bash +`riak ertspath`/erl +``` + +{{% note title="Erlang version" %}} +Note, you must start up the Erlang shell using the same version of Erlang packaged with Riak. The above command will make sure you do so. If you choose not to use the above command please pay close attention to the version and location you use with the `erl` command. +{{% /note %}} + +4\. Once in the shell, run the following command: + +```erlang +application:set_env(eleveldb, data_root, ""). +``` + +5\. Then supply the information you noted in Step 2: + +```erlang +Options = [ + {tiered_slow_level, »leveldb.tiered value«}, + {tiered_fast_prefix, "»leveldb.tiered.path.fast value«"}, + {tiered_slow_prefix, "»leveldb.tiered.path.slow value«"} +]. +``` + +6\. Set some supportive variables for the repair process. These will be custom to your environment and specific repair needs. +VNodeList should be a list of each corrupted LevelDB partitions that you found using the [`find` command above](#checking-for-compaction-errors) provided in double quotes. + +```erlang +DataRoot = "»path to your data root«". +VNodeList = ["»vnode id you want to repair«", ...]. +``` + +7\. Run the following commands, which will parse the information you provided and run eleveldb:repair over all of the VNode IDs that you listed in VNodeList. + +```erlang +RepairPath = fun(DataRoot, VNodeNumber) -> Path = lists:flatten(DataRoot ++ "/" ++ VNodeNumber), io:format("Repairing ~s.~n",[Path]), Path end. +[eleveldb:repair(RepairPath(DataRoot, VNodeList), Options) || VNodeNumber <- VNodeList]. +``` +8\. This process may take several minutes. When it has completed successfully, you can restart the node and continue as usual. + +```bash +riak start +``` + + +## Repairing Partitions + +If you have experienced a loss of object replicas in your cluster, you +may need to perform a repair operation on one or more of your data +[partitions]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/clusters/#the-ring). Repairs of Riak KV data are typically +run in situations where partitions or whole nodes are lost due to +corruption or hardware failure. In these cases, nodes or partitions are +brought back online without any data, which means that the need to +repair data will depend mainly on your use case and on whether [active anti-entropy]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/) is enabled. + +You will need to run a repair if the following are both true: + +* Active anti-entropy is [disabled]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/#disabling-active-anti-entropy) +* You have both non-expiring data and keys that are not accessed + frequently (which means that they are not likely to be subject to + [read repair]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/#read-repair-vs-active-anti-entropy)) + +You will most likely not need to run a repair operation if _any_ of the +following is true: + +* Active anti-entropy is [enabled]({{<baseurl>}}riak/kv/3.0.4/learn/concepts/active-anti-entropy/#enabling-active-anti-entropy) +* Your entire key set is accessed frequently, allowing passive read + repair to repair the partitions +* Your data expires frequently + +In most cases, we recommend either using active anti-entropy or, if +necessary and only when necessary, running a repair operation using the +instructions below. + +### Running a Repair + +The Riak KV repair operation will repair objects from a node's adjacent +partitions on the ring, consequently fixing the index. This is done as +efficiently as possible by generating a hash range for all the buckets +and thus avoiding a preflist calculation for each key. Only a hash of +each key is done, its range determined from a bucket->range map, and +then the hash is checked against the range. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make them +mutually exclusive events. If you join or remove a node all repairs +across the entire cluster will be killed. + +### Repairing a Single Partition + +In the case of data loss in a single partition, only that partition can +be repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + + You may have to hit **Enter** again to get a console prompt. + +2. Execute the repair for a single partition using the below command: + + ```erlang + riak_kv_vnode:repair(»Partition ID«). + ``` + + where `»Partition_ID«` is replaced by the ID of the partition to + repair. For example: + + ```erlang + riak_kv_vnode:repair(251195593916248939066258330623111144003363405824). + ``` + +3. Once the command has been executed, detach from Riak using +`Control-C`. + +### Repairing All Partitions on a Node + +If a node is lost, all partitions currently owned by that node can be +repaired. + +1. From any node in the cluster, attach to Riak's Erlang shell: + + ```bash + riak attach + ``` + +2. Get a copy of the current Ring: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with ring record information. + You can safely ignore it. + +3. Get a list of partitions owned by the node that needs to be repaired. +Replace `dev1@127.0.0.1` with the name of the node to be repaired. The +name can be found in each node's `vm.args` file, specified as the +`-name` parameter, if you are using the older configuration system; if +you are using the newer, `riak-conf`-based system, the name is given by +the `nodename` parameter. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + **Note**: The above is an [Erlang list + comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html) + that loops over each `{Partition, Node}` tuple in the ring and + extracts only the partitions that match the given node name, as a + list. + + +4. Execute the repair on all the partitions. Executing the repairs all +at once will cause a lot of `{shutdown, max_concurrency}` messages in +the logs. These can be safely ingored, as it is just the transfers +mechanism enforcing an upper limit on the number of concurrent +transfers. + + ```erlang + [riak_kv_vnode:repair(P) || P <- Partitions]. + ``` +5. Once the command has been executed, detach from Riak using +`Control-C`. + +### Monitoring Repairs + +The above repair commands can be monitored via the `riak-admin +transfers` command. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This command can be executed from a `riak attach` +session like below: + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, and will +look similar to: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Repairs on a node can also be killed remotely from another node in the +cluster. From a `riak attach` session the below command can be used: + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + + + + diff --git a/content/riak/kv/3.0.4/using/repair-recovery/rolling-replaces.md b/content/riak/kv/3.0.4/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..b835b6e7e8 --- /dev/null +++ b/content/riak/kv/3.0.4/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,76 @@ +--- +title: "Rolling Replaces" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Rolling Replaces" + identifier: "repair_recover_replace" + weight: 106 + parent: "managing_repair_recover" +toc: true +aliases: +--- + +[upgrade]: {{<baseurl>}}riak/kv/3.0.4/setup/upgrading/cluster/ +[rolling restarts]: {{<baseurl>}}riak/kv/3.0.4/using/repair-recovery/rolling-restart/ +[add node]: {{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes + +Riak KV functions as a multi-node system, so cluster-level [version upgrades][upgrade] and [restarts][rolling restarts] can be performed on a node-by-node or *rolling* basis. + +The following steps should be undertaken on each Riak KV node that you wish to replace: + +1\. Create a free node: + + a\. [Create an additional node][add node] with similar specifications to the other nodes in the cluster. + + b\. Or leave a node that is currently in the cluster: + + ```bash + riak-admin cluster leave »nodename« + ``` + + After creating a node or leaving a node, wait for all transfers to complete: + + ```bash + riak-admin transfers + ``` + +2\. Join the free node to your cluster: + +```bash +riak-admin cluster join »free_node« +``` + +3\. Next, replace the free node with an existing node: + +```bash +riak-admin cluster replace »free_node« »nodename« +``` + +4\. Then review the cluster transition plan: + +```bash +riak-admin cluster plan +``` + +5\. And commit the changes: + +```bash +riak-admin cluster commit +``` + +6\. Wait for all transfers to complete: + +```bash +riak-admin transfers +``` + +7\. Repeat steps 2-6 above until each node has been replaced. + +8\. Join the replaced node back into the cluster or decommission the additional node that was created. + + + + diff --git a/content/riak/kv/3.0.4/using/repair-recovery/rolling-restart.md b/content/riak/kv/3.0.4/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..88542bee81 --- /dev/null +++ b/content/riak/kv/3.0.4/using/repair-recovery/rolling-restart.md @@ -0,0 +1,64 @@ +--- +title: "Rolling Restarts" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Rolling Restarts" + identifier: "repair_recover_restart" + weight: 103 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.4/ops/running/recovery/rolling-restart + - /riak/kv/3.0.4/ops/running/recovery/rolling-restart +--- + +Because Riak functions as a multi-node system, cluster-level [Riak version upgrades]({{<baseurl>}}riak/kv/3.0.4/setup/upgrading/cluster) and restarts can be performed on a node-by-node, "rolling" basis. + +The following steps should be undertaken on each Riak node that you wish to restart: + +1\. Stop Riak + +```bash +riak stop +``` + +2\. Perform any necessary maintenance, upgrade, or other work in your cluster. + +3\. Start Riak again + +```bash +riak start +``` + +4\. Verify that the `riak_kv` service is once again available on the target node + +```bash +riak-admin wait-for-service riak_kv <nodename> +``` + +If this responds with `riak_kv is up`, then the service is available and you can move on to the next step. Otherwise, the console will periodically return `riak_kv is not up` until the service is available. + +5\. Verify that all in-progress handoffs have been completed + +```bash +riak-admin transfers +``` + +If this responds with `No transfers active`, then all handoffs are complete. You can either run this command periodically until no more transfers are active or run the following script, which will run the `riak-admin transfers` command every 5 seconds until the transfers are complete: + +```bash +while ! riak-admin transfers | grep -iqF 'No transfers active' +do + echo 'Transfers in progress' + sleep 5 +done +``` + +6\. Repeat the above process for any other nodes that need to be restarted. + + + + diff --git a/content/riak/kv/3.0.4/using/repair-recovery/secondary-indexes.md b/content/riak/kv/3.0.4/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..b3853c5076 --- /dev/null +++ b/content/riak/kv/3.0.4/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,142 @@ +--- +title: "Repairing Secondary Indexes" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Repair Secondary Indexes" + identifier: "repair_recover_2i" + weight: 105 + parent: "managing_repair_recover" +toc: true +aliases: + - /riak/3.0.4/ops/running/recovery/repairing-indexes + - /riak/kv/3.0.4/ops/running/recovery/repairing-indexes +--- + +The `riak-admin repair-2i` command can be used to repair any stale or missing secondary indexes. This command scans and repairs any mismatches between the secondary index data used for querying and the secondary index data stored in the Riak objects. It can be run on all partitions of a node or on a subset of them. We recommend scheduling these repairs outside of peak load time. + +### Running a Repair + +The secondary indexes of a single partition can be repaired by executing: + +```bash +riak-admin repair-2i <Partition_ID> +``` + +The secondary indexes of every partition can be repaired by executing the same command, without a partition ID: + +```bash +riak-admin repair-2i +``` + +### Monitoring a Repair + +Repairs can be monitored using the below command: + +```bash +riak-admin repair-2i status +``` + +### Killing a Repair + +In the event the secondary index repair operation needs to be halted, all repairs can be killed with: + +```bash +riak-admin repair-2i kill +``` + +---- + +## Repairing Search Indexes + +Riak Search indexes currently have no form of anti-entropy (such as read-repair). Furthermore, for performance and load balancing reasons, Search reads from one random node. This means that when a replica loss has occurred, inconsistent results may be returned. + +### Running a Repair + +If a replica loss has occurred, you need to run the repair command. This command repairs objects from a node's adjacent partitions on the ring, consequently fixing the search index. + +This is done as efficiently as possible by generating a hash range for all the buckets and thus avoiding a preflist calculation for each key. Only a hash of each key is done, its range determined from a bucket→range map, and then the hash is checked against the range. + +This code will force all keys in each partition on a node to be reread, thus rebuilding the search index properly. + +1. From a cluster node with Riak installed, attach to the Riak console: + + ```bash + riak attach + ``` + + You may have to hit enter again to get a console prompt. + +2. Get a list of partitions owned by the node that needs repair: + + ```erlang + {ok, Ring} = riak_core_ring_manager:get_my_ring(). + ``` + + You will get a lot of output with Ring record information. You can safely ignore it. + +3. Then run the following code to get a list of partitions. Replace 'dev1@127.0.0.1' with the name of the node you need to repair. + + ```erlang + Partitions = [P || {P, 'dev1@127.0.0.1'} <- riak_core_ring:all_owners(Ring)]. + ``` + + _Note: The above is an [Erlang list comprehension](http://www.erlang.org/doc/programming_examples/list_comprehensions.html), that loops over each `{Partition, Node}` tuple in the Ring, and extracts only the partitions that match the given node name, as a list._ + +4. Execute repair on all the partitions. Executing them all at once like this will cause a lot of `{shutdown,max_concurrency}` spam but it's not anything to worry about. That is just the transfers mechanism enforcing an upper limit on the number of concurrent transactions. + + ```erlang + [riak_search_vnode:repair(P) || P <- Partitions]. + ``` + +5. When you're done, press `Ctrl-D` to disconnect the console. DO NOT RUN q() which will cause the running Riak node to quit. Note that `Ctrl-D` merely disconnects the console from the service, it does not stop the code from running. + + +### Monitoring a Repair + +The above Repair command can be slow, so if you reattach to the console, you can run the repair_status function. You can use the `Partitions` variable defined above to get the status of every partition. + +```erlang +[{P, riak_search_vnode:repair_status(P)} || P <- Partitions]. +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +### Killing a Repair + +Currently there is no easy way to kill an individual repair. The only +option is to kill all repairs targeting a given node. This is done by +running `riak_core_vnode_manager:kill_repairs(Reason)` on the node +undergoing repair. This means you'll either have to be attached to +that node's console or you can use the `rpc` module to make a remote +call. Here is an example of killing all repairs targeting partitions +on the local node. + +```erlang +riak_core_vnode_manager:kill_repairs(killed_by_user). +``` + +Log entries will reflect that repairs were killed manually, something akin to this: + +``` +2012-08-10 10:14:50.529 [warning] <0.154.0>@riak_core_vnode_manager:handle_cast:395 Killing all repairs: killed_by_user +``` + +Here is an example of executing the call remotely. + +```erlang +rpc:call('dev1@127.0.0.1', riak_core_vnode_manager, kill_repairs, [killed_by_user]). +``` + +When you're done, press `Ctrl-D` to disconnect the console. + +Repairs are not allowed to occur during ownership changes. Since +ownership entails the moving of partition data it is safest to make +them mutually exclusive events. If you join or remove a node all +repairs across the entire cluster will be killed. + + + + diff --git a/content/riak/kv/3.0.4/using/running-a-cluster.md b/content/riak/kv/3.0.4/using/running-a-cluster.md new file mode 100644 index 0000000000..0648c8f8c3 --- /dev/null +++ b/content/riak/kv/3.0.4/using/running-a-cluster.md @@ -0,0 +1,339 @@ +--- +title: "Running a Cluster" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Running a Cluster" + identifier: "managing_running_a_cluster" + weight: 200 + parent: "managing" +toc: true +aliases: + - /riak/3.0.4/ops/building/basic-cluster-setup + - /riak/kv/3.0.4/ops/building/basic-cluster-setup +--- + +Configuring a Riak cluster involves instructing each node to listen on a +non-local interface, i.e. not `127.0.0.1`, and then joining all of the +nodes together to participate in the cluster. + +Most configuration changes will be applied to the [configuration file]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/) located in your `rel/riak/etc` directory (if +you compiled from source) or `/etc` (if you used a binary install of +Riak). + +The commands below presume that you are running from a source install, +but if you have installed Riak with a binary install, you can substitute +the usage of `bin/riak` with `sudo /usr/sbin/riak` and `bin/riak-admin` +with `sudo /usr/sbin/riak-admin`. The `riak` and `riak-admin` scripts +are located in the `/bin` directory of your installation. + +> **Note on changing the `name` value** +> +> If possible, you should avoid starting Riak prior to editing the name of +a node. This setting corresponds to the `nodename` parameter in the +`riak.conf` file if you are using the newer configuration system, and to +the `-name` parameter in `vm.args` (as described below) if you are using +the older configuration system. If you have already started Riak with +the default settings, you cannot change the `-name` setting and then +successfully restart the node. +> +> If you cannot restart after changing the `-name` value you have two +options: +> +> * Discard the existing ring metadata by removing the contents of the +`ring` directory. This will require rejoining all nodes into a +cluster again. +> +> *Rename the node using the [`riak-admin cluster replace`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster-replace) command. This will not work if you have previously only started Riak with a single node. + +## Configure the First Node + +First, stop your Riak node if it is currently running: + +```bash +riak stop +``` + +#### Select an IP address and port + +Let's say that the IP address for your cluster is 192.168.1.10 and that +you'll be using the default port (8087). If you're using the [Protocol Buffers interface]({{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/) to Riak (which we recommend over the HTTP +interface due to performance gains), you should change your +configuration file: + +```riakconf +listener.protobuf.internal = 127.0.0.1:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"127.0.0.1", 8087 }, +``` + +becomes + +```riakconf +listener.protobuf.internal = 192.168.1.10:8087 +``` + +```appconfig +%% In the pb section of riak_core: + +{"192.168.1.10", 8087 }, +``` + +{{% note title="Note on upgrading to 2.0" %}} +If you are upgrading to Riak version 2.0 or later from an pre-2.0 +release, you can use either your old `app.config`/ `vm.args` +configuration files or the newer `riak.conf` if you wish. If you have +installed Riak 2.0 directly, you should use only `riak.conf`. + +Below, examples will be provided for both the old and new configuration +systems. Bear in mind that you need to use either the older or the newer +but never both simultaneously. + +More on configuring Riak can be found in the [Configuration documentation](../../configuring/reference). +{{% /note %}} + +If you're using the HTTP interface, you will need to alter your +configuration in an analogous way: + +```riakconf +listener.http.internal = 127.0.0.1:8098 +``` + +```appconfig +%% In the riak_core section: + +{http, [ {"127.0.0.1", 8098 } ]}, +``` + +becomes + +```riakconf +listener.http.internal = 192.168.1.10:8098 +``` + +```appconfig +{http, [ {"192.168.1.10", 8098 } ]}, +``` + +#### Name your node + +Every node in Riak has a name associated with it. The default name is +`riak@127.0.0.1`. Let's say that you want to change the name to +`riak@192.168.1.10`: + +```riakconf +nodename = riak@127.0.0.1 +``` + +```vmargs +-name riak@127.0.0.1 +``` + +becomes + +```riakconf +nodename = riak@192.168.1.10 +``` + +```vmargs +-name riak@192.168.1.10 +``` + +> **Node Names** +> +> Use fully qualified domain names ([FQDNs](http://en.wikipedia.org/wiki/Fully_qualified_domain_name)) rather than IP addresses for the cluster member node names. For example, `riak@cluster.example.com` and `riak@192.168.1.10` +are both acceptable node naming schemes, but using the FQDN style is +preferred. +> +> Once a node has been started, in order to change the name you must +either remove ring files from the `/data/ring` directory or +[`riak-admin cluster force-replace`]({{<baseurl>}}riak/kv/3.0.4/using/admin/riak-admin/#cluster-force-replace) the node. + +#### Start the node + +Now that your node is properly configured, you can start it: + +```bash +riak start +``` + +If the Riak node has been previously started, you must use the +`riak-admin cluster replace` command to change the node name and update +the node's ring file. + +```bash +riak-admin cluster replace riak@127.0.0.1 riak@192.168.1.10 +``` + +{{% note title="Note on single nodes" %}} +If a node is started singly using default settings, as you might do when you +are building your first test environment, you will need to remove the ring +files from the data directory after you edit your configuration files. +`riak-admin cluster replace` will not work since the node has not been joined +to a cluster. +{{% /note %}} + +As with all cluster changes, you need to view the planned changes by +running `riak-admin cluster plan` and then running `riak-admin cluster +commit` to finalize those changes. + +The node is now properly set up to join other nodes for cluster +participation. You can proceed to adding a second node to the cluster. + +## Add a Second Node to Your Cluster + +Repeat the above steps for a second host on the same network, providing +the second node with a host/port and node name. Once the second node has +started, use `riak-admin cluster join` to join the second node to the +first node, thereby creating an initial Riak cluster. Let's say that +we've named our second node `riak@192.168.1.11`. From the new node's +`/bin` directory: + +```bash +riak-admin cluster join riak@192.168.1.10 +``` + +Output from the above should resemble: + +``` +Success: staged join request for `riak@192.168.1.11` to `riak@192.168.1.10` +``` + +Next, plan and commit the changes: + +```bash +riak-admin cluster plan +riak-admin cluster commit +``` + +After the last command, you should see: + +``` +Cluster changes committed +``` + +If your output was similar, then the second Riak node is now part of the +cluster and has begun syncing with the first node. Riak provides several +ways to determine the cluster's ring status. Here are two ways to +examine your Riak cluster's ring: + +1. Using the `riak-admin` command: + + ```bash + bin/riak-admin status | grep ring_members + ``` + + With output resembling the following: + + ```bash + ring_members : ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +2. Running the `riak attach` command. This will open up an Erlang shell, +into which you can type the following command: + + ```erlang + 1> {ok, R} = riak_core_ring_manager:get_my_ring(). + + %% Response: + + {ok,{chstate,'riak@192.168.1.10',......... + (riak@192.168.52.129)2> riak_core_ring:all_members(R). + ['riak@192.168.1.10','riak@192.168.1.11'] + ``` + +To join additional nodes to your cluster, repeat the above steps. You +can also find more detailed instructions about [adding and removing nodes]({{<baseurl>}}riak/kv/3.0.4/using/cluster-operations/adding-removing-nodes) from a cluster. + +> **Ring Creation Size** +> +> All nodes in the cluster +must have the same initial ring size setting in order to join, and +participate in cluster activity. This setting can be adjusted in your +configuration file using the `ring_creation_size` parameter if you're +using the older configuration system or `ring_size` in the new system. +> +> Check the value of all nodes if you receive a message like this: +> `Failed: riak@10.0.1.156 has a different ring_creation_size` + +## Running Multiple Nodes on One Host + +If you built Riak from source code, or if you are using the Mac OS X +pre-built package, then you can easily run multiple Riak nodes on the +same machine. The most common scenario for doing this is to experiment +with running a Riak cluster. + +**Note**: If you have installed the `.deb` or `.rpm` package, then you +will need to download and build Riak from source to follow the +directions below. + +To run multiple nodes, make copies of the `riak` directory. + +- If you ran `make all rel`, then this can be found in `./rel/riak` + under the Riak source root directory. +- If you are running Mac OS X, then this is the directory where you + unzipped the `.tar.gz` file. + +Presuming that you copied `./rel/riak` into `./rel/riak1`, `./rel/riak2`, +`./rel/riak3`, and so on, you need to make two changes: + +1. Set your handoff port and your Protocol Buffers or HTTP port +(depending on which interface you are using) to different values on each +node. For example: + + ```riakconf + # For Protocol Buffers: + listener.protobuf.internal = 127.0.0.1:8187 + + # For HTTP: + listener.http.internal = 127.0.0.1:8198 + + # For either interface: + handoff.port = 8199 + ``` + + ```appconfig + %% In the pb section of riak_core: + {"127.0.0.1", 8187 } + + %% In the http section of riak_core: + {"127.0.0.1", 8198} + ``` + +2. Change the name of each node to a unique name. Now, start the nodes, +changing path names and nodes as appropriate: + +```bash +./rel/riak1/bin/riak start +./rel/riak2/bin/riak start +./rel/riak3/bin/riak start + +# etc +``` + +Next, join the nodes into a cluster: + +```bash +./rel/riak2/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak3/bin/riak-admin cluster join riak1@127.0.0.1 +./rel/riak2/bin/riak-admin cluster plan +./rel/riak2/bin/riak-admin cluster commit +``` + +## Multiple Clusters on One Host + +Using the above technique, it is possible to run multiple clusters on +one computer. If a node hasn’t joined an existing cluster, it will +behave just as a cluster would. Running multiple clusters on one +computer is simply a matter of having two or more distinct nodes or +groups of clustered nodes. + + + + diff --git a/content/riak/kv/3.0.4/using/security.md b/content/riak/kv/3.0.4/using/security.md new file mode 100644 index 0000000000..a82226b426 --- /dev/null +++ b/content/riak/kv/3.0.4/using/security.md @@ -0,0 +1,199 @@ +--- +title: "Security & Firewalls" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Security" + identifier: "managing_security" + weight: 205 + parent: "managing" +toc: true +aliases: + - /riak/3.0.4/ops/advanced/security + - /riak/kv/3.0.4/ops/advanced/security +--- + +[config reference search]: {{<baseurl>}}riak/kv/3.0.4/configuring/reference/#search +[config search enabling]: {{<baseurl>}}riak/kv/3.0.4/configuring/search/#enabling-riak-search +[config v3 ssl]: {{<baseurl>}}riak/kv/3.0.4/configuring/v3-multi-datacenter/ssl +[JMX]: http://www.oracle.com/technetwork/java/javase/tech/javamanagement-140525.html +[security basics]: {{<baseurl>}}riak/kv/3.0.4/using/security/basics +[security managing]: {{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/ +[Solr]: http://lucene.apache.org/solr/ +[usage search]: {{<baseurl>}}riak/kv/3.0.4/developing/usage/search + +> **Internal security** +> +> This document covers network-level security. For documentation on the +authentication and authorization features introduced in Riak 2.0, see +[Authentication and Authorization][security basics] and [Managing Security Sources][security managing] + +This article discusses standard configurations and port settings to use +when providing network security for a Riak Cluster. There are two +classes of access control for Riak: + +* Other Riak nodes participating in the cluster +* Clients making use of the Riak cluster + +The settings for both access groups are located in your cluster's +configuration settings. If you are using the newer configuration system, +you can set a host and port for each node in that node's `riak.conf` +file, setting `listener.protobuf` if you are using Riak's Protocol +Buffers interface or `listener.http` if you are using HTTP (or +`listener.https` if you are using SSL). If you are using the older +configuration system, adjust the settings of `pb`, `http`, or `https`, +depending on which client interface you are using. + +Make note of these configurations and set up your firewall to allow +incoming TCP access to those ports or IP address/port combinations. +Exceptions to this are the `handoff_ip` and `handoff_port` directives. +Those are for communication between Riak nodes only. + +## Inter-node Communication + +Riak uses the Erlang distribution mechanism for most inter-node +communication. Riak identifies other machines in the ring using Erlang +identifiers (`<hostname or IP>`, e.g. `riak@10.9.8.7`). Erlang resolves +these node identifiers to a TCP port on a given machine via the Erlang +Port Mapper daemon (epmd) running on each cluster node. + +By default, epmd binds to TCP port 4369 and listens on the wildcard +interface. For inter-node communication, Erlang uses an unpredictable +port by default; it binds to port 0, which means the first available +port. + +For ease of firewall configuration, Riak can be configured +to instruct the Erlang interpreter to use a limited range +of ports. For example, to restrict the range of ports that Erlang will +use for inter-Erlang node communication to 6000-7999, add the following +lines to the configuration file on each Riak node: + +```riakconf +erlang.distribution.port_range.minimum = 6000 +erlang.distribution.port_range.maximum = 7999 +``` + +```appconfig +{ kernel, [ + {inet_dist_listen_min, 6000}, + {inet_dist_listen_max, 7999} + ]}, +``` + +The above lines should be added into the top level list in app.config, +at the same level as all the other applications (e.g. `riak_core`). +Then configure your firewall to allow incoming access to TCP ports 6000 +through 7999 from whichever network(s) contain your Riak nodes. + +### Riak Node Ports + +Riak nodes in a cluster need to be able to communicate freely with one +another on the following ports: + +* epmd listener: TCP:4369 +* handoff_port listener: TCP:8099 +* range of ports specified in `app.config` or `riak.conf` + +### Riak Client Ports + +Riak clients must be able to contact at least one machine in a Riak +cluster on the following TCP ports: + +Protocol | Port +:--------|:---- +<a href="../../developing/api/http">HTTP</a> | TCP port 8098 +<a href="../../developing/api/protocol-buffers">Protocol Buffers</a> | TCP port 8087 + +### Riak Search Ports + +Riak's [search][usage search] feature relies on [Apache Solr][Solr], which runs +on each Riak node if security has been [enabled][config search enabling]. When +Riak's Search subsystem starts up, [JMX][JMX] opens a well-known port as well +as some ephemeral ports. The well-known port is determined by the value of the +`search.solr.jmx_port` in each node's [Search configuration][config reference search]. +The default is 8985. + +In addition to JMX ports, Solr also binds to a well-known port of its +own, as determined by each node's `search.solr.port` setting, which is +also located in each node's Search configuration. The default is 8093. + +# Riak Security Community + +## Riak + +Riak is a powerful open-source distributed database focused on scaling +predictably and easily, while remaining highly available in the face of +server crashes, network partitions or other (inevitable) disasters. + +## Commitment + +Data security is an important and sensitive issue to many of our users. +A real-world approach to security allows us to balance appropriate +levels of security and related overhead while creating a fast, scalable, +and operationally straightforward database. + +### Continuous Improvement + +Though we make every effort to thwart security vulnerabilities whenever +possible (including through independent reviews), no system is +completely secure. We will never claim that Riak is 100% secure (and you +should seriously doubt anyone who claims their solution is). What we can +promise is that we openly accept all vulnerabilities from the community. +When appropriate, we'll publish and make every attempt to quickly +address these concerns. + +### Balance + +More layers of security increase operational and administrative costs. +Sometimes those costs are warranted, sometimes they are not. Our +approach is to strike an appropriate balance between effort, cost, and +security. + +For example, Riak does not have fine-grained role-base security. Though +it can be an attractive bullet-point in a database comparison chart, +you're usually better off finely controlling data access through your +application or a service layer. + +### Notifying Basho + +If you discover a potential security issue, please email us at +**security@basho.com**, and allow us 48 hours to reply. + +We prefer to be contacted first, rather than searching for blog posts +over the Internet. This allows us to open a dialogue with the security +community on how best to handle a possible exploit without putting any +users at risk. + +## Security Best Practices + +### Authentication and Authorization + +For instructions on how to apply permissions and to require client +authentication, please see our documentation on [Riak Security][security basics]. + +### Network Configurations + +Being a distributed database means that much of Riak's security springs +from how you configure your network. We have a few recommendations for +[Security and Firewalls][security basics]. + +### Client Auth + +All of the Riak client libraries support encrypted TCP communication +as well as authentication and authorization. For instructions on how +to apply permissions and to require client authentication, please see +our documentation on [Riak Security][security basics]. + +### Multi-Datacenter Replication + +For those versions of Riak that support Multi Data Center (MDC) +Replication, you can configure Riak 1.2+ to communicate over SSL, to +seamlessly encrypt the message traffic. + +See also: [Multi Data Center Replication: SSL][config v3 ssl] + + + + diff --git a/content/riak/kv/3.0.4/using/security/basics.md b/content/riak/kv/3.0.4/using/security/basics.md new file mode 100644 index 0000000000..90a5a62391 --- /dev/null +++ b/content/riak/kv/3.0.4/using/security/basics.md @@ -0,0 +1,851 @@ +--- +title: "Security Basics" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Security Basics" + identifier: "security_basics" + weight: 100 + parent: "managing_security" +toc: true +aliases: + - /riak/3.0.4/ops/running/authz + - /riak/kv/3.0.4/ops/running/authz +--- + +> **Note on Network security** +> +> This document covers only the 2.0 authentication and authorization +features. For a look at network security in Riak, see [Security and Firewalls]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/). + +As of version 2.0, Riak administrators can selectively apportion +access to a wide variety of Riak's functionality, including accessing, +modifying, and deleting objects, changing bucket properties, and +running MapReduce jobs. + +## Terminology + +* **Authentication** is the process of identifying a user. +* **Authorization** is verifying whether a user has access to perform + the requested operation. +* **Groups** can have permissions assigned to them, but cannot be + authenticated. +* **Users** can be authenticated and authorized; permissions + (authorization) may be granted directly or via group membership. +* **Sources** are used to define authentication mechanisms. A user + cannot be authenticated to Riak until a source is defined. + +## Security Checklist + +There are a few key steps that all applications will need to undertake +when turning on Riak security. Missing one of these steps will almost +certainly break your application, so make sure that you have done each +of the following **before** enabling security: + +1. Make certain that the original Riak Search (version 1) and link + walking are not required. Enabling security will break this + functionality. If you wish to use security and Search together, you + will need to use the [new Search feature]({{<baseurl>}}riak/kv/3.0.4/developing/usage/search/). +1. Because Riak security requires a secure SSL connection, you will need + to generate appropriate SSL certs, [enable SSL](#enabling-ssl) and establish a [certificate configuration](#certificate-configuration) on each node. **If you + enable security without having established a functioning SSL + connection, all requests to Riak will fail**. +1. Define [users](#user-management) + and, optionally, [groups](#add-group) +1. Define an [authentication source](#managing-sources) for each user +1. Grant the necessary [permissions](#managing-permissions) to each user (and/or group) +1. Check any Erlang MapReduce code for invocations of Riak modules other + than `riak_kv_mapreduce`. Enabling security will prevent those from + succeeding unless those modules are available via the `add_path` + mechanism documented in [Installing Custom Code]({{<baseurl>}}riak/kv/3.0.4/using/reference/custom-code). +1. Make sure that your client software will work properly: + * It must pass authentication information with each request + * It must support HTTPS or encrypted [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/) + traffic + * If using HTTPS, the proper port (presumably 443) is open from + client to server + * Code that uses Riak's deprecated link walking feature **will + not work** with security enabled +1. If you have applications that rely on an already existing Riak + cluster, make sure that those applications are prepared to gracefully + transition into using Riak security once security is enabled. + +Security should be enabled only after all of the above steps have been +performed and your security setup has been properly vetted. + +Clients that use [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/) will typically have to be +reconfigured/restarted with the proper credentials once security is +enabled. + +## Security Basics + +Riak security may be checked, enabled, or disabled by an administrator +through the command line. This allows an administrator to change +security settings for the whole cluster quickly without needing to +change settings on a node-by-node basis. + +**Note**: Currently, Riak security commands can be run only through +the command line, using the `riak-admin security` command. In future +versions of Riak, administrators may have the option of issuing +those commands through the Protocol Buffers and HTTP interfaces. + +### Enabling Security + +> **Warning: Enable security with caution** +> +> Enabling security will change the way your client libraries and +your applications interact with Riak. +> +> Once security is enabled, all client connections must be encrypted and all permissions will be denied by default. Do not enable this in production until you have worked through the [security checklist](#security-checklist) above and tested everything in a non-production environment. + +Riak security is disabled by default. To enable it: + +```bash +riak-admin security enable +``` + +**As per the warning above, do not enable security in production without +taking the appropriate precautions.** + +All users, groups, authentication sources, and permissions can be +configured while security is disabled, allowing you to create a +security configuration of any level of complexity without prematurely +impacting the service. This should be borne in mind when you are +[managing users](#user-management) and [managing sources](#managing-sources). + +### Disabling Security + +If you disable security, this means that you have disabled all of the +various permissions checks that take place when executing operations +against Riak. Users, groups, and other security attributes remain +available for configuration while security is disabled, and will be +applied if and when security is re-enabled. + +```bash +riak-admin security disable +``` + +While security is disabled, clients will need to be reconfigured to no +longer require TLS and send credentials. + +### Checking Security Status + +To check whether security is currently enabled for the cluster, use the +`status` command: + +```bash +riak-admin security status +``` + +This command will usually return `Enabled` or `Disabled`, but if +security is enabled on a mixed-mode cluster (running a combination of +Riak 2.0 and older versions) it will indicate that security is enabled +but not yet available. + +## User Management + +Riak security enables you to control _authorization_ by creating, +modifying, and deleting user characteristics and granting users +selective access to Riak functionality (and also to revoke access). +Users can be assigned one or more of the following characteristics: + +* `username` +* `groups` +* `password` + +You may also assign users characteristics beyond those listed +above---e.g., listing email addresses or other information---but those +values will carry no special significance for Riak. + +**Note**: The `username` is the one user characteristic that cannot be +changed once a user has been created. + +### Retrieve a Current User or Group List + +A list of currently existing users can be accessed at any time: + +```bash +riak-admin security print-users +``` + +The same goes for groups: + +```bash +riak-admin security print-groups +``` + +Example output, assuming user named `riakuser` with an assigned +password: + +``` ++----------+--------+----------------------+------------------------------+ +| username | groups | password | options | ++----------+--------+----------------------+------------------------------+ +| riakuser | |983e8ae1421574b8733824| [] | ++----------+--------+----------------------+------------------------------+ +``` + +**Note**: All passwords are displayed in encrypted form in console +output. + +If the user `riakuser` were assigned to the group `dev` and a `name` of +`lucius`, the output would look like this: + +```bash ++----------+----------------+----------------------+---------------------+ +| username | groups | password | options | ++----------+----------------+----------------------+---------------------+ +| riakuser | dev |983e8ae1421574b8733824| [{"name","lucius"}] | ++----------+----------------+----------------------+---------------------+ +``` + +If you'd like to see which permissions have been assigned to +`riakuser`, you would need to use the `print-grants` command, detailed +below. + +The `security print-user` or `security-print-group` (singular) commands +can be used with a name as argument to see the same information as +above, except for only that user or group. + +### Permissions Grants For a Single User or Group + +You can retrieve authorization information about a specific user or +group using the `print-grants` command, which takes the form of +`riak-admin security print-grants <username>`. + +The output will look like this if the user `riakuser` has been +explicitly granted a `riak_kv.get` permission on the bucket +`shopping_list` and inherits a set of permissions from the `admin` +group: + +```bash +Inherited permissions (user/riakuser) + ++--------+----------+----------+----------------------------------------+ +| group | type | bucket | grants | ++--------+----------+----------+----------------------------------------+ +| admin | * | * | riak_kv.get, riak_kv.delete, | +| | | | riak_kv.put | ++--------+----------+----------+----------------------------------------+ + +Dedicated permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ + +Cumulative permissions (user/riakuser) + ++----------+-------------+----------------------------------------+ +| type | bucket | grants | ++----------+-------------+----------------------------------------+ +| * | * | riak_kv.get, riak_kv.delete, | +| | | riak_kv.put | +| ANY |shopping_list| riak_kv.get | ++----------+-------------+----------------------------------------+ +``` + +**Note**: The term `admin` is not a reserved term in Riak security. It +is used here only for illustrative purposes. + +Because the same name can represent both a user and a group, a prefix +(`user/` or `group/`) can be used before the name (e.g., `print-grants +user/admin`). If a name collides and no prefix is supplied, grants for +both will be listed separately. + +### Add Group + +For easier management of permissions across several users, it is +possible to create groups to be assigned to those users. + +```bash +riak-admin security add-group admin +``` + +### Add User + +To create a user with the username `riakuser`, we use the `add-user` +command: + +```bash +riak-admin security add-user riakuser +``` + +Using the command this way will create the user `riakuser` without _any_ +characteristics beyond a username, which is the only attribute that you +must assign upon user creation. + +Alternatively, a password---or other attributes---can be assigned to the +user upon creation. Here, we'll assign a password: + +```bash +riak-admin security add-user riakuser password=Test1234 +``` + +### Assigning a Password and Altering Existing User Characteristics + +While passwords and other characteristics can be set upon user creation, +it often makes sense to change user characteristics after the user has +already been created. Let's say that the user `riakuser` was created +without a password (or created _with_ a password that we'd like to +change). The `alter-user` command can be used to modify our `riakuser` +user: + +```bash +riak-admin security alter-user riakuser password=opensesame +``` + +When creating or altering a user, any number of `<option>=<value>` +pairs can be appended to the end of the command. Any non-standard +options will be stored and displayed via the `riak-admin security +print-users` command. + +```bash +riak-admin security alter-user riakuser name=bill age=47 fav_color=red +``` + +Now, the `print-users` command should return this: + +``` ++----------+--------+----------+--------------------------------------------------+ +| username | groups | password | options | ++----------+--------+----------+--------------------------------------------------+ +| riakuser | | |[{"fav_color","red"},{"age","47"},{"name","bill"}]| ++----------+--------+----------+--------------------------------------------------+ +``` + +**Note**: Usernames _cannot_ be changed using the `alter-user` command. +For example, running `riak-admin security alter-user riakuser +username=other-name`, will instead add the +`{"username","other-name"}` tuple to `riakuser`'s options. + +### Managing Groups for a User + +If we have a user `riakuser` and we'd like to assign her to the +`admin` group, we assign the value `admin` to the option `groups`: + +```bash +riak-admin security alter-user riakuser groups=admin +``` + +If we'd like to make the user `riakuser` both an `admin` and an +`archoverlord`: + +```bash +riak-admin security alter-user riakuser groups=admin,archoverlord +``` + +There is no way to incrementally add groups; even if `riakuser` was +already an `admin`, it is necessary to list it again when adding the +`archoverlord` group. Thus, to remove a group from a user, use +`alter-user` and list all *other* groups. + +If the user should be removed from all groups, use `groups=` with no +list: + +```bash +riak-admin security alter-user riakuser groups= +``` + +### Managing Groups for Groups + +Groups can be added to other groups for cascading permissions. + +```bash +riak-admin security alter-group admin groups=dev +``` + +### Deleting a User or Group + +If you'd like to remove a user, use the `del-user` command: + +``` +riak-admin security del-user riakuser +``` + +For groups, use the `del-group` command: + +``` +riak-admin security del-group admin +``` + +### Adding or Deleting Multiple Users + +The `riak-admin security` command does not currently allow you to +add or delete multiple users using a single command. Instead, they must +be added or deleted one by one. + +## Managing Permissions + +Permission to perform a wide variety of operations against Riak can be +granted to---or revoked from---users via the `grant` and `revoke` +commands. + +### Basic Form + +The `grant` command takes one of the following forms: + +```bash +riak-admin security grant <permissions> on any to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> to all|{<user>|<group>[,...]} +riak-admin security grant <permissions> on <bucket-type> <bucket> to all|{<user>|<group>[,...]} +``` + +The `revoke` command is essentially the same, except that `to` is +replaced with `from` of `to`: + +```bash +riak-admin security revoke <permissions> on any from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> from all|{<user>|<group>[,...]} +riak-admin security revoke <permissions> on <bucket-type> <bucket> from all|{<user>|<group>[,...]} +``` + +If you select `any`, this means that the permission (or set of +permissions) is granted/revoked for all buckets and [bucket types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types). If you specify a bucket type only, then the permission +is granted/revoked for all buckets of that type. If you specify a bucket +type _and_ a bucket, the permission is granted/revoked only for that +bucket type/bucket combination. + +**Note**: You cannot grant/revoke permissions with respect to a bucket +alone. You must specify either a bucket type by itself or a bucket type +and bucket. This limitation reflects the naming structure underlying +buckets and bucket types. + +Selecting `all` grants or revokes a permission (or set of permissions) +for all users in all groups. When specifying the user(s)/group(s) to +which you want to apply a permission (or set of permissions), you may +list any number of users or groups comma-separated with no whitespace. +Here is an example of granting multiple permissions across all buckets +and bucket types to multiple users: + +```bash +riak-admin security grant riak_kv.get,riak_search.query on any to jane,ahmed +``` + +If the same name is used for both a user and a group, the `grant` +command will ask for the name to be prefixed with `user/` or `group/` +to disambiguate. + +### Key/Value Permissions + +Permissions that can be granted for basic key/value access +functionality: + +Permission | Operation | +:----------|:----------| +`riak_kv.get` | Retrieve objects +`riak_kv.put` | Create or update objects +`riak_kv.delete` | Delete objects +`riak_kv.index` | Index objects using secondary indexes (2i) +`riak_kv.list_keys` | List all of the keys in a bucket +`riak_kv.list_buckets` | List all buckets + +{{% note title="Note on Listing Keys and Buckets" %}} +`riak_kv.list_keys` and `riak_kv.list_buckets` are both very expensive +operations that should be performed very rarely and never in production. +Access to this functionality should be granted very carefully. +{{% /note %}} + +If you'd like to create, for example, a `client` account that is +allowed only to run `GET` and `PUT` requests on all buckets: + +```bash +riak-admin security add-user client +riak-admin security grant riak_kv.get,riak_kv.put on any to client +``` + +### MapReduce Permissions + +Permission to perform [MapReduce]({{<baseurl>}}riak/kv/3.0.4/developing/usage/mapreduce/) jobs can be assigned +using `riak_kv.mapreduce`. The following example grants MapReduce +permissions to the user `mapreduce-power-user` for all buckets and +bucket types: + +```bash +riak-admin security grant riak_kv.mapreduce on any to mapreduce-power-user +``` + +### Bucket Type Permissions + +In versions 2.0 and later, Riak users can manage [bucket types]({{<baseurl>}}riak/kv/3.0.4/developing/usage/bucket-types) in addition to setting bucket properties. `riak-admin +security` allows you to manage the following bucket type-related +permissions: + +Permission | Operation +:----------|:--------- +`riak_core.get_bucket` | Retrieve the `props` associated with a bucket +`riak_core.set_bucket` | Modify the `props` associated with a bucket +`riak_core.get_bucket_type` | Retrieve the set of `props` associated with a bucket type +`riak_core.set_bucket_type` | Modify the set of `props` associated with a bucket type + +### Search Query Permission (Riak Search version 1) + +Security is incompatible with the original (and now deprecated) Riak +Search. Riak Search version 1 will stop working if security is enabled. + +### Search Query Permissions (Riak Search version 2, aka Yokozuna) + +If you are using the new Riak Search, i.e. the Solr-compatible search +capabilities included with Riak versions 2.0 and greater, the following +search-related permissions can be granted/revoked: + +Permission | Operation +:----------|:--------- +`search.admin` | The ability to perform search admin-related tasks, such as creating and deleting indexes and adding and modifying search schemas +`search.query` | The ability to query an index + +> **Note on Search Permissions** +> +> Search must be enabled in order to successfully grant/revoke Search +permissions. If you attempt to grant/revoke permissions while Search is +disabled, you will get the following error: +> +> `{error,{unknown_permission,"search.query"}}` +> +> More information on Riak Search and how to enable it can be found in the +[Riak Search Settings]({{<baseurl>}}riak/kv/3.0.4/configuring/search/) document. + +#### Usage Examples + +To grant the user `riakuser` the ability to query all indexes: + +```bash +riak-admin security grant search.query on index to riakuser + +# To revoke: +# riak-admin security revoke search.query on index from riakuser +``` + +To grant the user `riakuser` the ability to query all schemas: + +```bash +riak-admin security grant search.query on schema to riakuser + +# To revoke: +# riak-admin security revoke search.query on schema from riakuser +``` + +To grant the user `riakuser` admin privileges only on the index +`riakusers_index`: + +```bash +riak-admin security grant search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.admin on index riakusers_index from riakuser +``` + +To grant `riakuser` querying and admin permissions on the index +`riakusers_index`: + +```bash +riak-admin security grant search.query,search.admin on index riakusers_index to riakuser + +# To revoke: +# riak-admin security revoke search.query,search.admin on index riakusers_index from riakuser +``` + +## Managing Sources + +While user management enables you to control _authorization_ with regard +to users, security **sources** provide you with an interface for +managing means of _authentication_. If you create users and grant them +access to some or all of Riak's functionality as described in the [User Management](#user-management) section, +you will then need to define security sources required for +authentication. + +An more in-depth tutorial can be found in [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/). + +### Add Source + +Riak security sources may be applied to a specific user, multiple users, +or all users (`all`). + +#### Available Sources + +Source | Description +:--------|:----------- +`trust` | Always authenticates successfully if access has been granted to a user or all users on the specified CIDR range +`password` | Check the user's password against the [PBKFD2](http://en.wikipedia.org/wiki/PBKDF2)-hashed password stored in Riak +`pam` | Authenticate against the given pluggable authentication module (PAM) service +`certificate` | Authenticate using a client certificate + +### Example: Adding a Trusted Source + +Security sources can be added either to a specific user, multiple users, +or all users (`all`). + +In general, the `add-source` command takes the following form: + +```bash +riak-admin security add-source all|<users> <CIDR> <source> [<option>=<value>[...]] +``` + +Using `all` indicates that the authentication source can be added to +all users. A source can be added to a specific user, e.g. `add-source +superuser`, or to a list of users separated by commas, e.g. `add-source +jane,bill,admin`. + +Let's say that we want to give all users trusted access to securables +(without a password) when requests come from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +At that point, the `riak-admin security print-sources` command would +print the following: + +``` ++--------------------+------------+----------+----------+ +| users | cidr | source | options | ++--------------------+------------+----------+----------+ +| all |127.0.0.1/32| trust | [] | ++--------------------+------------+----------+----------+ +``` + +### Deleting Sources + +If we wish to remove the `trust` source that we granted to `all` in the +example above, we can simply use the `del-source` command and specify +the CIDR. + +```bash +riak-admin security del-source all 127.0.0.1/32 +``` + +Note that this does not require that you specify which type of source is +being deleted. You only need to specify the user(s) or `all`, because +only one source can be applied to a user or `all` at any given time. + +The following command would remove the source for `riakuser` on +`localhost`, regardless of which source is being used: + +```bash +riak-admin security del-source riakuser 127.0.0.1/32 +``` + +{{% note title="Note on Removing Sources" %}} +If you apply a security source both to `all` and to specific users and then +wish to remove that source, you will need to do so in separate steps. The +`riak-admin security del-source all ...` command by itself is not sufficient. + +For example, if you have assigned the source `password` to both `all` and to +the user `riakuser` on the network `127.0.0.1/32`, the following two-step +process would be required to fully remove the source: + +```bash +riak-admin security del-source all 127.0.0.1/32 password +riak-admin security del-source riakuser 127.0.0.1/32 password +``` +{{% /note %}} + +### More Usage Examples + +This section provides only a very brief overview of the syntax for +working with sources. For more information on using the `trust`, +`password`, `pam`, and `certificate` sources, please see our [Managing Security Sources]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/) document. + +## Security Ciphers + +To view a list of currently available security ciphers or change Riak's +preferences, use the `ciphers` command: + +```bash +riak-admin security ciphers +``` + +That command by itself will return a large list of available ciphers: + +``` +Configured ciphers + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... + +Valid ciphers(35) + +ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256: ... + +Unknown/Unsupported ciphers(32) + +ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256: ... +``` + +To alter the list, i.e. to constrain it and/or to set preferred ciphers +higher in the list: + +```bash +riak-admin security ciphers DHE-RSA-AES256-SHA:AES128-GCM-SHA256 +``` + +The list of configured ciphers should now look like this: + +``` +Configured ciphers + +DHE-RSA-AES256-SHA:AES128-GCM-SHA256 + +Valid ciphers(1) + +DHE-RSA-AES256-SHA + +Unknown/Unsupported ciphers(1) + +AES128-GCM-SHA256 +``` + +A list of available ciphers on a server can be obtained using the +`openssl` command: + +```bash +openssl ciphers +``` + +That should return a list structured like this: + +``` +DHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:AES256-SHA:EDH-RSA-DES-CBC3-SHA: # and so on +``` + +Riak's cipher preferences were taken from [Mozilla's Server-Side TLS +documentation](https://wiki.mozilla.org/Security/Server_Side_TLS). + +### Client vs. Server Cipher Order + +By default, Riak prefers the cipher order that you set on the server, +i.e. the [`honor_cipher_order`]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#security) setting is set to `on`. If you prefer, however, that clients' preferred cipher +order dictate which cipher is chosen, set `honor_cipher_order` to `off`. + +> **Note on Erlang versions** +> +> Riak's default cipher order behavior has the potential to crash Erlang +VMs that do not support it. Erlang VMs that are known to support it +include Basho's patched version of Erlang R16. Instructions on +installing it can be found in [Installing Erlang]({{<baseurl>}}riak/kv/3.0.4/setup/installing/source/erlang). This issue should +not affect Erlang 17.0 and later. + +## Enabling SSL + +In order to use any authentication or authorization features, you must +enable SSL for Riak. **SSL is disabled by default**, but you will need +to enable it prior to enabling security. If you are using [Protocol Buffers]({{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/) as a transport protocol for Riak (which we strongly recommend), enabling SSL on a given node requires only that you specify a [host and port]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#client-interfaces) for the node +as well as a [certification configuration](#certificate-configuration). + +If, however, you are using the [HTTP API]({{<baseurl>}}riak/kv/3.0.4/developing/api/http) for Riak and would like to +configure HTTPS, you will need to not only establish a [certificate configuration](#certificate-configuration) but also specify an HTTPS host +and port. The following configuration would establish port 8088 on +`localhost` as the HTTPS port: + +```riakconf +listener.https.$name = 127.0.0.1:8088 + +# By default, "internal" is used as the "name" setting +``` + +```appconfig +{riak_core, [ + %% Other configs + {https, [{"127.0.0.1", 8088}]}, + %% Other configs + ]} +``` + +## TLS Settings + +When using Riak security, you can choose which versions of SSL/TLS are +allowed. By default, only TLS 1.2 is allowed, but this version can be +disabled and others enabled by setting the following [configurable parameters]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#security) to `on` or `off`: + +* `tls_protocols.tlsv1` +* `tls_protocols.tlsv1.1` +* `tls_protocols.tlsv1.2` +* `tls_protocols.sslv3` + +Three things to note: + +* Among the four available options, only TLS version 1.2 is enabled by + default +* You can enable more than one protocol at a time +* We strongly recommend that you do _not_ use SSL version 3 unless + absolutely necessary + +## Certificate Configuration + +If you are using any of the available [security sources]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/), including [trust-based authentication]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#trust-based-authentication), you will need to do so +over a secure SSL connection. In order to establish a secure connection, +you will need to ensure that each Riak node's [configuration files]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#security) point to the proper paths for your +generated certs. By default, Riak assumes that all certs are stored in +each node's `/etc` directory. + +If you are using the newer, `riak.conf`-based configuration system, you +can change the location of the `/etc` directory by modifying the +`platform_etc_dir`. More information can be found in our documentation +on [configuring directories]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/#directories). + +<table class="riak-conf"> + <thead> + <tr> + <th>Type</th> + <th>Parameter</th> + <th>Default</th> + </tr> + </thead> + <tbody> + <tr> + <td><strong>Signing authority</strong></td> + <td><code>ssl.cacertfile</code></td> + <td><code>#(platform_etc_dir)/cacertfile.pem</code></td> + </tr> + <tr> + <td><strong>Cert</strong></td> + <td><code>ssl.certfile</code></td> + <td><code>#(platform_etc_dir)/cert.pem</code></td> + </tr> + <tr> + <td><strong>Key file</strong></td> + <td><code>ssl.keyfile</code></td> + <td><code>#(platform_etc_dir)/key.pem</code></td> + </tr> + </tbody> +</table> + +If you are using the older, `app.config`-based configuration system, +these paths can be set in the `ssl` subsection of the `riak_core` +section. The corresponding parameters are shown in the example below: + +```appconfig +{riak_core, [ + %% Other configs + + {ssl, [ + {certfile, "./etc/cert.pem"}, + {keyfile, "./etc/key.pem"}, + {cacertfile, "./etc/cacertfile.pem"} + ]}, + + %% Other configs +]} +``` + +## Referer Checks and Certificate Revocation Lists + +In order to provide safeguards against +[cross-site-scripting](http://en.wikipedia.org/wiki/Cross-site_scripting) +(XSS) and +[request-forgery](http://en.wikipedia.org/wiki/Cross-site_request_forgery) +attacks, Riak performs [secure referer +checks](http://en.wikipedia.org/wiki/HTTP_referer) by default. Those +checks make it impossible to serve data directly from Riak. To disable +those checks, set the `secure_referer_check` parameter to `off`. + +If you are using [certificate-based authentication]({{<baseurl>}}riak/kv/3.0.4/using/security/managing-sources/#certificate-based-authentication), Riak will check the certificate revocation list (CRL) of connecting clients' certificate by +default. To disable this behavior, set the `check_crl` parameter to +`off`. + + + + diff --git a/content/riak/kv/3.0.4/using/security/best-practices.md b/content/riak/kv/3.0.4/using/security/best-practices.md new file mode 100644 index 0000000000..0fd0347757 --- /dev/null +++ b/content/riak/kv/3.0.4/using/security/best-practices.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "Security Best Practices" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Best Practices" + identifier: "security_best_practices" + weight: 102 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.4/using/security/managing-sources.md b/content/riak/kv/3.0.4/using/security/managing-sources.md new file mode 100644 index 0000000000..397b3b4595 --- /dev/null +++ b/content/riak/kv/3.0.4/using/security/managing-sources.md @@ -0,0 +1,273 @@ +--- +title: "Managing Security Sources" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Managing Security Sources" + identifier: "security_manage_sources" + weight: 101 + parent: "managing_security" +toc: true +aliases: + - /riak/3.0.4/ops/running/security-sources + - /riak/kv/3.0.4/ops/running/security-sources +--- + +If you're looking for more general information on Riak Security, it may +be best to start with our general guide to [authentication and authorization]({{<baseurl>}}riak/kv/3.0.4/using/security/basics). + +This document provides more granular information on the four available +authentication sources in Riak Security: trusted networks, password, +pluggable authentication modules (PAM), and certificates. These sources +correspond to `trust`, `password`, `pam`, and `certificate`, +respectively, in the `riak-admin security` interface. + +The examples below will assume that the network in question is +`127.0.0.1/32` and that a Riak user named `riakuser` has been +[created]({{<baseurl>}}riak/kv/3.0.4/using/security/basics/#user-management) and that +security has been [enabled]({{<baseurl>}}riak/kv/3.0.4/using/security/basics/#the-basics). + +{{% note title="Note on SSL connections" %}} +If you use _any_ of the aforementioned security sources, even `trust`, you +will need to do so via a secure SSL connection. +{{% /note %}} + +## Trust-based Authentication + +This form of authentication enables you to specify trusted +[CIDRs](http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) +from which all clients will be authenticated by default. + +```bash +riak-admin security add-source all 127.0.0.1/32 trust +``` + +Here, we have specified that anyone connecting to Riak from the +designated CIDR (in this case `localhost`) will be successfully +authenticated: + +```curl +curl https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +If this request returns `not found` or a Riak object, then things have +been set up appropriately. You can specify any number of trusted +networks in the same fashion. + +You can also specify users as trusted users, as in the following +example: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 trust +``` + +Now, `riakuser` can interact with Riak without providing credentials. +Here's an example in which only the username is passed to Riak: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Password-based Authentication + +Authenticating via the `password` source requires that our `riakuser` be +given a password. `riakuser` can be assigned a password upon creation, +as in this example: + +```bash +riak-admin security add-user riakuser password=captheorem4life +``` + +Or a password can be assigned to an already existing user by modifying +that user's characteristics: + +```bash +riak-admin security alter-user riakuser password=captheorem4life +``` + +You can specify that _all_ users must authenticate themselves via +password when connecting to Riak from `localhost`: + +```bash +riak-admin security add-source all 127.0.0.1/32 password +``` + +Or you can specify that any number of specific users must do so: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 password +riak-admin security add-source otheruser 127.0.0.1/32 password + +# etc +``` + +Now, our `riakuser` must enter a username and password to have any +access to Riak whatsoever: + +```curl +curl -u riakuser:captheorem4life \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +## Certificate-based Authentication + +This form of authentication (`certificate`) requires that Riak and a +specified client---or clients---interacting with Riak bear certificates +signed by the same [Root Certificate +Authority](http://en.wikipedia.org/wiki/Root_certificate). + +> **Note** +> +> At this time, client certificates are not supported in Riak's HTTP +interface, and can be used only through the [protocol buffers interface]({{<baseurl>}}riak/kv/3.0.4/developing/api/protocol-buffers/). + +Let's specify that our user `riakuser` is going to be authenticated +using a certificate on `localhost`: + +```bash +riak-admin security add-source riakuser 127.0.0.1/32 certificate +``` + +When the `certificate` source is used, `riakuser` must also be entered +as the common name, aka `CN`, that you specified when you generated your +certificate, as in the following OpenSSL example: + +```bash +openssl req -new ... '/CN=riakuser' +``` + +You can add a `certificate` source to any number of clients, as long as +their `CN` and Riak username match. + +On the server side, you need to configure Riak by specifying a path to +your certificates. First, copy all relevant files to your Riak cluster. +The default directory for certificates is `/etc`, though you can specify +a different directory in your [`riak.conf`]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/) by either uncommenting those lines if you choose to use the defaults or setting the paths yourself: + +```riakconf +ssl.certfile = /path/to/cert.pem +ssl.keyfile = /path/to/key.pem +ssl.cacertfile = /path/to/cacert.pem +``` + +In the client-side example above, the client's `CN` and Riak username +needed to match. On the server (i.e. Riak) side, the `CN` specified _on +each node_ must match the node's name as registered by Riak. You can +find the node's name in [`riak.conf`]({{<baseurl>}}riak/kv/3.0.4/configuring/reference/) under the parameter `nodename`. And so if the `nodename` for a cluster is +`riak-node-1`, you would need to generate your certificate with that in +mind, as in this OpenSSL example: + +```bash +openssl req -new ... '/CN=riak-node-1' +``` + +Once certificates have been properly generated and configured on all of +the nodes in your Riak cluster, you need to perform a [rolling restart]({{<baseurl>}}riak/kv/3.0.4/using/repair-recovery/rolling-restart/). Once that process is complete, you can use the client +certificate that you generated for the user `riakuser`. + +How to use Riak clients in conjunction with OpenSSL and other +certificates varies from client library to client library. We strongly +recommend checking the documentation of your client library for further +information. + +## PAM-based Authentication + +This section assumes that you have set up a PAM service bearing the name +`riak_pam`, e.g. by creating a `pam.d/riak_pam` service definition +specifying `auth` and/or other PAM services set up to authenticate a +user named `riakuser`. As in the certificate-based authentication +example above, the user's name must be the same in both your +authentication module and in Riak Security. + +If we want the user `riakuser` to use this PAM service on `localhost`, +we need to add a `pam` security source in Riak and specify the name of +the service: + +```bash +riak-admin security add-source all 127.0.0.1/32 pam service=riak_pam +``` + +**Note**: If you do not specify a name for your PAM service, Riak will +use the default, which is `riak`. + +To verify that the source has been properly specified: + +```bash +riak-admin security print-sources +``` + +That command should output the following: + +``` ++--------------------+------------+----------+------------------------+ +| users | cidr | source | options | ++--------------------+------------+----------+------------------------+ +| riakuser |127.0.0.1/32| pam |[{"service","riak_pam"}]| ++--------------------+------------+----------+------------------------+ +``` + +You can test that setup most easily by using `curl`. A normal request to +Riak without specifying a user will return an `Unauthorized` message: + +```curl +curl -u riakuser: \ + https://localhost:8098/types/<type>/buckets/<bucket>/keys/<key> +``` + +Response: + +``` +<html><head><title>401 Unauthorized

Unauthorized

Unauthorized


mochiweb+webmachine web server
+``` + +If you identify yourself as `riakuser` and are successfully +authenticated by your PAM service, you should get either `not found` or +a Riak object if one is stored in the specified bucket type/bucket/key +path: + +```curl +curl -u riakuser: \ + https://localhost:8098/types//buckets//keys/ +``` + +## How Sources Are Applied + +When managing security sources---any of the sources explained +above---you always have the option of applying a source to either a +single user, multiple users, or all users (`all`). If specific users and +`all` have no sources in common, this presents no difficulty. But what +happens if one source is applied to `all` and a different source is +applied to a specific user? + +The short answer is that the more specifically assigned source---i.e. to +the user---will be consider a user's security source. We'll illustrate +that with the following example, in which the `certificate` source is +assigned to `all`, but the `password` source is assigned to `riakuser`: + +```bash +riak-admin security add-source all 127.0.0.1/32 certificate +riak-admin security add-source riakuser 127.0.0.1/32 password +``` + +If we run `riak-admin security print-sources`, we'll get the following +output: + +``` ++--------------------+------------+-----------+----------+ +| users | cidr | source | options | ++--------------------+------------+-----------+----------+ +| riakuser |127.0.0.1/32| password | [] | +| |127.0.0.1/32|certificate| [] | +| all |127.0.0.1/32|certificate| [] | ++--------------------+------------+-----------+----------+ +``` + +As we can see, `password` is set as the security source for `riakuser`, +whereas everyone else will authenticate using `certificate`. + + + + diff --git a/content/riak/kv/3.0.4/using/security/v2-v3-ssl-ca.md b/content/riak/kv/3.0.4/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..3f853ca2bf --- /dev/null +++ b/content/riak/kv/3.0.4/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,85 @@ +--- +draft: true +title: "V2 / V3 SSL & CA Validation" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "V2/V3 SSL & CA Validation" + identifier: "security_validation" + weight: 103 + parent: "managing_security" +toc: true +aliases: +--- + +## Hanc capellae + +Lorem markdownum Byblida. Modo **etiam** litora mittat vellera infelix caeli. +Studiosius forte, potuit pectore. Puer undas dignior iam turpe sorores abesse. +Deae Saturnia levius viribus membra. + +## Iussorum ad fronti rutilasque tenuit cursu quae + +Nostros vovistis artes. **Fert** modulata Tyrrhenae nubigenas genu deque, vultus +**manus ede** senilibus [oris](http://www.youtube.com/watch?v=MghiBW3r65M) +transcurrere quem rarissima. Viderunt nutu quod, tumidaque, mihi mihi sacer pia. +Summis rediit pavidus tersere et at prosiluit natus Phaethon noxa. Singultibus +oblita **foedabis** orsa. + +- Fecere aliis postquam inviti caliginis ab inque +- Voverat dividuae et tardus huc magna non +- Sex barba ipsaque Caucason corpora sono ecce +- Non esse +- Sibi atris regna licuit Antium carituraque nubes + +## Omni levare gelidumque minanti + +Omnis adeunt ossibus gravis, Venus pinuque capit, et sereno viros ignara *plena +incaluere* percussit mellaque, vertere arte. Ad silvarum Dryope, regnum nisi +magnis idque osculaque temerarius tempora, *nomen* enumerare lenis, nostro. Ac +mutabit [arma](http://www.thesecretofinvisibility.com/) operiri saxum ratione, +crudelior feram, est usu tamen quod, hasta. Equos **sonant et deum**. Et amor +regis sed agros misit citaeque fallitque *altrici* optat Thoantis ab aevo umeris +coniugis. + +## Troiana quoque + +Equo uni Stygias trahunt, interea, in tela labores lumina, nam *Aganippe +sanctique meum*; est. [Gente inimica +premeret](http://en.wikipedia.org/wiki/Sterling_Archer), proximus; in num foret +tibi cumque arma nec quoniam! Contribuere mollis, tu dum parem viscera, tamen +ante. Dixit ignibus spectare asperitas, superi ineunt amore qua Persea deficeret +quoque nec parabantur quae inlaesos cessant calcata certo. Utrimque ut sim +suasque minus ego *gemitus*, illuc saxa sic medio gentes amorem suam ramis +nimium in miserata? + +1. `In naribus aequos aberant` +2. Naturae murmura te rimas suarum vulnus quod +3. Socios leto loquor timide +4. Ergo sub +5. Patrias mihi consumite breve + +## Ruit huic movit luminibus excubias arma + +> Loco humo tecum gurgite timui. Peragant tu regia ut umbras premit condit. Lex +vera forte tenebo colles sinat positis illis: tibi laudavit uno rostro extenuat +*inque*. Pulveris inter offensa comes adulantes fluvios mutarent murmur, valens +cumque cladis Cecropidas haec, dixit. Lucus cognomine **Achilles**: pastor nec. + +1. Hic causam et dilecte nudae nec corpus +2. Cor Si nive +3. Petis equos perosa tu perterrita exitus non +4. Per et et ire geminos parte +5. Aqua coniunx cecidisse sonum + +``` +Nominis haec lacrimis orba gloria obstipuere tu Ceyx tepebat fetus me equorum +potero! Iampridem illi; deducit [reor orbem](http://heeeeeeeey.com/), comes, et +nec rubebant pietas, ipsa. +``` + + + + diff --git a/content/riak/kv/3.0.4/using/troubleshooting.md b/content/riak/kv/3.0.4/using/troubleshooting.md new file mode 100644 index 0000000000..9a7b5d333d --- /dev/null +++ b/content/riak/kv/3.0.4/using/troubleshooting.md @@ -0,0 +1,28 @@ +--- +title: "Troubleshooting" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "Troubleshooting" + identifier: "managing_troubleshooting" + weight: 207 + parent: "managing" +toc: true +aliases: +--- + +[http 204]: ./http-204 + +## In This Section + +#### [HTTP 204][http 204] + +About the HTTP 204 response. + +[Learn More >>][http 204] + + + + diff --git a/content/riak/kv/3.0.4/using/troubleshooting/http-204.md b/content/riak/kv/3.0.4/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..372c1d94bd --- /dev/null +++ b/content/riak/kv/3.0.4/using/troubleshooting/http-204.md @@ -0,0 +1,22 @@ +--- +title: "HTTP 204" +description: "" +project: "riak_kv" +project_version: 3.0.4 +menu: + riak_kv-3.0.4: + name: "HTTP 204" + identifier: "troubleshooting_http_204" + weight: 101 + parent: "managing_troubleshooting" +toc: true +aliases: +--- + +In the HTTP standard, a `204 No Content` is returned when the request was successful but there is nothing to return other than HTTP headers. + +If you add `returnbody=true` in the `PUT` request, you will receive a `200 OK` and the content you just stored, otherwise you will receive a `204 No Content`. + + + + diff --git a/content/riak/kv/latest/_reference-links.md b/content/riak/kv/latest/_reference-links.md new file mode 100644 index 0000000000..57364a5e5f --- /dev/null +++ b/content/riak/kv/latest/_reference-links.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/_reference-links/" + - "/riakkv/latest/_reference-links/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/add-ons.md b/content/riak/kv/latest/add-ons.md new file mode 100644 index 0000000000..41656e950f --- /dev/null +++ b/content/riak/kv/latest/add-ons.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/add-ons/" + - "/riakkv/latest/add-ons/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/add-ons/redis.md b/content/riak/kv/latest/add-ons/redis.md new file mode 100644 index 0000000000..28098e46de --- /dev/null +++ b/content/riak/kv/latest/add-ons/redis.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/add-ons/redis/" + - "/riakkv/latest/add-ons/redis/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/add-ons/redis/developing-rra.md b/content/riak/kv/latest/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..b6f06a7434 --- /dev/null +++ b/content/riak/kv/latest/add-ons/redis/developing-rra.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/add-ons/redis/developing-rra/" + - "/riakkv/latest/add-ons/redis/developing-rra/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/add-ons/redis/redis-add-on-features.md b/content/riak/kv/latest/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..d45311382c --- /dev/null +++ b/content/riak/kv/latest/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/add-ons/redis/redis-add-on-features/" + - "/riakkv/latest/add-ons/redis/redis-add-on-features/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/add-ons/redis/set-up-rra.md b/content/riak/kv/latest/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..6385481569 --- /dev/null +++ b/content/riak/kv/latest/add-ons/redis/set-up-rra.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/add-ons/redis/set-up-rra/" + - "/riakkv/latest/add-ons/redis/set-up-rra/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/kv/latest/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..5c45d5da5c --- /dev/null +++ b/content/riak/kv/latest/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/add-ons/redis/set-up-rra/deployment-models/" + - "/riakkv/latest/add-ons/redis/set-up-rra/deployment-models/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/add-ons/redis/using-rra.md b/content/riak/kv/latest/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..51134f9cf1 --- /dev/null +++ b/content/riak/kv/latest/add-ons/redis/using-rra.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/add-ons/redis/using-rra/" + - "/riakkv/latest/add-ons/redis/using-rra/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring.md b/content/riak/kv/latest/configuring.md new file mode 100644 index 0000000000..94ee05e9e4 --- /dev/null +++ b/content/riak/kv/latest/configuring.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/" + - "/riakkv/latest/configuring/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/backend.md b/content/riak/kv/latest/configuring/backend.md new file mode 100644 index 0000000000..8349f0ed54 --- /dev/null +++ b/content/riak/kv/latest/configuring/backend.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/backend/" + - "/riakkv/latest/configuring/backend/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/basic.md b/content/riak/kv/latest/configuring/basic.md new file mode 100644 index 0000000000..d29fa471ac --- /dev/null +++ b/content/riak/kv/latest/configuring/basic.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/basic/" + - "/riakkv/latest/configuring/basic/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/global-object-expiration.md b/content/riak/kv/latest/configuring/global-object-expiration.md new file mode 100644 index 0000000000..87ee338964 --- /dev/null +++ b/content/riak/kv/latest/configuring/global-object-expiration.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/global-object-expiration/" + - "/riakkv/latest/configuring/global-object-expiration/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/load-balancing-proxy.md b/content/riak/kv/latest/configuring/load-balancing-proxy.md new file mode 100644 index 0000000000..0ac52fd6ec --- /dev/null +++ b/content/riak/kv/latest/configuring/load-balancing-proxy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/load-balancing-proxy/" + - "/riakkv/latest/configuring/load-balancing-proxy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/managing.md b/content/riak/kv/latest/configuring/managing.md new file mode 100644 index 0000000000..9e96d861ab --- /dev/null +++ b/content/riak/kv/latest/configuring/managing.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/managing/" + - "/riakkv/latest/configuring/managing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/mapreduce.md b/content/riak/kv/latest/configuring/mapreduce.md new file mode 100644 index 0000000000..3ac2eb0a5a --- /dev/null +++ b/content/riak/kv/latest/configuring/mapreduce.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/mapreduce/" + - "/riakkv/latest/configuring/mapreduce/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/next-gen-replication.md b/content/riak/kv/latest/configuring/next-gen-replication.md new file mode 100644 index 0000000000..fd2f6b94cb --- /dev/null +++ b/content/riak/kv/latest/configuring/next-gen-replication.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/next-gen-replication/" + - "/riakkv/latest/configuring/next-gen-replication/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/reference.md b/content/riak/kv/latest/configuring/reference.md new file mode 100644 index 0000000000..c0e1c6ab30 --- /dev/null +++ b/content/riak/kv/latest/configuring/reference.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/reference/" + - "/riakkv/latest/configuring/reference/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/search.md b/content/riak/kv/latest/configuring/search.md new file mode 100644 index 0000000000..f9460ec410 --- /dev/null +++ b/content/riak/kv/latest/configuring/search.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/search/" + - "/riakkv/latest/configuring/search/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/strong-consistency.md b/content/riak/kv/latest/configuring/strong-consistency.md new file mode 100644 index 0000000000..cae887af6f --- /dev/null +++ b/content/riak/kv/latest/configuring/strong-consistency.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/strong-consistency/" + - "/riakkv/latest/configuring/strong-consistency/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/v2-multi-datacenter.md b/content/riak/kv/latest/configuring/v2-multi-datacenter.md new file mode 100644 index 0000000000..f1068acf87 --- /dev/null +++ b/content/riak/kv/latest/configuring/v2-multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/v2-multi-datacenter/" + - "/riakkv/latest/configuring/v2-multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/v2-multi-datacenter/nat.md b/content/riak/kv/latest/configuring/v2-multi-datacenter/nat.md new file mode 100644 index 0000000000..8b7dc0d70b --- /dev/null +++ b/content/riak/kv/latest/configuring/v2-multi-datacenter/nat.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/v2-multi-datacenter/nat/" + - "/riakkv/latest/configuring/v2-multi-datacenter/nat/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/v2-multi-datacenter/quick-start.md b/content/riak/kv/latest/configuring/v2-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..96db5d32f2 --- /dev/null +++ b/content/riak/kv/latest/configuring/v2-multi-datacenter/quick-start.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/v2-multi-datacenter/quick-start/" + - "/riakkv/latest/configuring/v2-multi-datacenter/quick-start/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/v2-multi-datacenter/ssl.md b/content/riak/kv/latest/configuring/v2-multi-datacenter/ssl.md new file mode 100644 index 0000000000..6340cd23a7 --- /dev/null +++ b/content/riak/kv/latest/configuring/v2-multi-datacenter/ssl.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/v2-multi-datacenter/ssl/" + - "/riakkv/latest/configuring/v2-multi-datacenter/ssl/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/v3-multi-datacenter.md b/content/riak/kv/latest/configuring/v3-multi-datacenter.md new file mode 100644 index 0000000000..45a61b5f64 --- /dev/null +++ b/content/riak/kv/latest/configuring/v3-multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/v3-multi-datacenter/" + - "/riakkv/latest/configuring/v3-multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/v3-multi-datacenter/nat.md b/content/riak/kv/latest/configuring/v3-multi-datacenter/nat.md new file mode 100644 index 0000000000..ce1571a85f --- /dev/null +++ b/content/riak/kv/latest/configuring/v3-multi-datacenter/nat.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/v3-multi-datacenter/nat/" + - "/riakkv/latest/configuring/v3-multi-datacenter/nat/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/v3-multi-datacenter/quick-start.md b/content/riak/kv/latest/configuring/v3-multi-datacenter/quick-start.md new file mode 100644 index 0000000000..1c9c7a9946 --- /dev/null +++ b/content/riak/kv/latest/configuring/v3-multi-datacenter/quick-start.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/v3-multi-datacenter/quick-start/" + - "/riakkv/latest/configuring/v3-multi-datacenter/quick-start/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/configuring/v3-multi-datacenter/ssl.md b/content/riak/kv/latest/configuring/v3-multi-datacenter/ssl.md new file mode 100644 index 0000000000..e8a7f2c674 --- /dev/null +++ b/content/riak/kv/latest/configuring/v3-multi-datacenter/ssl.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/configuring/v3-multi-datacenter/ssl/" + - "/riakkv/latest/configuring/v3-multi-datacenter/ssl/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing.md b/content/riak/kv/latest/developing.md new file mode 100644 index 0000000000..2f688b60d3 --- /dev/null +++ b/content/riak/kv/latest/developing.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/" + - "/riakkv/latest/developing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api.md b/content/riak/kv/latest/developing/api.md new file mode 100644 index 0000000000..caeff0ea24 --- /dev/null +++ b/content/riak/kv/latest/developing/api.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/" + - "/riakkv/latest/developing/api/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/backend.md b/content/riak/kv/latest/developing/api/backend.md new file mode 100644 index 0000000000..9a13ab8929 --- /dev/null +++ b/content/riak/kv/latest/developing/api/backend.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/backend/" + - "/riakkv/latest/developing/api/backend/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http.md b/content/riak/kv/latest/developing/api/http.md new file mode 100644 index 0000000000..2bccaff62d --- /dev/null +++ b/content/riak/kv/latest/developing/api/http.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/" + - "/riakkv/latest/developing/api/http/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/counters.md b/content/riak/kv/latest/developing/api/http/counters.md new file mode 100644 index 0000000000..26bee76d02 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/counters.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/counters/" + - "/riakkv/latest/developing/api/http/counters/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/delete-object.md b/content/riak/kv/latest/developing/api/http/delete-object.md new file mode 100644 index 0000000000..4f82669e0a --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/delete-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/delete-object/" + - "/riakkv/latest/developing/api/http/delete-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/delete-search-index.md b/content/riak/kv/latest/developing/api/http/delete-search-index.md new file mode 100644 index 0000000000..c372a2a033 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/delete-search-index.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/delete-search-index/" + - "/riakkv/latest/developing/api/http/delete-search-index/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/fetch-object.md b/content/riak/kv/latest/developing/api/http/fetch-object.md new file mode 100644 index 0000000000..673416781a --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/fetch-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/fetch-object/" + - "/riakkv/latest/developing/api/http/fetch-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/fetch-search-index.md b/content/riak/kv/latest/developing/api/http/fetch-search-index.md new file mode 100644 index 0000000000..34945e4506 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/fetch-search-index.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/fetch-search-index/" + - "/riakkv/latest/developing/api/http/fetch-search-index/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/fetch-search-schema.md b/content/riak/kv/latest/developing/api/http/fetch-search-schema.md new file mode 100644 index 0000000000..1cbabc68df --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/fetch-search-schema.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/fetch-search-schema/" + - "/riakkv/latest/developing/api/http/fetch-search-schema/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/get-bucket-props.md b/content/riak/kv/latest/developing/api/http/get-bucket-props.md new file mode 100644 index 0000000000..e0435656a2 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/get-bucket-props.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/get-bucket-props/" + - "/riakkv/latest/developing/api/http/get-bucket-props/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/link-walking.md b/content/riak/kv/latest/developing/api/http/link-walking.md new file mode 100644 index 0000000000..f34c1d302b --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/link-walking.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/link-walking/" + - "/riakkv/latest/developing/api/http/link-walking/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/list-buckets.md b/content/riak/kv/latest/developing/api/http/list-buckets.md new file mode 100644 index 0000000000..c31defcbd9 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/list-buckets.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/list-buckets/" + - "/riakkv/latest/developing/api/http/list-buckets/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/list-keys.md b/content/riak/kv/latest/developing/api/http/list-keys.md new file mode 100644 index 0000000000..02c3586bbf --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/list-keys.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/list-keys/" + - "/riakkv/latest/developing/api/http/list-keys/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/list-resources.md b/content/riak/kv/latest/developing/api/http/list-resources.md new file mode 100644 index 0000000000..770b1777c7 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/list-resources.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/list-resources/" + - "/riakkv/latest/developing/api/http/list-resources/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/mapreduce.md b/content/riak/kv/latest/developing/api/http/mapreduce.md new file mode 100644 index 0000000000..640066da77 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/mapreduce.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/mapreduce/" + - "/riakkv/latest/developing/api/http/mapreduce/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/ping.md b/content/riak/kv/latest/developing/api/http/ping.md new file mode 100644 index 0000000000..9afb95c2a4 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/ping.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/ping/" + - "/riakkv/latest/developing/api/http/ping/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/reset-bucket-props.md b/content/riak/kv/latest/developing/api/http/reset-bucket-props.md new file mode 100644 index 0000000000..b7fb46ee83 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/reset-bucket-props.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/reset-bucket-props/" + - "/riakkv/latest/developing/api/http/reset-bucket-props/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/search-index-info.md b/content/riak/kv/latest/developing/api/http/search-index-info.md new file mode 100644 index 0000000000..e708cced3e --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/search-index-info.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/search-index-info/" + - "/riakkv/latest/developing/api/http/search-index-info/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/search-query.md b/content/riak/kv/latest/developing/api/http/search-query.md new file mode 100644 index 0000000000..b5acece012 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/search-query.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/search-query/" + - "/riakkv/latest/developing/api/http/search-query/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/secondary-indexes.md b/content/riak/kv/latest/developing/api/http/secondary-indexes.md new file mode 100644 index 0000000000..29d67643b6 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/secondary-indexes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/secondary-indexes/" + - "/riakkv/latest/developing/api/http/secondary-indexes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/set-bucket-props.md b/content/riak/kv/latest/developing/api/http/set-bucket-props.md new file mode 100644 index 0000000000..85bf85906b --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/set-bucket-props.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/set-bucket-props/" + - "/riakkv/latest/developing/api/http/set-bucket-props/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/status.md b/content/riak/kv/latest/developing/api/http/status.md new file mode 100644 index 0000000000..b7fed8dec9 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/status.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/status/" + - "/riakkv/latest/developing/api/http/status/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/store-object.md b/content/riak/kv/latest/developing/api/http/store-object.md new file mode 100644 index 0000000000..3e74f52150 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/store-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/store-object/" + - "/riakkv/latest/developing/api/http/store-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/store-search-index.md b/content/riak/kv/latest/developing/api/http/store-search-index.md new file mode 100644 index 0000000000..283808e53b --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/store-search-index.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/store-search-index/" + - "/riakkv/latest/developing/api/http/store-search-index/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/http/store-search-schema.md b/content/riak/kv/latest/developing/api/http/store-search-schema.md new file mode 100644 index 0000000000..8ad8860492 --- /dev/null +++ b/content/riak/kv/latest/developing/api/http/store-search-schema.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/http/store-search-schema/" + - "/riakkv/latest/developing/api/http/store-search-schema/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers.md b/content/riak/kv/latest/developing/api/protocol-buffers.md new file mode 100644 index 0000000000..3f1f56a473 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/" + - "/riakkv/latest/developing/api/protocol-buffers/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/auth-req.md b/content/riak/kv/latest/developing/api/protocol-buffers/auth-req.md new file mode 100644 index 0000000000..3f9feea7da --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/auth-req.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/auth-req/" + - "/riakkv/latest/developing/api/protocol-buffers/auth-req/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/coverage-queries.md b/content/riak/kv/latest/developing/api/protocol-buffers/coverage-queries.md new file mode 100644 index 0000000000..886640d857 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/coverage-queries.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/coverage-queries/" + - "/riakkv/latest/developing/api/protocol-buffers/coverage-queries/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/delete-object.md b/content/riak/kv/latest/developing/api/protocol-buffers/delete-object.md new file mode 100644 index 0000000000..121c1c7fef --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/delete-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/delete-object/" + - "/riakkv/latest/developing/api/protocol-buffers/delete-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/dt-counter-store.md b/content/riak/kv/latest/developing/api/protocol-buffers/dt-counter-store.md new file mode 100644 index 0000000000..c83fc44bf1 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/dt-counter-store.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/dt-counter-store/" + - "/riakkv/latest/developing/api/protocol-buffers/dt-counter-store/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/dt-fetch.md b/content/riak/kv/latest/developing/api/protocol-buffers/dt-fetch.md new file mode 100644 index 0000000000..1868826667 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/dt-fetch.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/dt-fetch/" + - "/riakkv/latest/developing/api/protocol-buffers/dt-fetch/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/dt-map-store.md b/content/riak/kv/latest/developing/api/protocol-buffers/dt-map-store.md new file mode 100644 index 0000000000..3eca7d4d63 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/dt-map-store.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/dt-map-store/" + - "/riakkv/latest/developing/api/protocol-buffers/dt-map-store/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/dt-set-store.md b/content/riak/kv/latest/developing/api/protocol-buffers/dt-set-store.md new file mode 100644 index 0000000000..f87d46fd9f --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/dt-set-store.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/dt-set-store/" + - "/riakkv/latest/developing/api/protocol-buffers/dt-set-store/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/dt-store.md b/content/riak/kv/latest/developing/api/protocol-buffers/dt-store.md new file mode 100644 index 0000000000..59e1265bd2 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/dt-store.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/dt-store/" + - "/riakkv/latest/developing/api/protocol-buffers/dt-store/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/dt-union.md b/content/riak/kv/latest/developing/api/protocol-buffers/dt-union.md new file mode 100644 index 0000000000..d736fb5147 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/dt-union.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/dt-union/" + - "/riakkv/latest/developing/api/protocol-buffers/dt-union/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/fetch-object.md b/content/riak/kv/latest/developing/api/protocol-buffers/fetch-object.md new file mode 100644 index 0000000000..37d664613d --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/fetch-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/fetch-object/" + - "/riakkv/latest/developing/api/protocol-buffers/fetch-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/get-bucket-props.md b/content/riak/kv/latest/developing/api/protocol-buffers/get-bucket-props.md new file mode 100644 index 0000000000..1569a68e13 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/get-bucket-props.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/get-bucket-props/" + - "/riakkv/latest/developing/api/protocol-buffers/get-bucket-props/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/get-bucket-type.md b/content/riak/kv/latest/developing/api/protocol-buffers/get-bucket-type.md new file mode 100644 index 0000000000..c9cd3b696f --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/get-bucket-type.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/get-bucket-type/" + - "/riakkv/latest/developing/api/protocol-buffers/get-bucket-type/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/get-client-id.md b/content/riak/kv/latest/developing/api/protocol-buffers/get-client-id.md new file mode 100644 index 0000000000..e86a32b38d --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/get-client-id.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/get-client-id/" + - "/riakkv/latest/developing/api/protocol-buffers/get-client-id/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/list-buckets.md b/content/riak/kv/latest/developing/api/protocol-buffers/list-buckets.md new file mode 100644 index 0000000000..c42d91ec2d --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/list-buckets.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/list-buckets/" + - "/riakkv/latest/developing/api/protocol-buffers/list-buckets/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/list-keys.md b/content/riak/kv/latest/developing/api/protocol-buffers/list-keys.md new file mode 100644 index 0000000000..60d6ac124d --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/list-keys.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/list-keys/" + - "/riakkv/latest/developing/api/protocol-buffers/list-keys/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/mapreduce.md b/content/riak/kv/latest/developing/api/protocol-buffers/mapreduce.md new file mode 100644 index 0000000000..f590723bc8 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/mapreduce.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/mapreduce/" + - "/riakkv/latest/developing/api/protocol-buffers/mapreduce/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/ping.md b/content/riak/kv/latest/developing/api/protocol-buffers/ping.md new file mode 100644 index 0000000000..73a95bdc82 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/ping.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/ping/" + - "/riakkv/latest/developing/api/protocol-buffers/ping/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/reset-bucket-props.md b/content/riak/kv/latest/developing/api/protocol-buffers/reset-bucket-props.md new file mode 100644 index 0000000000..3ae1a95930 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/reset-bucket-props.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/reset-bucket-props/" + - "/riakkv/latest/developing/api/protocol-buffers/reset-bucket-props/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/search.md b/content/riak/kv/latest/developing/api/protocol-buffers/search.md new file mode 100644 index 0000000000..aa7a1b400f --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/search.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/search/" + - "/riakkv/latest/developing/api/protocol-buffers/search/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/secondary-indexes.md b/content/riak/kv/latest/developing/api/protocol-buffers/secondary-indexes.md new file mode 100644 index 0000000000..56a33b8443 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/secondary-indexes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/secondary-indexes/" + - "/riakkv/latest/developing/api/protocol-buffers/secondary-indexes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/server-info.md b/content/riak/kv/latest/developing/api/protocol-buffers/server-info.md new file mode 100644 index 0000000000..856354125b --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/server-info.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/server-info/" + - "/riakkv/latest/developing/api/protocol-buffers/server-info/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/set-bucket-props.md b/content/riak/kv/latest/developing/api/protocol-buffers/set-bucket-props.md new file mode 100644 index 0000000000..b7f9f11cac --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/set-bucket-props.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/set-bucket-props/" + - "/riakkv/latest/developing/api/protocol-buffers/set-bucket-props/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/set-bucket-type.md b/content/riak/kv/latest/developing/api/protocol-buffers/set-bucket-type.md new file mode 100644 index 0000000000..af35b4a563 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/set-bucket-type.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/set-bucket-type/" + - "/riakkv/latest/developing/api/protocol-buffers/set-bucket-type/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/set-client-id.md b/content/riak/kv/latest/developing/api/protocol-buffers/set-client-id.md new file mode 100644 index 0000000000..028789a940 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/set-client-id.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/set-client-id/" + - "/riakkv/latest/developing/api/protocol-buffers/set-client-id/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/store-object.md b/content/riak/kv/latest/developing/api/protocol-buffers/store-object.md new file mode 100644 index 0000000000..08115b71db --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/store-object.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/store-object/" + - "/riakkv/latest/developing/api/protocol-buffers/store-object/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-delete.md b/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-delete.md new file mode 100644 index 0000000000..080f2f9763 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-delete.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/yz-index-delete/" + - "/riakkv/latest/developing/api/protocol-buffers/yz-index-delete/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-get.md b/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-get.md new file mode 100644 index 0000000000..943d06affd --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-get.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/yz-index-get/" + - "/riakkv/latest/developing/api/protocol-buffers/yz-index-get/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-put.md b/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-put.md new file mode 100644 index 0000000000..1c44335646 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/yz-index-put.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/yz-index-put/" + - "/riakkv/latest/developing/api/protocol-buffers/yz-index-put/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/yz-schema-get.md b/content/riak/kv/latest/developing/api/protocol-buffers/yz-schema-get.md new file mode 100644 index 0000000000..bb279b91d9 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/yz-schema-get.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/yz-schema-get/" + - "/riakkv/latest/developing/api/protocol-buffers/yz-schema-get/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/protocol-buffers/yz-schema-put.md b/content/riak/kv/latest/developing/api/protocol-buffers/yz-schema-put.md new file mode 100644 index 0000000000..aeda46f3b0 --- /dev/null +++ b/content/riak/kv/latest/developing/api/protocol-buffers/yz-schema-put.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/protocol-buffers/yz-schema-put/" + - "/riakkv/latest/developing/api/protocol-buffers/yz-schema-put/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/api/repl-hooks.md b/content/riak/kv/latest/developing/api/repl-hooks.md new file mode 100644 index 0000000000..26d52a42eb --- /dev/null +++ b/content/riak/kv/latest/developing/api/repl-hooks.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/api/repl-hooks/" + - "/riakkv/latest/developing/api/repl-hooks/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/app-guide.md b/content/riak/kv/latest/developing/app-guide.md new file mode 100644 index 0000000000..0861aba345 --- /dev/null +++ b/content/riak/kv/latest/developing/app-guide.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/app-guide/" + - "/riakkv/latest/developing/app-guide/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/app-guide/advanced-mapreduce.md b/content/riak/kv/latest/developing/app-guide/advanced-mapreduce.md new file mode 100644 index 0000000000..610ee5d866 --- /dev/null +++ b/content/riak/kv/latest/developing/app-guide/advanced-mapreduce.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/app-guide/advanced-mapreduce/" + - "/riakkv/latest/developing/app-guide/advanced-mapreduce/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/app-guide/cluster-metadata.md b/content/riak/kv/latest/developing/app-guide/cluster-metadata.md new file mode 100644 index 0000000000..97e561a222 --- /dev/null +++ b/content/riak/kv/latest/developing/app-guide/cluster-metadata.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/app-guide/cluster-metadata/" + - "/riakkv/latest/developing/app-guide/cluster-metadata/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/app-guide/reference.md b/content/riak/kv/latest/developing/app-guide/reference.md new file mode 100644 index 0000000000..e3959b1650 --- /dev/null +++ b/content/riak/kv/latest/developing/app-guide/reference.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/app-guide/reference/" + - "/riakkv/latest/developing/app-guide/reference/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/app-guide/replication-properties.md b/content/riak/kv/latest/developing/app-guide/replication-properties.md new file mode 100644 index 0000000000..a8081fa5cc --- /dev/null +++ b/content/riak/kv/latest/developing/app-guide/replication-properties.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/app-guide/replication-properties/" + - "/riakkv/latest/developing/app-guide/replication-properties/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/app-guide/strong-consistency.md b/content/riak/kv/latest/developing/app-guide/strong-consistency.md new file mode 100644 index 0000000000..990a6270cd --- /dev/null +++ b/content/riak/kv/latest/developing/app-guide/strong-consistency.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/app-guide/strong-consistency/" + - "/riakkv/latest/developing/app-guide/strong-consistency/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/app-guide/write-once.md b/content/riak/kv/latest/developing/app-guide/write-once.md new file mode 100644 index 0000000000..4c39e059a8 --- /dev/null +++ b/content/riak/kv/latest/developing/app-guide/write-once.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/app-guide/write-once/" + - "/riakkv/latest/developing/app-guide/write-once/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/client-libraries.md b/content/riak/kv/latest/developing/client-libraries.md new file mode 100644 index 0000000000..a5107e27c0 --- /dev/null +++ b/content/riak/kv/latest/developing/client-libraries.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/client-libraries/" + - "/riakkv/latest/developing/client-libraries/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/data-modeling.md b/content/riak/kv/latest/developing/data-modeling.md new file mode 100644 index 0000000000..9f4f6c33ca --- /dev/null +++ b/content/riak/kv/latest/developing/data-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/data-modeling/" + - "/riakkv/latest/developing/data-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/data-types.md b/content/riak/kv/latest/developing/data-types.md new file mode 100644 index 0000000000..b327f4b3da --- /dev/null +++ b/content/riak/kv/latest/developing/data-types.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/data-types/" + - "/riakkv/latest/developing/data-types/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/data-types/counters.md b/content/riak/kv/latest/developing/data-types/counters.md new file mode 100644 index 0000000000..9fea9aa514 --- /dev/null +++ b/content/riak/kv/latest/developing/data-types/counters.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/data-types/counters/" + - "/riakkv/latest/developing/data-types/counters/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/data-types/gsets.md b/content/riak/kv/latest/developing/data-types/gsets.md new file mode 100644 index 0000000000..e35e632109 --- /dev/null +++ b/content/riak/kv/latest/developing/data-types/gsets.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/data-types/gsets/" + - "/riakkv/latest/developing/data-types/gsets/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/data-types/hyperloglogs.md b/content/riak/kv/latest/developing/data-types/hyperloglogs.md new file mode 100644 index 0000000000..4c7c2ee492 --- /dev/null +++ b/content/riak/kv/latest/developing/data-types/hyperloglogs.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/data-types/hyperloglogs/" + - "/riakkv/latest/developing/data-types/hyperloglogs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/data-types/maps.md b/content/riak/kv/latest/developing/data-types/maps.md new file mode 100644 index 0000000000..3c444feda4 --- /dev/null +++ b/content/riak/kv/latest/developing/data-types/maps.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/data-types/maps/" + - "/riakkv/latest/developing/data-types/maps/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/data-types/sets.md b/content/riak/kv/latest/developing/data-types/sets.md new file mode 100644 index 0000000000..656e5f6f65 --- /dev/null +++ b/content/riak/kv/latest/developing/data-types/sets.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/data-types/sets/" + - "/riakkv/latest/developing/data-types/sets/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/faq.md b/content/riak/kv/latest/developing/faq.md new file mode 100644 index 0000000000..9fefca2432 --- /dev/null +++ b/content/riak/kv/latest/developing/faq.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/faq/" + - "/riakkv/latest/developing/faq/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started.md b/content/riak/kv/latest/developing/getting-started.md new file mode 100644 index 0000000000..7f00c87be1 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/" + - "/riakkv/latest/developing/getting-started/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/csharp.md b/content/riak/kv/latest/developing/getting-started/csharp.md new file mode 100644 index 0000000000..7750b60d85 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/csharp.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/csharp/" + - "/riakkv/latest/developing/getting-started/csharp/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/csharp/crud-operations.md b/content/riak/kv/latest/developing/getting-started/csharp/crud-operations.md new file mode 100644 index 0000000000..ce7b2b1582 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/csharp/crud-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/csharp/crud-operations/" + - "/riakkv/latest/developing/getting-started/csharp/crud-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/csharp/object-modeling.md b/content/riak/kv/latest/developing/getting-started/csharp/object-modeling.md new file mode 100644 index 0000000000..12c362bd23 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/csharp/object-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/csharp/object-modeling/" + - "/riakkv/latest/developing/getting-started/csharp/object-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/csharp/querying.md b/content/riak/kv/latest/developing/getting-started/csharp/querying.md new file mode 100644 index 0000000000..36bb56243f --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/csharp/querying.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/csharp/querying/" + - "/riakkv/latest/developing/getting-started/csharp/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/erlang.md b/content/riak/kv/latest/developing/getting-started/erlang.md new file mode 100644 index 0000000000..047f39fbf7 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/erlang.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/erlang/" + - "/riakkv/latest/developing/getting-started/erlang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/erlang/crud-operations.md b/content/riak/kv/latest/developing/getting-started/erlang/crud-operations.md new file mode 100644 index 0000000000..d572b79a21 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/erlang/crud-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/erlang/crud-operations/" + - "/riakkv/latest/developing/getting-started/erlang/crud-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/erlang/object-modeling.md b/content/riak/kv/latest/developing/getting-started/erlang/object-modeling.md new file mode 100644 index 0000000000..6f083431d9 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/erlang/object-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/erlang/object-modeling/" + - "/riakkv/latest/developing/getting-started/erlang/object-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/erlang/querying.md b/content/riak/kv/latest/developing/getting-started/erlang/querying.md new file mode 100644 index 0000000000..1c0fb8038e --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/erlang/querying.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/erlang/querying/" + - "/riakkv/latest/developing/getting-started/erlang/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/golang.md b/content/riak/kv/latest/developing/getting-started/golang.md new file mode 100644 index 0000000000..7bf62455cb --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/golang.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/golang/" + - "/riakkv/latest/developing/getting-started/golang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/golang/crud-operations.md b/content/riak/kv/latest/developing/getting-started/golang/crud-operations.md new file mode 100644 index 0000000000..1a2e4df3eb --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/golang/crud-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/golang/crud-operations/" + - "/riakkv/latest/developing/getting-started/golang/crud-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/golang/object-modeling.md b/content/riak/kv/latest/developing/getting-started/golang/object-modeling.md new file mode 100644 index 0000000000..72b45f5044 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/golang/object-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/golang/object-modeling/" + - "/riakkv/latest/developing/getting-started/golang/object-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/golang/querying.md b/content/riak/kv/latest/developing/getting-started/golang/querying.md new file mode 100644 index 0000000000..d81df998ed --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/golang/querying.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/golang/querying/" + - "/riakkv/latest/developing/getting-started/golang/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/java.md b/content/riak/kv/latest/developing/getting-started/java.md new file mode 100644 index 0000000000..577a450be0 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/java.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/java/" + - "/riakkv/latest/developing/getting-started/java/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/java/crud-operations.md b/content/riak/kv/latest/developing/getting-started/java/crud-operations.md new file mode 100644 index 0000000000..91bba87667 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/java/crud-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/java/crud-operations/" + - "/riakkv/latest/developing/getting-started/java/crud-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/java/object-modeling.md b/content/riak/kv/latest/developing/getting-started/java/object-modeling.md new file mode 100644 index 0000000000..47c2e46c1f --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/java/object-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/java/object-modeling/" + - "/riakkv/latest/developing/getting-started/java/object-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/java/querying.md b/content/riak/kv/latest/developing/getting-started/java/querying.md new file mode 100644 index 0000000000..a19141f668 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/java/querying.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/java/querying/" + - "/riakkv/latest/developing/getting-started/java/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/nodejs.md b/content/riak/kv/latest/developing/getting-started/nodejs.md new file mode 100644 index 0000000000..7650b78d68 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/nodejs.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/nodejs/" + - "/riakkv/latest/developing/getting-started/nodejs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/nodejs/crud-operations.md b/content/riak/kv/latest/developing/getting-started/nodejs/crud-operations.md new file mode 100644 index 0000000000..93dc5cce95 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/nodejs/crud-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/nodejs/crud-operations/" + - "/riakkv/latest/developing/getting-started/nodejs/crud-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/nodejs/object-modeling.md b/content/riak/kv/latest/developing/getting-started/nodejs/object-modeling.md new file mode 100644 index 0000000000..3052b11016 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/nodejs/object-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/nodejs/object-modeling/" + - "/riakkv/latest/developing/getting-started/nodejs/object-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/nodejs/querying.md b/content/riak/kv/latest/developing/getting-started/nodejs/querying.md new file mode 100644 index 0000000000..9770c37e1e --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/nodejs/querying.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/nodejs/querying/" + - "/riakkv/latest/developing/getting-started/nodejs/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/php.md b/content/riak/kv/latest/developing/getting-started/php.md new file mode 100644 index 0000000000..85d80f8e3f --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/php.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/php/" + - "/riakkv/latest/developing/getting-started/php/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/php/crud-operations.md b/content/riak/kv/latest/developing/getting-started/php/crud-operations.md new file mode 100644 index 0000000000..21bdfd8ec0 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/php/crud-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/php/crud-operations/" + - "/riakkv/latest/developing/getting-started/php/crud-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/php/querying.md b/content/riak/kv/latest/developing/getting-started/php/querying.md new file mode 100644 index 0000000000..8c7cdc65d4 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/php/querying.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/php/querying/" + - "/riakkv/latest/developing/getting-started/php/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/python.md b/content/riak/kv/latest/developing/getting-started/python.md new file mode 100644 index 0000000000..7af80739f7 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/python.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/python/" + - "/riakkv/latest/developing/getting-started/python/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/python/crud-operations.md b/content/riak/kv/latest/developing/getting-started/python/crud-operations.md new file mode 100644 index 0000000000..b9999ff5dc --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/python/crud-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/python/crud-operations/" + - "/riakkv/latest/developing/getting-started/python/crud-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/python/object-modeling.md b/content/riak/kv/latest/developing/getting-started/python/object-modeling.md new file mode 100644 index 0000000000..53bb90ebcc --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/python/object-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/python/object-modeling/" + - "/riakkv/latest/developing/getting-started/python/object-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/python/querying.md b/content/riak/kv/latest/developing/getting-started/python/querying.md new file mode 100644 index 0000000000..edff809efd --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/python/querying.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/python/querying/" + - "/riakkv/latest/developing/getting-started/python/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/ruby.md b/content/riak/kv/latest/developing/getting-started/ruby.md new file mode 100644 index 0000000000..eff53418fc --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/ruby.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/ruby/" + - "/riakkv/latest/developing/getting-started/ruby/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/ruby/crud-operations.md b/content/riak/kv/latest/developing/getting-started/ruby/crud-operations.md new file mode 100644 index 0000000000..ecc0ccd3e0 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/ruby/crud-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/ruby/crud-operations/" + - "/riakkv/latest/developing/getting-started/ruby/crud-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/ruby/object-modeling.md b/content/riak/kv/latest/developing/getting-started/ruby/object-modeling.md new file mode 100644 index 0000000000..b9270074ed --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/ruby/object-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/ruby/object-modeling/" + - "/riakkv/latest/developing/getting-started/ruby/object-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/getting-started/ruby/querying.md b/content/riak/kv/latest/developing/getting-started/ruby/querying.md new file mode 100644 index 0000000000..58d1a36866 --- /dev/null +++ b/content/riak/kv/latest/developing/getting-started/ruby/querying.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/getting-started/ruby/querying/" + - "/riakkv/latest/developing/getting-started/ruby/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/key-value-modeling.md b/content/riak/kv/latest/developing/key-value-modeling.md new file mode 100644 index 0000000000..9c2c623a8a --- /dev/null +++ b/content/riak/kv/latest/developing/key-value-modeling.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/key-value-modeling/" + - "/riakkv/latest/developing/key-value-modeling/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage.md b/content/riak/kv/latest/developing/usage.md new file mode 100644 index 0000000000..de958fcc69 --- /dev/null +++ b/content/riak/kv/latest/developing/usage.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/" + - "/riakkv/latest/developing/usage/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/bucket-types.md b/content/riak/kv/latest/developing/usage/bucket-types.md new file mode 100644 index 0000000000..16c2f76ba0 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/bucket-types.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/bucket-types/" + - "/riakkv/latest/developing/usage/bucket-types/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/commit-hooks.md b/content/riak/kv/latest/developing/usage/commit-hooks.md new file mode 100644 index 0000000000..8dbe25a746 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/commit-hooks.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/commit-hooks/" + - "/riakkv/latest/developing/usage/commit-hooks/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/conflict-resolution.md b/content/riak/kv/latest/developing/usage/conflict-resolution.md new file mode 100644 index 0000000000..e6fec2c501 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/conflict-resolution.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/conflict-resolution/" + - "/riakkv/latest/developing/usage/conflict-resolution/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/conflict-resolution/csharp.md b/content/riak/kv/latest/developing/usage/conflict-resolution/csharp.md new file mode 100644 index 0000000000..3dbf16ba82 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/conflict-resolution/csharp.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/conflict-resolution/csharp/" + - "/riakkv/latest/developing/usage/conflict-resolution/csharp/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/conflict-resolution/golang.md b/content/riak/kv/latest/developing/usage/conflict-resolution/golang.md new file mode 100644 index 0000000000..bcc37f5704 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/conflict-resolution/golang.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/conflict-resolution/golang/" + - "/riakkv/latest/developing/usage/conflict-resolution/golang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/conflict-resolution/java.md b/content/riak/kv/latest/developing/usage/conflict-resolution/java.md new file mode 100644 index 0000000000..f9a1fad1c1 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/conflict-resolution/java.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/conflict-resolution/java/" + - "/riakkv/latest/developing/usage/conflict-resolution/java/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/conflict-resolution/nodejs.md b/content/riak/kv/latest/developing/usage/conflict-resolution/nodejs.md new file mode 100644 index 0000000000..b5a6e69fc5 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/conflict-resolution/nodejs.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/conflict-resolution/nodejs/" + - "/riakkv/latest/developing/usage/conflict-resolution/nodejs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/conflict-resolution/php.md b/content/riak/kv/latest/developing/usage/conflict-resolution/php.md new file mode 100644 index 0000000000..4dc47e2e2f --- /dev/null +++ b/content/riak/kv/latest/developing/usage/conflict-resolution/php.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/conflict-resolution/php/" + - "/riakkv/latest/developing/usage/conflict-resolution/php/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/conflict-resolution/python.md b/content/riak/kv/latest/developing/usage/conflict-resolution/python.md new file mode 100644 index 0000000000..98be08206d --- /dev/null +++ b/content/riak/kv/latest/developing/usage/conflict-resolution/python.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/conflict-resolution/python/" + - "/riakkv/latest/developing/usage/conflict-resolution/python/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/conflict-resolution/ruby.md b/content/riak/kv/latest/developing/usage/conflict-resolution/ruby.md new file mode 100644 index 0000000000..299877aceb --- /dev/null +++ b/content/riak/kv/latest/developing/usage/conflict-resolution/ruby.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/conflict-resolution/ruby/" + - "/riakkv/latest/developing/usage/conflict-resolution/ruby/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/content-types.md b/content/riak/kv/latest/developing/usage/content-types.md new file mode 100644 index 0000000000..b9bb39c5d3 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/content-types.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/content-types/" + - "/riakkv/latest/developing/usage/content-types/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/creating-objects.md b/content/riak/kv/latest/developing/usage/creating-objects.md new file mode 100644 index 0000000000..91a975c007 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/creating-objects.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/creating-objects/" + - "/riakkv/latest/developing/usage/creating-objects/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/custom-extractors.md b/content/riak/kv/latest/developing/usage/custom-extractors.md new file mode 100644 index 0000000000..c3ffb3d674 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/custom-extractors.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/custom-extractors/" + - "/riakkv/latest/developing/usage/custom-extractors/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/deleting-objects.md b/content/riak/kv/latest/developing/usage/deleting-objects.md new file mode 100644 index 0000000000..56529e2fcb --- /dev/null +++ b/content/riak/kv/latest/developing/usage/deleting-objects.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/deleting-objects/" + - "/riakkv/latest/developing/usage/deleting-objects/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/document-store.md b/content/riak/kv/latest/developing/usage/document-store.md new file mode 100644 index 0000000000..c4a477b4a5 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/document-store.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/document-store/" + - "/riakkv/latest/developing/usage/document-store/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/mapreduce.md b/content/riak/kv/latest/developing/usage/mapreduce.md new file mode 100644 index 0000000000..e543588852 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/mapreduce.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/mapreduce/" + - "/riakkv/latest/developing/usage/mapreduce/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/next-gen-replication.md b/content/riak/kv/latest/developing/usage/next-gen-replication.md new file mode 100644 index 0000000000..bb5f354167 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/next-gen-replication.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/next-gen-replication/" + - "/riakkv/latest/developing/usage/next-gen-replication/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/reading-objects.md b/content/riak/kv/latest/developing/usage/reading-objects.md new file mode 100644 index 0000000000..bf8c380f5e --- /dev/null +++ b/content/riak/kv/latest/developing/usage/reading-objects.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/reading-objects/" + - "/riakkv/latest/developing/usage/reading-objects/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/replication.md b/content/riak/kv/latest/developing/usage/replication.md new file mode 100644 index 0000000000..56c271e7e6 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/replication.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/replication/" + - "/riakkv/latest/developing/usage/replication/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/search-schemas.md b/content/riak/kv/latest/developing/usage/search-schemas.md new file mode 100644 index 0000000000..642b559cb8 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/search-schemas.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/search-schemas/" + - "/riakkv/latest/developing/usage/search-schemas/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/search.md b/content/riak/kv/latest/developing/usage/search.md new file mode 100644 index 0000000000..4de4faefcd --- /dev/null +++ b/content/riak/kv/latest/developing/usage/search.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/search/" + - "/riakkv/latest/developing/usage/search/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/searching-data-types.md b/content/riak/kv/latest/developing/usage/searching-data-types.md new file mode 100644 index 0000000000..eab941b722 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/searching-data-types.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/searching-data-types/" + - "/riakkv/latest/developing/usage/searching-data-types/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/secondary-indexes.md b/content/riak/kv/latest/developing/usage/secondary-indexes.md new file mode 100644 index 0000000000..1463b5a0e3 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/secondary-indexes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/secondary-indexes/" + - "/riakkv/latest/developing/usage/secondary-indexes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/security.md b/content/riak/kv/latest/developing/usage/security.md new file mode 100644 index 0000000000..f6bd1f33aa --- /dev/null +++ b/content/riak/kv/latest/developing/usage/security.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/security/" + - "/riakkv/latest/developing/usage/security/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/security/erlang.md b/content/riak/kv/latest/developing/usage/security/erlang.md new file mode 100644 index 0000000000..5bd48eb1b7 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/security/erlang.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/security/erlang/" + - "/riakkv/latest/developing/usage/security/erlang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/security/java.md b/content/riak/kv/latest/developing/usage/security/java.md new file mode 100644 index 0000000000..7a1882796a --- /dev/null +++ b/content/riak/kv/latest/developing/usage/security/java.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/security/java/" + - "/riakkv/latest/developing/usage/security/java/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/security/php.md b/content/riak/kv/latest/developing/usage/security/php.md new file mode 100644 index 0000000000..7ed8c885af --- /dev/null +++ b/content/riak/kv/latest/developing/usage/security/php.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/security/php/" + - "/riakkv/latest/developing/usage/security/php/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/security/python.md b/content/riak/kv/latest/developing/usage/security/python.md new file mode 100644 index 0000000000..90ef95cb68 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/security/python.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/security/python/" + - "/riakkv/latest/developing/usage/security/python/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/security/ruby.md b/content/riak/kv/latest/developing/usage/security/ruby.md new file mode 100644 index 0000000000..ac854e77c8 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/security/ruby.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/security/ruby/" + - "/riakkv/latest/developing/usage/security/ruby/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/developing/usage/updating-objects.md b/content/riak/kv/latest/developing/usage/updating-objects.md new file mode 100644 index 0000000000..b4c411d7c7 --- /dev/null +++ b/content/riak/kv/latest/developing/usage/updating-objects.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/developing/usage/updating-objects/" + - "/riakkv/latest/developing/usage/updating-objects/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/downloads.md b/content/riak/kv/latest/downloads.md new file mode 100644 index 0000000000..d292b7fc53 --- /dev/null +++ b/content/riak/kv/latest/downloads.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/downloads/" + - "/riakkv/latest/downloads/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/index.md b/content/riak/kv/latest/index.md new file mode 100644 index 0000000000..0e4eb4ef83 --- /dev/null +++ b/content/riak/kv/latest/index.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/" + - "/riakkv/latest/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn.md b/content/riak/kv/latest/learn.md new file mode 100644 index 0000000000..7f6b855046 --- /dev/null +++ b/content/riak/kv/latest/learn.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/" + - "/riakkv/latest/learn/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts.md b/content/riak/kv/latest/learn/concepts.md new file mode 100644 index 0000000000..4ef49988ab --- /dev/null +++ b/content/riak/kv/latest/learn/concepts.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/" + - "/riakkv/latest/learn/concepts/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/active-anti-entropy.md b/content/riak/kv/latest/learn/concepts/active-anti-entropy.md new file mode 100644 index 0000000000..ab3f09b522 --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/active-anti-entropy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/active-anti-entropy/" + - "/riakkv/latest/learn/concepts/active-anti-entropy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/buckets.md b/content/riak/kv/latest/learn/concepts/buckets.md new file mode 100644 index 0000000000..ecba14f4fb --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/buckets.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/buckets/" + - "/riakkv/latest/learn/concepts/buckets/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/capability-negotiation.md b/content/riak/kv/latest/learn/concepts/capability-negotiation.md new file mode 100644 index 0000000000..4fd251cf40 --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/capability-negotiation.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/capability-negotiation/" + - "/riakkv/latest/learn/concepts/capability-negotiation/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/causal-context.md b/content/riak/kv/latest/learn/concepts/causal-context.md new file mode 100644 index 0000000000..b8cf7cc6da --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/causal-context.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/causal-context/" + - "/riakkv/latest/learn/concepts/causal-context/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/clusters.md b/content/riak/kv/latest/learn/concepts/clusters.md new file mode 100644 index 0000000000..c142825cf3 --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/clusters.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/clusters/" + - "/riakkv/latest/learn/concepts/clusters/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/crdts.md b/content/riak/kv/latest/learn/concepts/crdts.md new file mode 100644 index 0000000000..cd1113e241 --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/crdts.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/crdts/" + - "/riakkv/latest/learn/concepts/crdts/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/eventual-consistency.md b/content/riak/kv/latest/learn/concepts/eventual-consistency.md new file mode 100644 index 0000000000..26938873da --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/eventual-consistency.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/eventual-consistency/" + - "/riakkv/latest/learn/concepts/eventual-consistency/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/keys-and-objects.md b/content/riak/kv/latest/learn/concepts/keys-and-objects.md new file mode 100644 index 0000000000..3e9570fd4c --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/keys-and-objects.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/keys-and-objects/" + - "/riakkv/latest/learn/concepts/keys-and-objects/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/replication.md b/content/riak/kv/latest/learn/concepts/replication.md new file mode 100644 index 0000000000..62908f0b8a --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/replication.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/replication/" + - "/riakkv/latest/learn/concepts/replication/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/strong-consistency.md b/content/riak/kv/latest/learn/concepts/strong-consistency.md new file mode 100644 index 0000000000..869e986e64 --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/strong-consistency.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/strong-consistency/" + - "/riakkv/latest/learn/concepts/strong-consistency/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/concepts/vnodes.md b/content/riak/kv/latest/learn/concepts/vnodes.md new file mode 100644 index 0000000000..192482a65a --- /dev/null +++ b/content/riak/kv/latest/learn/concepts/vnodes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/concepts/vnodes/" + - "/riakkv/latest/learn/concepts/vnodes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/dynamo.md b/content/riak/kv/latest/learn/dynamo.md new file mode 100644 index 0000000000..19d54700ad --- /dev/null +++ b/content/riak/kv/latest/learn/dynamo.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/dynamo/" + - "/riakkv/latest/learn/dynamo/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/glossary.md b/content/riak/kv/latest/learn/glossary.md new file mode 100644 index 0000000000..23f4a8851f --- /dev/null +++ b/content/riak/kv/latest/learn/glossary.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/glossary/" + - "/riakkv/latest/learn/glossary/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/new-to-nosql.md b/content/riak/kv/latest/learn/new-to-nosql.md new file mode 100644 index 0000000000..23e544af79 --- /dev/null +++ b/content/riak/kv/latest/learn/new-to-nosql.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/new-to-nosql/" + - "/riakkv/latest/learn/new-to-nosql/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/use-cases.md b/content/riak/kv/latest/learn/use-cases.md new file mode 100644 index 0000000000..61bb187d5e --- /dev/null +++ b/content/riak/kv/latest/learn/use-cases.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/use-cases/" + - "/riakkv/latest/learn/use-cases/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/learn/why-riak-kv.md b/content/riak/kv/latest/learn/why-riak-kv.md new file mode 100644 index 0000000000..a6093b520f --- /dev/null +++ b/content/riak/kv/latest/learn/why-riak-kv.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/learn/why-riak-kv/" + - "/riakkv/latest/learn/why-riak-kv/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/release-notes.md b/content/riak/kv/latest/release-notes.md new file mode 100644 index 0000000000..4af21e9f2e --- /dev/null +++ b/content/riak/kv/latest/release-notes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/release-notes/" + - "/riakkv/latest/release-notes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup.md b/content/riak/kv/latest/setup.md new file mode 100644 index 0000000000..362a1ba510 --- /dev/null +++ b/content/riak/kv/latest/setup.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/" + - "/riakkv/latest/setup/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/downgrade.md b/content/riak/kv/latest/setup/downgrade.md new file mode 100644 index 0000000000..530a90d865 --- /dev/null +++ b/content/riak/kv/latest/setup/downgrade.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/downgrade/" + - "/riakkv/latest/setup/downgrade/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing.md b/content/riak/kv/latest/setup/installing.md new file mode 100644 index 0000000000..d512386fc7 --- /dev/null +++ b/content/riak/kv/latest/setup/installing.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/" + - "/riakkv/latest/setup/installing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/amazon-web-services.md b/content/riak/kv/latest/setup/installing/amazon-web-services.md new file mode 100644 index 0000000000..460826a23e --- /dev/null +++ b/content/riak/kv/latest/setup/installing/amazon-web-services.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/amazon-web-services/" + - "/riakkv/latest/setup/installing/amazon-web-services/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/debian-ubuntu.md b/content/riak/kv/latest/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..a0ac3ba46c --- /dev/null +++ b/content/riak/kv/latest/setup/installing/debian-ubuntu.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/debian-ubuntu/" + - "/riakkv/latest/setup/installing/debian-ubuntu/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/freebsd.md b/content/riak/kv/latest/setup/installing/freebsd.md new file mode 100644 index 0000000000..7aa668ee09 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/freebsd.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/freebsd/" + - "/riakkv/latest/setup/installing/freebsd/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/mac-osx.md b/content/riak/kv/latest/setup/installing/mac-osx.md new file mode 100644 index 0000000000..5815ed4845 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/mac-osx.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/mac-osx/" + - "/riakkv/latest/setup/installing/mac-osx/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/rhel-centos.md b/content/riak/kv/latest/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..75ba8133a0 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/rhel-centos.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/rhel-centos/" + - "/riakkv/latest/setup/installing/rhel-centos/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/smartos.md b/content/riak/kv/latest/setup/installing/smartos.md new file mode 100644 index 0000000000..f9a935f266 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/smartos.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/smartos/" + - "/riakkv/latest/setup/installing/smartos/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/solaris.md b/content/riak/kv/latest/setup/installing/solaris.md new file mode 100644 index 0000000000..b975a62d47 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/solaris.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/solaris/" + - "/riakkv/latest/setup/installing/solaris/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/source.md b/content/riak/kv/latest/setup/installing/source.md new file mode 100644 index 0000000000..0d66f52a21 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/source.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/source/" + - "/riakkv/latest/setup/installing/source/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/source/erlang.md b/content/riak/kv/latest/setup/installing/source/erlang.md new file mode 100644 index 0000000000..fa492472ce --- /dev/null +++ b/content/riak/kv/latest/setup/installing/source/erlang.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/source/erlang/" + - "/riakkv/latest/setup/installing/source/erlang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/source/jvm.md b/content/riak/kv/latest/setup/installing/source/jvm.md new file mode 100644 index 0000000000..8691cbb562 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/source/jvm.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/source/jvm/" + - "/riakkv/latest/setup/installing/source/jvm/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/suse.md b/content/riak/kv/latest/setup/installing/suse.md new file mode 100644 index 0000000000..c67f559388 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/suse.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/suse/" + - "/riakkv/latest/setup/installing/suse/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/verify.md b/content/riak/kv/latest/setup/installing/verify.md new file mode 100644 index 0000000000..08867b0ac1 --- /dev/null +++ b/content/riak/kv/latest/setup/installing/verify.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/verify/" + - "/riakkv/latest/setup/installing/verify/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/installing/windows-azure.md b/content/riak/kv/latest/setup/installing/windows-azure.md new file mode 100644 index 0000000000..9413409e7d --- /dev/null +++ b/content/riak/kv/latest/setup/installing/windows-azure.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/installing/windows-azure/" + - "/riakkv/latest/setup/installing/windows-azure/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning.md b/content/riak/kv/latest/setup/planning.md new file mode 100644 index 0000000000..b020eea8bc --- /dev/null +++ b/content/riak/kv/latest/setup/planning.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/" + - "/riakkv/latest/setup/planning/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/backend.md b/content/riak/kv/latest/setup/planning/backend.md new file mode 100644 index 0000000000..0355c53b96 --- /dev/null +++ b/content/riak/kv/latest/setup/planning/backend.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/backend/" + - "/riakkv/latest/setup/planning/backend/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/backend/bitcask.md b/content/riak/kv/latest/setup/planning/backend/bitcask.md new file mode 100644 index 0000000000..7709de9fac --- /dev/null +++ b/content/riak/kv/latest/setup/planning/backend/bitcask.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/backend/bitcask/" + - "/riakkv/latest/setup/planning/backend/bitcask/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/backend/leveldb.md b/content/riak/kv/latest/setup/planning/backend/leveldb.md new file mode 100644 index 0000000000..dab2935ddc --- /dev/null +++ b/content/riak/kv/latest/setup/planning/backend/leveldb.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/backend/leveldb/" + - "/riakkv/latest/setup/planning/backend/leveldb/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/backend/leveled.md b/content/riak/kv/latest/setup/planning/backend/leveled.md new file mode 100644 index 0000000000..0871b7940d --- /dev/null +++ b/content/riak/kv/latest/setup/planning/backend/leveled.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/backend/leveled/" + - "/riakkv/latest/setup/planning/backend/leveled/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/backend/memory.md b/content/riak/kv/latest/setup/planning/backend/memory.md new file mode 100644 index 0000000000..a710df3e20 --- /dev/null +++ b/content/riak/kv/latest/setup/planning/backend/memory.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/backend/memory/" + - "/riakkv/latest/setup/planning/backend/memory/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/backend/multi.md b/content/riak/kv/latest/setup/planning/backend/multi.md new file mode 100644 index 0000000000..35a6513923 --- /dev/null +++ b/content/riak/kv/latest/setup/planning/backend/multi.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/backend/multi/" + - "/riakkv/latest/setup/planning/backend/multi/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/best-practices.md b/content/riak/kv/latest/setup/planning/best-practices.md new file mode 100644 index 0000000000..f8539b0605 --- /dev/null +++ b/content/riak/kv/latest/setup/planning/best-practices.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/best-practices/" + - "/riakkv/latest/setup/planning/best-practices/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/bitcask-capacity-calc.md b/content/riak/kv/latest/setup/planning/bitcask-capacity-calc.md new file mode 100644 index 0000000000..f759871df9 --- /dev/null +++ b/content/riak/kv/latest/setup/planning/bitcask-capacity-calc.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/bitcask-capacity-calc/" + - "/riakkv/latest/setup/planning/bitcask-capacity-calc/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/cluster-capacity.md b/content/riak/kv/latest/setup/planning/cluster-capacity.md new file mode 100644 index 0000000000..f69d99aa0f --- /dev/null +++ b/content/riak/kv/latest/setup/planning/cluster-capacity.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/cluster-capacity/" + - "/riakkv/latest/setup/planning/cluster-capacity/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/future.md b/content/riak/kv/latest/setup/planning/future.md new file mode 100644 index 0000000000..498beb630d --- /dev/null +++ b/content/riak/kv/latest/setup/planning/future.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/future/" + - "/riakkv/latest/setup/planning/future/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/operating-system.md b/content/riak/kv/latest/setup/planning/operating-system.md new file mode 100644 index 0000000000..d8c649556d --- /dev/null +++ b/content/riak/kv/latest/setup/planning/operating-system.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/operating-system/" + - "/riakkv/latest/setup/planning/operating-system/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/planning/start.md b/content/riak/kv/latest/setup/planning/start.md new file mode 100644 index 0000000000..40600be3f4 --- /dev/null +++ b/content/riak/kv/latest/setup/planning/start.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/planning/start/" + - "/riakkv/latest/setup/planning/start/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/search.md b/content/riak/kv/latest/setup/search.md new file mode 100644 index 0000000000..7e279e1c48 --- /dev/null +++ b/content/riak/kv/latest/setup/search.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/search/" + - "/riakkv/latest/setup/search/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/upgrading.md b/content/riak/kv/latest/setup/upgrading.md new file mode 100644 index 0000000000..1f8063f2f0 --- /dev/null +++ b/content/riak/kv/latest/setup/upgrading.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/upgrading/" + - "/riakkv/latest/setup/upgrading/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/upgrading/checklist.md b/content/riak/kv/latest/setup/upgrading/checklist.md new file mode 100644 index 0000000000..f2c5c67fa0 --- /dev/null +++ b/content/riak/kv/latest/setup/upgrading/checklist.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/upgrading/checklist/" + - "/riakkv/latest/setup/upgrading/checklist/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/upgrading/cluster.md b/content/riak/kv/latest/setup/upgrading/cluster.md new file mode 100644 index 0000000000..386ef60b7d --- /dev/null +++ b/content/riak/kv/latest/setup/upgrading/cluster.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/upgrading/cluster/" + - "/riakkv/latest/setup/upgrading/cluster/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/upgrading/multi-datacenter.md b/content/riak/kv/latest/setup/upgrading/multi-datacenter.md new file mode 100644 index 0000000000..3f6394c0ce --- /dev/null +++ b/content/riak/kv/latest/setup/upgrading/multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/upgrading/multi-datacenter/" + - "/riakkv/latest/setup/upgrading/multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/upgrading/search.md b/content/riak/kv/latest/setup/upgrading/search.md new file mode 100644 index 0000000000..123e748ebc --- /dev/null +++ b/content/riak/kv/latest/setup/upgrading/search.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/upgrading/search/" + - "/riakkv/latest/setup/upgrading/search/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/setup/upgrading/version.md b/content/riak/kv/latest/setup/upgrading/version.md new file mode 100644 index 0000000000..c12e7518a8 --- /dev/null +++ b/content/riak/kv/latest/setup/upgrading/version.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/setup/upgrading/version/" + - "/riakkv/latest/setup/upgrading/version/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using.md b/content/riak/kv/latest/using.md new file mode 100644 index 0000000000..5c13603eaf --- /dev/null +++ b/content/riak/kv/latest/using.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/" + - "/riakkv/latest/using/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/admin.md b/content/riak/kv/latest/using/admin.md new file mode 100644 index 0000000000..63d37b675e --- /dev/null +++ b/content/riak/kv/latest/using/admin.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/admin/" + - "/riakkv/latest/using/admin/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/admin/commands.md b/content/riak/kv/latest/using/admin/commands.md new file mode 100644 index 0000000000..e9b3b596f8 --- /dev/null +++ b/content/riak/kv/latest/using/admin/commands.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/admin/commands/" + - "/riakkv/latest/using/admin/commands/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/admin/riak-admin.md b/content/riak/kv/latest/using/admin/riak-admin.md new file mode 100644 index 0000000000..d41ee8b3a4 --- /dev/null +++ b/content/riak/kv/latest/using/admin/riak-admin.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/admin/riak-admin/" + - "/riakkv/latest/using/admin/riak-admin/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/admin/riak-cli.md b/content/riak/kv/latest/using/admin/riak-cli.md new file mode 100644 index 0000000000..1a9bd96cdf --- /dev/null +++ b/content/riak/kv/latest/using/admin/riak-cli.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/admin/riak-cli/" + - "/riakkv/latest/using/admin/riak-cli/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/admin/riak-control.md b/content/riak/kv/latest/using/admin/riak-control.md new file mode 100644 index 0000000000..c454acdb30 --- /dev/null +++ b/content/riak/kv/latest/using/admin/riak-control.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/admin/riak-control/" + - "/riakkv/latest/using/admin/riak-control/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations.md b/content/riak/kv/latest/using/cluster-operations.md new file mode 100644 index 0000000000..2c1a333881 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/" + - "/riakkv/latest/using/cluster-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/active-anti-entropy.md b/content/riak/kv/latest/using/cluster-operations/active-anti-entropy.md new file mode 100644 index 0000000000..ddeeeb7d10 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/active-anti-entropy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/active-anti-entropy/" + - "/riakkv/latest/using/cluster-operations/active-anti-entropy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/adding-removing-nodes.md b/content/riak/kv/latest/using/cluster-operations/adding-removing-nodes.md new file mode 100644 index 0000000000..b481f36728 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/adding-removing-nodes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/adding-removing-nodes/" + - "/riakkv/latest/using/cluster-operations/adding-removing-nodes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/backend.md b/content/riak/kv/latest/using/cluster-operations/backend.md new file mode 100644 index 0000000000..6c2a66563f --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/backend.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/backend/" + - "/riakkv/latest/using/cluster-operations/backend/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/backing-up.md b/content/riak/kv/latest/using/cluster-operations/backing-up.md new file mode 100644 index 0000000000..bc36b7083c --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/backing-up.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/backing-up/" + - "/riakkv/latest/using/cluster-operations/backing-up/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/bucket-types.md b/content/riak/kv/latest/using/cluster-operations/bucket-types.md new file mode 100644 index 0000000000..8d342b6674 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/bucket-types.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/bucket-types/" + - "/riakkv/latest/using/cluster-operations/bucket-types/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/changing-cluster-info.md b/content/riak/kv/latest/using/cluster-operations/changing-cluster-info.md new file mode 100644 index 0000000000..ca28c94886 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/changing-cluster-info.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/changing-cluster-info/" + - "/riakkv/latest/using/cluster-operations/changing-cluster-info/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/handoff.md b/content/riak/kv/latest/using/cluster-operations/handoff.md new file mode 100644 index 0000000000..f4e233e1a3 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/handoff.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/handoff/" + - "/riakkv/latest/using/cluster-operations/handoff/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/inspecting-node.md b/content/riak/kv/latest/using/cluster-operations/inspecting-node.md new file mode 100644 index 0000000000..c27a2c81ac --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/inspecting-node.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/inspecting-node/" + - "/riakkv/latest/using/cluster-operations/inspecting-node/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/load-balancing.md b/content/riak/kv/latest/using/cluster-operations/load-balancing.md new file mode 100644 index 0000000000..92fb32ffdd --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/load-balancing.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/load-balancing/" + - "/riakkv/latest/using/cluster-operations/load-balancing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/logging.md b/content/riak/kv/latest/using/cluster-operations/logging.md new file mode 100644 index 0000000000..cb121e7e80 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/logging.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/logging/" + - "/riakkv/latest/using/cluster-operations/logging/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/replacing-node.md b/content/riak/kv/latest/using/cluster-operations/replacing-node.md new file mode 100644 index 0000000000..baaae78fc5 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/replacing-node.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/replacing-node/" + - "/riakkv/latest/using/cluster-operations/replacing-node/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/secondary-indexes.md b/content/riak/kv/latest/using/cluster-operations/secondary-indexes.md new file mode 100644 index 0000000000..0a3a39cdac --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/secondary-indexes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/secondary-indexes/" + - "/riakkv/latest/using/cluster-operations/secondary-indexes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/strong-consistency.md b/content/riak/kv/latest/using/cluster-operations/strong-consistency.md new file mode 100644 index 0000000000..3b95f092d8 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/strong-consistency.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/strong-consistency/" + - "/riakkv/latest/using/cluster-operations/strong-consistency/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/tictac-active-anti-entropy.md b/content/riak/kv/latest/using/cluster-operations/tictac-active-anti-entropy.md new file mode 100644 index 0000000000..1362b5708b --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/tictac-active-anti-entropy.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/tictac-active-anti-entropy/" + - "/riakkv/latest/using/cluster-operations/tictac-active-anti-entropy/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/v2-multi-datacenter.md b/content/riak/kv/latest/using/cluster-operations/v2-multi-datacenter.md new file mode 100644 index 0000000000..7fe74a14b5 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/v2-multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/v2-multi-datacenter/" + - "/riakkv/latest/using/cluster-operations/v2-multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/cluster-operations/v3-multi-datacenter.md b/content/riak/kv/latest/using/cluster-operations/v3-multi-datacenter.md new file mode 100644 index 0000000000..b48e84efe1 --- /dev/null +++ b/content/riak/kv/latest/using/cluster-operations/v3-multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/cluster-operations/v3-multi-datacenter/" + - "/riakkv/latest/using/cluster-operations/v3-multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/performance.md b/content/riak/kv/latest/using/performance.md new file mode 100644 index 0000000000..f9167c66b0 --- /dev/null +++ b/content/riak/kv/latest/using/performance.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/performance/" + - "/riakkv/latest/using/performance/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/performance/amazon-web-services.md b/content/riak/kv/latest/using/performance/amazon-web-services.md new file mode 100644 index 0000000000..e55409d6b6 --- /dev/null +++ b/content/riak/kv/latest/using/performance/amazon-web-services.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/performance/amazon-web-services/" + - "/riakkv/latest/using/performance/amazon-web-services/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/performance/benchmarking.md b/content/riak/kv/latest/using/performance/benchmarking.md new file mode 100644 index 0000000000..fa45f7e143 --- /dev/null +++ b/content/riak/kv/latest/using/performance/benchmarking.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/performance/benchmarking/" + - "/riakkv/latest/using/performance/benchmarking/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/performance/erlang.md b/content/riak/kv/latest/using/performance/erlang.md new file mode 100644 index 0000000000..5617b451a7 --- /dev/null +++ b/content/riak/kv/latest/using/performance/erlang.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/performance/erlang/" + - "/riakkv/latest/using/performance/erlang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/performance/latency-reduction.md b/content/riak/kv/latest/using/performance/latency-reduction.md new file mode 100644 index 0000000000..6683621b7c --- /dev/null +++ b/content/riak/kv/latest/using/performance/latency-reduction.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/performance/latency-reduction/" + - "/riakkv/latest/using/performance/latency-reduction/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/performance/multi-datacenter-tuning.md b/content/riak/kv/latest/using/performance/multi-datacenter-tuning.md new file mode 100644 index 0000000000..781b8d378f --- /dev/null +++ b/content/riak/kv/latest/using/performance/multi-datacenter-tuning.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/performance/multi-datacenter-tuning/" + - "/riakkv/latest/using/performance/multi-datacenter-tuning/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/performance/open-files-limit.md b/content/riak/kv/latest/using/performance/open-files-limit.md new file mode 100644 index 0000000000..766381ad69 --- /dev/null +++ b/content/riak/kv/latest/using/performance/open-files-limit.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/performance/open-files-limit/" + - "/riakkv/latest/using/performance/open-files-limit/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/performance/v2-scheduling-fullsync.md b/content/riak/kv/latest/using/performance/v2-scheduling-fullsync.md new file mode 100644 index 0000000000..8ec21267d9 --- /dev/null +++ b/content/riak/kv/latest/using/performance/v2-scheduling-fullsync.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/performance/v2-scheduling-fullsync/" + - "/riakkv/latest/using/performance/v2-scheduling-fullsync/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference.md b/content/riak/kv/latest/using/reference.md new file mode 100644 index 0000000000..1e9a6e0388 --- /dev/null +++ b/content/riak/kv/latest/using/reference.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/" + - "/riakkv/latest/using/reference/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/architecture.md b/content/riak/kv/latest/using/reference/architecture.md new file mode 100644 index 0000000000..3c57fa453b --- /dev/null +++ b/content/riak/kv/latest/using/reference/architecture.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/architecture/" + - "/riakkv/latest/using/reference/architecture/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/bucket-types.md b/content/riak/kv/latest/using/reference/bucket-types.md new file mode 100644 index 0000000000..000e5d3d61 --- /dev/null +++ b/content/riak/kv/latest/using/reference/bucket-types.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/bucket-types/" + - "/riakkv/latest/using/reference/bucket-types/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/custom-code.md b/content/riak/kv/latest/using/reference/custom-code.md new file mode 100644 index 0000000000..54b7d7fd48 --- /dev/null +++ b/content/riak/kv/latest/using/reference/custom-code.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/custom-code/" + - "/riakkv/latest/using/reference/custom-code/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/failure-recovery.md b/content/riak/kv/latest/using/reference/failure-recovery.md new file mode 100644 index 0000000000..cff5c46eb9 --- /dev/null +++ b/content/riak/kv/latest/using/reference/failure-recovery.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/failure-recovery/" + - "/riakkv/latest/using/reference/failure-recovery/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/handoff.md b/content/riak/kv/latest/using/reference/handoff.md new file mode 100644 index 0000000000..70720257aa --- /dev/null +++ b/content/riak/kv/latest/using/reference/handoff.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/handoff/" + - "/riakkv/latest/using/reference/handoff/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/jmx.md b/content/riak/kv/latest/using/reference/jmx.md new file mode 100644 index 0000000000..da16822336 --- /dev/null +++ b/content/riak/kv/latest/using/reference/jmx.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/jmx/" + - "/riakkv/latest/using/reference/jmx/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/logging.md b/content/riak/kv/latest/using/reference/logging.md new file mode 100644 index 0000000000..5b8563fb04 --- /dev/null +++ b/content/riak/kv/latest/using/reference/logging.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/logging/" + - "/riakkv/latest/using/reference/logging/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/multi-datacenter.md b/content/riak/kv/latest/using/reference/multi-datacenter.md new file mode 100644 index 0000000000..f471c2a17b --- /dev/null +++ b/content/riak/kv/latest/using/reference/multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/multi-datacenter/" + - "/riakkv/latest/using/reference/multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/multi-datacenter/comparison.md b/content/riak/kv/latest/using/reference/multi-datacenter/comparison.md new file mode 100644 index 0000000000..2e8654a108 --- /dev/null +++ b/content/riak/kv/latest/using/reference/multi-datacenter/comparison.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/multi-datacenter/comparison/" + - "/riakkv/latest/using/reference/multi-datacenter/comparison/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/multi-datacenter/monitoring.md b/content/riak/kv/latest/using/reference/multi-datacenter/monitoring.md new file mode 100644 index 0000000000..ee24ff77b9 --- /dev/null +++ b/content/riak/kv/latest/using/reference/multi-datacenter/monitoring.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/multi-datacenter/monitoring/" + - "/riakkv/latest/using/reference/multi-datacenter/monitoring/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/multi-datacenter/per-bucket-replication.md b/content/riak/kv/latest/using/reference/multi-datacenter/per-bucket-replication.md new file mode 100644 index 0000000000..e1de8d72a1 --- /dev/null +++ b/content/riak/kv/latest/using/reference/multi-datacenter/per-bucket-replication.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/multi-datacenter/per-bucket-replication/" + - "/riakkv/latest/using/reference/multi-datacenter/per-bucket-replication/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/multi-datacenter/statistics.md b/content/riak/kv/latest/using/reference/multi-datacenter/statistics.md new file mode 100644 index 0000000000..b6c7dc23fc --- /dev/null +++ b/content/riak/kv/latest/using/reference/multi-datacenter/statistics.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/multi-datacenter/statistics/" + - "/riakkv/latest/using/reference/multi-datacenter/statistics/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/object-deletion.md b/content/riak/kv/latest/using/reference/object-deletion.md new file mode 100644 index 0000000000..12c1f78a0b --- /dev/null +++ b/content/riak/kv/latest/using/reference/object-deletion.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/object-deletion/" + - "/riakkv/latest/using/reference/object-deletion/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/runtime-interaction.md b/content/riak/kv/latest/using/reference/runtime-interaction.md new file mode 100644 index 0000000000..9040e42944 --- /dev/null +++ b/content/riak/kv/latest/using/reference/runtime-interaction.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/runtime-interaction/" + - "/riakkv/latest/using/reference/runtime-interaction/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/search.md b/content/riak/kv/latest/using/reference/search.md new file mode 100644 index 0000000000..4c117f024a --- /dev/null +++ b/content/riak/kv/latest/using/reference/search.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/search/" + - "/riakkv/latest/using/reference/search/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/secondary-indexes.md b/content/riak/kv/latest/using/reference/secondary-indexes.md new file mode 100644 index 0000000000..8457c54c14 --- /dev/null +++ b/content/riak/kv/latest/using/reference/secondary-indexes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/secondary-indexes/" + - "/riakkv/latest/using/reference/secondary-indexes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/snmp.md b/content/riak/kv/latest/using/reference/snmp.md new file mode 100644 index 0000000000..ad3de9ff0b --- /dev/null +++ b/content/riak/kv/latest/using/reference/snmp.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/snmp/" + - "/riakkv/latest/using/reference/snmp/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/statistics-monitoring.md b/content/riak/kv/latest/using/reference/statistics-monitoring.md new file mode 100644 index 0000000000..60b15c2003 --- /dev/null +++ b/content/riak/kv/latest/using/reference/statistics-monitoring.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/statistics-monitoring/" + - "/riakkv/latest/using/reference/statistics-monitoring/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/strong-consistency.md b/content/riak/kv/latest/using/reference/strong-consistency.md new file mode 100644 index 0000000000..0733be3b80 --- /dev/null +++ b/content/riak/kv/latest/using/reference/strong-consistency.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/strong-consistency/" + - "/riakkv/latest/using/reference/strong-consistency/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/v2-multi-datacenter.md b/content/riak/kv/latest/using/reference/v2-multi-datacenter.md new file mode 100644 index 0000000000..07c60aa528 --- /dev/null +++ b/content/riak/kv/latest/using/reference/v2-multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/v2-multi-datacenter/" + - "/riakkv/latest/using/reference/v2-multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/v2-multi-datacenter/architecture.md b/content/riak/kv/latest/using/reference/v2-multi-datacenter/architecture.md new file mode 100644 index 0000000000..e16f131717 --- /dev/null +++ b/content/riak/kv/latest/using/reference/v2-multi-datacenter/architecture.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/v2-multi-datacenter/architecture/" + - "/riakkv/latest/using/reference/v2-multi-datacenter/architecture/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/v2-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/latest/using/reference/v2-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..b43185c5af --- /dev/null +++ b/content/riak/kv/latest/using/reference/v2-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/v2-multi-datacenter/scheduling-fullsync/" + - "/riakkv/latest/using/reference/v2-multi-datacenter/scheduling-fullsync/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/v3-multi-datacenter.md b/content/riak/kv/latest/using/reference/v3-multi-datacenter.md new file mode 100644 index 0000000000..7ec76b7fbf --- /dev/null +++ b/content/riak/kv/latest/using/reference/v3-multi-datacenter.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/v3-multi-datacenter/" + - "/riakkv/latest/using/reference/v3-multi-datacenter/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/v3-multi-datacenter/aae.md b/content/riak/kv/latest/using/reference/v3-multi-datacenter/aae.md new file mode 100644 index 0000000000..11074f463c --- /dev/null +++ b/content/riak/kv/latest/using/reference/v3-multi-datacenter/aae.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/v3-multi-datacenter/aae/" + - "/riakkv/latest/using/reference/v3-multi-datacenter/aae/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/v3-multi-datacenter/architecture.md b/content/riak/kv/latest/using/reference/v3-multi-datacenter/architecture.md new file mode 100644 index 0000000000..2024fdad81 --- /dev/null +++ b/content/riak/kv/latest/using/reference/v3-multi-datacenter/architecture.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/v3-multi-datacenter/architecture/" + - "/riakkv/latest/using/reference/v3-multi-datacenter/architecture/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/v3-multi-datacenter/cascading-writes.md b/content/riak/kv/latest/using/reference/v3-multi-datacenter/cascading-writes.md new file mode 100644 index 0000000000..0534b8c095 --- /dev/null +++ b/content/riak/kv/latest/using/reference/v3-multi-datacenter/cascading-writes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/v3-multi-datacenter/cascading-writes/" + - "/riakkv/latest/using/reference/v3-multi-datacenter/cascading-writes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/reference/v3-multi-datacenter/scheduling-fullsync.md b/content/riak/kv/latest/using/reference/v3-multi-datacenter/scheduling-fullsync.md new file mode 100644 index 0000000000..7730ab10cb --- /dev/null +++ b/content/riak/kv/latest/using/reference/v3-multi-datacenter/scheduling-fullsync.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/reference/v3-multi-datacenter/scheduling-fullsync/" + - "/riakkv/latest/using/reference/v3-multi-datacenter/scheduling-fullsync/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/repair-recovery.md b/content/riak/kv/latest/using/repair-recovery.md new file mode 100644 index 0000000000..585914e906 --- /dev/null +++ b/content/riak/kv/latest/using/repair-recovery.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/repair-recovery/" + - "/riakkv/latest/using/repair-recovery/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/repair-recovery/errors.md b/content/riak/kv/latest/using/repair-recovery/errors.md new file mode 100644 index 0000000000..14e4b1e6e3 --- /dev/null +++ b/content/riak/kv/latest/using/repair-recovery/errors.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/repair-recovery/errors/" + - "/riakkv/latest/using/repair-recovery/errors/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/repair-recovery/failed-node.md b/content/riak/kv/latest/using/repair-recovery/failed-node.md new file mode 100644 index 0000000000..0eb45c8c36 --- /dev/null +++ b/content/riak/kv/latest/using/repair-recovery/failed-node.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/repair-recovery/failed-node/" + - "/riakkv/latest/using/repair-recovery/failed-node/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/repair-recovery/failure-recovery.md b/content/riak/kv/latest/using/repair-recovery/failure-recovery.md new file mode 100644 index 0000000000..f0b6aadd0d --- /dev/null +++ b/content/riak/kv/latest/using/repair-recovery/failure-recovery.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/repair-recovery/failure-recovery/" + - "/riakkv/latest/using/repair-recovery/failure-recovery/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/repair-recovery/repairs.md b/content/riak/kv/latest/using/repair-recovery/repairs.md new file mode 100644 index 0000000000..39703971db --- /dev/null +++ b/content/riak/kv/latest/using/repair-recovery/repairs.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/repair-recovery/repairs/" + - "/riakkv/latest/using/repair-recovery/repairs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/repair-recovery/rolling-replaces.md b/content/riak/kv/latest/using/repair-recovery/rolling-replaces.md new file mode 100644 index 0000000000..08a1987bf4 --- /dev/null +++ b/content/riak/kv/latest/using/repair-recovery/rolling-replaces.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/repair-recovery/rolling-replaces/" + - "/riakkv/latest/using/repair-recovery/rolling-replaces/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/repair-recovery/rolling-restart.md b/content/riak/kv/latest/using/repair-recovery/rolling-restart.md new file mode 100644 index 0000000000..21cc48a418 --- /dev/null +++ b/content/riak/kv/latest/using/repair-recovery/rolling-restart.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/repair-recovery/rolling-restart/" + - "/riakkv/latest/using/repair-recovery/rolling-restart/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/repair-recovery/secondary-indexes.md b/content/riak/kv/latest/using/repair-recovery/secondary-indexes.md new file mode 100644 index 0000000000..ed3cc3af97 --- /dev/null +++ b/content/riak/kv/latest/using/repair-recovery/secondary-indexes.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/repair-recovery/secondary-indexes/" + - "/riakkv/latest/using/repair-recovery/secondary-indexes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/running-a-cluster.md b/content/riak/kv/latest/using/running-a-cluster.md new file mode 100644 index 0000000000..ac3cb558e6 --- /dev/null +++ b/content/riak/kv/latest/using/running-a-cluster.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/running-a-cluster/" + - "/riakkv/latest/using/running-a-cluster/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/security.md b/content/riak/kv/latest/using/security.md new file mode 100644 index 0000000000..afcae395c9 --- /dev/null +++ b/content/riak/kv/latest/using/security.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/security/" + - "/riakkv/latest/using/security/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/security/basics.md b/content/riak/kv/latest/using/security/basics.md new file mode 100644 index 0000000000..819147b30c --- /dev/null +++ b/content/riak/kv/latest/using/security/basics.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/security/basics/" + - "/riakkv/latest/using/security/basics/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/security/best-practices.md b/content/riak/kv/latest/using/security/best-practices.md new file mode 100644 index 0000000000..0f845684f7 --- /dev/null +++ b/content/riak/kv/latest/using/security/best-practices.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/security/best-practices/" + - "/riakkv/latest/using/security/best-practices/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/security/managing-sources.md b/content/riak/kv/latest/using/security/managing-sources.md new file mode 100644 index 0000000000..8dcc8ba43d --- /dev/null +++ b/content/riak/kv/latest/using/security/managing-sources.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/security/managing-sources/" + - "/riakkv/latest/using/security/managing-sources/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/security/v2-v3-ssl-ca.md b/content/riak/kv/latest/using/security/v2-v3-ssl-ca.md new file mode 100644 index 0000000000..999d89de25 --- /dev/null +++ b/content/riak/kv/latest/using/security/v2-v3-ssl-ca.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/security/v2-v3-ssl-ca/" + - "/riakkv/latest/using/security/v2-v3-ssl-ca/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/troubleshooting.md b/content/riak/kv/latest/using/troubleshooting.md new file mode 100644 index 0000000000..9b1c8ba1f2 --- /dev/null +++ b/content/riak/kv/latest/using/troubleshooting.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/troubleshooting/" + - "/riakkv/latest/using/troubleshooting/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/kv/latest/using/troubleshooting/http-204.md b/content/riak/kv/latest/using/troubleshooting/http-204.md new file mode 100644 index 0000000000..66e835da95 --- /dev/null +++ b/content/riak/kv/latest/using/troubleshooting/http-204.md @@ -0,0 +1,18 @@ +--- +layout: latest_redirect +project: riak_kv +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riak/latest/using/troubleshooting/http-204/" + - "/riakkv/latest/using/troubleshooting/http-204/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/building-testing.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/building-testing.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/getting.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/getting.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/getting.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/quick-start.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/quick-start.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/bulk-write.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/bulk-write.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/bulk-write.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/config-spark-context.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/config-spark-context.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/config-spark-context.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/dataframes.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/dataframes.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/dataframes.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/dates.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/dates.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/dates.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/range-query-partition.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/reading-data.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/reading-data.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/reading-data.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/streaming-example.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/streaming-example.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/writing-data.md b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/writing-data.md new file mode 100644 index 0000000000..7aabd59043 --- /dev/null +++ b/content/riak/ts/1.0.0/add-ons/spark-riak-connector/usage/writing-data.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.0.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.0.0/installing.md b/content/riak/ts/1.0.0/installing.md index 8519a21cbf..c7210df920 100644 --- a/content/riak/ts/1.0.0/installing.md +++ b/content/riak/ts/1.0.0/installing.md @@ -19,7 +19,7 @@ aliases: --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ [OSX]: mac-osx/ diff --git a/content/riak/ts/1.0.0/installing/debian-ubuntu.md b/content/riak/ts/1.0.0/installing/debian-ubuntu.md index 954b80464e..4ba47eca13 100644 --- a/content/riak/ts/1.0.0/installing/debian-ubuntu.md +++ b/content/riak/ts/1.0.0/installing/debian-ubuntu.md @@ -19,9 +19,9 @@ aliases: --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [planning]: ../../using/planning -[security basics]: /riak/kv/2.1.3/using/security/basics +[security basics]: {{}}riak/kv/2.1.3/using/security/basics Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.0.0/installing/mac-osx.md b/content/riak/ts/1.0.0/installing/mac-osx.md index f7480dc8c0..531cc9cd27 100644 --- a/content/riak/ts/1.0.0/installing/mac-osx.md +++ b/content/riak/ts/1.0.0/installing/mac-osx.md @@ -18,8 +18,8 @@ aliases: - /riakts/1.0.0/installing/mac-osx/ --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit [planning]: ../../using/planning diff --git a/content/riak/ts/1.0.0/installing/rhel-centos.md b/content/riak/ts/1.0.0/installing/rhel-centos.md index f6ac8934f3..6edfdcc5ad 100644 --- a/content/riak/ts/1.0.0/installing/rhel-centos.md +++ b/content/riak/ts/1.0.0/installing/rhel-centos.md @@ -18,7 +18,7 @@ aliases: - /riakts/1.0.0/installing/rhel-centos/ --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [planning]: ../../using/planning diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/building-testing.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/building-testing.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/getting.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/getting.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/getting.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/quick-start.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/quick-start.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/bulk-write.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/bulk-write.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/bulk-write.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/config-spark-context.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/config-spark-context.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/config-spark-context.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/dataframes.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/dataframes.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/dataframes.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/dates.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/dates.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/dates.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/range-query-partition.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/reading-data.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/reading-data.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/reading-data.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/streaming-example.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/streaming-example.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/writing-data.md b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/writing-data.md new file mode 100644 index 0000000000..744e19e0c3 --- /dev/null +++ b/content/riak/ts/1.1.0/add-ons/spark-riak-connector/usage/writing-data.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.1.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.1.0/installing.md b/content/riak/ts/1.1.0/installing.md index 8f3a4bfc2f..2d1842eb27 100644 --- a/content/riak/ts/1.1.0/installing.md +++ b/content/riak/ts/1.1.0/installing.md @@ -18,7 +18,7 @@ aliases: - /riakts/1.1.0/installing/installing/ --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ [OSX]: mac-osx/ diff --git a/content/riak/ts/1.1.0/installing/debian-ubuntu.md b/content/riak/ts/1.1.0/installing/debian-ubuntu.md index e1bf8834c8..2020347c9d 100644 --- a/content/riak/ts/1.1.0/installing/debian-ubuntu.md +++ b/content/riak/ts/1.1.0/installing/debian-ubuntu.md @@ -18,9 +18,9 @@ aliases: - /riakts/1.1.0/installing/debian-ubuntu/ --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [planning]: ../../using/planning -[security basics]: /riak/kv/2.1.3/using/security/basics +[security basics]: {{}}riak/kv/2.1.3/using/security/basics Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.1.0/installing/mac-osx.md b/content/riak/ts/1.1.0/installing/mac-osx.md index d7362a4423..12289e6026 100644 --- a/content/riak/ts/1.1.0/installing/mac-osx.md +++ b/content/riak/ts/1.1.0/installing/mac-osx.md @@ -18,8 +18,8 @@ aliases: - /riakts/1.1.0/installing/mac-osx/ --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit [planning]: ../../using/planning Riak TS can be installed on Mac OS X systems using a binary diff --git a/content/riak/ts/1.1.0/installing/rhel-centos.md b/content/riak/ts/1.1.0/installing/rhel-centos.md index 9d20b9a1b5..c7f4c20fa8 100644 --- a/content/riak/ts/1.1.0/installing/rhel-centos.md +++ b/content/riak/ts/1.1.0/installing/rhel-centos.md @@ -18,7 +18,7 @@ aliases: - /riakts/1.1.0/installing/rhel-centos/ --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [planning]: ../../using/planning Riak TS can be installed on CentOS-based systems using a binary diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/building-testing.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/building-testing.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/getting.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/getting.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/getting.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/quick-start.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/quick-start.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/bulk-write.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/bulk-write.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/bulk-write.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/config-spark-context.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/config-spark-context.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/config-spark-context.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/dataframes.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/dataframes.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/dataframes.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/dates.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/dates.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/dates.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/range-query-partition.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/reading-data.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/reading-data.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/reading-data.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/streaming-example.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/streaming-example.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/writing-data.md b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/writing-data.md new file mode 100644 index 0000000000..421f40eb95 --- /dev/null +++ b/content/riak/ts/1.2.0/add-ons/spark-riak-connector/usage/writing-data.md @@ -0,0 +1,11 @@ +--- +layout: redirect +target: "riak/ts/1.2.0/" +--- + +This page exists solely to redirect from the generated URL to the above `target` + +We prefer to store these redirects as .html files in static/ but -- for reasons +yet divined -- Hugo replaces public/riak/index.html with an empty file some time +after it is sync'd from static/ but before the dynamic/ content is done being +compiled. diff --git a/content/riak/ts/1.2.0/index.md b/content/riak/ts/1.2.0/index.md index abb7b7e262..c45e31b112 100644 --- a/content/riak/ts/1.2.0/index.md +++ b/content/riak/ts/1.2.0/index.md @@ -14,7 +14,7 @@ aliases: - /riakts/1.2.0/ --- -[installing]: /riak/ts/1.2.0/installing/ +[installing]: {{}}riak/ts/1.2.0/installing/ [learnabout]: learn-about/ diff --git a/content/riak/ts/1.2.0/installing.md b/content/riak/ts/1.2.0/installing.md index 1004fbb6d7..2d88d9cfde 100644 --- a/content/riak/ts/1.2.0/installing.md +++ b/content/riak/ts/1.2.0/installing.md @@ -19,7 +19,7 @@ aliases: --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ [OSX]: mac-osx/ diff --git a/content/riak/ts/1.2.0/installing/debian-ubuntu.md b/content/riak/ts/1.2.0/installing/debian-ubuntu.md index 63c7b4cf2b..9f1a64b55c 100644 --- a/content/riak/ts/1.2.0/installing/debian-ubuntu.md +++ b/content/riak/ts/1.2.0/installing/debian-ubuntu.md @@ -19,9 +19,9 @@ aliases: --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy -[planning]: /riak/ts/1.2.0/using/planning -[security basics]: /riak/kv/2.1.3/using/security/basics +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy +[planning]: {{}}riak/ts/1.2.0/using/planning +[security basics]: {{}}riak/kv/2.1.3/using/security/basics Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.2.0/installing/mac-osx.md b/content/riak/ts/1.2.0/installing/mac-osx.md index 6fc377efed..c9ec101aef 100644 --- a/content/riak/ts/1.2.0/installing/mac-osx.md +++ b/content/riak/ts/1.2.0/installing/mac-osx.md @@ -18,8 +18,8 @@ aliases: - /riakts/1.2.0/installing/mac-osx/ --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy -[perf open files]: /riak/kv/2.1.3/using/performance/open-files-limit +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy +[perf open files]: {{}}riak/kv/2.1.3/using/performance/open-files-limit [planning]: ../../using/planning Riak TS can be installed on Mac OS X systems using a binary diff --git a/content/riak/ts/1.2.0/installing/rhel-centos.md b/content/riak/ts/1.2.0/installing/rhel-centos.md index 03a83811f5..d7d4940973 100644 --- a/content/riak/ts/1.2.0/installing/rhel-centos.md +++ b/content/riak/ts/1.2.0/installing/rhel-centos.md @@ -18,7 +18,7 @@ aliases: - /riakts/1.2.0/installing/rhel-centos/ --- -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [planning]: ../../using/planning Riak TS can be installed on CentOS-based systems using a binary diff --git a/content/riak/ts/1.2.0/releasenotes.md b/content/riak/ts/1.2.0/releasenotes.md index fccdd3a4d9..a6da3a3698 100644 --- a/content/riak/ts/1.2.0/releasenotes.md +++ b/content/riak/ts/1.2.0/releasenotes.md @@ -24,7 +24,7 @@ Riak TS 1.2.0 introduces riak_shell, a shell that allows you to run SQL within R ## New Features -* riak_shell is a configurable, extendable shell for Riak that allows you to run SQL commands and logging in a single shell within Riak TS. You can find more information about riak_shell [here](http://docs.basho.com/riakts/1.2.0/using/riakshell/). +* riak_shell is a configurable, extendable shell for Riak that allows you to run SQL commands and logging in a single shell within Riak TS. You can find more information about riak_shell [here]({{< baseurl >}}riak/ts/1.2.0/using/riakshell/). ## Changes @@ -52,7 +52,7 @@ Riak TS is compatible with the following operating systems: ## Known Issues -* For security reasons, you should change the owner of the /etc/init.d/riak file to the root user after installation has completed. See our [product advisory](http://docs.basho.com/community/productadvisories/codeinjectioninitfiles/) for more information and further instruction. +* For security reasons, you should change the owner of the /etc/init.d/riak file to the root user after installation has completed. See our [product advisory]({{}}community/productadvisories/codeinjectioninitfiles/) for more information and further instruction. * Negation of an aggregate function returns an error. You can use negation by structuring any aggregate you'd like to negate as follows: `-1*COUNT(...)`. * Rolling upgrades are not supported. * AAE must be turned off. diff --git a/content/riak/ts/1.3.0/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.3.0/add-ons/spark-riak-connector/building-testing.md index 0afacf4d5b..50ce7f38db 100644 --- a/content/riak/ts/1.3.0/add-ons/spark-riak-connector/building-testing.md +++ b/content/riak/ts/1.3.0/add-ons/spark-riak-connector/building-testing.md @@ -27,7 +27,7 @@ In order to build the Spark-Riak connector, you'll need to have the following in * [Java OpenJDK 8](http://openjdk.java.net/install/) * [Maven 3](https://maven.apache.org/download.cgi) * [Spark 1.6](http://spark.apache.org/docs/latest/#downloading) -* [Riak TS](http://docs.basho.com/riak/ts/latest/installing/) +* [Riak TS]({{< baseurl >}}riak/ts/latest/installing/) ## Download diff --git a/content/riak/ts/1.3.0/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.3.0/add-ons/spark-riak-connector/quick-start.md index 6b516ed7f1..0f4ef36be1 100644 --- a/content/riak/ts/1.3.0/add-ons/spark-riak-connector/quick-start.md +++ b/content/riak/ts/1.3.0/add-ons/spark-riak-connector/quick-start.md @@ -18,8 +18,8 @@ This guide will run you through a quick example that uses the Spark-Riak connect ## Prerequisites - Update Homebrew with `brew update`. -- Install Riak TS OSX build. Instruction can be found [here](http://docs.basho.com/riak/ts/1.2.0/installing/mac-osx/) -- Set open file limits for Riak by following the guide [here](http://docs.basho.com/riak/latest/ops/tuning/open-files-limit/#Mac-OS-X). +- Install Riak TS OSX build. Instruction can be found [here]({{< baseurl >}}riak/ts/1.2.0/installing/mac-osx/) +- Set open file limits for Riak by following the guide [here]({{< baseurl >}}riak/kv/latest/ops/tuning/open-files-limit/#Mac-OS-X). - Install Spark with `brew install apache-spark`. - Download the Spark-Riak connector uber jar (containing all dependencies) from here: https://github.com/basho/spark-riak-connector/releases/latest. diff --git a/content/riak/ts/1.3.0/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.3.0/add-ons/spark-riak-connector/usage/range-query-partition.md index 8e081a85a7..dd39ff6aed 100644 --- a/content/riak/ts/1.3.0/add-ons/spark-riak-connector/usage/range-query-partition.md +++ b/content/riak/ts/1.3.0/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -15,7 +15,7 @@ version_history: in: "1.3.0+" --- -Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS](http://docs.basho.com/riakts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. +Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS]({{< baseurl >}}riak/ts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. To use this functionality, you must provide the following options: diff --git a/content/riak/ts/1.3.0/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.3.0/add-ons/spark-riak-connector/usage/streaming-example.md index c1e0f3564a..acce8a53d2 100644 --- a/content/riak/ts/1.3.0/add-ons/spark-riak-connector/usage/streaming-example.md +++ b/content/riak/ts/1.3.0/add-ons/spark-riak-connector/usage/streaming-example.md @@ -29,7 +29,7 @@ path/to/kafka/bin/kafka-server-start.sh config/server.properties path/to/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic streaming ``` -We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here](/riak/ts/1.2.0/installing/mac-osx/). +We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here]({{}}riak/ts/1.2.0/installing/mac-osx/). You will need to build the TS example as well. Please follow the instructions on [building the examples](https://github.com/basho/spark-riak-connector/tree/master/examples#building-and-running-examplesdemos). diff --git a/content/riak/ts/1.3.0/developing.md b/content/riak/ts/1.3.0/developing.md index 67d481f0f3..ffafbeb5b4 100644 --- a/content/riak/ts/1.3.0/developing.md +++ b/content/riak/ts/1.3.0/developing.md @@ -15,16 +15,16 @@ aliases: --- -[erlang]: /riak/ts/1.3.0/developing/erlang -[go]: /riak/ts/1.3.0/developing/golang -[http]: /riak/ts/1.3.0/developing/http -[java]: /riak/ts/1.3.0/developing/java -[ruby]: /riak/ts/1.3.0/developing/ruby -[python]: /riak/ts/1.3.0/developing/python -[csharp]: /riak/ts/1.3.0/developing/csharp -[nodejs]: /riak/ts/1.3.0/developing/nodejs -[erlang]: /riak/ts/1.3.0/developing/erlang -[php]: /riak/ts/1.3.0/developing/php +[erlang]: {{}}riak/ts/1.3.0/developing/erlang +[go]: {{}}riak/ts/1.3.0/developing/golang +[http]: {{}}riak/ts/1.3.0/developing/http +[java]: {{}}riak/ts/1.3.0/developing/java +[ruby]: {{}}riak/ts/1.3.0/developing/ruby +[python]: {{}}riak/ts/1.3.0/developing/python +[csharp]: {{}}riak/ts/1.3.0/developing/csharp +[nodejs]: {{}}riak/ts/1.3.0/developing/nodejs +[erlang]: {{}}riak/ts/1.3.0/developing/erlang +[php]: {{}}riak/ts/1.3.0/developing/php You can access Riak TS data over HTTP through the [API][http]. diff --git a/content/riak/ts/1.3.0/installing.md b/content/riak/ts/1.3.0/installing.md index 07c818c7ce..f3ba8df2ff 100644 --- a/content/riak/ts/1.3.0/installing.md +++ b/content/riak/ts/1.3.0/installing.md @@ -20,7 +20,7 @@ aliases: [AWS]: aws/ -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ [download]: ../downloads/ diff --git a/content/riak/ts/1.3.0/installing/aws.md b/content/riak/ts/1.3.0/installing/aws.md index 5e61500f7d..9ec4c23702 100644 --- a/content/riak/ts/1.3.0/installing/aws.md +++ b/content/riak/ts/1.3.0/installing/aws.md @@ -23,7 +23,7 @@ aliases: [AWS]: http://aws.amazon.com [download]: ../../downloads/ [ec2 guide]: http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html -[security basics]: /riak/kv/2.1.4/using/security/basics +[security basics]: {{}}riak/kv/2.1.4/using/security/basics Riak TS can be installed on AWS virtual machines (VMs) using a binary @@ -40,7 +40,7 @@ Get started by launching a Riak TS virtual machine via the AWS Marketplace. (You 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair. - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Then click the **Accept Terms and Launch with 1-Click** button. @@ -65,7 +65,7 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) We also recommend that you read more about [Security and Firewalls][security basics]. diff --git a/content/riak/ts/1.3.0/installing/debian-ubuntu.md b/content/riak/ts/1.3.0/installing/debian-ubuntu.md index 53fbfe7777..b954f9aee8 100644 --- a/content/riak/ts/1.3.0/installing/debian-ubuntu.md +++ b/content/riak/ts/1.3.0/installing/debian-ubuntu.md @@ -19,9 +19,9 @@ aliases: --- [download]: ../../downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit [planning]: ../../using/planning -[security basics]: /riak/kv/2.1.4/using/security/basics +[security basics]: {{}}riak/kv/2.1.4/using/security/basics Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.3.0/installing/mac-osx.md b/content/riak/ts/1.3.0/installing/mac-osx.md index c980715f86..44943b0a3e 100644 --- a/content/riak/ts/1.3.0/installing/mac-osx.md +++ b/content/riak/ts/1.3.0/installing/mac-osx.md @@ -20,7 +20,7 @@ aliases: [download]: ../../downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit [planning]: ../../using/planning Riak TS can be installed on Mac OS X systems using a binary diff --git a/content/riak/ts/1.3.0/installing/rhel-centos.md b/content/riak/ts/1.3.0/installing/rhel-centos.md index 156904b358..9cfea46640 100644 --- a/content/riak/ts/1.3.0/installing/rhel-centos.md +++ b/content/riak/ts/1.3.0/installing/rhel-centos.md @@ -19,7 +19,7 @@ aliases: --- [download]: ../../downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit [planning]: ../../using/planning/ diff --git a/content/riak/ts/1.3.0/learn-about/bestpractices.md b/content/riak/ts/1.3.0/learn-about/bestpractices.md index f0980e37bd..a321598ee0 100644 --- a/content/riak/ts/1.3.0/learn-about/bestpractices.md +++ b/content/riak/ts/1.3.0/learn-about/bestpractices.md @@ -15,7 +15,7 @@ aliases: --- -[glossary bucket]: http://docs.basho.com/riak/kv/2.1.4/learn/glossary/#bucket +[glossary bucket]: {{< baseurl >}}riak/kv/2.1.4/learn/glossary/#bucket [table arch]: ../tablearchitecture/ diff --git a/content/riak/ts/1.3.0/releasenotes.md b/content/riak/ts/1.3.0/releasenotes.md index 173155287c..7e07a468d2 100644 --- a/content/riak/ts/1.3.0/releasenotes.md +++ b/content/riak/ts/1.3.0/releasenotes.md @@ -19,7 +19,7 @@ Released May 4, 2016. Riak TS 1.3.0 is [open source](https://github.com/basho/riak/tree/riak_ts-1.3.0)! In addition to becoming OSS, version 1.3.0 introduces a broad range of new functionality including: an HTTP API, additional SQL commands, and relaxed key restrictions. It also includes Multi-Datacenter (MDC) replication for our Enterprise users. -We've also added AWS AMI support. You can find instructions for installing Riak TS on AWS [here](http://docs.basho.com/riak/ts/1.3.0/installing/aws/). +We've also added AWS AMI support. You can find instructions for installing Riak TS on AWS [here]({{< baseurl >}}riak/ts/1.3.0/installing/aws/). ## New Features @@ -41,7 +41,7 @@ We've also added AWS AMI support. You can find instructions for installing Riak * The relaxed key restrictions mean the family and series keys are no longer required, which makes TS table schemas more flexible, and makes customizing the data you store and how you store even easier. * [[PR #1357](https://github.com/basho/riak_kv/pull/1357)] * [[riak_ql PR #108](https://github.com/basho/riak_ql/pull/108)] -* TS now supports MDC replication on TS tables. At this time, MDC support in TS does not include AAE fullsync. To use MDC, you will need to create your TS tables in your clusters and then [configure](http://http://docs.basho.com/riak/ts/1.3.0/using/mdc/) MDC. +* TS now supports MDC replication on TS tables. At this time, MDC support in TS does not include AAE fullsync. To use MDC, you will need to create your TS tables in your clusters and then [configure]({{< baseurl >}}riak/ts/1.3.0/using/mdc/) MDC. * [[riak_repl PR #738](https://github.com/basho/riak_repl/pull/738)] * [[PR #1381](https://github.com/basho/riak_kv/pull/1381)] * Riak TS now offers integration with PHP and .NET clients. @@ -56,7 +56,7 @@ We've also added AWS AMI support. You can find instructions for installing Riak ## Changes -* AWS AMI is now available. You can find instructions for installing Riak TS on AWS [here](http://docs.basho.com/riak/ts/1.3.0/installing/aws/). [[PR #89](https://github.com/basho/aws-ansible/pull/89)] +* AWS AMI is now available. You can find instructions for installing Riak TS on AWS [here]({{< baseurl >}}riak/ts/1.3.0/installing/aws/). [[PR #89](https://github.com/basho/aws-ansible/pull/89)] * riak shell has had several changes: the `command` record (which includes result) has been added, optional debugging has been added to exceptions, and SQL commands are allowed to span multiple lines. [[PR #23](https://github.com/basho/riak_shell/pull/23)] * Several changes have been made to facilitate rolling upgrades/downgrades in future releases: * The DDL compiler has been updated to facilitate rolling upgrade/downgrade functionality in future releases. [[riak_ql PR #115](https://github.com/basho/riak_ql/pull/115)] @@ -64,7 +64,7 @@ We've also added AWS AMI support. You can find instructions for installing Riak * The version that DDLs were compiled with will be stored in the dets table. [[PR #1377](https://github.com/basho/riak_kv/pull/1377)] * The DDL compiler's version is registered as a capability. [[PR #1377](https://github.com/basho/riak_kv/pull/1377)] * elevelDB has been updated to pull in levelDB version 2.0.15. [[eleveldb PR #184](https://github.com/basho/eleveldb/pull/184)] -* node_package has been updated to version 3.0.0 to address a [security issue](http://docs.basho.com/community/productadvisories/codeinjectioninitfiles/) in which arbitrary root access was possible for a local user that had direct access to the Riak account. [[PR #820](https://github.com/basho/riak/pull/820)] +* node_package has been updated to version 3.0.0 to address a [security issue]({{}}community/productadvisories/codeinjectioninitfiles/) in which arbitrary root access was possible for a local user that had direct access to the Riak account. [[PR #820](https://github.com/basho/riak/pull/820)] * module_info calls have been removed from riak_core_coverage_fsm:init() to speed up small queries. [[PR #829](https://github.com/basho/riak_core/pull/829)] diff --git a/content/riak/ts/1.3.0/using.md b/content/riak/ts/1.3.0/using.md index 42fec954c4..27ae4578a3 100644 --- a/content/riak/ts/1.3.0/using.md +++ b/content/riak/ts/1.3.0/using.md @@ -19,7 +19,7 @@ aliases: [aggregate]: aggregate-functions/ [arithmetic]: arithmetic-operations/ [configuring]: configuring/ -[download]: /riak/ts/1.3.0/downloads/ +[download]: {{}}riak/ts/1.3.0/downloads/ [installing]: ../installing/ [mdc]: mdc/ [planning]: planning/ diff --git a/content/riak/ts/1.3.0/using/mdc.md b/content/riak/ts/1.3.0/using/mdc.md index 77352ece15..d9a19262b8 100644 --- a/content/riak/ts/1.3.0/using/mdc.md +++ b/content/riak/ts/1.3.0/using/mdc.md @@ -20,10 +20,10 @@ aliases: --- -[activating]: /riak/ts/1.3.0/using/creating-activating -[cluster ops v3 mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[activating]: {{}}riak/ts/1.3.0/using/creating-activating +[cluster ops v3 mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter [ee]: http://basho.com/contact/ -[install]: /riak/ts/1.3.0/installing +[install]: {{}}riak/ts/1.3.0/installing Multi-Datacenter (MDC) replication makes it possible to replicate time series data between Riak clusters. This document will walk through how to configure MDC to work with Riak TS. diff --git a/content/riak/ts/1.3.0/using/riakshell.md b/content/riak/ts/1.3.0/using/riakshell.md index 310dcf7515..4bb15a6741 100644 --- a/content/riak/ts/1.3.0/using/riakshell.md +++ b/content/riak/ts/1.3.0/using/riakshell.md @@ -16,9 +16,9 @@ aliases: - /riakts/1.3.0/using/riakshell/ --- -[nodename]: /riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/ -[creating]: /riak/ts/1.3.0/using/creating-activating -[writing]: /riak/ts/1.3.0/using/writingdata +[nodename]: {{}}riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/ +[creating]: {{}}riak/ts/1.3.0/using/creating-activating +[writing]: {{}}riak/ts/1.3.0/using/writingdata You can use riak shell within Riak TS to run SQL and logging commands from one place. diff --git a/content/riak/ts/1.3.0/using/writingdata.md b/content/riak/ts/1.3.0/using/writingdata.md index d167e46e7c..2c91e4ced3 100644 --- a/content/riak/ts/1.3.0/using/writingdata.md +++ b/content/riak/ts/1.3.0/using/writingdata.md @@ -18,8 +18,8 @@ aliases: [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[config reference]: /riak/kv/2.1.4/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.3.0/using/mdc +[config reference]: {{}}riak/kv/2.1.4/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.3.0/using/mdc [riakshell]: ../riakshell diff --git a/content/riak/ts/1.3.1/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.3.1/add-ons/spark-riak-connector/building-testing.md index 4d5d680c15..6cff0e1729 100644 --- a/content/riak/ts/1.3.1/add-ons/spark-riak-connector/building-testing.md +++ b/content/riak/ts/1.3.1/add-ons/spark-riak-connector/building-testing.md @@ -27,7 +27,7 @@ In order to build the Spark-Riak connector, you'll need to have the following in * [Java OpenJDK 8](http://openjdk.java.net/install/) * [Maven 3](https://maven.apache.org/download.cgi) * [Spark 1.6](http://spark.apache.org/docs/latest/#downloading) -* [Riak TS](http://docs.basho.com/riak/ts/latest/installing/) +* [Riak TS]({{< baseurl >}}riak/ts/latest/installing/) ## Download diff --git a/content/riak/ts/1.3.1/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.3.1/add-ons/spark-riak-connector/quick-start.md index 0beefae90e..4ba30b4a0b 100644 --- a/content/riak/ts/1.3.1/add-ons/spark-riak-connector/quick-start.md +++ b/content/riak/ts/1.3.1/add-ons/spark-riak-connector/quick-start.md @@ -18,8 +18,8 @@ This guide will run you through a quick example that uses the Spark-Riak connect ## Prerequisites - Update Homebrew with `brew update`. -- Install Riak TS OSX build. Instruction can be found [here](http://docs.basho.com/riak/ts/1.2.0/installing/mac-osx/) -- Set open file limits for Riak by following the guide [here](http://docs.basho.com/riak/latest/ops/tuning/open-files-limit/#Mac-OS-X). +- Install Riak TS OSX build. Instruction can be found [here]({{< baseurl >}}riak/ts/1.2.0/installing/mac-osx/) +- Set open file limits for Riak by following the guide [here]({{< baseurl >}}riak/kv/latest/ops/tuning/open-files-limit/#Mac-OS-X). - Install Spark with `brew install apache-spark`. - Download the Spark-Riak connector uber jar (containing all dependencies) from here: https://github.com/basho/spark-riak-connector/releases/latest. diff --git a/content/riak/ts/1.3.1/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.3.1/add-ons/spark-riak-connector/usage/range-query-partition.md index 0282fc39a3..44ffca1e7f 100644 --- a/content/riak/ts/1.3.1/add-ons/spark-riak-connector/usage/range-query-partition.md +++ b/content/riak/ts/1.3.1/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -15,7 +15,7 @@ version_history: in: "1.3.0+" --- -Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS](http://docs.basho.com/riakts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. +Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS]({{< baseurl >}}riak/ts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. To use this functionality, you must provide the following options: diff --git a/content/riak/ts/1.3.1/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.3.1/add-ons/spark-riak-connector/usage/streaming-example.md index df7eaeec0f..ea065baf9d 100644 --- a/content/riak/ts/1.3.1/add-ons/spark-riak-connector/usage/streaming-example.md +++ b/content/riak/ts/1.3.1/add-ons/spark-riak-connector/usage/streaming-example.md @@ -29,7 +29,7 @@ path/to/kafka/bin/kafka-server-start.sh config/server.properties path/to/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic streaming ``` -We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here](/riak/ts/1.2.0/installing/mac-osx/). +We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here]({{}}riak/ts/1.2.0/installing/mac-osx/). You will need to build the TS example as well. Please follow the instructions on [building the examples](https://github.com/basho/spark-riak-connector/tree/master/examples#building-and-running-examplesdemos). diff --git a/content/riak/ts/1.3.1/developing.md b/content/riak/ts/1.3.1/developing.md index 4f66854d90..1fc64a040b 100644 --- a/content/riak/ts/1.3.1/developing.md +++ b/content/riak/ts/1.3.1/developing.md @@ -15,16 +15,16 @@ aliases: --- -[erlang]: /riak/ts/1.3.1/developing/erlang -[go]: /riak/ts/1.3.1/developing/golang -[http]: /riak/ts/1.3.1/developing/http -[java]: /riak/ts/1.3.1/developing/java -[ruby]: /riak/ts/1.3.1/developing/ruby -[python]: /riak/ts/1.3.1/developing/python -[csharp]: /riak/ts/1.3.1/developing/csharp -[nodejs]: /riak/ts/1.3.1/developing/nodejs -[erlang]: /riak/ts/1.3.1/developing/erlang -[php]: /riak/ts/1.3.1/developing/php +[erlang]: {{}}riak/ts/1.3.1/developing/erlang +[go]: {{}}riak/ts/1.3.1/developing/golang +[http]: {{}}riak/ts/1.3.1/developing/http +[java]: {{}}riak/ts/1.3.1/developing/java +[ruby]: {{}}riak/ts/1.3.1/developing/ruby +[python]: {{}}riak/ts/1.3.1/developing/python +[csharp]: {{}}riak/ts/1.3.1/developing/csharp +[nodejs]: {{}}riak/ts/1.3.1/developing/nodejs +[erlang]: {{}}riak/ts/1.3.1/developing/erlang +[php]: {{}}riak/ts/1.3.1/developing/php You can access Riak TS data over HTTP through the [API][http]. diff --git a/content/riak/ts/1.3.1/installing.md b/content/riak/ts/1.3.1/installing.md index af4f7c2096..c7787c2db1 100644 --- a/content/riak/ts/1.3.1/installing.md +++ b/content/riak/ts/1.3.1/installing.md @@ -20,7 +20,7 @@ aliases: [AWS]: aws/ -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ [download]: ../downloads/ diff --git a/content/riak/ts/1.3.1/installing/aws.md b/content/riak/ts/1.3.1/installing/aws.md index da83c54f4d..be6965fd28 100644 --- a/content/riak/ts/1.3.1/installing/aws.md +++ b/content/riak/ts/1.3.1/installing/aws.md @@ -23,7 +23,7 @@ aliases: [AWS]: http://aws.amazon.com [download]: ../../downloads/ [ec2 guide]: http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html -[security basics]: /riak/kv/2.1.4/using/security/basics +[security basics]: {{}}riak/kv/2.1.4/using/security/basics Riak TS can be installed on AWS virtual machines (VMs) using a binary @@ -40,7 +40,7 @@ Get started by launching a Riak TS virtual machine via the AWS Marketplace. (You 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair. - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Then click the **Accept Terms and Launch with 1-Click** button. @@ -65,7 +65,7 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) We also recommend that you read more about [Security and Firewalls][security basics]. diff --git a/content/riak/ts/1.3.1/installing/debian-ubuntu.md b/content/riak/ts/1.3.1/installing/debian-ubuntu.md index 67e80a0d58..fcfdb29b9d 100644 --- a/content/riak/ts/1.3.1/installing/debian-ubuntu.md +++ b/content/riak/ts/1.3.1/installing/debian-ubuntu.md @@ -19,9 +19,9 @@ aliases: --- [download]: ../../downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit [planning]: ../../using/planning -[security basics]: /riak/kv/2.1.4/using/security/basics +[security basics]: {{}}riak/kv/2.1.4/using/security/basics Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.3.1/installing/mac-osx.md b/content/riak/ts/1.3.1/installing/mac-osx.md index e4e753c05b..ddfe1d4a73 100644 --- a/content/riak/ts/1.3.1/installing/mac-osx.md +++ b/content/riak/ts/1.3.1/installing/mac-osx.md @@ -20,7 +20,7 @@ aliases: [download]: ../../downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit [planning]: ../../using/planning Riak TS can be installed on Mac OS X systems using a binary diff --git a/content/riak/ts/1.3.1/installing/rhel-centos.md b/content/riak/ts/1.3.1/installing/rhel-centos.md index 5684315510..1eb21cb72c 100644 --- a/content/riak/ts/1.3.1/installing/rhel-centos.md +++ b/content/riak/ts/1.3.1/installing/rhel-centos.md @@ -19,7 +19,7 @@ aliases: --- [download]: ../../downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit [planning]: ../../using/planning/ diff --git a/content/riak/ts/1.3.1/learn-about/bestpractices.md b/content/riak/ts/1.3.1/learn-about/bestpractices.md index 9908847608..ef1c78c1b3 100644 --- a/content/riak/ts/1.3.1/learn-about/bestpractices.md +++ b/content/riak/ts/1.3.1/learn-about/bestpractices.md @@ -15,7 +15,7 @@ aliases: --- -[glossary bucket]: http://docs.basho.com/riak/kv/2.1.4/learn/glossary/#bucket +[glossary bucket]: {{< baseurl >}}riak/kv/2.1.4/learn/glossary/#bucket [table arch]: ../tablearchitecture/ diff --git a/content/riak/ts/1.3.1/releasenotes.md b/content/riak/ts/1.3.1/releasenotes.md index 556f6259c2..982cbf041a 100644 --- a/content/riak/ts/1.3.1/releasenotes.md +++ b/content/riak/ts/1.3.1/releasenotes.md @@ -17,13 +17,13 @@ aliases: Released July 5, 2016. -This is a bugfix release addressing the [Data Loss](http://docs.basho.com/community/productadvisories/130-dataloss/) bug in Riak TS 1.3.0. +This is a bugfix release addressing the [Data Loss]({{}}community/productadvisories/130-dataloss/) bug in Riak TS 1.3.0. ## Product Advisory The default configuration for handoff.ip caused vnodes marked for transfer during handoff to be removed without transferring data to their new destination nodes. A mandatory change to configuration (riak.conf) mitigates this issue for OSS TS 1.3.0 users. While not all users were impacted by this issue, we recommend that all 1.3.0 users upgrade to 1.3.1. -Please see the [product advisory](http://docs.basho.com/community/productadvisories/130-dataloss/) for more information. +Please see the [product advisory]({{}}community/productadvisories/130-dataloss/) for more information. ## Bugs Fixed @@ -38,7 +38,7 @@ Released May 4, 2016. Riak TS 1.3.0 is [open source](https://github.com/basho/riak/tree/riak_ts-1.3.0)! In addition to becoming OSS, version 1.3.0 introduces a broad range of new functionality including: an HTTP API, additional SQL commands, and relaxed key restrictions. It also includes Multi-Datacenter (MDC) replication for our Enterprise users. -We've also added AWS AMI support. You can find instructions for installing Riak TS on AWS [here](http://docs.basho.com/riak/ts/1.3.0/installing/aws/). +We've also added AWS AMI support. You can find instructions for installing Riak TS on AWS [here]({{< baseurl >}}riak/ts/1.3.0/installing/aws/). ### New Features @@ -60,7 +60,7 @@ We've also added AWS AMI support. You can find instructions for installing Riak * The relaxed key restrictions mean the family and series keys are no longer required, which makes TS table schemas more flexible, and makes customizing the data you store and how you store even easier. * [[PR #1357](https://github.com/basho/riak_kv/pull/1357)] * [[riak_ql PR #108](https://github.com/basho/riak_ql/pull/108)] -* TS now supports MDC replication on TS tables. At this time, MDC support in TS does not include AAE fullsync. To use MDC, you will need to create your TS tables in your clusters and then [configure](http://docs.basho.com/riak/ts/1.3.1/using/mdc) MDC. +* TS now supports MDC replication on TS tables. At this time, MDC support in TS does not include AAE fullsync. To use MDC, you will need to create your TS tables in your clusters and then [configure]({{< baseurl >}}riak/ts/1.3.1/using/mdc) MDC. * [[riak_repl PR #738](https://github.com/basho/riak_repl/pull/738)] * [[PR #1381](https://github.com/basho/riak_kv/pull/1381)] * Riak TS now offers integration with PHP and .NET clients. @@ -75,7 +75,7 @@ We've also added AWS AMI support. You can find instructions for installing Riak ### Changes -* AWS AMI is now available. You can find instructions for installing Riak TS on AWS [here](http://docs.basho.com/riak/ts/1.3.0/installing/aws/). [[PR #89](https://github.com/basho/aws-ansible/pull/89)] +* AWS AMI is now available. You can find instructions for installing Riak TS on AWS [here]({{< baseurl >}}riak/ts/1.3.0/installing/aws/). [[PR #89](https://github.com/basho/aws-ansible/pull/89)] * riak shell has had several changes: the `command` record (which includes result) has been added, optional debugging has been added to exceptions, and SQL commands are allowed to span multiple lines. [[PR #23](https://github.com/basho/riak_shell/pull/23)] * Several changes have been made to facilitate rolling upgrades/downgrades in future releases: * The DDL compiler has been updated to facilitate rolling upgrade/downgrade functionality in future releases. [[riak_ql PR #115](https://github.com/basho/riak_ql/pull/115)] @@ -83,7 +83,7 @@ We've also added AWS AMI support. You can find instructions for installing Riak * The version that DDLs were compiled with will be stored in the dets table. [[PR #1377](https://github.com/basho/riak_kv/pull/1377)] * The DDL compiler's version is registered as a capability. [[PR #1377](https://github.com/basho/riak_kv/pull/1377)] * elevelDB has been updated to pull in levelDB version 2.0.15. [[eleveldb PR #184](https://github.com/basho/eleveldb/pull/184)] -* node_package has been updated to version 3.0.0 to address a [security issue](http://docs.basho.com/community/productadvisories/codeinjectioninitfiles/) in which arbitrary root access was possible for a local user that had direct access to the Riak account. [[PR #820](https://github.com/basho/riak/pull/820)] +* node_package has been updated to version 3.0.0 to address a [security issue]({{}}community/productadvisories/codeinjectioninitfiles/) in which arbitrary root access was possible for a local user that had direct access to the Riak account. [[PR #820](https://github.com/basho/riak/pull/820)] * module_info calls have been removed from riak_core_coverage_fsm:init() to speed up small queries. [[PR #829](https://github.com/basho/riak_core/pull/829)] diff --git a/content/riak/ts/1.3.1/using.md b/content/riak/ts/1.3.1/using.md index c4c772f20d..3239912dba 100644 --- a/content/riak/ts/1.3.1/using.md +++ b/content/riak/ts/1.3.1/using.md @@ -19,7 +19,7 @@ aliases: [aggregate]: aggregate-functions/ [arithmetic]: arithmetic-operations/ [configuring]: configuring/ -[download]: /riak/ts/1.3.1/downloads/ +[download]: {{}}riak/ts/1.3.1/downloads/ [installing]: ../installing/ [mdc]: mdc/ [planning]: planning/ diff --git a/content/riak/ts/1.3.1/using/mdc.md b/content/riak/ts/1.3.1/using/mdc.md index d8a78250e9..3188c43694 100644 --- a/content/riak/ts/1.3.1/using/mdc.md +++ b/content/riak/ts/1.3.1/using/mdc.md @@ -20,10 +20,10 @@ aliases: --- -[activating]: /riak/ts/1.3.1/using/creating-activating -[cluster ops v3 mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[activating]: {{}}riak/ts/1.3.1/using/creating-activating +[cluster ops v3 mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter [ee]: http://basho.com/contact/ -[install]: /riak/ts/1.3.1/installing +[install]: {{}}riak/ts/1.3.1/installing Multi-Datacenter (MDC) replication makes it possible to replicate time series data between Riak clusters. This document will walk through how to configure MDC to work with Riak TS. diff --git a/content/riak/ts/1.3.1/using/riakshell.md b/content/riak/ts/1.3.1/using/riakshell.md index 9c931ba4e2..dcb3784c51 100644 --- a/content/riak/ts/1.3.1/using/riakshell.md +++ b/content/riak/ts/1.3.1/using/riakshell.md @@ -16,9 +16,9 @@ aliases: - /riakts/1.3.1/using/riakshell/ --- -[nodename]: /riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/ -[creating]: /riak/ts/1.3.1/using/creating-activating -[writing]: /riak/ts/1.3.1/using/writingdata +[nodename]: {{}}riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/ +[creating]: {{}}riak/ts/1.3.1/using/creating-activating +[writing]: {{}}riak/ts/1.3.1/using/writingdata You can use riak shell within Riak TS to run SQL and logging commands from one place. diff --git a/content/riak/ts/1.3.1/using/writingdata.md b/content/riak/ts/1.3.1/using/writingdata.md index 8863cf5b23..876964812f 100644 --- a/content/riak/ts/1.3.1/using/writingdata.md +++ b/content/riak/ts/1.3.1/using/writingdata.md @@ -18,8 +18,8 @@ aliases: [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[config reference]: /riak/kv/2.1.4/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.3.1/using/mdc +[config reference]: {{}}riak/kv/2.1.4/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.3.1/using/mdc [riakshell]: ../riakshell diff --git a/content/riak/ts/1.4.0/add-ons.md b/content/riak/ts/1.4.0/add-ons.md index 65c5a571ee..7dcd157c45 100644 --- a/content/riak/ts/1.4.0/add-ons.md +++ b/content/riak/ts/1.4.0/add-ons.md @@ -10,7 +10,6 @@ menu: weight: 450 pre: tools toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons" --- Here at Basho, we've developed integrations between Riak TS and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. diff --git a/content/riak/ts/1.4.0/add-ons/redis/developing-rra.md b/content/riak/ts/1.4.0/add-ons/redis/developing-rra.md index 8d79bfa57a..fb5d490d2c 100644 --- a/content/riak/ts/1.4.0/add-ons/redis/developing-rra.md +++ b/content/riak/ts/1.4.0/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.2.0/developing/api/http +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/ts/1.4.0/add-ons/redis/redis-add-on-features.md b/content/riak/ts/1.4.0/add-ons/redis/redis-add-on-features.md index 243bfef6d0..44a04c307f 100644 --- a/content/riak/ts/1.4.0/add-ons/redis/redis-add-on-features.md +++ b/content/riak/ts/1.4.0/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/ts/1.4.0/add-ons/redis/set-up-rra.md b/content/riak/ts/1.4.0/add-ons/redis/set-up-rra.md index 2e70cc26ed..aed5103e25 100644 --- a/content/riak/ts/1.4.0/add-ons/redis/set-up-rra.md +++ b/content/riak/ts/1.4.0/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/ts/1.4.0/setup/installing -[perf open files]: /riak/ts/1.4.0/setup/installing/rhel-centos/#ulimit +[install index]: {{}}riak/ts/1.4.0/setup/installing +[perf open files]: {{}}riak/ts/1.4.0/setup/installing/rhel-centos/#ulimit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/ts/1.4.0/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/ts/1.4.0/add-ons/redis/set-up-rra/deployment-models.md index 4cddc3248f..5d8bc31d1a 100644 --- a/content/riak/ts/1.4.0/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/ts/1.4.0/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/ts/1.4.0/add-ons/redis/using-rra.md b/content/riak/ts/1.4.0/add-ons/redis/using-rra.md index 1599f2ec25..533b4f599e 100644 --- a/content/riak/ts/1.4.0/add-ons/redis/using-rra.md +++ b/content/riak/ts/1.4.0/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.2.0/developing/api/http/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector.md index 225126089c..c77fd62cd8 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector.md @@ -10,7 +10,6 @@ menu: weight: 101 parent: "addons" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector" --- The Spark-Riak connector enables you to connect Spark applications to Riak TS with the Spark RDD and Spark DataFrames APIs. You can write your app in Scala, Python, and Java. The connector makes it easy to partition the data you get from Riak so multiple Spark workers can process the data in parallel, and it has support for failover if a Riak node goes down while your Spark job is running. diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/building-testing.md index 0d549f31be..c92b1bc4a9 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/building-testing.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/building-testing.md @@ -11,7 +11,6 @@ menu: weight: 103 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/building-testing" --- If you want to download the source code of the Spark-Riak connector, build it, and install the results in your local repo, this is the document for you! Keep reading for instructions on downloading, building, and installing the connector. @@ -26,7 +25,7 @@ In order to build the Spark-Riak connector, you'll need to have the following in * [Java OpenJDK 8](http://openjdk.java.net/install/) * [Maven 3](https://maven.apache.org/download.cgi) * [Spark 1.6](http://spark.apache.org/docs/latest/#downloading) -* [Riak TS](http://docs.basho.com/riak/ts/latest/installing/) +* [Riak TS]({{< baseurl >}}riak/ts/latest/installing/) ## Download diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/getting.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/getting.md index b16f8b6240..05a6c4c4da 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/getting.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/getting.md @@ -10,7 +10,6 @@ menu: weight: 102 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/getting" --- > **Note:** diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/quick-start.md index cab1a2843a..87d8e8ae43 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/quick-start.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/quick-start.md @@ -11,7 +11,6 @@ menu: weight: 101 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/quick-start" --- This guide will run you through a quick example that uses the Spark-Riak connector to read and write data using Java, Scala, and Python. We will assume you are running this guide on Mac OSX. @@ -19,8 +18,8 @@ This guide will run you through a quick example that uses the Spark-Riak connect ## Prerequisites - Update Homebrew with `brew update`. -- Install Riak TS OSX build. Instruction can be found [here](http://docs.basho.com/riak/ts/1.2.0/installing/mac-osx/) -- Set open file limits for Riak by following the guide [here](http://docs.basho.com/riak/latest/ops/tuning/open-files-limit/#Mac-OS-X). +- Install Riak TS OSX build. Instruction can be found [here]({{< baseurl >}}riak/ts/1.2.0/installing/mac-osx/) +- Set open file limits for Riak by following the guide [here]({{< baseurl >}}riak/kv/latest/ops/tuning/open-files-limit/#Mac-OS-X). - Install Spark with `brew install apache-spark`. - Download the Spark-Riak connector uber jar (containing all dependencies) from here: https://github.com/basho/spark-riak-connector/releases/latest. diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage.md index 245198948b..7a1fbe18f0 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage.md @@ -11,7 +11,6 @@ menu: weight: 104 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage" --- This section will walk you through setting up your application for development with the Spark-Riak connector. diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/bulk-write.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/bulk-write.md index b50060092d..c302ea6e00 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/bulk-write.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/bulk-write.md @@ -11,7 +11,6 @@ menu: weight: 107 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/bulk-write" --- To write into a Riak TS table, the Spark-Riak Connector splits the initial set of rows into smaller bulks and processes them in parallel. Bulk size can be configured using `spark.riakts.write.bulk-size` property. The default number is `100`. diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/config-spark-context.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/config-spark-context.md index 9a2bd284f3..eec524e3db 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/config-spark-context.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/config-spark-context.md @@ -11,7 +11,6 @@ menu: weight: 101 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/config-spark-context" --- The following `import` statements should be included at the top of your Spark application to enable the connector: diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/dataframes.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/dataframes.md index 51ae5567e7..4ad80c01e4 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/dataframes.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/dataframes.md @@ -11,7 +11,6 @@ menu: weight: 104 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/dataframes" --- ## Spark Dataframes With TS Table diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/dates.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/dates.md index 7da8a456a1..eee9bb6729 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/dates.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/dates.md @@ -11,7 +11,6 @@ menu: weight: 105 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/dates" --- Riak TS automatically stores all datetimes as a Long integer that represents milliseconds from the [beginning of the epoc](https://en.wikipedia.org/wiki/Unix_time). This is not very human friendly so we have provided a Spark configuration option called `spark.riakts.bindings.timestamp`. This option is for use with Automatic Schema Discovery and allows for conversion from Riak TS datetimes, which are stored as Longs, to Timestamps. The default value of this option is `useTimestamp` which converts Longs to Timestamps. If you would like to use the original Long value, you can use the option value of `useLong`. All conversion takes place during Automatic Schema Discovery when reading from Riak TS tables. diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/range-query-partition.md index 731d3ef11c..9ecdea4cad 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/range-query-partition.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -11,10 +11,9 @@ menu: weight: 106 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/range-query-partition" --- -Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS](http://docs.basho.com/riakts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. +Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS]({{< baseurl >}}riak/ts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. To use this functionality, you must provide the following options: diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/reading-data.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/reading-data.md index ef685b5772..19f802e929 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/reading-data.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/reading-data.md @@ -11,7 +11,6 @@ menu: weight: 102 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/reading-data" --- ## Reading Data From TS Table diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/streaming-example.md index 4aef7bc33d..16fbe7ae1e 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/streaming-example.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/streaming-example.md @@ -11,7 +11,6 @@ menu: weight: 108 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/streaming-example" --- > **Note:** @@ -28,7 +27,7 @@ path/to/kafka/bin/kafka-server-start.sh config/server.properties path/to/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic streaming ``` -We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here](/riak/ts/1.2.0/installing/mac-osx/). +We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here]({{}}riak/ts/1.2.0/installing/mac-osx/). You will need to build the TS example as well. Please follow the instructions on [building the examples](https://github.com/basho/spark-riak-connector/tree/master/examples#building-and-running-examplesdemos). diff --git a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/writing-data.md b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/writing-data.md index e5de5f76bd..66a3fc1eb6 100644 --- a/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/writing-data.md +++ b/content/riak/ts/1.4.0/add-ons/spark-riak-connector/usage/writing-data.md @@ -11,7 +11,6 @@ menu: weight: 103 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/writing-data" --- ## Writing Data To TS Table diff --git a/content/riak/ts/1.4.0/developing.md b/content/riak/ts/1.4.0/developing.md index ed9fa7bb3e..22582d2283 100644 --- a/content/riak/ts/1.4.0/developing.md +++ b/content/riak/ts/1.4.0/developing.md @@ -12,20 +12,19 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/developing/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing" --- -[erlang]: /riak/ts/1.4.0/developing/erlang -[go]: /riak/ts/1.4.0/developing/golang -[http]: /riak/ts/1.4.0/developing/http -[java]: /riak/ts/1.4.0/developing/java -[ruby]: /riak/ts/1.4.0/developing/ruby -[python]: /riak/ts/1.4.0/developing/python -[csharp]: /riak/ts/1.4.0/developing/csharp -[nodejs]: /riak/ts/1.4.0/developing/nodejs -[erlang]: /riak/ts/1.4.0/developing/erlang -[php]: /riak/ts/1.4.0/developing/php +[erlang]: {{}}riak/ts/1.4.0/developing/erlang +[go]: {{}}riak/ts/1.4.0/developing/golang +[http]: {{}}riak/ts/1.4.0/developing/http +[java]: {{}}riak/ts/1.4.0/developing/java +[ruby]: {{}}riak/ts/1.4.0/developing/ruby +[python]: {{}}riak/ts/1.4.0/developing/python +[csharp]: {{}}riak/ts/1.4.0/developing/csharp +[nodejs]: {{}}riak/ts/1.4.0/developing/nodejs +[erlang]: {{}}riak/ts/1.4.0/developing/erlang +[php]: {{}}riak/ts/1.4.0/developing/php You can access Riak TS data over HTTP through the [API][http]. diff --git a/content/riak/ts/1.4.0/developing/csharp.md b/content/riak/ts/1.4.0/developing/csharp.md index 50ad435c68..8f79476871 100644 --- a/content/riak/ts/1.4.0/developing/csharp.md +++ b/content/riak/ts/1.4.0/developing/csharp.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/csharp/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/csharp" --- diff --git a/content/riak/ts/1.4.0/developing/erlang.md b/content/riak/ts/1.4.0/developing/erlang.md index 2dd036c097..b86527415a 100644 --- a/content/riak/ts/1.4.0/developing/erlang.md +++ b/content/riak/ts/1.4.0/developing/erlang.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/erlang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/erlang" --- diff --git a/content/riak/ts/1.4.0/developing/golang.md b/content/riak/ts/1.4.0/developing/golang.md index 4883cabc11..0c7857ac02 100644 --- a/content/riak/ts/1.4.0/developing/golang.md +++ b/content/riak/ts/1.4.0/developing/golang.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/golang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/golang" --- diff --git a/content/riak/ts/1.4.0/developing/http.md b/content/riak/ts/1.4.0/developing/http.md index 10ec6bdc8c..564ecbab48 100644 --- a/content/riak/ts/1.4.0/developing/http.md +++ b/content/riak/ts/1.4.0/developing/http.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/http/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/http" --- diff --git a/content/riak/ts/1.4.0/developing/java.md b/content/riak/ts/1.4.0/developing/java.md index 53f7668b0c..e4aa04bdd0 100644 --- a/content/riak/ts/1.4.0/developing/java.md +++ b/content/riak/ts/1.4.0/developing/java.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/java/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/java" --- diff --git a/content/riak/ts/1.4.0/developing/nodejs.md b/content/riak/ts/1.4.0/developing/nodejs.md index baff8f9391..d51f5f112d 100644 --- a/content/riak/ts/1.4.0/developing/nodejs.md +++ b/content/riak/ts/1.4.0/developing/nodejs.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/nodejs/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/nodejs" --- diff --git a/content/riak/ts/1.4.0/developing/php.md b/content/riak/ts/1.4.0/developing/php.md index 5ddcc5f4bb..ad7f88dd6c 100644 --- a/content/riak/ts/1.4.0/developing/php.md +++ b/content/riak/ts/1.4.0/developing/php.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/php/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/php" --- diff --git a/content/riak/ts/1.4.0/developing/python.md b/content/riak/ts/1.4.0/developing/python.md index 5cd33882cc..5a4d765968 100644 --- a/content/riak/ts/1.4.0/developing/python.md +++ b/content/riak/ts/1.4.0/developing/python.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/python/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/python" --- diff --git a/content/riak/ts/1.4.0/developing/ruby.md b/content/riak/ts/1.4.0/developing/ruby.md index 0ac558e402..f54968396b 100644 --- a/content/riak/ts/1.4.0/developing/ruby.md +++ b/content/riak/ts/1.4.0/developing/ruby.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/developing/ruby/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/ruby" --- You can develop applications and tools using Riak TS with the Riak Ruby client. diff --git a/content/riak/ts/1.4.0/downloads.md b/content/riak/ts/1.4.0/downloads.md index aa83ae2481..c44b56a373 100644 --- a/content/riak/ts/1.4.0/downloads.md +++ b/content/riak/ts/1.4.0/downloads.md @@ -18,7 +18,6 @@ listed_projects: install_instructions_set: "installing" aliases: - /riakts/1.4.0/downloads/ -canonical_link: "https://docs.basho.com/riak/ts/latest/downloads" --- diff --git a/content/riak/ts/1.4.0/index.md b/content/riak/ts/1.4.0/index.md index aab02a0874..ff5f47c89d 100644 --- a/content/riak/ts/1.4.0/index.md +++ b/content/riak/ts/1.4.0/index.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/ -canonical_link: "https://docs.basho.com/riak/ts/latest" --- diff --git a/content/riak/ts/1.4.0/learn-about.md b/content/riak/ts/1.4.0/learn-about.md index 3598bf9175..b219a76435 100644 --- a/content/riak/ts/1.4.0/learn-about.md +++ b/content/riak/ts/1.4.0/learn-about.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/learn-about/learn-about/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about" --- [table arch]: tablearchitecture/ diff --git a/content/riak/ts/1.4.0/learn-about/bestpractices.md b/content/riak/ts/1.4.0/learn-about/bestpractices.md index 9340a1770e..d2f345d05c 100644 --- a/content/riak/ts/1.4.0/learn-about/bestpractices.md +++ b/content/riak/ts/1.4.0/learn-about/bestpractices.md @@ -13,11 +13,10 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/learn-about/bestpractices/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/bestpractices" --- -[glossary bucket]: http://docs.basho.com/riak/kv/2.1.4/learn/glossary/#bucket +[glossary bucket]: {{< baseurl >}}riak/kv/2.1.4/learn/glossary/#bucket [planning column def]: ../../using/planning/#column-definitions [planning partition]: ../../using/planning/#partition-key [planning primary]: ../../using/planning/#primary-key diff --git a/content/riak/ts/1.4.0/learn-about/sqlriakts.md b/content/riak/ts/1.4.0/learn-about/sqlriakts.md index b86d1bd421..3932dcfd96 100644 --- a/content/riak/ts/1.4.0/learn-about/sqlriakts.md +++ b/content/riak/ts/1.4.0/learn-about/sqlriakts.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/learn-about/sqlriakts -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/sqlriakts" --- diff --git a/content/riak/ts/1.4.0/learn-about/tablearchitecture.md b/content/riak/ts/1.4.0/learn-about/tablearchitecture.md index 25da2ad425..994235acfe 100644 --- a/content/riak/ts/1.4.0/learn-about/tablearchitecture.md +++ b/content/riak/ts/1.4.0/learn-about/tablearchitecture.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/learn-about/advancedplanning/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/tablearchitecture" --- diff --git a/content/riak/ts/1.4.0/learn-about/timestamps.md b/content/riak/ts/1.4.0/learn-about/timestamps.md index 9d277816bc..3e69238c4c 100644 --- a/content/riak/ts/1.4.0/learn-about/timestamps.md +++ b/content/riak/ts/1.4.0/learn-about/timestamps.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/learn-about/timestamps/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/timestamps" --- [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 diff --git a/content/riak/ts/1.4.0/releasenotes.md b/content/riak/ts/1.4.0/releasenotes.md index 81de75ee7c..d1e095eb42 100644 --- a/content/riak/ts/1.4.0/releasenotes.md +++ b/content/riak/ts/1.4.0/releasenotes.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/releasenotes -canonical_link: "https://docs.basho.com/riak/ts/latest/releasenotes" --- @@ -30,38 +29,38 @@ Riak TS 1.4.0 delivers a broad range of new functionality and improvements, incl ## New Features -* The GROUP BY statement allows you to pick out and condense rows sharing the same value into a single row. You can read more about `GROUP BY` [here](/riak/ts/1.4.0/using/querying/select/group-by/). +* The GROUP BY statement allows you to pick out and condense rows sharing the same value into a single row. You can read more about `GROUP BY` [here]({{}}riak/ts/1.4.0/using/querying/select/group-by/). * [[PR #1445](https://github.com/basho/riak_kv/pull/1445)] * [[riak_core PR #848](https://github.com/basho/riak_core/pull/848)] * [[riak_ql PR #132](https://github.com/basho/riak_ql/pull/132)] -* [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) strings are now supported as timestamps for SELECT and INSERT statements in Riak TS. You can find out more about timestamps and ISO 8601 [here](/riak/ts/1.4.0/using/timerepresentations/). +* [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) strings are now supported as timestamps for SELECT and INSERT statements in Riak TS. You can find out more about timestamps and ISO 8601 [here]({{}}riak/ts/1.4.0/using/timerepresentations/). * [[PR #1444](https://github.com/basho/riak_kv/pull/1444)] * [[riak_core PR #847](https://github.com/basho/riak_core/pull/847)] * [[riak PR #862](https://github.com/basho/riak/pull/862)] * [[riak_ee PR #403](https://github.com/basho/riak_ee/pull/403)] * [[riak_shell PR #43](https://github.com/basho/riak_shell/pull/43)] -* You can now configure global object expiry (a.k.a. time to live - TTL) for your Riak TS data. Read more about data expiry [here](/riak/ts/1.4.0/using/global-object-expiration/). +* You can now configure global object expiry (a.k.a. time to live - TTL) for your Riak TS data. Read more about data expiry [here]({{}}riak/ts/1.4.0/using/global-object-expiration/). * [[eleveldb PR #210](https://github.com/basho/eleveldb/pull/210)] -* `SHOW TABLES`, which lists all the TS tables you've created, is now available. You can read more about the SHOW TABLES statement [here](/riak/ts/1.4.0/using/querying/show-tables/). +* `SHOW TABLES`, which lists all the TS tables you've created, is now available. You can read more about the SHOW TABLES statement [here]({{}}riak/ts/1.4.0/using/querying/show-tables/). * [[riak_ql PR #133](https://github.com/basho/riak_ql/pull/133)] * [[PR #1448](https://github.com/basho/riak_kv/pull/1448)] * [[riak_shell PR #44](https://github.com/basho/riak_shell/pull/44)] -* Riak TS 1.4.0 supports rolling upgrades from 1.3.1 and downgrades to 1.3.1. You can read about how to perform an upgrade or downgrade [here](/riak/ts/1.4.0/setup/). +* Riak TS 1.4.0 supports rolling upgrades from 1.3.1 and downgrades to 1.3.1. You can read about how to perform an upgrade or downgrade [here]({{}}riak/ts/1.4.0/setup/). {{% note title="Note on Downgrading" %}} -If you freshly installed TS 1.4.0 and did NOT upgrade from 1.3.1, and then you choose to downgrade to 1.3.1, you will need to change your riak.conf to preserve your configuration settings. Read more about that process [here](/riak/ts/1.4.0/setup/downgrading). +If you freshly installed TS 1.4.0 and did NOT upgrade from 1.3.1, and then you choose to downgrade to 1.3.1, you will need to change your riak.conf to preserve your configuration settings. Read more about that process [here]({{}}riak/ts/1.4.0/setup/downgrading). {{% /note %}} ## Additions -* New configuration settings for Riak TS have been added to riak.conf. You can read more about TS's configuration options [here](/riak/ts/1.4.0/using/configuring/). Additionally, the old configuration settings have been exposed and are being deprecated. +* New configuration settings for Riak TS have been added to riak.conf. You can read more about TS's configuration options [here]({{}}riak/ts/1.4.0/using/configuring/). Additionally, the old configuration settings have been exposed and are being deprecated. >If you were using the old configuration settings, please update riak.conf to use the new settings. The older settings are scheduled to be deprecated. ## Changes -* Riak TS now has some TS-specific security settings. You can read more about security topics in Riak TS [here](/riak/ts/1.4.0/using/security/). [[PR #1452](https://github.com/basho/riak_kv/pull/1452)] +* Riak TS now has some TS-specific security settings. You can read more about security topics in Riak TS [here]({{}}riak/ts/1.4.0/using/security/). [[PR #1452](https://github.com/basho/riak_kv/pull/1452)] * The DESCRIBE statement now returns additional information about the interval and unit of time in your TS table. [[PR #1438](https://github.com/basho/riak_kv/pull/1438)] * LevelDB now uses LZ4 as an internal compression mechanism. This change should provide a performance boost for LevelDB. [[PR #208](https://github.com/basho/eleveldb/pull/208)] diff --git a/content/riak/ts/1.4.0/setup.md b/content/riak/ts/1.4.0/setup.md index 6d63ea768b..0ef8f182d0 100644 --- a/content/riak/ts/1.4.0/setup.md +++ b/content/riak/ts/1.4.0/setup.md @@ -14,12 +14,11 @@ version_history: in: "1.4.0+" aliases: - /riakts/1.4.0/setup/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/" --- -[install]: /riak/ts/1.4.0/setup/installing -[upgrade]: /riak/ts/1.4.0/setup/upgrading -[downgrade]: /riak/ts/1.4.0/setup/downgrading +[install]: {{}}riak/ts/1.4.0/setup/installing +[upgrade]: {{}}riak/ts/1.4.0/setup/upgrading +[downgrade]: {{}}riak/ts/1.4.0/setup/downgrading ## In This Section diff --git a/content/riak/ts/1.4.0/setup/downgrading.md b/content/riak/ts/1.4.0/setup/downgrading.md index 5e010ce373..5d6fc04944 100644 --- a/content/riak/ts/1.4.0/setup/downgrading.md +++ b/content/riak/ts/1.4.0/setup/downgrading.md @@ -15,10 +15,9 @@ version_history: aliases: - /riakts/1.4.0/setup/downgrading/ - /riakts/1.4.0/downgrading/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/downgrading/" --- -[ts upgrade]: /riak/ts/1.4.0/setup/upgrading +[ts upgrade]: {{}}riak/ts/1.4.0/setup/upgrading [change riakconf]: #change-riak-conf-before-downgrade Downgrades of Riak TS are tested and supported for two feature release diff --git a/content/riak/ts/1.4.0/setup/installing.md b/content/riak/ts/1.4.0/setup/installing.md index cc753f2072..99309f0892 100644 --- a/content/riak/ts/1.4.0/setup/installing.md +++ b/content/riak/ts/1.4.0/setup/installing.md @@ -19,15 +19,14 @@ aliases: - /riakts/1.4.0/installing/installing/ - /riakts/1.4.0/setup/installing/ - /riak/ts/1.4.0/installing/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/" --- [AWS]: aws/ -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ -[download]: /riak/ts/1.4.0/downloads/ +[download]: {{}}riak/ts/1.4.0/downloads/ [OSX]: mac-osx/ [source]: source/ [Ubuntu]: debian-ubuntu/ diff --git a/content/riak/ts/1.4.0/setup/installing/aws.md b/content/riak/ts/1.4.0/setup/installing/aws.md index e4bbbde5f4..433accd067 100644 --- a/content/riak/ts/1.4.0/setup/installing/aws.md +++ b/content/riak/ts/1.4.0/setup/installing/aws.md @@ -18,14 +18,13 @@ aliases: - /riakts/1.4.0/installing/aws/ - /riakts/1.4.0/setup/installing/aws/ - /riak/ts/1.4.0/installing/aws/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/aws/" --- [AWS]: http://aws.amazon.com -[download]: /riak/ts/1.4.0/downloads/ +[download]: {{}}riak/ts/1.4.0/downloads/ [ec2 guide]: http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html -[security basics]: /riak/ts/1.4.0/using/security/ +[security basics]: {{}}riak/ts/1.4.0/using/security/ Riak TS can be installed on AWS virtual machines (VMs) using a binary @@ -42,7 +41,7 @@ Get started by launching a Riak TS virtual machine via the AWS Marketplace. (You 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair. - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Then click the **Accept Terms and Launch with 1-Click** button. @@ -67,7 +66,7 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) We also recommend that you read more about [Security in TS][security basics]. diff --git a/content/riak/ts/1.4.0/setup/installing/debian-ubuntu.md b/content/riak/ts/1.4.0/setup/installing/debian-ubuntu.md index e59d762998..27b1e4f920 100644 --- a/content/riak/ts/1.4.0/setup/installing/debian-ubuntu.md +++ b/content/riak/ts/1.4.0/setup/installing/debian-ubuntu.md @@ -18,13 +18,12 @@ aliases: - /riakts/1.4.0/installing/debian-ubuntu/ - /riakts/1.4.0/setup/installing/debian-ubuntu/ - /riak/ts/1.4.0/installing/debian-ubuntu/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/debian-ubuntu/" --- -[download]: /riak/ts/1.4.0/downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit -[planning]: /riak/ts/1.4.0/using/planning -[security basics pam]: /riak/ts/1.4.0/using/security/sources-management/#pam-based-authentication +[download]: {{}}riak/ts/1.4.0/downloads/ +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.4.0/using/planning +[security basics pam]: {{}}riak/ts/1.4.0/using/security/sources-management/#pam-based-authentication Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.4.0/setup/installing/mac-osx.md b/content/riak/ts/1.4.0/setup/installing/mac-osx.md index 2cbed4c643..d247e246f0 100644 --- a/content/riak/ts/1.4.0/setup/installing/mac-osx.md +++ b/content/riak/ts/1.4.0/setup/installing/mac-osx.md @@ -18,13 +18,12 @@ aliases: - /riakts/1.4.0/installing/mac-osx/ - /riakts/1.4.0/setup/installing/mac-osx/ - /riak/ts/1.4.0/installing/mac-osx/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/mac-osx/" --- -[download]: /riak/ts/1.4.0/downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit -[planning]: /riak/ts/1.4.0/using/planning +[download]: {{}}riak/ts/1.4.0/downloads/ +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.4.0/using/planning Riak TS can be installed on Mac OS X systems using a binary package available [here][download]. diff --git a/content/riak/ts/1.4.0/setup/installing/rhel-centos.md b/content/riak/ts/1.4.0/setup/installing/rhel-centos.md index 244128f25b..bc94fa24f5 100644 --- a/content/riak/ts/1.4.0/setup/installing/rhel-centos.md +++ b/content/riak/ts/1.4.0/setup/installing/rhel-centos.md @@ -18,12 +18,11 @@ aliases: - /riakts/1.4.0/installing/rhel-centos/ - /riakts/1.4.0/setup/installing/rhel-centos/ - /riak/ts/1.4.0/installing/rhel-centos/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/rhel-centos/" --- -[download]: /riak/ts/1.4.0/downloads/ -[openfileslimit]: /riak/kv/2.1.4/using/performance/open-files-limit -[planning]: /riak/ts/1.4.0/using/planning/ +[download]: {{}}riak/ts/1.4.0/downloads/ +[openfileslimit]: {{}}riak/kv/2.1.4/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.4.0/using/planning/ Riak TS can be installed on CentOS-based systems using a binary diff --git a/content/riak/ts/1.4.0/setup/installing/source.md b/content/riak/ts/1.4.0/setup/installing/source.md index 147b5c44b6..a3138d393d 100644 --- a/content/riak/ts/1.4.0/setup/installing/source.md +++ b/content/riak/ts/1.4.0/setup/installing/source.md @@ -18,16 +18,15 @@ aliases: - /riakts/1.4.0/installing/source/ - /riakts/1.4.0/setup/installing/source/ - /riak/ts/1.4.0/installing/source/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/" --- -[download]: /riak/ts/1.4.0/downloads/ +[download]: {{}}riak/ts/1.4.0/downloads/ [Erlang]: http://www.erlang.org/ [GCC]: https://gcc.gnu.org/ [Git]: https://git-scm.com/ -[install erlang]: /riak/ts/1.4.0/setup/installing/source/erlang -[planning]: /riak/ts/1.4.0/using/planning/ +[install erlang]: {{}}riak/ts/1.4.0/setup/installing/source/erlang +[planning]: {{}}riak/ts/1.4.0/using/planning/ [Riak TS GitHub repository]: https://github.com/basho/riak/tree/riak_ts-1.4.0 diff --git a/content/riak/ts/1.4.0/setup/installing/source/erlang.md b/content/riak/ts/1.4.0/setup/installing/source/erlang.md index 64c31b3ff3..7facc47d3a 100644 --- a/content/riak/ts/1.4.0/setup/installing/source/erlang.md +++ b/content/riak/ts/1.4.0/setup/installing/source/erlang.md @@ -18,7 +18,6 @@ aliases: - /riakts/1.4.0/installing/source/erlang/ - /riakts/1.4.0/setup/installing/source/erlang/ - /riak/ts/1.4.0/installing/source/erlang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/erlang/" --- @@ -34,7 +33,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/e [make]: http://www.gnu.org/software/make/ [ncurses]: http://www.gnu.org/software/ncurses/ [OpenSSL]: https://www.openssl.org/ -[source]: /riak/ts/1.4.0/setup/installing/source/ +[source]: {{}}riak/ts/1.4.0/setup/installing/source/ [XCode Developer Tools]: https://developer.apple.com/xcode/downloads/ diff --git a/content/riak/ts/1.4.0/setup/upgrading.md b/content/riak/ts/1.4.0/setup/upgrading.md index d2b41b9b71..7a91f82ae6 100644 --- a/content/riak/ts/1.4.0/setup/upgrading.md +++ b/content/riak/ts/1.4.0/setup/upgrading.md @@ -15,21 +15,20 @@ version_history: aliases: - /riakts/1.4.0/setup/upgrading/ - /riakts/1.4.0/upgrading/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/upgrading/" --- -[use admin commands]: /riak/kv/2.1.4/using/admin/commands -[use admin riak-admin]: /riak/kv/2.1.4/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.1.4/developing/usage/secondary-indexes +[use admin commands]: {{}}riak/kv/2.1.4/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.1.4/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.1.4/developing/usage/secondary-indexes [riak ts enterprise]: http://basho.com/products/riak-ts/ -[cluster ops mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.1.4/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.1.4/using/reference/jmx -[snmp]: /riak/kv/2.1.4/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.1.4/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.1.4/using/reference/jmx +[snmp]: {{}}riak/kv/2.1.4/using/reference/snmp {{% note title="Note on upgrading Riak TS from older versions" %}} [contact]: http://basho.com/contact/ -[use admin riak control]: /riak/kv/2.1.4/using/admin/riak-control +[use admin riak control]: {{}}riak/kv/2.1.4/using/admin/riak-control Upgrading Riak TS is only supported for Riak TS 1.3.1 to 1.4.0. For assistance upgrading from earlier versions to 1.4.0 [contact Client Services][contact]. diff --git a/content/riak/ts/1.4.0/using.md b/content/riak/ts/1.4.0/using.md index 7a4d769546..1693f272e8 100644 --- a/content/riak/ts/1.4.0/using.md +++ b/content/riak/ts/1.4.0/using.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using" --- @@ -20,7 +19,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using" [aggregate]: querying/select/aggregate-functions/ [arithmetic]: querying/select/arithmetic-operations/ [configuring]: configuring/ -[download]: /riak/ts/1.4.0/downloads/ +[download]: {{}}riak/ts/1.4.0/downloads/ [installing]: ../setup/installing/ [mdc]: mdc/ [planning]: planning/ diff --git a/content/riak/ts/1.4.0/using/configuring.md b/content/riak/ts/1.4.0/using/configuring.md index f58c7966cd..8cec59357b 100644 --- a/content/riak/ts/1.4.0/using/configuring.md +++ b/content/riak/ts/1.4.0/using/configuring.md @@ -16,7 +16,6 @@ version_history: - ["<=1.4.0", "using/configuring"] aliases: - /riakts/1.4.0/using/configuring/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/configuring" --- @@ -72,7 +71,7 @@ riak_kv.query.timeseries.timeout = 10000 ### Maximum quanta {{% note %}} -Before you change this setting, we recommend you take a moment to determine whether requantizing your data would be a better option. If your data is not optimally quantized, upping the maximum quanta setting may make your queries less efficient. You can read more about best practices for quantizing your data [here](/riak/ts/1.4.0/learn-about/bestpractices/#quantum). +Before you change this setting, we recommend you take a moment to determine whether requantizing your data would be a better option. If your data is not optimally quantized, upping the maximum quanta setting may make your queries less efficient. You can read more about best practices for quantizing your data [here]({{}}riak/ts/1.4.0/learn-about/bestpractices/#quantum). {{% /note %}} diff --git a/content/riak/ts/1.4.0/using/core-fundamentals.md b/content/riak/ts/1.4.0/using/core-fundamentals.md index c5dd52e86e..7dec2c1a00 100644 --- a/content/riak/ts/1.4.0/using/core-fundamentals.md +++ b/content/riak/ts/1.4.0/using/core-fundamentals.md @@ -12,51 +12,50 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/core-fundamentals/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/core-fundamentals" --- -[Riak KV]: /riak/kv/2.1.4/ +[Riak KV]: {{}}riak/kv/2.1.4/ Riak TS shares the same core codebase as [Riak KV], which allows you to operate a TS cluster much the same as you would operate a KV cluster. Below are some links to help you get started. ### Configuration -Basic Configuration will help you set up your Riak core configuration. +Basic Configuration will help you set up your Riak core configuration. -Managing Configuration will show you how to retrieve your configuration, check your settings, and debug your configuration. +Managing Configuration will show you how to retrieve your configuration, check your settings, and debug your configuration. -Configuration Reference provides you with everything you need to know about configuring Riak core. +Configuration Reference provides you with everything you need to know about configuring Riak core. -Load Balancing will walk you through configuring a load balancer with your Riak cluster. +Load Balancing will walk you through configuring a load balancer with your Riak cluster. ### Cluster Operations -Running a Cluster gives you a basic walkthrough of how to run a Riak cluster. +Running a Cluster gives you a basic walkthrough of how to run a Riak cluster. -Cluster Administration provides a series of links to information on various ways to administer your cluster. +Cluster Administration provides a series of links to information on various ways to administer your cluster. -Adding & Removing Nodes walks you through the process of adding or removing nodes in your cluster. +Adding & Removing Nodes walks you through the process of adding or removing nodes in your cluster. -Changing Cluster Information will show you how to change various parts of your cluster. +Changing Cluster Information will show you how to change various parts of your cluster. -Replace a Node is a step-by-step guide for how to replace a node in your cluster. +Replace a Node is a step-by-step guide for how to replace a node in your cluster. -Inspect a Node shows you the steps and tools for inspecting nodes in your cluster. +Inspect a Node shows you the steps and tools for inspecting nodes in your cluster. -Logging will provide you the steps for enabling and disabling debug logging. +Logging will provide you the steps for enabling and disabling debug logging. -Backing Up is a how-to guide for backing up your data. +Backing Up is a how-to guide for backing up your data. -Handoff will tell you everything you need to know to enable and disable handoff. +Handoff will tell you everything you need to know to enable and disable handoff. ### Repair, Tuning, and Reference -Repair & Recovery will cover all of the important topics of what can go wrong and what you can do to fix it. +Repair & Recovery will cover all of the important topics of what can go wrong and what you can do to fix it. -Performance will give you all the information you need to tune your cluster configurations to optimize performance. +Performance will give you all the information you need to tune your cluster configurations to optimize performance. -Reference will provide you with explanations of various core functions, such as logging, handoff, and monitoring. +Reference will provide you with explanations of various core functions, such as logging, handoff, and monitoring. diff --git a/content/riak/ts/1.4.0/using/creating-activating.md b/content/riak/ts/1.4.0/using/creating-activating.md index 337c1495a1..f9676ffd25 100644 --- a/content/riak/ts/1.4.0/using/creating-activating.md +++ b/content/riak/ts/1.4.0/using/creating-activating.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/creating-activating/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/creating-activating" --- diff --git a/content/riak/ts/1.4.0/using/global-object-expiration.md b/content/riak/ts/1.4.0/using/global-object-expiration.md index 1fdb080423..2591a362dc 100644 --- a/content/riak/ts/1.4.0/using/global-object-expiration.md +++ b/content/riak/ts/1.4.0/using/global-object-expiration.md @@ -17,7 +17,6 @@ version_history: - ["<=1.4.0", "using/global-object-expiration"] aliases: - /riakts/1.4.0/using/global-object-expiration/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/ts/1.4.0/using/mdc.md b/content/riak/ts/1.4.0/using/mdc.md index e8e366c993..a1f42f95e4 100644 --- a/content/riak/ts/1.4.0/using/mdc.md +++ b/content/riak/ts/1.4.0/using/mdc.md @@ -17,15 +17,14 @@ version_history: - ["<=1.4.0", "using/mdc"] aliases: - /riakts/1.4.0/using/mdc -canonical_link: "https://docs.basho.com/riak/ts/latest/using/mdc" --- -[activating]: /riak/ts/1.4.0/using/creating-activating -[cluster ops v3 mdc]: /riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter +[activating]: {{}}riak/ts/1.4.0/using/creating-activating +[cluster ops v3 mdc]: {{}}riak/kv/2.1.4/using/cluster-operations/v3-multi-datacenter [ee]: http://basho.com/contact/ [Enterprise]: http://basho.com/products/riak-ts/ -[install]: /riak/ts/1.4.0/setup/installing +[install]: {{}}riak/ts/1.4.0/setup/installing Multi-Datacenter (MDC) replication makes it possible to replicate your time series data between Riak clusters. This document will walk through how to configure MDC to work with Riak TS. diff --git a/content/riak/ts/1.4.0/using/planning.md b/content/riak/ts/1.4.0/using/planning.md index 56fdf8a779..daf83dc117 100644 --- a/content/riak/ts/1.4.0/using/planning.md +++ b/content/riak/ts/1.4.0/using/planning.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/planning/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/planning" --- diff --git a/content/riak/ts/1.4.0/using/querying.md b/content/riak/ts/1.4.0/using/querying.md index 61dc10d312..5b84b88191 100644 --- a/content/riak/ts/1.4.0/using/querying.md +++ b/content/riak/ts/1.4.0/using/querying.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/querying/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying" --- [activating]: ../creating-activating/ diff --git a/content/riak/ts/1.4.0/using/querying/describe.md b/content/riak/ts/1.4.0/using/querying/describe.md index a07f2ba79e..402bed0a23 100644 --- a/content/riak/ts/1.4.0/using/querying/describe.md +++ b/content/riak/ts/1.4.0/using/querying/describe.md @@ -12,10 +12,9 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/querying/describe -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/describe" --- -[riak shell]: /riak/ts/1.4.0/using/riakshell +[riak shell]: {{}}riak/ts/1.4.0/using/riakshell You can use the DESCRIBE statement to obtain the definition of your Riak TS table. This document will show you how to execute `DESCRIBE` in TS. diff --git a/content/riak/ts/1.4.0/using/querying/explain.md b/content/riak/ts/1.4.0/using/querying/explain.md index 6637900d4a..c12a17f571 100644 --- a/content/riak/ts/1.4.0/using/querying/explain.md +++ b/content/riak/ts/1.4.0/using/querying/explain.md @@ -13,13 +13,12 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/querying/explain -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/explain" --- -[creating-activating]: /riak/ts/1.4.0/using/creating-activating -[develop]: /riak/ts/1.4.0/developing -[planning]: /riak/ts/1.4.0/using/planning -[riak shell]: /riak/ts/1.4.0/using/riakshell +[creating-activating]: {{}}riak/ts/1.4.0/using/creating-activating +[develop]: {{}}riak/ts/1.4.0/developing +[planning]: {{}}riak/ts/1.4.0/using/planning +[riak shell]: {{}}riak/ts/1.4.0/using/riakshell You can use the EXPLAIN statement to better understand how a query you would like to run will be executed. This document will show you how to use `EXPLAIN` in Riak TS. diff --git a/content/riak/ts/1.4.0/using/querying/guidelines.md b/content/riak/ts/1.4.0/using/querying/guidelines.md index 0d6c8cc60b..a7fc65d4a3 100644 --- a/content/riak/ts/1.4.0/using/querying/guidelines.md +++ b/content/riak/ts/1.4.0/using/querying/guidelines.md @@ -13,7 +13,6 @@ toc: true aliases: - /riakts/1.4.0/using/querying/basic-querying - /riakts/1.4.0/using/querying/guidelines -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/guidelines" --- [table arch]: ../../../learn-about/tablearchitecture/#data-modeling @@ -21,7 +20,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/guidelines [writing]: ../../writingdata/ [planning]: ../../planning#column-definitions [iso8601]: ../../../timerepresentations/ -[SELECT]: /riak/ts/1.4.0/using/querying/SELECT#iso_8601 +[SELECT]: {{}}riak/ts/1.4.0/using/querying/SELECT#iso_8601 Riak TS supports several kinds of queries of your TS data. To create the most successful queries possible, there are some guidelines and limitations you should know. @@ -59,7 +58,7 @@ Any quantized field in your partition key must be included in the query as eithe * Invalid: `time > 1449864277000 or time < 1449864290000` {{% note title="A Note About `SELECT`" %}} -It is possible to use ISO 8601-compliant date/time strings rather than integer timestamps in SELECT statements. Please see [SELECT](/riak/ts/1.4.0/using/querying/select/#iso-8601) for an example or [Time Representations](/riak/ts/1.4.0/using/timerepresentations/) for more information. +It is possible to use ISO 8601-compliant date/time strings rather than integer timestamps in SELECT statements. Please see [SELECT]({{}}riak/ts/1.4.0/using/querying/select/#iso-8601) for an example or [Time Representations]({{}}riak/ts/1.4.0/using/timerepresentations/) for more information. {{% /note %}} diff --git a/content/riak/ts/1.4.0/using/querying/select.md b/content/riak/ts/1.4.0/using/querying/select.md index 929abcdf90..b6a79f25dd 100644 --- a/content/riak/ts/1.4.0/using/querying/select.md +++ b/content/riak/ts/1.4.0/using/querying/select.md @@ -12,18 +12,17 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/querying/select -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select" --- [aggregate functions]: aggregate-functions/ [arithmetic operations]: arithmetic-operations/ [GROUP BY]: group-by/ -[guidelines]: /riak/ts/1.4.0/using/querying/guidelines +[guidelines]: {{}}riak/ts/1.4.0/using/querying/guidelines [iso8601]: ../../timerepresentations/ -[iso8601 accuracy]: /riak/ts/1.4.0/using/timerepresentations/#reduced-accuracy +[iso8601 accuracy]: {{}}riak/ts/1.4.0/using/timerepresentations/#reduced-accuracy [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 -[learn timestamps accuracy]: /riak/ts/1.4.0/learn-about/timestamps/#reduced-accuracy +[learn timestamps accuracy]: {{}}riak/ts/1.4.0/learn-about/timestamps/#reduced-accuracy You can use the SELECT statement in Riak TS to query your TS dataset. This document will show you how to run various queries using `SELECT`. diff --git a/content/riak/ts/1.4.0/using/querying/select/aggregate-functions.md b/content/riak/ts/1.4.0/using/querying/select/aggregate-functions.md index 5d0ceb6560..ec9c6cc799 100644 --- a/content/riak/ts/1.4.0/using/querying/select/aggregate-functions.md +++ b/content/riak/ts/1.4.0/using/querying/select/aggregate-functions.md @@ -10,7 +10,6 @@ menu: project: "riak_ts" project_version: "1.4.0" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/aggregate-functions" version_history: present_from: "1.4.0+" moved: diff --git a/content/riak/ts/1.4.0/using/querying/select/arithmetic-operations.md b/content/riak/ts/1.4.0/using/querying/select/arithmetic-operations.md index b2e625fe6a..44bd2fa904 100644 --- a/content/riak/ts/1.4.0/using/querying/select/arithmetic-operations.md +++ b/content/riak/ts/1.4.0/using/querying/select/arithmetic-operations.md @@ -10,7 +10,6 @@ menu: project: "riak_ts" project_version: "1.4.0" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/arithmetic-operations" version_history: present_from: "1.4.0+" moved: @@ -22,7 +21,7 @@ aliases: --- -[querying select]: /riak/ts/1.4.0/using/querying/#select-query +[querying select]: {{}}riak/ts/1.4.0/using/querying/#select-query Riak TS supports arithmetic operations in the SELECT statement. diff --git a/content/riak/ts/1.4.0/using/querying/select/group-by.md b/content/riak/ts/1.4.0/using/querying/select/group-by.md index 61ce3a4738..4e77762a5c 100644 --- a/content/riak/ts/1.4.0/using/querying/select/group-by.md +++ b/content/riak/ts/1.4.0/using/querying/select/group-by.md @@ -12,11 +12,10 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/querying/select/group-by -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/group-by" --- [aggregate function]: ../aggregate-functions -[guidelines]: /riak/ts/1.4.0/using/querying/guidelines +[guidelines]: {{}}riak/ts/1.4.0/using/querying/guidelines The GROUP BY statement is used with `SELECT` to pick out and condense rows sharing the same value and return a single row. `GROUP BY` is useful for aggregating an attribute of a device over a time period; for instance, you could use it to pull average values for every 30 minute period over the last 24 hours. diff --git a/content/riak/ts/1.4.0/using/querying/show-tables.md b/content/riak/ts/1.4.0/using/querying/show-tables.md index 257a358790..980c8ffc59 100644 --- a/content/riak/ts/1.4.0/using/querying/show-tables.md +++ b/content/riak/ts/1.4.0/using/querying/show-tables.md @@ -12,10 +12,9 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/querying/show-tables -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/show-tables" --- -[riak shell]: /riak/ts/1.4.0/using/riakshell +[riak shell]: {{}}riak/ts/1.4.0/using/riakshell You can use the SHOW TABLES statement to enumerate the Riak TS tables you have set up. This document will show you how to execute `SHOW TABLES` in TS. diff --git a/content/riak/ts/1.4.0/using/querying/single-key-fetch.md b/content/riak/ts/1.4.0/using/querying/single-key-fetch.md index d24a517035..7643d53291 100644 --- a/content/riak/ts/1.4.0/using/querying/single-key-fetch.md +++ b/content/riak/ts/1.4.0/using/querying/single-key-fetch.md @@ -12,7 +12,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/querying/single-key-fetch -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/single-key-fetch" --- You may find the need to fetch a single key from Riak TS. The below examples show you how to perform a single key fetch in each of our official clients that support Riak TS. diff --git a/content/riak/ts/1.4.0/using/riakshell.md b/content/riak/ts/1.4.0/using/riakshell.md index 6632d77c54..c2a81d1b6a 100644 --- a/content/riak/ts/1.4.0/using/riakshell.md +++ b/content/riak/ts/1.4.0/using/riakshell.md @@ -12,12 +12,11 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/riakshell/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/riakshell" --- -[nodename]: /riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/ -[creating]: /riak/ts/1.4.0/using/creating-activating -[writing]: /riak/ts/1.4.0/using/writingdata +[nodename]: {{}}riak/kv/2.1.4/using/cluster-operations/changing-cluster-info/ +[creating]: {{}}riak/ts/1.4.0/using/creating-activating +[writing]: {{}}riak/ts/1.4.0/using/writingdata [riak shell README]: https://github.com/basho/riak_shell/blob/develop/README.md diff --git a/content/riak/ts/1.4.0/using/security.md b/content/riak/ts/1.4.0/using/security.md index 167b6878f2..76c749d856 100644 --- a/content/riak/ts/1.4.0/using/security.md +++ b/content/riak/ts/1.4.0/using/security.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/security/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/" --- [security checklist]: ./checklist @@ -90,7 +89,7 @@ cluster on the following TCP ports: Protocol | Port :--------|:---- -Protocol Buffers | TCP port 8087 +Protocol Buffers | TCP port 8087 ## Best Practices diff --git a/content/riak/ts/1.4.0/using/security/checklist.md b/content/riak/ts/1.4.0/using/security/checklist.md index e421ab9fa5..576922e244 100644 --- a/content/riak/ts/1.4.0/using/security/checklist.md +++ b/content/riak/ts/1.4.0/using/security/checklist.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/security/checklist -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/checklist/" --- [enable ssl]: ../enable-disable/#enabling-ssl @@ -21,7 +20,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/checklist/ [security users]: ../user-management [security sources]: ../sources-management [manage permissions]: ../user-management/#managing-permissions -[pbc]: /riak/kv/2.1.4/developing/api/protocol-buffers/ +[pbc]: {{}}riak/kv/2.1.4/developing/api/protocol-buffers/ [security enable disable]: ../enable-disable Before turning on Riak TS security there are key steps all applications need to take. Missing one of these steps may break your application, so make sure you have done each of the following BEFORE enabling security: diff --git a/content/riak/ts/1.4.0/using/security/enable-disable.md b/content/riak/ts/1.4.0/using/security/enable-disable.md index 759ecfa36a..ff17dadd2f 100644 --- a/content/riak/ts/1.4.0/using/security/enable-disable.md +++ b/content/riak/ts/1.4.0/using/security/enable-disable.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/security/enable-disable -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/enable-disable/" --- Riak TS security may be [checked](#checking-security-status), [enabled](#enabling-security), or [disabled](#disabling-security) through the command line, allowing an administrator to change security settings for the whole cluster without needing to go node-by-node. diff --git a/content/riak/ts/1.4.0/using/security/notify-basho.md b/content/riak/ts/1.4.0/using/security/notify-basho.md index 6cd8437cd6..bb91c6bca9 100644 --- a/content/riak/ts/1.4.0/using/security/notify-basho.md +++ b/content/riak/ts/1.4.0/using/security/notify-basho.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/security/notify-basho -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/notify-basho/" --- Data security is an important and sensitive issue. A real-world approach to security allows us to balance appropriate levels of security and related overhead while creating a fast, scalable, and operationally straightforward database. diff --git a/content/riak/ts/1.4.0/using/security/sources-management.md b/content/riak/ts/1.4.0/using/security/sources-management.md index bf14e8baa8..f753f9ee15 100644 --- a/content/riak/ts/1.4.0/using/security/sources-management.md +++ b/content/riak/ts/1.4.0/using/security/sources-management.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/security/sources-management -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/sources-management/" --- [cidr]: http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing @@ -22,8 +21,8 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/sources-ma [security enabling]: ../enable-disable/#enabling-security [security add user]: ../user-management/#add-user [root cert]: http://en.wikipedia.org/wiki/Root_certificate -[rolling restart]: /riak/kv/2.1.4/using/repair-recovery/rolling-restart/ -[config ref security]: /riak/kv/2.1.4/configuring/reference/#security +[rolling restart]: {{}}riak/kv/2.1.4/using/repair-recovery/rolling-restart/ +[config ref security]: {{}}riak/kv/2.1.4/configuring/reference/#security [xss]: http://en.wikipedia.org/wiki/Cross-site_scripting [request forgery]: http://en.wikipedia.org/wiki/Cross-site_request_forgery [http referer]: http://en.wikipedia.org/wiki/HTTP_referer diff --git a/content/riak/ts/1.4.0/using/security/user-management.md b/content/riak/ts/1.4.0/using/security/user-management.md index 013f194ea3..18f60901d6 100644 --- a/content/riak/ts/1.4.0/using/security/user-management.md +++ b/content/riak/ts/1.4.0/using/security/user-management.md @@ -13,7 +13,6 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/security/user-management -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/user-management/" --- Riak TS security lets you to control authorization by creating, modifying, and deleting user characteristics and granting users selective access to Riak TS functionality. Users can be assigned one or more of the following characteristics: diff --git a/content/riak/ts/1.4.0/using/timerepresentations.md b/content/riak/ts/1.4.0/using/timerepresentations.md index 165ef56aa9..5b7f98867f 100644 --- a/content/riak/ts/1.4.0/using/timerepresentations.md +++ b/content/riak/ts/1.4.0/using/timerepresentations.md @@ -12,15 +12,14 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/timerepresentations/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/timerepresentations" --- [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[config reference]: /riak/kv/2.1.4/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.4.0/using/mdc +[config reference]: {{}}riak/kv/2.1.4/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.4.0/using/mdc [riak shell]: ../riakshell [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 [learn timestamps]: ../../learn-about/timestamps diff --git a/content/riak/ts/1.4.0/using/writingdata.md b/content/riak/ts/1.4.0/using/writingdata.md index c4dc6d8157..65973928b8 100644 --- a/content/riak/ts/1.4.0/using/writingdata.md +++ b/content/riak/ts/1.4.0/using/writingdata.md @@ -12,15 +12,14 @@ project_version: "1.4.0" toc: true aliases: - /riakts/1.4.0/using/writingdata/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/writingdata" --- [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[config reference]: /riak/kv/2.1.4/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.4.0/using/mdc +[config reference]: {{}}riak/kv/2.1.4/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.4.0/using/mdc [riakshell]: ../riakshell [iso8601]: ../timerepresentations/ [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 diff --git a/content/riak/ts/1.5.0/add-ons.md b/content/riak/ts/1.5.0/add-ons.md index 2883fc8b8a..057d0370f8 100644 --- a/content/riak/ts/1.5.0/add-ons.md +++ b/content/riak/ts/1.5.0/add-ons.md @@ -10,7 +10,6 @@ menu: weight: 450 pre: tools toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons" --- Here at Basho, we've developed integrations between Riak TS and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. diff --git a/content/riak/ts/1.5.0/add-ons/redis/developing-rra.md b/content/riak/ts/1.5.0/add-ons/redis/developing-rra.md index 61822a6776..a45f11131b 100644 --- a/content/riak/ts/1.5.0/add-ons/redis/developing-rra.md +++ b/content/riak/ts/1.5.0/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.2.0/developing/api/http +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/ts/1.5.0/add-ons/redis/redis-add-on-features.md b/content/riak/ts/1.5.0/add-ons/redis/redis-add-on-features.md index 85ac815536..ac18081b4a 100644 --- a/content/riak/ts/1.5.0/add-ons/redis/redis-add-on-features.md +++ b/content/riak/ts/1.5.0/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/ts/1.5.0/add-ons/redis/set-up-rra.md b/content/riak/ts/1.5.0/add-ons/redis/set-up-rra.md index 5563597181..cf5e83e760 100644 --- a/content/riak/ts/1.5.0/add-ons/redis/set-up-rra.md +++ b/content/riak/ts/1.5.0/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/ts/1.5.0/setup/installing -[perf open files]: /riak/ts/1.5.0/setup/installing/rhel-centos/#ulimit +[install index]: {{}}riak/ts/1.5.0/setup/installing +[perf open files]: {{}}riak/ts/1.5.0/setup/installing/rhel-centos/#ulimit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/ts/1.5.0/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/ts/1.5.0/add-ons/redis/set-up-rra/deployment-models.md index feacc5b094..17860603ed 100644 --- a/content/riak/ts/1.5.0/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/ts/1.5.0/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/ts/1.5.0/add-ons/redis/using-rra.md b/content/riak/ts/1.5.0/add-ons/redis/using-rra.md index 69adeec2f9..0330f3a210 100644 --- a/content/riak/ts/1.5.0/add-ons/redis/using-rra.md +++ b/content/riak/ts/1.5.0/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.2.0/developing/api/http/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector.md index 7342d17ae2..3d6187f504 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector.md @@ -10,7 +10,6 @@ menu: weight: 101 parent: "addons" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector" --- The Spark-Riak connector enables you to connect Spark applications to Riak TS with the Spark RDD and Spark DataFrames APIs. You can write your app in Scala, Python, and Java. The connector makes it easy to partition the data you get from Riak so multiple Spark workers can process the data in parallel, and it has support for failover if a Riak node goes down while your Spark job is running. diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/building-testing.md index 2109919c1a..e07352d13a 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/building-testing.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/building-testing.md @@ -11,7 +11,6 @@ menu: weight: 103 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/building-testing" --- If you want to download the source code of the Spark-Riak connector, build it, and install the results in your local repo, this is the document for you! Keep reading for instructions on downloading, building, and installing the connector. @@ -26,7 +25,7 @@ In order to build the Spark-Riak connector, you'll need to have the following in * [Java OpenJDK 8](http://openjdk.java.net/install/) * [Maven 3](https://maven.apache.org/download.cgi) * [Spark 1.6](http://spark.apache.org/docs/latest/#downloading) -* [Riak TS](http://docs.basho.com/riak/ts/latest/installing/) +* [Riak TS]({{< baseurl >}}riak/ts/latest/installing/) ## Download diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/getting.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/getting.md index 228f04da60..4d6a4f2a93 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/getting.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/getting.md @@ -10,7 +10,6 @@ menu: weight: 102 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/getting" --- > **Note:** diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/quick-start.md index bb8fdd42f9..a0f5def06a 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/quick-start.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/quick-start.md @@ -11,7 +11,6 @@ menu: weight: 101 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/quick-start" --- This guide will run you through a quick example that uses the Spark-Riak connector to read and write data using Java, Scala, and Python. We will assume you are running this guide on Mac OSX. @@ -19,8 +18,8 @@ This guide will run you through a quick example that uses the Spark-Riak connect ## Prerequisites - Update Homebrew with `brew update`. -- Install Riak TS OSX build. Instruction can be found [here](http://docs.basho.com/riak/ts/1.2.0/installing/mac-osx/) -- Set open file limits for Riak by following the guide [here](http://docs.basho.com/riak/latest/ops/tuning/open-files-limit/#Mac-OS-X). +- Install Riak TS OSX build. Instruction can be found [here]({{< baseurl >}}riak/ts/1.2.0/installing/mac-osx/) +- Set open file limits for Riak by following the guide [here]({{< baseurl >}}riak/kv/latest/ops/tuning/open-files-limit/#Mac-OS-X). - Install Spark with `brew install apache-spark`. - Download the Spark-Riak connector uber jar (containing all dependencies) from here: https://github.com/basho/spark-riak-connector/releases/latest. diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage.md index cd31f5d67a..86df799b4f 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage.md @@ -11,7 +11,6 @@ menu: weight: 104 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage" --- This section will walk you through setting up your application for development with the Spark-Riak connector. diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/bulk-write.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/bulk-write.md index 3c411ab8bb..588b4dd7c1 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/bulk-write.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/bulk-write.md @@ -11,7 +11,6 @@ menu: weight: 107 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/bulk-write" --- To write into a Riak TS table, the Spark-Riak Connector splits the initial set of rows into smaller bulks and processes them in parallel. Bulk size can be configured using `spark.riakts.write.bulk-size` property. The default number is `100`. diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/config-spark-context.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/config-spark-context.md index 034a119ff8..be1075d468 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/config-spark-context.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/config-spark-context.md @@ -11,7 +11,6 @@ menu: weight: 101 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/config-spark-context" --- The following `import` statements should be included at the top of your Spark application to enable the connector: diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/dataframes.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/dataframes.md index 1839153809..1320056814 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/dataframes.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/dataframes.md @@ -11,7 +11,6 @@ menu: weight: 104 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/dataframes" --- ## Spark Dataframes With TS Table diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/dates.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/dates.md index fe5a4d6c68..72528a627b 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/dates.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/dates.md @@ -11,7 +11,6 @@ menu: weight: 105 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/dates" --- Riak TS automatically stores all datetimes as a Long integer that represents milliseconds from the [beginning of the epoc](https://en.wikipedia.org/wiki/Unix_time). This is not very human friendly so we have provided a Spark configuration option called `spark.riakts.bindings.timestamp`. This option is for use with Automatic Schema Discovery and allows for conversion from Riak TS datetimes, which are stored as Longs, to Timestamps. The default value of this option is `useTimestamp` which converts Longs to Timestamps. If you would like to use the original Long value, you can use the option value of `useLong`. All conversion takes place during Automatic Schema Discovery when reading from Riak TS tables. diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/range-query-partition.md index 702faea058..8bd8c35876 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/range-query-partition.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -11,10 +11,9 @@ menu: weight: 106 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/range-query-partition" --- -Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS](http://docs.basho.com/riakts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. +Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS]({{< baseurl >}}riak/ts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. To use this functionality, you must provide the following options: diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/reading-data.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/reading-data.md index 6a7bcf3544..0740e1d240 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/reading-data.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/reading-data.md @@ -11,7 +11,6 @@ menu: weight: 102 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/reading-data" --- ## Reading Data From TS Table diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/streaming-example.md index 4845134ebf..e2dbb75b31 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/streaming-example.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/streaming-example.md @@ -11,7 +11,6 @@ menu: weight: 108 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/streaming-example" --- > **Note:** @@ -28,7 +27,7 @@ path/to/kafka/bin/kafka-server-start.sh config/server.properties path/to/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic streaming ``` -We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here](/riak/ts/1.2.0/installing/mac-osx/). +We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here]({{}}riak/ts/1.2.0/installing/mac-osx/). You will need to build the TS example as well. Please follow the instructions on [building the examples](https://github.com/basho/spark-riak-connector/tree/master/examples#building-and-running-examplesdemos). diff --git a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/writing-data.md b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/writing-data.md index b09c4e7b01..41d513c3eb 100644 --- a/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/writing-data.md +++ b/content/riak/ts/1.5.0/add-ons/spark-riak-connector/usage/writing-data.md @@ -11,7 +11,6 @@ menu: weight: 103 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/writing-data" --- ## Writing Data To TS Table diff --git a/content/riak/ts/1.5.0/configuring.md b/content/riak/ts/1.5.0/configuring.md index aa6166f163..3b9eb2115b 100644 --- a/content/riak/ts/1.5.0/configuring.md +++ b/content/riak/ts/1.5.0/configuring.md @@ -16,15 +16,14 @@ version_history: - ["<=1.4.0", "using/configuring"] aliases: - /riakts/1.5.0/configuring/ -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring" --- -[riakconf]: /riak/ts/1.5.0/configuring/riakconf/ -[mdc]: /riak/ts/1.5.0/configuring/mdc/ -[global expiry]: /riak/ts/1.5.0/configuring/global-object-expiration/ -[kv config]: /riak/kv/2.2.0/configuring/reference -[WITH]: /riak/ts/1.5.0/using/creating-activating/#using-the-with-clause +[riakconf]: {{}}riak/ts/1.5.0/configuring/riakconf/ +[mdc]: {{}}riak/ts/1.5.0/configuring/mdc/ +[global expiry]: {{}}riak/ts/1.5.0/configuring/global-object-expiration/ +[kv config]: {{}}riak/kv/2.2.0/configuring/reference +[WITH]: {{}}riak/ts/1.5.0/using/creating-activating/#using-the-with-clause Riak TS mostly relies on Riak KV's [default configuration settings][kv config]. However, there are a few TS-specific configurations you should know about: diff --git a/content/riak/ts/1.5.0/configuring/global-object-expiration.md b/content/riak/ts/1.5.0/configuring/global-object-expiration.md index 4ce271523e..3323082960 100644 --- a/content/riak/ts/1.5.0/configuring/global-object-expiration.md +++ b/content/riak/ts/1.5.0/configuring/global-object-expiration.md @@ -17,7 +17,6 @@ version_history: - ["<=1.4.0", "using/global-object-expiration"] aliases: - /riakts/1.5.0/configuring/global-object-expiration/ -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/ts/1.5.0/configuring/mdc.md b/content/riak/ts/1.5.0/configuring/mdc.md index ef4ed240c1..32b7b9eacf 100644 --- a/content/riak/ts/1.5.0/configuring/mdc.md +++ b/content/riak/ts/1.5.0/configuring/mdc.md @@ -18,15 +18,14 @@ version_history: - ["<=1.4.0", "using/mdc"] aliases: - /riakts/1.5.0/configuring/mdc -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/mdc" --- -[activating]: /riak/ts/1.5.0/using/creating-activating -[cluster ops v3 mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[activating]: {{}}riak/ts/1.5.0/using/creating-activating +[cluster ops v3 mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter [ee]: http://basho.com/contact/ [Enterprise]: http://basho.com/products/riak-ts/ -[install]: /riak/ts/1.5.0/setup/installing +[install]: {{}}riak/ts/1.5.0/setup/installing Multi-Datacenter (MDC) replication makes it possible to replicate your time series data between Riak clusters. This document will walk through how to configure MDC to work with Riak TS. @@ -72,7 +71,7 @@ data definition language (DDL) on each cluster to make certain they are equivalent before synchronization occurs. {{% note title="Tip" %}} -[`SHOW CREATE TABLE`](/riak/ts/1.5.0/using/querying/show-create-table/) makes this much easier. +[`SHOW CREATE TABLE`]({{}}riak/ts/1.5.0/using/querying/show-create-table/) makes this much easier. {{% /note %}} diff --git a/content/riak/ts/1.5.0/configuring/riakconf.md b/content/riak/ts/1.5.0/configuring/riakconf.md index 5d4736ce71..75b3c05b95 100644 --- a/content/riak/ts/1.5.0/configuring/riakconf.md +++ b/content/riak/ts/1.5.0/configuring/riakconf.md @@ -16,12 +16,11 @@ version_history: - [">=1.5.0", "using/configuring"] aliases: - /riakts/1.5.0/configuring/riakconf -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/riakconf" --- [glossary quanta]: ../../learn/glossary/quanta -[Riak object settings]: /riak/kv/2.2.0/configuring/reference/#object-settings +[Riak object settings]: {{}}riak/kv/2.2.0/configuring/reference/#object-settings Riak TS exposes a few configuration settings in riak.conf. This document will walk you through the TS configurations. diff --git a/content/riak/ts/1.5.0/developing.md b/content/riak/ts/1.5.0/developing.md index 7d146309ce..91b9386280 100644 --- a/content/riak/ts/1.5.0/developing.md +++ b/content/riak/ts/1.5.0/developing.md @@ -12,20 +12,19 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/developing/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing" --- -[erlang]: /riak/ts/1.5.0/developing/erlang -[go]: /riak/ts/1.5.0/developing/golang -[http]: /riak/ts/1.5.0/developing/http -[java]: /riak/ts/1.5.0/developing/java -[ruby]: /riak/ts/1.5.0/developing/ruby -[python]: /riak/ts/1.5.0/developing/python -[csharp]: /riak/ts/1.5.0/developing/csharp -[nodejs]: /riak/ts/1.5.0/developing/nodejs -[erlang]: /riak/ts/1.5.0/developing/erlang -[php]: /riak/ts/1.5.0/developing/php +[erlang]: {{}}riak/ts/1.5.0/developing/erlang +[go]: {{}}riak/ts/1.5.0/developing/golang +[http]: {{}}riak/ts/1.5.0/developing/http +[java]: {{}}riak/ts/1.5.0/developing/java +[ruby]: {{}}riak/ts/1.5.0/developing/ruby +[python]: {{}}riak/ts/1.5.0/developing/python +[csharp]: {{}}riak/ts/1.5.0/developing/csharp +[nodejs]: {{}}riak/ts/1.5.0/developing/nodejs +[erlang]: {{}}riak/ts/1.5.0/developing/erlang +[php]: {{}}riak/ts/1.5.0/developing/php You can access Riak TS data over HTTP through the [API][http]. diff --git a/content/riak/ts/1.5.0/developing/csharp.md b/content/riak/ts/1.5.0/developing/csharp.md index 1dc4d5e0b6..5d071d4191 100644 --- a/content/riak/ts/1.5.0/developing/csharp.md +++ b/content/riak/ts/1.5.0/developing/csharp.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/csharp/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/csharp" --- diff --git a/content/riak/ts/1.5.0/developing/erlang.md b/content/riak/ts/1.5.0/developing/erlang.md index 12401714cf..bb89d542a7 100644 --- a/content/riak/ts/1.5.0/developing/erlang.md +++ b/content/riak/ts/1.5.0/developing/erlang.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/erlang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/erlang" --- diff --git a/content/riak/ts/1.5.0/developing/golang.md b/content/riak/ts/1.5.0/developing/golang.md index 62a9924461..7429a982a7 100644 --- a/content/riak/ts/1.5.0/developing/golang.md +++ b/content/riak/ts/1.5.0/developing/golang.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/golang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/golang" --- diff --git a/content/riak/ts/1.5.0/developing/http.md b/content/riak/ts/1.5.0/developing/http.md index c906b2696f..4f95210d91 100644 --- a/content/riak/ts/1.5.0/developing/http.md +++ b/content/riak/ts/1.5.0/developing/http.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/http/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/http" --- diff --git a/content/riak/ts/1.5.0/developing/java.md b/content/riak/ts/1.5.0/developing/java.md index 09282f76ef..8770f80422 100644 --- a/content/riak/ts/1.5.0/developing/java.md +++ b/content/riak/ts/1.5.0/developing/java.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/java/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/java" --- diff --git a/content/riak/ts/1.5.0/developing/nodejs.md b/content/riak/ts/1.5.0/developing/nodejs.md index d2a4625cba..7c22e7ef2f 100644 --- a/content/riak/ts/1.5.0/developing/nodejs.md +++ b/content/riak/ts/1.5.0/developing/nodejs.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/nodejs/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/nodejs" --- diff --git a/content/riak/ts/1.5.0/developing/php.md b/content/riak/ts/1.5.0/developing/php.md index c03f03cda8..054ee671de 100644 --- a/content/riak/ts/1.5.0/developing/php.md +++ b/content/riak/ts/1.5.0/developing/php.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/php/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/php" --- diff --git a/content/riak/ts/1.5.0/developing/python.md b/content/riak/ts/1.5.0/developing/python.md index 8fbf914ace..a92f5df72f 100644 --- a/content/riak/ts/1.5.0/developing/python.md +++ b/content/riak/ts/1.5.0/developing/python.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/python/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/python" --- diff --git a/content/riak/ts/1.5.0/developing/ruby.md b/content/riak/ts/1.5.0/developing/ruby.md index cb710e19c0..504e9113e5 100644 --- a/content/riak/ts/1.5.0/developing/ruby.md +++ b/content/riak/ts/1.5.0/developing/ruby.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/developing/ruby/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/ruby" --- You can develop applications and tools using Riak TS with the Riak Ruby client. diff --git a/content/riak/ts/1.5.0/downloads.md b/content/riak/ts/1.5.0/downloads.md index 4ffd8ba192..37ff60b48c 100644 --- a/content/riak/ts/1.5.0/downloads.md +++ b/content/riak/ts/1.5.0/downloads.md @@ -18,7 +18,6 @@ listed_projects: install_instructions_set: "installing" aliases: - /riakts/1.5.0/downloads/ -canonical_link: "https://docs.basho.com/riak/ts/latest/downloads" --- diff --git a/content/riak/ts/1.5.0/index.md b/content/riak/ts/1.5.0/index.md index 81377b9c2c..ace2ff2a74 100644 --- a/content/riak/ts/1.5.0/index.md +++ b/content/riak/ts/1.5.0/index.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/ -canonical_link: "https://docs.basho.com/riak/ts/latest" --- diff --git a/content/riak/ts/1.5.0/learn-about.md b/content/riak/ts/1.5.0/learn-about.md index 31ef4647aa..5ea0837092 100644 --- a/content/riak/ts/1.5.0/learn-about.md +++ b/content/riak/ts/1.5.0/learn-about.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/learn-about/learn-about/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about" --- [table arch]: tablearchitecture/ diff --git a/content/riak/ts/1.5.0/learn-about/bestpractices.md b/content/riak/ts/1.5.0/learn-about/bestpractices.md index 6b5a0b2b12..651b7f75bf 100644 --- a/content/riak/ts/1.5.0/learn-about/bestpractices.md +++ b/content/riak/ts/1.5.0/learn-about/bestpractices.md @@ -13,11 +13,10 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/learn-about/bestpractices/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/bestpractices" --- -[glossary bucket]: http://docs.basho.com/riak/kv/2.2.0/learn/glossary/#bucket +[glossary bucket]: {{< baseurl >}}riak/kv/2.2.0/learn/glossary/#bucket [planning column def]: ../../using/planning/#column-definitions [planning partition]: ../../using/planning/#partition-key [planning primary]: ../../using/planning/#primary-key diff --git a/content/riak/ts/1.5.0/learn-about/sqlriakts.md b/content/riak/ts/1.5.0/learn-about/sqlriakts.md index 9c6c15e27a..c3fa71132c 100644 --- a/content/riak/ts/1.5.0/learn-about/sqlriakts.md +++ b/content/riak/ts/1.5.0/learn-about/sqlriakts.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/learn-about/sqlriakts -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/sqlriakts" --- diff --git a/content/riak/ts/1.5.0/learn-about/tablearchitecture.md b/content/riak/ts/1.5.0/learn-about/tablearchitecture.md index 7d3aa48413..b29e941c91 100644 --- a/content/riak/ts/1.5.0/learn-about/tablearchitecture.md +++ b/content/riak/ts/1.5.0/learn-about/tablearchitecture.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/learn-about/advancedplanning/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/tablearchitecture" --- diff --git a/content/riak/ts/1.5.0/learn-about/timestamps.md b/content/riak/ts/1.5.0/learn-about/timestamps.md index b6b75c5474..8a98aca363 100644 --- a/content/riak/ts/1.5.0/learn-about/timestamps.md +++ b/content/riak/ts/1.5.0/learn-about/timestamps.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/learn-about/timestamps/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/timestamps" --- [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 diff --git a/content/riak/ts/1.5.0/releasenotes.md b/content/riak/ts/1.5.0/releasenotes.md index 94934060b3..0c4e8e4ea3 100644 --- a/content/riak/ts/1.5.0/releasenotes.md +++ b/content/riak/ts/1.5.0/releasenotes.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/releasenotes -canonical_link: "https://docs.basho.com/riak/ts/latest/releasenotes" --- Released December 20, 2016. @@ -30,7 +29,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on ## New Features -* `ASC` and `DESC` have been added to the CREATE TABLE statement. Adding the ASC/DESC keywords to your local key during `CREATE TABLE` means you can have your data pre-sorted in ascending or descending order as it's input into your TS table. You can read more about `ASC`/`DESC` in the local key [here](/riak/ts/1.5.0/using/planning). +* `ASC` and `DESC` have been added to the CREATE TABLE statement. Adding the ASC/DESC keywords to your local key during `CREATE TABLE` means you can have your data pre-sorted in ascending or descending order as it's input into your TS table. You can read more about `ASC`/`DESC` in the local key [here]({{}}riak/ts/1.5.0/using/planning). * [[PR 1427](https://github.com/basho/riak_kv/pull/1427)] * [[PR 1500](https://github.com/basho/riak_kv/pull/1500)] * [[PR 1558](https://github.com/basho/riak_kv/pull/1558 )] @@ -40,17 +39,17 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * [[riak_test PR 1200](https://github.com/basho/riak_test/pull/1200)] * [[riak_test PR 1081](https://github.com/basho/riak_test/pull/1081)] * [[riak_test PR 1201](https://github.com/basho/riak_test/pull/1201 )] -* The ORDER BY statement has been added to `SELECT`, allowing you to sort the results of your query in various ways, including: ascending or descending order, or nulls first or last. You can learn about `ORDER BY` [here](/riak/ts/1.5.0/using/querying/select/order-by). +* The ORDER BY statement has been added to `SELECT`, allowing you to sort the results of your query in various ways, including: ascending or descending order, or nulls first or last. You can learn about `ORDER BY` [here]({{}}riak/ts/1.5.0/using/querying/select/order-by). * [[PR 1479](https://github.com/basho/riak_kv/pull/1479)] * [[riak erlang client PR 321](https://github.com/basho/riak-erlang-client/pull/321)] * [[riak_pb PR 208](https://github.com/basho/riak_pb/pull/208)] * [[riak_test PR 1152](https://github.com/basho/riak_test/pull/1152)] -* `LIMIT` allows you to specify that you only want a specific number of records from your query, and it can be expanded by `OFFSET`. You can read about how to use the LIMIT statement [here](/riak/ts/1.5.0/using/querying/select/limit). +* `LIMIT` allows you to specify that you only want a specific number of records from your query, and it can be expanded by `OFFSET`. You can read about how to use the LIMIT statement [here]({{}}riak/ts/1.5.0/using/querying/select/limit). * [[PR 1479](https://github.com/basho/riak_kv/pull/1479)] * [[riak erlang client PR 321](https://github.com/basho/riak-erlang-client/pull/321)] * [[riak_pb PR 208](https://github.com/basho/riak_pb/pull/208)] * [[riak_test PR 1152](https://github.com/basho/riak_test/pull/1152)] -* You can now use `DELETE` from riak shell to remove a record from your TS table. Learn all about `DELETE` [here](/riak/ts/1.5.0/using/querying/delete). +* You can now use `DELETE` from riak shell to remove a record from your TS table. Learn all about `DELETE` [here]({{}}riak/ts/1.5.0/using/querying/delete). * [[PR 1552](https://github.com/basho/riak_kv/pull/1552)] * [[riak_ql PR 145](https://github.com/basho/riak_ql/pull/145)] * [[riak_shell PR 23](https://github.com/basho/riak_shell/pull/23)] @@ -62,14 +61,14 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * [[riak_ql PR 144](https://github.com/basho/riak_ql/pull/144)] * [[riak_shell PR 56](https://github.com/basho/riak_shell/pull/56)] * [[riak_test PR 1169](https://github.com/basho/riak_test/pull/1169)] -* You can now run `SHOW CREATE TABLE` to review SQL definition and replication properties of existing Riak TS tables. You can read more about the SHOW CREATE TABLE statement [here](/riak/ts/1.5.0/using/querying/show-create-table). +* You can now run `SHOW CREATE TABLE` to review SQL definition and replication properties of existing Riak TS tables. You can read more about the SHOW CREATE TABLE statement [here]({{}}riak/ts/1.5.0/using/querying/show-create-table). * [[PR 1536](https://github.com/basho/riak_kv/pull/1536) * [[riak_ql 155](https://github.com/basho/riak_ql/pull/155)] * [[riak_ql 159](https://github.com/basho/riak_ql/pull/159 )] * [[riak_shell PR 62](https://github.com/basho/riak_shell/pull/62)] * [[riak_test PR 1193](https://github.com/basho/riak_test/pull/1193)] * [[riak_test PR 1211](https://github.com/basho/riak_test/pull/1211)] -* A BLOB data type is now available. BLOB allows the storage of unstructured data, binary or opaque (JSON), in a Riak TS column. Learn about BLOB data type [here](/riak/ts/1.5.0/using/writingdata/#blob-data). +* A BLOB data type is now available. BLOB allows the storage of unstructured data, binary or opaque (JSON), in a Riak TS column. Learn about BLOB data type [here]({{}}riak/ts/1.5.0/using/writingdata/#blob-data). * [[PR 1540](https://github.com/basho/riak_kv/pull/1540)] * [[riak_pb PR 211](https://github.com/basho/riak_pb/issues/211)] * [[riak_ql PR 156](https://github.com/basho/riak_ql/issues/156)] @@ -93,7 +92,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * The timestamp type is now able to be used as an argument in aggregate functions. [[riak_ql PR 146](https://github.com/basho/riak_ql/pull/146) & [riak_ql PR 147](https://github.com/basho/riak_ql/pull/147)] * You can now see the Status field of your TS table when you use `SHOW TABLES`. [[PR 1514](https://github.com/basho/riak_kv/pull/1514 ) and [PR 1176](https://github.com/basho/riak_test/pull/1176 )] -* Introduced the following new parameters in riak.conf. See the [TS configuration docs](/riak/ts/1.5.0/configuring/riakconf) for details. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] +* Introduced the following new parameters in riak.conf. See the [TS configuration docs]({{}}riak/ts/1.5.0/configuring/riakconf) for details. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] * riak_kv.query.timeseries.max_returned_data_size * riak_kv.query.timeseries.max_running_fsms * riak_kv.query.timeseries.qbuf_root_path @@ -108,7 +107,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * Write-once conflict resolution has been changed to be more predictable. It is now based on timestamp rather than SHA-1 hash on value part. [[PR 1512](https://github.com/basho/riak_kv/pull/1512)] * LevelDB has been updated to version 2.0.33 [[eleveldb PR 231](https://github.com/basho/eleveldb/pull/231)] * LZ4 is now the default compression for LevelDB. [[leveldb PR 164](https://github.com/basho/leveldb/pull/164) & [eleveldb PR 208](https://github.com/basho/eleveldb/pull/208)] -* Updated the default value for `riak_kv.query.timeseries.max_quanta_span`. See the [TS configuration docs](/riak/ts/1.5.0/configuring/riakconf) for details. **Note:** due to a bug in the code, the `max_quanta_span` is capped at 1000. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] +* Updated the default value for `riak_kv.query.timeseries.max_quanta_span`. See the [TS configuration docs]({{}}riak/ts/1.5.0/configuring/riakconf) for details. **Note:** due to a bug in the code, the `max_quanta_span` is capped at 1000. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] * The default value for `OFFSET` is `[ ]`. [[PR 1546](https://github.com/basho/riak_kv/pull/1546)] @@ -148,4 +147,4 @@ Riak TS is compatible with the following: * You cannot use Bitcask with Riak TS tables. * `riak_kv.query.timeseries.max_quanta_span` is capped at 1000 due to a bug. -You can see a table of KV and TS features [here](/riak/ts/1.5.0/using/core-fundamentals/). +You can see a table of KV and TS features [here]({{}}riak/ts/1.5.0/using/core-fundamentals/). diff --git a/content/riak/ts/1.5.0/setup.md b/content/riak/ts/1.5.0/setup.md index e6ad177d5f..d011f75473 100644 --- a/content/riak/ts/1.5.0/setup.md +++ b/content/riak/ts/1.5.0/setup.md @@ -14,12 +14,11 @@ version_history: in: "1.5.0+" aliases: - /riakts/1.5.0/setup/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/" --- -[install]: /riak/ts/1.5.0/setup/installing -[upgrade]: /riak/ts/1.5.0/setup/upgrading -[downgrade]: /riak/ts/1.5.0/setup/downgrading +[install]: {{}}riak/ts/1.5.0/setup/installing +[upgrade]: {{}}riak/ts/1.5.0/setup/upgrading +[downgrade]: {{}}riak/ts/1.5.0/setup/downgrading ## In This Section diff --git a/content/riak/ts/1.5.0/setup/downgrading.md b/content/riak/ts/1.5.0/setup/downgrading.md index 926be3eb62..f41112af00 100644 --- a/content/riak/ts/1.5.0/setup/downgrading.md +++ b/content/riak/ts/1.5.0/setup/downgrading.md @@ -15,7 +15,6 @@ version_history: aliases: - /riakts/1.5.0/setup/downgrading/ - /riakts/1.5.0/downgrading/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/downgrading/" --- ## Caution diff --git a/content/riak/ts/1.5.0/setup/installing.md b/content/riak/ts/1.5.0/setup/installing.md index c6d16be2b1..9a2d2ac351 100644 --- a/content/riak/ts/1.5.0/setup/installing.md +++ b/content/riak/ts/1.5.0/setup/installing.md @@ -19,15 +19,14 @@ aliases: - /riakts/1.5.0/installing/installing/ - /riakts/1.5.0/setup/installing/ - /riak/ts/1.5.0/installing/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/" --- [AWS]: aws/ -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ -[download]: /riak/ts/1.5.0/downloads/ +[download]: {{}}riak/ts/1.5.0/downloads/ [OSX]: mac-osx/ [source]: source/ [Ubuntu]: debian-ubuntu/ diff --git a/content/riak/ts/1.5.0/setup/installing/aws.md b/content/riak/ts/1.5.0/setup/installing/aws.md index e8f5eca475..8f05f07357 100644 --- a/content/riak/ts/1.5.0/setup/installing/aws.md +++ b/content/riak/ts/1.5.0/setup/installing/aws.md @@ -18,14 +18,13 @@ aliases: - /riakts/1.5.0/installing/aws/ - /riakts/1.5.0/setup/installing/aws/ - /riak/ts/1.5.0/installing/aws/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/aws/" --- [AWS]: http://aws.amazon.com -[download]: /riak/ts/1.5.0/downloads/ +[download]: {{}}riak/ts/1.5.0/downloads/ [ec2 guide]: http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html -[security basics]: /riak/ts/1.5.0/using/security/ +[security basics]: {{}}riak/ts/1.5.0/using/security/ Riak TS can be installed on AWS virtual machines (VMs) using a binary @@ -42,7 +41,7 @@ Get started by launching a Riak TS virtual machine via the AWS Marketplace. (You 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair. - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Then click the **Accept Terms and Launch with 1-Click** button. @@ -67,7 +66,7 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) We also recommend that you read more about [Security in TS][security basics]. diff --git a/content/riak/ts/1.5.0/setup/installing/debian-ubuntu.md b/content/riak/ts/1.5.0/setup/installing/debian-ubuntu.md index abcd98c212..471e237d2e 100644 --- a/content/riak/ts/1.5.0/setup/installing/debian-ubuntu.md +++ b/content/riak/ts/1.5.0/setup/installing/debian-ubuntu.md @@ -18,13 +18,12 @@ aliases: - /riakts/1.5.0/installing/debian-ubuntu/ - /riakts/1.5.0/setup/installing/debian-ubuntu/ - /riak/ts/1.5.0/installing/debian-ubuntu/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/debian-ubuntu/" --- -[download]: /riak/ts/1.5.0/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.0/using/planning -[security basics pam]: /riak/ts/1.5.0/using/security/sources-management/#pam-based-authentication +[download]: {{}}riak/ts/1.5.0/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.0/using/planning +[security basics pam]: {{}}riak/ts/1.5.0/using/security/sources-management/#pam-based-authentication Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.5.0/setup/installing/mac-osx.md b/content/riak/ts/1.5.0/setup/installing/mac-osx.md index 895068f635..7550145f40 100644 --- a/content/riak/ts/1.5.0/setup/installing/mac-osx.md +++ b/content/riak/ts/1.5.0/setup/installing/mac-osx.md @@ -18,13 +18,12 @@ aliases: - /riakts/1.5.0/installing/mac-osx/ - /riakts/1.5.0/setup/installing/mac-osx/ - /riak/ts/1.5.0/installing/mac-osx/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/mac-osx/" --- -[download]: /riak/ts/1.5.0/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.0/using/planning +[download]: {{}}riak/ts/1.5.0/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.0/using/planning Riak TS can be installed on Mac OS X systems using a binary package available [here][download]. diff --git a/content/riak/ts/1.5.0/setup/installing/rhel-centos.md b/content/riak/ts/1.5.0/setup/installing/rhel-centos.md index a6489e7224..51fa1ab8a0 100644 --- a/content/riak/ts/1.5.0/setup/installing/rhel-centos.md +++ b/content/riak/ts/1.5.0/setup/installing/rhel-centos.md @@ -18,12 +18,11 @@ aliases: - /riakts/1.5.0/installing/rhel-centos/ - /riakts/1.5.0/setup/installing/rhel-centos/ - /riak/ts/1.5.0/installing/rhel-centos/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/rhel-centos/" --- -[download]: /riak/ts/1.5.0/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.0/using/planning/ +[download]: {{}}riak/ts/1.5.0/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.0/using/planning/ Riak TS can be installed on CentOS-based systems using a binary diff --git a/content/riak/ts/1.5.0/setup/installing/source.md b/content/riak/ts/1.5.0/setup/installing/source.md index 5dd3129f2e..e579fbf4fa 100644 --- a/content/riak/ts/1.5.0/setup/installing/source.md +++ b/content/riak/ts/1.5.0/setup/installing/source.md @@ -18,16 +18,15 @@ aliases: - /riakts/1.5.0/installing/source/ - /riakts/1.5.0/setup/installing/source/ - /riak/ts/1.5.0/installing/source/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/" --- -[download]: /riak/ts/1.5.0/downloads/ +[download]: {{}}riak/ts/1.5.0/downloads/ [Erlang]: http://www.erlang.org/ [GCC]: https://gcc.gnu.org/ [Git]: https://git-scm.com/ -[install erlang]: /riak/ts/1.5.0/setup/installing/source/erlang -[planning]: /riak/ts/1.5.0/using/planning/ +[install erlang]: {{}}riak/ts/1.5.0/setup/installing/source/erlang +[planning]: {{}}riak/ts/1.5.0/using/planning/ [Riak TS GitHub repository]: https://github.com/basho/riak/tree/riak_ts-1.5.0 diff --git a/content/riak/ts/1.5.0/setup/installing/source/erlang.md b/content/riak/ts/1.5.0/setup/installing/source/erlang.md index bcc8938dbd..f0458da2e4 100644 --- a/content/riak/ts/1.5.0/setup/installing/source/erlang.md +++ b/content/riak/ts/1.5.0/setup/installing/source/erlang.md @@ -18,7 +18,6 @@ aliases: - /riakts/1.5.0/installing/source/erlang/ - /riakts/1.5.0/setup/installing/source/erlang/ - /riak/ts/1.5.0/installing/source/erlang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/erlang/" --- @@ -34,7 +33,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/e [make]: http://www.gnu.org/software/make/ [ncurses]: http://www.gnu.org/software/ncurses/ [OpenSSL]: https://www.openssl.org/ -[source]: /riak/ts/1.5.0/setup/installing/source/ +[source]: {{}}riak/ts/1.5.0/setup/installing/source/ [XCode Developer Tools]: https://developer.apple.com/xcode/downloads/ diff --git a/content/riak/ts/1.5.0/setup/upgrading.md b/content/riak/ts/1.5.0/setup/upgrading.md index be2f0a6138..cf71805184 100644 --- a/content/riak/ts/1.5.0/setup/upgrading.md +++ b/content/riak/ts/1.5.0/setup/upgrading.md @@ -15,17 +15,16 @@ version_history: aliases: - /riakts/1.5.0/setup/upgrading/ - /riakts/1.5.0/upgrading/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/upgrading/" --- -[use admin commands]: /riak/kv/2.2.0/using/admin/commands -[use admin riak-admin]: /riak/kv/2.2.0/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.2.0/developing/usage/secondary-indexes +[use admin commands]: {{}}riak/kv/2.2.0/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.0/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes [riak ts enterprise]: http://basho.com/products/riak-ts/ -[cluster ops mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.2.0/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.2.0/using/reference/jmx -[snmp]: /riak/kv/2.2.0/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.0/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.0/using/reference/snmp {{% note title="**CAUTION**" %}} diff --git a/content/riak/ts/1.5.0/using.md b/content/riak/ts/1.5.0/using.md index a602bdd0db..927b6031ae 100644 --- a/content/riak/ts/1.5.0/using.md +++ b/content/riak/ts/1.5.0/using.md @@ -12,17 +12,16 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using" --- [activating]: creating-activating/ [aggregate]: querying/select/aggregate-functions/ [arithmetic]: querying/select/arithmetic-operations/ -[configuring]: /riak/ts/1.5.0/configuring/ -[download]: /riak/ts/1.5.0/downloads/ +[configuring]: {{}}riak/ts/1.5.0/configuring/ +[download]: {{}}riak/ts/1.5.0/downloads/ [installing]: ../setup/installing/ -[mdc]: /riak/ts/1.5.0/configuring/mdc/ +[mdc]: {{}}riak/ts/1.5.0/configuring/mdc/ [planning]: planning/ [querying]: querying/ [riakshell]: riakshell/ diff --git a/content/riak/ts/1.5.0/using/core-fundamentals.md b/content/riak/ts/1.5.0/using/core-fundamentals.md index 6b17ffaeff..c56f5d597b 100644 --- a/content/riak/ts/1.5.0/using/core-fundamentals.md +++ b/content/riak/ts/1.5.0/using/core-fundamentals.md @@ -12,11 +12,10 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/core-fundamentals/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/core-fundamentals" --- -[Riak KV]: /riak/kv/2.2.0/ +[Riak KV]: {{}}riak/kv/2.2.0/ Riak TS shares the same core codebase as [Riak KV], which allows you to operate a TS cluster much the same as you would operate a KV cluster. @@ -53,40 +52,40 @@ Below, you will find links to Riak KV documents that are applicable and helpful ### Configuration -Basic Configuration will help you set up your Riak core configuration. +Basic Configuration will help you set up your Riak core configuration. -Managing Configuration will show you how to retrieve your configuration, check your settings, and debug your configuration. +Managing Configuration will show you how to retrieve your configuration, check your settings, and debug your configuration. -Configuration Reference provides you with everything you need to know about configuring Riak core. +Configuration Reference provides you with everything you need to know about configuring Riak core. -Load Balancing will walk you through configuring a load balancer with your Riak cluster. +Load Balancing will walk you through configuring a load balancer with your Riak cluster. ### Cluster Operations -Running a Cluster gives you a basic walkthrough of how to run a Riak cluster. +Running a Cluster gives you a basic walkthrough of how to run a Riak cluster. -Cluster Administration provides a series of links to information on various ways to administer your cluster. +Cluster Administration provides a series of links to information on various ways to administer your cluster. -Adding & Removing Nodes walks you through the process of adding or removing nodes in your cluster. +Adding & Removing Nodes walks you through the process of adding or removing nodes in your cluster. -Changing Cluster Information will show you how to change various parts of your cluster. +Changing Cluster Information will show you how to change various parts of your cluster. -Replace a Node is a step-by-step guide for how to replace a node in your cluster. +Replace a Node is a step-by-step guide for how to replace a node in your cluster. -Inspect a Node shows you the steps and tools for inspecting nodes in your cluster. +Inspect a Node shows you the steps and tools for inspecting nodes in your cluster. -Logging will provide you the steps for enabling and disabling debug logging. +Logging will provide you the steps for enabling and disabling debug logging. -Backing Up is a how-to guide for backing up your data. +Backing Up is a how-to guide for backing up your data. -Handoff will tell you everything you need to know to enable and disable handoff. +Handoff will tell you everything you need to know to enable and disable handoff. ### Repair, Tuning, and Reference -Repair & Recovery will cover all of the important topics of what can go wrong and what you can do to fix it. +Repair & Recovery will cover all of the important topics of what can go wrong and what you can do to fix it. -Performance will give you all the information you need to tune your cluster configurations to optimize performance. +Performance will give you all the information you need to tune your cluster configurations to optimize performance. -Reference will provide you with explanations of various core functions, such as logging and handoff. \ No newline at end of file +Reference will provide you with explanations of various core functions, such as logging and handoff. \ No newline at end of file diff --git a/content/riak/ts/1.5.0/using/creating-activating.md b/content/riak/ts/1.5.0/using/creating-activating.md index 1a8d6132da..4dfcc87e40 100644 --- a/content/riak/ts/1.5.0/using/creating-activating.md +++ b/content/riak/ts/1.5.0/using/creating-activating.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/creating-activating/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/creating-activating" --- @@ -26,7 +25,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/creating-activating [ruby]: ../../developing/ruby#sql-queries [planning]: ../planning/ [writing]: ../writingdata/ -[Riak bucket properties]: /riak/kv/2.2.0/configuring/reference/#default-bucket-properties +[Riak bucket properties]: {{}}riak/kv/2.2.0/configuring/reference/#default-bucket-properties Once you have [planned out your table][planning] you can create it by: diff --git a/content/riak/ts/1.5.0/using/deleting-data.md b/content/riak/ts/1.5.0/using/deleting-data.md index c87bbb26e6..04ba8526ba 100644 --- a/content/riak/ts/1.5.0/using/deleting-data.md +++ b/content/riak/ts/1.5.0/using/deleting-data.md @@ -14,11 +14,10 @@ version_history: in: "1.5.0+" aliases: - /riakts/1.5.0/using/deleting-data/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/deleting-data" --- -[delete]: /riak/ts/1.5.0/using/querying/delete -[expiry]: /riak/ts/1.5.0/configuring/global-object-expiration +[delete]: {{}}riak/ts/1.5.0/using/querying/delete +[expiry]: {{}}riak/ts/1.5.0/configuring/global-object-expiration Riak TS offers several ways to delete data: with clients, using the DELETE statement, and through global expiry. Global expiry is more efficient than other delete options but operates on all of your data. `DELETE` works per-row but takes more resources to run. diff --git a/content/riak/ts/1.5.0/using/planning.md b/content/riak/ts/1.5.0/using/planning.md index f58876d042..52edbb2b53 100644 --- a/content/riak/ts/1.5.0/using/planning.md +++ b/content/riak/ts/1.5.0/using/planning.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/planning/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/planning" --- @@ -23,7 +22,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/planning" [epoch]: https://en.wikipedia.org/wiki/Unix_time [installing]: ../../setup/installing/ [sql]: ../../learn-about/sqlriakts/ -[order by]: /riak/ts/1.5.0/using/querying/select/order-by +[order by]: {{}}riak/ts/1.5.0/using/querying/select/order-by You've [installed][installing] Riak TS, and you're ready to create a table. diff --git a/content/riak/ts/1.5.0/using/querying.md b/content/riak/ts/1.5.0/using/querying.md index 788e23f8fc..4f77d36273 100644 --- a/content/riak/ts/1.5.0/using/querying.md +++ b/content/riak/ts/1.5.0/using/querying.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/querying/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying" --- [activating]: ../creating-activating/ diff --git a/content/riak/ts/1.5.0/using/querying/delete.md b/content/riak/ts/1.5.0/using/querying/delete.md index f9bd5bf09f..a965f5a1c7 100644 --- a/content/riak/ts/1.5.0/using/querying/delete.md +++ b/content/riak/ts/1.5.0/using/querying/delete.md @@ -14,12 +14,11 @@ version_history: in: "1.5.0+" aliases: - /riakts/1.5.0/using/querying/delete -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/delete" --- -[query guidelines]: /riak/ts/1.5.0/using/querying/guidelines/ -[time rep]: /riak/ts/1.5.0/using/timerepresentations/ -[http delete]: /riak/ts/1.4.0/using/writingdata/#deleting-data +[query guidelines]: {{}}riak/ts/1.5.0/using/querying/guidelines/ +[time rep]: {{}}riak/ts/1.5.0/using/timerepresentations/ +[http delete]: {{}}riak/ts/1.4.0/using/writingdata/#deleting-data # DELETE diff --git a/content/riak/ts/1.5.0/using/querying/describe.md b/content/riak/ts/1.5.0/using/querying/describe.md index 4e164af88b..200b8336b7 100644 --- a/content/riak/ts/1.5.0/using/querying/describe.md +++ b/content/riak/ts/1.5.0/using/querying/describe.md @@ -12,10 +12,9 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/querying/describe -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/describe" --- -[riak shell]: /riak/ts/1.5.0/using/riakshell +[riak shell]: {{}}riak/ts/1.5.0/using/riakshell You can use the DESCRIBE statement to obtain the definition of your Riak TS table. This document will show you how to execute `DESCRIBE` in TS. diff --git a/content/riak/ts/1.5.0/using/querying/explain.md b/content/riak/ts/1.5.0/using/querying/explain.md index 6bccec92cf..0a47a46f7a 100644 --- a/content/riak/ts/1.5.0/using/querying/explain.md +++ b/content/riak/ts/1.5.0/using/querying/explain.md @@ -12,13 +12,12 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/querying/explain -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/explain" --- -[creating-activating]: /riak/ts/1.5.0/using/creating-activating -[develop]: /riak/ts/1.5.0/developing -[planning]: /riak/ts/1.5.0/using/planning -[riak shell]: /riak/ts/1.5.0/using/riakshell +[creating-activating]: {{}}riak/ts/1.5.0/using/creating-activating +[develop]: {{}}riak/ts/1.5.0/developing +[planning]: {{}}riak/ts/1.5.0/using/planning +[riak shell]: {{}}riak/ts/1.5.0/using/riakshell You can use the EXPLAIN statement to better understand how a query you would like to run will be executed. This document will show you how to use `EXPLAIN` in Riak TS. diff --git a/content/riak/ts/1.5.0/using/querying/guidelines.md b/content/riak/ts/1.5.0/using/querying/guidelines.md index fd075abb24..741cd4778e 100644 --- a/content/riak/ts/1.5.0/using/querying/guidelines.md +++ b/content/riak/ts/1.5.0/using/querying/guidelines.md @@ -13,7 +13,6 @@ toc: true aliases: - /riakts/1.5.0/using/querying/basic-querying - /riakts/1.5.0/using/querying/guidelines -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/guidelines" --- [table arch]: ../../../learn-about/tablearchitecture/#data-modeling @@ -21,7 +20,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/guidelines [writing]: ../../writingdata/ [planning]: ../../planning#column-definitions [iso8601]: ../../../timerepresentations/ -[SELECT]: /riak/ts/1.5.0/using/querying/SELECT#iso_8601 +[SELECT]: {{}}riak/ts/1.5.0/using/querying/SELECT#iso_8601 [configuring]: ../../../configuring/riakconf/ @@ -60,7 +59,7 @@ Any quantized field in your partition key must be included in the query as a bou * Invalid: `time > 1449864277000 or time < 1449864290000` {{% note title="A Note About `SELECT`" %}} -It is possible to use ISO 8601-compliant date/time strings rather than integer timestamps in SELECT statements. Please see [SELECT](/riak/ts/1.5.0/using/querying/select/#iso-8601) for an example or [Time Representations](/riak/ts/1.5.0/using/timerepresentations/) for more information. +It is possible to use ISO 8601-compliant date/time strings rather than integer timestamps in SELECT statements. Please see [SELECT]({{}}riak/ts/1.5.0/using/querying/select/#iso-8601) for an example or [Time Representations]({{}}riak/ts/1.5.0/using/timerepresentations/) for more information. {{% /note %}} diff --git a/content/riak/ts/1.5.0/using/querying/reference.md b/content/riak/ts/1.5.0/using/querying/reference.md index 327d918516..a655aed096 100644 --- a/content/riak/ts/1.5.0/using/querying/reference.md +++ b/content/riak/ts/1.5.0/using/querying/reference.md @@ -14,21 +14,20 @@ version_history: in: "1.5.0+" aliases: - /riakts/1.5.0/using/querying/reference -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/reference" --- -[select]: /riak/ts/1.5.0/using/querying/select/ -[describe]: /riak/ts/1.5.0/using/querying/describe/ -[delete]: /riak/ts/1.5.0/using/querying/delete/ -[explain]: /riak/ts/1.5.0/using/querying/explain/ -[show tables]: /riak/ts/1.5.0/using/querying/show-tables/ -[create table]: /riak/ts/1.5.0/using/creating-activating/ -[group by]: /riak/ts/1.5.0/using/querying/select/group-by/ -[order by]: /riak/ts/1.5.0/using/querying/select/order-by/ -[limit]: /riak/ts/1.5.0/using/querying/select/limit/ -[offset]: /riak/ts/1.5.0/using/querying/select/ -[arithmetic]: /riak/ts/1.5.0/using/querying/select/arithmetic-operations/ -[aggregate]: /riak/ts/1.5.0/using/querying/select/aggregate-functions/ +[select]: {{}}riak/ts/1.5.0/using/querying/select/ +[describe]: {{}}riak/ts/1.5.0/using/querying/describe/ +[delete]: {{}}riak/ts/1.5.0/using/querying/delete/ +[explain]: {{}}riak/ts/1.5.0/using/querying/explain/ +[show tables]: {{}}riak/ts/1.5.0/using/querying/show-tables/ +[create table]: {{}}riak/ts/1.5.0/using/creating-activating/ +[group by]: {{}}riak/ts/1.5.0/using/querying/select/group-by/ +[order by]: {{}}riak/ts/1.5.0/using/querying/select/order-by/ +[limit]: {{}}riak/ts/1.5.0/using/querying/select/limit/ +[offset]: {{}}riak/ts/1.5.0/using/querying/select/ +[arithmetic]: {{}}riak/ts/1.5.0/using/querying/select/arithmetic-operations/ +[aggregate]: {{}}riak/ts/1.5.0/using/querying/select/aggregate-functions/ This document lists each SQL statement available in Riak TS. diff --git a/content/riak/ts/1.5.0/using/querying/select.md b/content/riak/ts/1.5.0/using/querying/select.md index 78241a7e80..d8367219b8 100644 --- a/content/riak/ts/1.5.0/using/querying/select.md +++ b/content/riak/ts/1.5.0/using/querying/select.md @@ -12,18 +12,17 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/querying/select -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select" --- [aggregate functions]: aggregate-functions/ [arithmetic operations]: arithmetic-operations/ [GROUP BY]: group-by/ -[guidelines]: /riak/ts/1.5.0/using/querying/guidelines +[guidelines]: {{}}riak/ts/1.5.0/using/querying/guidelines [iso8601]: ../../timerepresentations/ -[iso8601 accuracy]: /riak/ts/1.5.0/using/timerepresentations/#reduced-accuracy +[iso8601 accuracy]: {{}}riak/ts/1.5.0/using/timerepresentations/#reduced-accuracy [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 -[learn timestamps accuracy]: /riak/ts/1.5.0/learn-about/timestamps/#reduced-accuracy +[learn timestamps accuracy]: {{}}riak/ts/1.5.0/learn-about/timestamps/#reduced-accuracy You can use the SELECT statement in Riak TS to query your TS dataset. This document will show you how to run various queries using `SELECT`. diff --git a/content/riak/ts/1.5.0/using/querying/select/aggregate-functions.md b/content/riak/ts/1.5.0/using/querying/select/aggregate-functions.md index 26f0ec4380..bc674f6247 100644 --- a/content/riak/ts/1.5.0/using/querying/select/aggregate-functions.md +++ b/content/riak/ts/1.5.0/using/querying/select/aggregate-functions.md @@ -10,7 +10,6 @@ menu: project: "riak_ts" project_version: "1.5.0" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/aggregate-functions" version_history: present_from: "1.4.0+" moved: diff --git a/content/riak/ts/1.5.0/using/querying/select/arithmetic-operations.md b/content/riak/ts/1.5.0/using/querying/select/arithmetic-operations.md index c04323906a..bc56c57b9f 100644 --- a/content/riak/ts/1.5.0/using/querying/select/arithmetic-operations.md +++ b/content/riak/ts/1.5.0/using/querying/select/arithmetic-operations.md @@ -10,7 +10,6 @@ menu: project: "riak_ts" project_version: "1.5.0" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/arithmetic-operations" version_history: present_from: "1.4.0+" moved: @@ -22,7 +21,7 @@ aliases: --- -[querying select]: /riak/ts/1.5.0/using/querying/#select-query +[querying select]: {{}}riak/ts/1.5.0/using/querying/#select-query Riak TS supports arithmetic operations in the SELECT statement. diff --git a/content/riak/ts/1.5.0/using/querying/select/group-by.md b/content/riak/ts/1.5.0/using/querying/select/group-by.md index c60db0def4..a04ce175c1 100644 --- a/content/riak/ts/1.5.0/using/querying/select/group-by.md +++ b/content/riak/ts/1.5.0/using/querying/select/group-by.md @@ -12,11 +12,10 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/querying/select/group-by -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/group-by" --- [aggregate function]: ../aggregate-functions -[guidelines]: /riak/ts/1.5.0/using/querying/guidelines +[guidelines]: {{}}riak/ts/1.5.0/using/querying/guidelines The GROUP BY statement is used with `SELECT` to pick out and condense rows sharing the same value and return a single row. `GROUP BY` is useful for aggregating an attribute of a device over a time period; for instance, you could use it to pull average values for every 30 minute period over the last 24 hours. diff --git a/content/riak/ts/1.5.0/using/querying/select/limit.md b/content/riak/ts/1.5.0/using/querying/select/limit.md index b136a5be5e..1e86a06fe5 100644 --- a/content/riak/ts/1.5.0/using/querying/select/limit.md +++ b/content/riak/ts/1.5.0/using/querying/select/limit.md @@ -12,12 +12,11 @@ project_version: "1.5.0" toc: true version_history: in: "1.5.0+" -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/limit/" --- -[select]: /riak/ts/1.5.0/using/querying/select -[query guidelines]: /riak/ts/1.5.0/using/querying/guidelines/ -[configuring]: /riak/ts/1.5.0/configuring/riakconf/#maximum-returned-data-size +[select]: {{}}riak/ts/1.5.0/using/querying/select +[query guidelines]: {{}}riak/ts/1.5.0/using/querying/guidelines/ +[configuring]: {{}}riak/ts/1.5.0/configuring/riakconf/#maximum-returned-data-size The LIMIT statement is used with [`SELECT`][select] to return a limited number of results. @@ -27,7 +26,7 @@ This document shows how to run various queries using `LIMIT`. See the [guideline {{% note title="A Note on Latency" %}} `LIMIT` uses on-disk query buffer to prevent overload, which adds some overhead and increases the query latency. -You may adjust various parameters in [riak.conf](/riak/ts/1.5.0/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for LIMIT statements; you can read more about that [here](/riak/ts/1.5.0/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. +You may adjust various parameters in [riak.conf]({{}}riak/ts/1.5.0/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for LIMIT statements; you can read more about that [here]({{}}riak/ts/1.5.0/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. However, the most effective means of speeding up your `LIMIT` queries is to place the query buffer directory (`timeseries_query_buffers_root_path`) on fast storage or in memory-backed /tmp directory. {{% /note %}} diff --git a/content/riak/ts/1.5.0/using/querying/select/order-by.md b/content/riak/ts/1.5.0/using/querying/select/order-by.md index d958e99a15..a904d1834d 100644 --- a/content/riak/ts/1.5.0/using/querying/select/order-by.md +++ b/content/riak/ts/1.5.0/using/querying/select/order-by.md @@ -12,12 +12,11 @@ project_version: "1.5.0" toc: true version_history: in: "1.5.0+" -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/order-by" --- -[select]: /riak/ts/1.5.0/using/querying/select -[query guidelines]: /riak/ts/1.5.0/using/querying/guidelines/ -[configuring]: /riak/ts/1.5.0/configuring/riakconf/#maximum-returned-data-size +[select]: {{}}riak/ts/1.5.0/using/querying/select +[query guidelines]: {{}}riak/ts/1.5.0/using/querying/guidelines/ +[configuring]: {{}}riak/ts/1.5.0/configuring/riakconf/#maximum-returned-data-size The ORDER BY statement is used with [`SELECT`][select] to sort results by one or more columns in ascending or descending order. `ORDER BY` is useful for operations such as returning the most recent results in a set. @@ -26,7 +25,7 @@ This document shows how to run various queries using `ORDER BY`. See the [guidel {{% note title="A Note on Latency" %}} `ORDER BY` uses on-disk query buffer to prevent overload, which adds some overhead and increases the query latency. -You may adjust various parameters in [riak.conf](/riak/ts/1.5.0/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for ORDER BY statements; you can read more about that [here](/riak/ts/1.5.0/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. +You may adjust various parameters in [riak.conf]({{}}riak/ts/1.5.0/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for ORDER BY statements; you can read more about that [here]({{}}riak/ts/1.5.0/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. However, the most effective means of speeding up your `ORDER BY` queries is to place the query buffer directory (`timeseries_query_buffers_root_path`) on fast storage or in memory-backed /tmp directory. {{% /note %}} diff --git a/content/riak/ts/1.5.0/using/querying/show-create-table.md b/content/riak/ts/1.5.0/using/querying/show-create-table.md index 6cc56a74ba..9396c7ec9f 100644 --- a/content/riak/ts/1.5.0/using/querying/show-create-table.md +++ b/content/riak/ts/1.5.0/using/querying/show-create-table.md @@ -14,10 +14,9 @@ version_history: in: "1.5.0+" aliases: - /riakts/1.5.0/using/querying/show-create-table -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/show-create-table" --- -[riak shell]: /riak/ts/1.5.0/using/riakshell +[riak shell]: {{}}riak/ts/1.5.0/using/riakshell You can use the SHOW CREATE TABLE statement to obtain the SQL used to create your Riak TS table. This document will show you how to execute `SHOW CREATE TABLE` in TS. diff --git a/content/riak/ts/1.5.0/using/querying/show-tables.md b/content/riak/ts/1.5.0/using/querying/show-tables.md index 7acde99c9d..0cfdf5ed1c 100644 --- a/content/riak/ts/1.5.0/using/querying/show-tables.md +++ b/content/riak/ts/1.5.0/using/querying/show-tables.md @@ -12,10 +12,9 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/querying/show-tables -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/show-tables" --- -[riak shell]: /riak/ts/1.5.0/using/riakshell +[riak shell]: {{}}riak/ts/1.5.0/using/riakshell You can use the SHOW TABLES statement to enumerate the Riak TS tables you have set up. This document will show you how to execute `SHOW TABLES` in TS. diff --git a/content/riak/ts/1.5.0/using/querying/single-key-fetch.md b/content/riak/ts/1.5.0/using/querying/single-key-fetch.md index 12cd19854d..84fb88b7c2 100644 --- a/content/riak/ts/1.5.0/using/querying/single-key-fetch.md +++ b/content/riak/ts/1.5.0/using/querying/single-key-fetch.md @@ -12,7 +12,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/querying/single-key-fetch -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/single-key-fetch" --- You may find the need to fetch a single key from Riak TS. The below examples show you how to perform a single key fetch in each of our official clients that support Riak TS. diff --git a/content/riak/ts/1.5.0/using/riakshell.md b/content/riak/ts/1.5.0/using/riakshell.md index 7a9b53f87d..9acb96163a 100644 --- a/content/riak/ts/1.5.0/using/riakshell.md +++ b/content/riak/ts/1.5.0/using/riakshell.md @@ -12,12 +12,11 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/riakshell/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/riakshell" --- -[nodename]: /riak/kv/2.2.0/using/cluster-operations/changing-cluster-info/ -[creating]: /riak/ts/1.5.0/using/creating-activating -[writing]: /riak/ts/1.5.0/using/writingdata +[nodename]: {{}}riak/kv/2.2.0/using/cluster-operations/changing-cluster-info/ +[creating]: {{}}riak/ts/1.5.0/using/creating-activating +[writing]: {{}}riak/ts/1.5.0/using/writingdata [riak shell README]: https://github.com/basho/riak_shell/blob/develop/README.md @@ -195,7 +194,7 @@ An example of the second format is shown below: In both of these formats multiple rows of data can be specified (3)>INSERT INTO mytable VALUES ('keyvalue', '2016-11-30 19:30:00', 123, 12.3, false), ('newvalue', '2016-11-30 19:31:04' 456, 45.6, true); -For more details please go to http://docs.basho.com/riak/ts +For more details please go to /riak/ts ``` diff --git a/content/riak/ts/1.5.0/using/security.md b/content/riak/ts/1.5.0/using/security.md index 0aae21807d..9eecc99f73 100644 --- a/content/riak/ts/1.5.0/using/security.md +++ b/content/riak/ts/1.5.0/using/security.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/security/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/" --- [security checklist]: ./checklist @@ -90,7 +89,7 @@ cluster on the following TCP ports: Protocol | Port :--------|:---- -Protocol Buffers | TCP port 8087 +Protocol Buffers | TCP port 8087 ## Best Practices diff --git a/content/riak/ts/1.5.0/using/security/checklist.md b/content/riak/ts/1.5.0/using/security/checklist.md index b2ea2f90e2..8a6152d381 100644 --- a/content/riak/ts/1.5.0/using/security/checklist.md +++ b/content/riak/ts/1.5.0/using/security/checklist.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/security/checklist -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/checklist/" --- [enable ssl]: ../enable-disable/#enabling-ssl @@ -21,7 +20,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/checklist/ [security users]: ../user-management [security sources]: ../sources-management [manage permissions]: ../user-management/#managing-permissions -[pbc]: /riak/kv/2.2.0/developing/api/protocol-buffers/ +[pbc]: {{}}riak/kv/2.2.0/developing/api/protocol-buffers/ [security enable disable]: ../enable-disable Before turning on Riak TS security there are key steps all applications need to take. Missing one of these steps may break your application, so make sure you have done each of the following BEFORE enabling security: diff --git a/content/riak/ts/1.5.0/using/security/enable-disable.md b/content/riak/ts/1.5.0/using/security/enable-disable.md index ed24927cb3..74e18ddb14 100644 --- a/content/riak/ts/1.5.0/using/security/enable-disable.md +++ b/content/riak/ts/1.5.0/using/security/enable-disable.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/security/enable-disable -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/enable-disable/" --- Riak TS security may be [checked](#checking-security-status), [enabled](#enabling-security), or [disabled](#disabling-security) through the command line, allowing an administrator to change security settings for the whole cluster without needing to go node-by-node. diff --git a/content/riak/ts/1.5.0/using/security/notify-basho.md b/content/riak/ts/1.5.0/using/security/notify-basho.md index 1a498a249e..1883424532 100644 --- a/content/riak/ts/1.5.0/using/security/notify-basho.md +++ b/content/riak/ts/1.5.0/using/security/notify-basho.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/security/notify-basho -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/notify-basho/" --- Data security is an important and sensitive issue. A real-world approach to security allows us to balance appropriate levels of security and related overhead while creating a fast, scalable, and operationally straightforward database. diff --git a/content/riak/ts/1.5.0/using/security/sources-management.md b/content/riak/ts/1.5.0/using/security/sources-management.md index f78a75c28d..109e1ebac2 100644 --- a/content/riak/ts/1.5.0/using/security/sources-management.md +++ b/content/riak/ts/1.5.0/using/security/sources-management.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/security/sources-management -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/sources-management/" --- [cidr]: http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing @@ -22,8 +21,8 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/sources-ma [security enabling]: ../enable-disable/#enabling-security [security add user]: ../user-management/#add-user [root cert]: http://en.wikipedia.org/wiki/Root_certificate -[rolling restart]: /riak/kv/2.2.0/using/repair-recovery/rolling-restart/ -[config ref security]: /riak/kv/2.2.0/configuring/reference/#security +[rolling restart]: {{}}riak/kv/2.2.0/using/repair-recovery/rolling-restart/ +[config ref security]: {{}}riak/kv/2.2.0/configuring/reference/#security [xss]: http://en.wikipedia.org/wiki/Cross-site_scripting [request forgery]: http://en.wikipedia.org/wiki/Cross-site_request_forgery [http referer]: http://en.wikipedia.org/wiki/HTTP_referer diff --git a/content/riak/ts/1.5.0/using/security/user-management.md b/content/riak/ts/1.5.0/using/security/user-management.md index e3a03085a3..c7eb719834 100644 --- a/content/riak/ts/1.5.0/using/security/user-management.md +++ b/content/riak/ts/1.5.0/using/security/user-management.md @@ -13,7 +13,6 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/security/user-management -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/user-management/" --- Riak TS security lets you to control authorization by creating, modifying, and deleting user characteristics and granting users selective access to Riak TS functionality. Users can be assigned one or more of the following characteristics: diff --git a/content/riak/ts/1.5.0/using/timerepresentations.md b/content/riak/ts/1.5.0/using/timerepresentations.md index d26d9e1d9d..0a31cb8d6c 100644 --- a/content/riak/ts/1.5.0/using/timerepresentations.md +++ b/content/riak/ts/1.5.0/using/timerepresentations.md @@ -12,15 +12,14 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/timerepresentations/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/timerepresentations" --- [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[config reference]: /riak/kv/2.2.0/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.5.0/using/mdc +[config reference]: {{}}riak/kv/2.2.0/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.5.0/using/mdc [riak shell]: ../riakshell [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 [learn timestamps]: ../../learn-about/timestamps diff --git a/content/riak/ts/1.5.0/using/writingdata.md b/content/riak/ts/1.5.0/using/writingdata.md index a292e99cfd..bde6f830c8 100644 --- a/content/riak/ts/1.5.0/using/writingdata.md +++ b/content/riak/ts/1.5.0/using/writingdata.md @@ -12,16 +12,15 @@ project_version: "1.5.0" toc: true aliases: - /riakts/1.5.0/using/writingdata/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/writingdata" --- [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[http]: /riak/ts/1.5.0/developing/http/ -[config reference]: /riak/kv/2.2.0/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.5.0/configuring/mdc +[http]: {{}}riak/ts/1.5.0/developing/http/ +[config reference]: {{}}riak/kv/2.2.0/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.5.0/configuring/mdc [riakshell]: ../riakshell [iso8601]: ../timerepresentations/ [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 diff --git a/content/riak/ts/1.5.1/add-ons.md b/content/riak/ts/1.5.1/add-ons.md index 8861bf71f4..d3e3230d6a 100644 --- a/content/riak/ts/1.5.1/add-ons.md +++ b/content/riak/ts/1.5.1/add-ons.md @@ -10,7 +10,6 @@ menu: weight: 450 pre: tools toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons" --- Here at Basho, we've developed integrations between Riak TS and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. diff --git a/content/riak/ts/1.5.1/add-ons/redis/developing-rra.md b/content/riak/ts/1.5.1/add-ons/redis/developing-rra.md index 473f8ece48..0a0d5c76e8 100644 --- a/content/riak/ts/1.5.1/add-ons/redis/developing-rra.md +++ b/content/riak/ts/1.5.1/add-ons/redis/developing-rra.md @@ -14,12 +14,12 @@ commercial_offering: true --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.2.0/developing/api/http +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. diff --git a/content/riak/ts/1.5.1/add-ons/redis/redis-add-on-features.md b/content/riak/ts/1.5.1/add-ons/redis/redis-add-on-features.md index fbc4d2514a..429f44ca3f 100644 --- a/content/riak/ts/1.5.1/add-ons/redis/redis-add-on-features.md +++ b/content/riak/ts/1.5.1/add-ons/redis/redis-add-on-features.md @@ -14,10 +14,10 @@ commercial_offering: true --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands [twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md @@ -69,7 +69,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +96,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +128,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/ts/1.5.1/add-ons/redis/set-up-rra.md b/content/riak/ts/1.5.1/add-ons/redis/set-up-rra.md index 0f8341bf1d..b7e6fd341c 100644 --- a/content/riak/ts/1.5.1/add-ons/redis/set-up-rra.md +++ b/content/riak/ts/1.5.1/add-ons/redis/set-up-rra.md @@ -16,8 +16,8 @@ commercial_offering: true [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/ts/1.5.1/setup/installing -[perf open files]: /riak/ts/1.5.1/setup/installing/rhel-centos/#ulimit +[install index]: {{}}riak/ts/1.5.1/setup/installing +[perf open files]: {{}}riak/ts/1.5.1/setup/installing/rhel-centos/#ulimit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/ts/1.5.1/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/ts/1.5.1/add-ons/redis/set-up-rra/deployment-models.md index a58c4d809d..8f1d851ede 100644 --- a/content/riak/ts/1.5.1/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/ts/1.5.1/add-ons/redis/set-up-rra/deployment-models.md @@ -13,9 +13,9 @@ toc: true commercial_offering: true --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +24,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +65,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +103,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/ts/1.5.1/add-ons/redis/using-rra.md b/content/riak/ts/1.5.1/add-ons/redis/using-rra.md index 97b01482e7..c10999323c 100644 --- a/content/riak/ts/1.5.1/add-ons/redis/using-rra.md +++ b/content/riak/ts/1.5.1/add-ons/redis/using-rra.md @@ -17,7 +17,7 @@ aliases: [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.2.0/developing/api/http/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector.md index 79feaf07e2..3abfe3a5aa 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector.md @@ -10,7 +10,6 @@ menu: weight: 101 parent: "addons" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector" --- The Spark-Riak connector enables you to connect Spark applications to Riak TS with the Spark RDD and Spark DataFrames APIs. You can write your app in Scala, Python, and Java. The connector makes it easy to partition the data you get from Riak so multiple Spark workers can process the data in parallel, and it has support for failover if a Riak node goes down while your Spark job is running. diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/building-testing.md index 33efe05224..96ec9138a9 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/building-testing.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/building-testing.md @@ -11,7 +11,6 @@ menu: weight: 103 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/building-testing" --- If you want to download the source code of the Spark-Riak connector, build it, and install the results in your local repo, this is the document for you! Keep reading for instructions on downloading, building, and installing the connector. @@ -26,7 +25,7 @@ In order to build the Spark-Riak connector, you'll need to have the following in * [Java OpenJDK 8](http://openjdk.java.net/install/) * [Maven 3](https://maven.apache.org/download.cgi) * [Spark 1.6](http://spark.apache.org/docs/latest/#downloading) -* [Riak TS](http://docs.basho.com/riak/ts/latest/installing/) +* [Riak TS]({{< baseurl >}}riak/ts/latest/installing/) ## Download diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/getting.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/getting.md index 543e28993c..65d9c45559 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/getting.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/getting.md @@ -10,7 +10,6 @@ menu: weight: 102 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/getting" --- > **Note:** diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/quick-start.md index 98154e06ba..bb582d5186 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/quick-start.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/quick-start.md @@ -11,7 +11,6 @@ menu: weight: 101 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/quick-start" --- This guide will run you through a quick example that uses the Spark-Riak connector to read and write data using Java, Scala, and Python. We will assume you are running this guide on Mac OSX. @@ -19,8 +18,8 @@ This guide will run you through a quick example that uses the Spark-Riak connect ## Prerequisites - Update Homebrew with `brew update`. -- Install Riak TS OSX build. Instruction can be found [here](http://docs.basho.com/riak/ts/1.2.0/installing/mac-osx/) -- Set open file limits for Riak by following the guide [here](http://docs.basho.com/riak/latest/ops/tuning/open-files-limit/#Mac-OS-X). +- Install Riak TS OSX build. Instruction can be found [here]({{< baseurl >}}riak/ts/1.2.0/installing/mac-osx/) +- Set open file limits for Riak by following the guide [here]({{< baseurl >}}riak/kv/latest/ops/tuning/open-files-limit/#Mac-OS-X). - Install Spark with `brew install apache-spark`. - Download the Spark-Riak connector uber jar (containing all dependencies) from here: https://github.com/basho/spark-riak-connector/releases/latest. diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage.md index a796f9bace..4f4730c0e2 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage.md @@ -11,7 +11,6 @@ menu: weight: 104 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage" --- This section will walk you through setting up your application for development with the Spark-Riak connector. diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/bulk-write.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/bulk-write.md index b784c266de..92aef7f677 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/bulk-write.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/bulk-write.md @@ -11,7 +11,6 @@ menu: weight: 107 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/bulk-write" --- To write into a Riak TS table, the Spark-Riak Connector splits the initial set of rows into smaller bulks and processes them in parallel. Bulk size can be configured using `spark.riakts.write.bulk-size` property. The default number is `100`. diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/config-spark-context.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/config-spark-context.md index f072a7abd8..2d2492d48a 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/config-spark-context.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/config-spark-context.md @@ -11,7 +11,6 @@ menu: weight: 101 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/config-spark-context" --- The following `import` statements should be included at the top of your Spark application to enable the connector: diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/dataframes.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/dataframes.md index bdcd940623..dc3831df90 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/dataframes.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/dataframes.md @@ -11,7 +11,6 @@ menu: weight: 104 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/dataframes" --- ## Spark Dataframes With TS Table diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/dates.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/dates.md index 6ce6b26c80..b8c41389ca 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/dates.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/dates.md @@ -11,7 +11,6 @@ menu: weight: 105 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/dates" --- Riak TS automatically stores all datetimes as a Long integer that represents milliseconds from the [beginning of the epoc](https://en.wikipedia.org/wiki/Unix_time). This is not very human friendly so we have provided a Spark configuration option called `spark.riakts.bindings.timestamp`. This option is for use with Automatic Schema Discovery and allows for conversion from Riak TS datetimes, which are stored as Longs, to Timestamps. The default value of this option is `useTimestamp` which converts Longs to Timestamps. If you would like to use the original Long value, you can use the option value of `useLong`. All conversion takes place during Automatic Schema Discovery when reading from Riak TS tables. diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/range-query-partition.md index 5af2c1c528..e70ae36b23 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/range-query-partition.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -11,10 +11,9 @@ menu: weight: 106 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/range-query-partition" --- -Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS](http://docs.basho.com/riakts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. +Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS]({{< baseurl >}}riak/ts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. To use this functionality, you must provide the following options: diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/reading-data.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/reading-data.md index 45bfaa7144..9afd9d0180 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/reading-data.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/reading-data.md @@ -11,7 +11,6 @@ menu: weight: 102 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/reading-data" --- ## Reading Data From TS Table diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/streaming-example.md index 983becf13d..fa0153bca9 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/streaming-example.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/streaming-example.md @@ -11,7 +11,6 @@ menu: weight: 108 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/streaming-example" --- > **Note:** @@ -28,7 +27,7 @@ path/to/kafka/bin/kafka-server-start.sh config/server.properties path/to/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic streaming ``` -We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here](/riak/ts/1.2.0/installing/mac-osx/). +We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here]({{}}riak/ts/1.2.0/installing/mac-osx/). You will need to build the TS example as well. Please follow the instructions on [building the examples](https://github.com/basho/spark-riak-connector/tree/master/examples#building-and-running-examplesdemos). diff --git a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/writing-data.md b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/writing-data.md index 4f86b1f5f4..55ec63e4c8 100644 --- a/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/writing-data.md +++ b/content/riak/ts/1.5.1/add-ons/spark-riak-connector/usage/writing-data.md @@ -11,7 +11,6 @@ menu: weight: 103 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/writing-data" --- ## Writing Data To TS Table diff --git a/content/riak/ts/1.5.1/configuring.md b/content/riak/ts/1.5.1/configuring.md index 2e784a6f94..8084a2e17d 100644 --- a/content/riak/ts/1.5.1/configuring.md +++ b/content/riak/ts/1.5.1/configuring.md @@ -16,15 +16,14 @@ version_history: - ["<=1.4.0", "using/configuring"] aliases: - /riakts/1.5.1/configuring/ -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring" --- -[riakconf]: /riak/ts/1.5.1/configuring/riakconf/ -[mdc]: /riak/ts/1.5.1/configuring/mdc/ -[global expiry]: /riak/ts/1.5.1/configuring/global-object-expiration/ -[kv config]: /riak/kv/2.2.0/configuring/reference -[WITH]: /riak/ts/1.5.1/using/creating-activating/#using-the-with-clause +[riakconf]: {{}}riak/ts/1.5.1/configuring/riakconf/ +[mdc]: {{}}riak/ts/1.5.1/configuring/mdc/ +[global expiry]: {{}}riak/ts/1.5.1/configuring/global-object-expiration/ +[kv config]: {{}}riak/kv/2.2.0/configuring/reference +[WITH]: {{}}riak/ts/1.5.1/using/creating-activating/#using-the-with-clause Riak TS mostly relies on Riak KV's [default configuration settings][kv config]. However, there are a few TS-specific configurations you should know about: diff --git a/content/riak/ts/1.5.1/configuring/global-object-expiration.md b/content/riak/ts/1.5.1/configuring/global-object-expiration.md index 3391a66081..bd262bf8cb 100644 --- a/content/riak/ts/1.5.1/configuring/global-object-expiration.md +++ b/content/riak/ts/1.5.1/configuring/global-object-expiration.md @@ -17,7 +17,6 @@ version_history: - ["<=1.4.0", "using/global-object-expiration"] aliases: - /riakts/1.5.1/configuring/global-object-expiration/ -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/global-object-expiration" --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/ts/1.5.1/configuring/mdc.md b/content/riak/ts/1.5.1/configuring/mdc.md index dd452e8c06..4c6147d74f 100644 --- a/content/riak/ts/1.5.1/configuring/mdc.md +++ b/content/riak/ts/1.5.1/configuring/mdc.md @@ -18,15 +18,14 @@ version_history: - ["<=1.4.0", "using/mdc"] aliases: - /riakts/1.5.1/configuring/mdc -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/mdc" --- -[activating]: /riak/ts/1.5.1/using/creating-activating -[cluster ops v3 mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[activating]: {{}}riak/ts/1.5.1/using/creating-activating +[cluster ops v3 mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter [ee]: http://basho.com/contact/ [Enterprise]: http://basho.com/products/riak-ts/ -[install]: /riak/ts/1.5.1/setup/installing +[install]: {{}}riak/ts/1.5.1/setup/installing Multi-Datacenter (MDC) replication makes it possible to replicate your time series data between Riak clusters. This document will walk through how to configure MDC to work with Riak TS. @@ -72,7 +71,7 @@ data definition language (DDL) on each cluster to make certain they are equivalent before synchronization occurs. {{% note title="Tip" %}} -[`SHOW CREATE TABLE`](/riak/ts/1.5.1/using/querying/show-create-table/) makes this much easier. +[`SHOW CREATE TABLE`]({{}}riak/ts/1.5.1/using/querying/show-create-table/) makes this much easier. {{% /note %}} diff --git a/content/riak/ts/1.5.1/configuring/riakconf.md b/content/riak/ts/1.5.1/configuring/riakconf.md index ca415b6529..b1cffba07c 100644 --- a/content/riak/ts/1.5.1/configuring/riakconf.md +++ b/content/riak/ts/1.5.1/configuring/riakconf.md @@ -16,12 +16,11 @@ version_history: - [">=1.5.1", "using/configuring"] aliases: - /riakts/1.5.1/configuring/riakconf -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/riakconf" --- [glossary quanta]: ../../learn/glossary/quanta -[Riak object settings]: /riak/kv/2.2.0/configuring/reference/#object-settings +[Riak object settings]: {{}}riak/kv/2.2.0/configuring/reference/#object-settings Riak TS exposes a few configuration settings in riak.conf. This document will walk you through the TS configurations. diff --git a/content/riak/ts/1.5.1/developing.md b/content/riak/ts/1.5.1/developing.md index 3d965a300f..8b65d6c076 100644 --- a/content/riak/ts/1.5.1/developing.md +++ b/content/riak/ts/1.5.1/developing.md @@ -12,20 +12,19 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/developing/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing" --- -[erlang]: /riak/ts/1.5.1/developing/erlang -[go]: /riak/ts/1.5.1/developing/golang -[http]: /riak/ts/1.5.1/developing/http -[java]: /riak/ts/1.5.1/developing/java -[ruby]: /riak/ts/1.5.1/developing/ruby -[python]: /riak/ts/1.5.1/developing/python -[csharp]: /riak/ts/1.5.1/developing/csharp -[nodejs]: /riak/ts/1.5.1/developing/nodejs -[erlang]: /riak/ts/1.5.1/developing/erlang -[php]: /riak/ts/1.5.1/developing/php +[erlang]: {{}}riak/ts/1.5.1/developing/erlang +[go]: {{}}riak/ts/1.5.1/developing/golang +[http]: {{}}riak/ts/1.5.1/developing/http +[java]: {{}}riak/ts/1.5.1/developing/java +[ruby]: {{}}riak/ts/1.5.1/developing/ruby +[python]: {{}}riak/ts/1.5.1/developing/python +[csharp]: {{}}riak/ts/1.5.1/developing/csharp +[nodejs]: {{}}riak/ts/1.5.1/developing/nodejs +[erlang]: {{}}riak/ts/1.5.1/developing/erlang +[php]: {{}}riak/ts/1.5.1/developing/php You can access Riak TS data over HTTP through the [API][http]. diff --git a/content/riak/ts/1.5.1/developing/csharp.md b/content/riak/ts/1.5.1/developing/csharp.md index 621a1bee2b..b6e1c834d5 100644 --- a/content/riak/ts/1.5.1/developing/csharp.md +++ b/content/riak/ts/1.5.1/developing/csharp.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/csharp/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/csharp" --- diff --git a/content/riak/ts/1.5.1/developing/erlang.md b/content/riak/ts/1.5.1/developing/erlang.md index 04f7432fde..1663499c60 100644 --- a/content/riak/ts/1.5.1/developing/erlang.md +++ b/content/riak/ts/1.5.1/developing/erlang.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/erlang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/erlang" --- diff --git a/content/riak/ts/1.5.1/developing/golang.md b/content/riak/ts/1.5.1/developing/golang.md index a7b9dd03b9..4783dfdfa7 100644 --- a/content/riak/ts/1.5.1/developing/golang.md +++ b/content/riak/ts/1.5.1/developing/golang.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/golang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/golang" --- diff --git a/content/riak/ts/1.5.1/developing/http.md b/content/riak/ts/1.5.1/developing/http.md index 15073fcac7..5e6a858808 100644 --- a/content/riak/ts/1.5.1/developing/http.md +++ b/content/riak/ts/1.5.1/developing/http.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/http/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/http" --- diff --git a/content/riak/ts/1.5.1/developing/java.md b/content/riak/ts/1.5.1/developing/java.md index 681bf2cf48..711758b3a1 100644 --- a/content/riak/ts/1.5.1/developing/java.md +++ b/content/riak/ts/1.5.1/developing/java.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/java/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/java" --- diff --git a/content/riak/ts/1.5.1/developing/nodejs.md b/content/riak/ts/1.5.1/developing/nodejs.md index 7471588551..b8e9142eda 100644 --- a/content/riak/ts/1.5.1/developing/nodejs.md +++ b/content/riak/ts/1.5.1/developing/nodejs.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/nodejs/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/nodejs" --- diff --git a/content/riak/ts/1.5.1/developing/php.md b/content/riak/ts/1.5.1/developing/php.md index 68cafcd34d..3fbf60cdc3 100644 --- a/content/riak/ts/1.5.1/developing/php.md +++ b/content/riak/ts/1.5.1/developing/php.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/php/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/php" --- diff --git a/content/riak/ts/1.5.1/developing/python.md b/content/riak/ts/1.5.1/developing/python.md index eb87e18ca6..2917e0a686 100644 --- a/content/riak/ts/1.5.1/developing/python.md +++ b/content/riak/ts/1.5.1/developing/python.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/python/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/python" --- diff --git a/content/riak/ts/1.5.1/developing/ruby.md b/content/riak/ts/1.5.1/developing/ruby.md index 7241b38dab..3f8a6f36e3 100644 --- a/content/riak/ts/1.5.1/developing/ruby.md +++ b/content/riak/ts/1.5.1/developing/ruby.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/developing/ruby/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/ruby" --- You can develop applications and tools using Riak TS with the Riak Ruby client. diff --git a/content/riak/ts/1.5.1/downloads.md b/content/riak/ts/1.5.1/downloads.md index ee5fba1d6e..e4216046c3 100644 --- a/content/riak/ts/1.5.1/downloads.md +++ b/content/riak/ts/1.5.1/downloads.md @@ -18,7 +18,6 @@ listed_projects: install_instructions_set: "installing" aliases: - /riakts/1.5.1/downloads/ -canonical_link: "https://docs.basho.com/riak/ts/latest/downloads" --- diff --git a/content/riak/ts/1.5.1/index.md b/content/riak/ts/1.5.1/index.md index 5729dc9ccc..f78a93f394 100644 --- a/content/riak/ts/1.5.1/index.md +++ b/content/riak/ts/1.5.1/index.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/ -canonical_link: "https://docs.basho.com/riak/ts/latest" --- diff --git a/content/riak/ts/1.5.1/learn-about.md b/content/riak/ts/1.5.1/learn-about.md index 2539d8b5cf..c1edd3911d 100644 --- a/content/riak/ts/1.5.1/learn-about.md +++ b/content/riak/ts/1.5.1/learn-about.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/learn-about/learn-about/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about" --- [table arch]: tablearchitecture/ diff --git a/content/riak/ts/1.5.1/learn-about/bestpractices.md b/content/riak/ts/1.5.1/learn-about/bestpractices.md index acb9b31cdd..fab41e55b0 100644 --- a/content/riak/ts/1.5.1/learn-about/bestpractices.md +++ b/content/riak/ts/1.5.1/learn-about/bestpractices.md @@ -13,11 +13,10 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/learn-about/bestpractices/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/bestpractices" --- -[glossary bucket]: http://docs.basho.com/riak/kv/2.2.0/learn/glossary/#bucket +[glossary bucket]: {{< baseurl >}}riak/kv/2.2.0/learn/glossary/#bucket [planning column def]: ../../using/planning/#column-definitions [planning partition]: ../../using/planning/#partition-key [planning primary]: ../../using/planning/#primary-key diff --git a/content/riak/ts/1.5.1/learn-about/sqlriakts.md b/content/riak/ts/1.5.1/learn-about/sqlriakts.md index e2ec662936..5055ce730a 100644 --- a/content/riak/ts/1.5.1/learn-about/sqlriakts.md +++ b/content/riak/ts/1.5.1/learn-about/sqlriakts.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/learn-about/sqlriakts -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/sqlriakts" --- diff --git a/content/riak/ts/1.5.1/learn-about/tablearchitecture.md b/content/riak/ts/1.5.1/learn-about/tablearchitecture.md index faf8bfbb22..89e8199701 100644 --- a/content/riak/ts/1.5.1/learn-about/tablearchitecture.md +++ b/content/riak/ts/1.5.1/learn-about/tablearchitecture.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/learn-about/advancedplanning/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/tablearchitecture" --- diff --git a/content/riak/ts/1.5.1/learn-about/timestamps.md b/content/riak/ts/1.5.1/learn-about/timestamps.md index 31fa9e6e1a..d518c20b9f 100644 --- a/content/riak/ts/1.5.1/learn-about/timestamps.md +++ b/content/riak/ts/1.5.1/learn-about/timestamps.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/learn-about/timestamps/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/timestamps" --- [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 diff --git a/content/riak/ts/1.5.1/releasenotes.md b/content/riak/ts/1.5.1/releasenotes.md index 632de3b444..f738257ec1 100644 --- a/content/riak/ts/1.5.1/releasenotes.md +++ b/content/riak/ts/1.5.1/releasenotes.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/releasenotes -canonical_link: "https://docs.basho.com/riak/ts/latest/releasenotes" --- @@ -53,7 +52,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on ## New Features -* `ASC` and `DESC` have been added to the CREATE TABLE statement. Adding the ASC/DESC keywords to your local key during `CREATE TABLE` means you can have your data pre-sorted in ascending or descending order as it's input into your TS table. You can read more about `ASC`/`DESC` in the local key [here](/riak/ts/1.5.1/using/planning). +* `ASC` and `DESC` have been added to the CREATE TABLE statement. Adding the ASC/DESC keywords to your local key during `CREATE TABLE` means you can have your data pre-sorted in ascending or descending order as it's input into your TS table. You can read more about `ASC`/`DESC` in the local key [here]({{}}riak/ts/1.5.1/using/planning). * [[PR 1427](https://github.com/basho/riak_kv/pull/1427)] * [[PR 1500](https://github.com/basho/riak_kv/pull/1500)] * [[PR 1558](https://github.com/basho/riak_kv/pull/1558 )] @@ -63,17 +62,17 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * [[riak_test PR 1200](https://github.com/basho/riak_test/pull/1200)] * [[riak_test PR 1081](https://github.com/basho/riak_test/pull/1081)] * [[riak_test PR 1201](https://github.com/basho/riak_test/pull/1201 )] -* The ORDER BY statement has been added to `SELECT`, allowing you to sort the results of your query in various ways, including: ascending or descending order, or nulls first or last. You can learn about `ORDER BY` [here](/riak/ts/1.5.1/using/querying/select/order-by). +* The ORDER BY statement has been added to `SELECT`, allowing you to sort the results of your query in various ways, including: ascending or descending order, or nulls first or last. You can learn about `ORDER BY` [here]({{}}riak/ts/1.5.1/using/querying/select/order-by). * [[PR 1479](https://github.com/basho/riak_kv/pull/1479)] * [[riak erlang client PR 321](https://github.com/basho/riak-erlang-client/pull/321)] * [[riak_pb PR 208](https://github.com/basho/riak_pb/pull/208)] * [[riak_test PR 1152](https://github.com/basho/riak_test/pull/1152)] -* `LIMIT` allows you to specify that you only want a specific number of records from your query, and it can be expanded by `OFFSET`. You can read about how to use the LIMIT statement [here](/riak/ts/1.5.1/using/querying/select/limit). +* `LIMIT` allows you to specify that you only want a specific number of records from your query, and it can be expanded by `OFFSET`. You can read about how to use the LIMIT statement [here]({{}}riak/ts/1.5.1/using/querying/select/limit). * [[PR 1479](https://github.com/basho/riak_kv/pull/1479)] * [[riak erlang client PR 321](https://github.com/basho/riak-erlang-client/pull/321)] * [[riak_pb PR 208](https://github.com/basho/riak_pb/pull/208)] * [[riak_test PR 1152](https://github.com/basho/riak_test/pull/1152)] -* You can now use `DELETE` from riak shell to remove a record from your TS table. Learn all about `DELETE` [here](/riak/ts/1.5.1/using/querying/delete). +* You can now use `DELETE` from riak shell to remove a record from your TS table. Learn all about `DELETE` [here]({{}}riak/ts/1.5.1/using/querying/delete). * [[PR 1552](https://github.com/basho/riak_kv/pull/1552)] * [[riak_ql PR 145](https://github.com/basho/riak_ql/pull/145)] * [[riak_shell PR 23](https://github.com/basho/riak_shell/pull/23)] @@ -85,14 +84,14 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * [[riak_ql PR 144](https://github.com/basho/riak_ql/pull/144)] * [[riak_shell PR 56](https://github.com/basho/riak_shell/pull/56)] * [[riak_test PR 1169](https://github.com/basho/riak_test/pull/1169)] -* You can now run `SHOW CREATE TABLE` to review SQL definition and replication properties of existing Riak TS tables. You can read more about the SHOW CREATE TABLE statement [here](/riak/ts/1.5.1/using/querying/show-create-table). +* You can now run `SHOW CREATE TABLE` to review SQL definition and replication properties of existing Riak TS tables. You can read more about the SHOW CREATE TABLE statement [here]({{}}riak/ts/1.5.1/using/querying/show-create-table). * [[PR 1536](https://github.com/basho/riak_kv/pull/1536) * [[riak_ql 155](https://github.com/basho/riak_ql/pull/155)] * [[riak_ql 159](https://github.com/basho/riak_ql/pull/159 )] * [[riak_shell PR 62](https://github.com/basho/riak_shell/pull/62)] * [[riak_test PR 1193](https://github.com/basho/riak_test/pull/1193)] * [[riak_test PR 1211](https://github.com/basho/riak_test/pull/1211)] -* A BLOB data type is now available. BLOB allows the storage of unstructured data, binary or opaque (JSON), in a Riak TS column. Learn about BLOB data type [here](/riak/ts/1.5.1/using/writingdata/#blob-data). +* A BLOB data type is now available. BLOB allows the storage of unstructured data, binary or opaque (JSON), in a Riak TS column. Learn about BLOB data type [here]({{}}riak/ts/1.5.1/using/writingdata/#blob-data). * [[PR 1540](https://github.com/basho/riak_kv/pull/1540)] * [[riak_pb PR 211](https://github.com/basho/riak_pb/issues/211)] * [[riak_ql PR 156](https://github.com/basho/riak_ql/issues/156)] @@ -116,7 +115,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * The timestamp type is now able to be used as an argument in aggregate functions. [[riak_ql PR 146](https://github.com/basho/riak_ql/pull/146) & [riak_ql PR 147](https://github.com/basho/riak_ql/pull/147)] * You can now see the Status field of your TS table when you use `SHOW TABLES`. [[PR 1514](https://github.com/basho/riak_kv/pull/1514 ) and [PR 1176](https://github.com/basho/riak_test/pull/1176 )] -* Introduced the following new parameters in riak.conf. See the [TS configuration docs](/riak/ts/1.5.1/configuring/riakconf) for details. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] +* Introduced the following new parameters in riak.conf. See the [TS configuration docs]({{}}riak/ts/1.5.1/configuring/riakconf) for details. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] * riak_kv.query.timeseries.max_returned_data_size * riak_kv.query.timeseries.max_running_fsms * riak_kv.query.timeseries.qbuf_root_path @@ -131,7 +130,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * Write-once conflict resolution has been changed to be more predictable. It is now based on timestamp rather than SHA-1 hash on value part. [[PR 1512](https://github.com/basho/riak_kv/pull/1512)] * LevelDB has been updated to version 2.0.33 [[eleveldb PR 231](https://github.com/basho/eleveldb/pull/231)] * LZ4 is now the default compression for LevelDB. [[leveldb PR 164](https://github.com/basho/leveldb/pull/164) & [eleveldb PR 208](https://github.com/basho/eleveldb/pull/208)] -* Updated the default value for `riak_kv.query.timeseries.max_quanta_span`. See the [TS configuration docs](/riak/ts/1.5.1/configuring/riakconf) for details. **Note:** due to a bug in the code, the `max_quanta_span` is capped at 1000. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] +* Updated the default value for `riak_kv.query.timeseries.max_quanta_span`. See the [TS configuration docs]({{}}riak/ts/1.5.1/configuring/riakconf) for details. **Note:** due to a bug in the code, the `max_quanta_span` is capped at 1000. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] * The default value for `OFFSET` is `[ ]`. [[PR 1546](https://github.com/basho/riak_kv/pull/1546)] @@ -171,4 +170,4 @@ Riak TS is compatible with the following: * You cannot use Bitcask with Riak TS tables. * `riak_kv.query.timeseries.max_quanta_span` is capped at 1000 due to a bug. -You can see a table of KV and TS features [here](/riak/ts/1.5.1/using/core-fundamentals/). +You can see a table of KV and TS features [here]({{}}riak/ts/1.5.1/using/core-fundamentals/). diff --git a/content/riak/ts/1.5.1/setup.md b/content/riak/ts/1.5.1/setup.md index 6573b03a4b..eb2cd333a3 100644 --- a/content/riak/ts/1.5.1/setup.md +++ b/content/riak/ts/1.5.1/setup.md @@ -14,12 +14,11 @@ version_history: in: "1.5.1+" aliases: - /riakts/1.5.1/setup/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/" --- -[install]: /riak/ts/1.5.1/setup/installing -[upgrade]: /riak/ts/1.5.1/setup/upgrading -[downgrade]: /riak/ts/1.5.1/setup/downgrading +[install]: {{}}riak/ts/1.5.1/setup/installing +[upgrade]: {{}}riak/ts/1.5.1/setup/upgrading +[downgrade]: {{}}riak/ts/1.5.1/setup/downgrading ## In This Section diff --git a/content/riak/ts/1.5.1/setup/downgrading.md b/content/riak/ts/1.5.1/setup/downgrading.md index 4429877b41..43cff7f0b1 100644 --- a/content/riak/ts/1.5.1/setup/downgrading.md +++ b/content/riak/ts/1.5.1/setup/downgrading.md @@ -15,7 +15,6 @@ version_history: aliases: - /riakts/1.5.1/setup/downgrading/ - /riakts/1.5.1/downgrading/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/downgrading/" --- ## Caution diff --git a/content/riak/ts/1.5.1/setup/installing.md b/content/riak/ts/1.5.1/setup/installing.md index 324d6b3af7..be8f7d12d4 100644 --- a/content/riak/ts/1.5.1/setup/installing.md +++ b/content/riak/ts/1.5.1/setup/installing.md @@ -19,15 +19,14 @@ aliases: - /riakts/1.5.1/installing/installing/ - /riakts/1.5.1/setup/installing/ - /riak/ts/1.5.1/installing/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/" --- [AWS]: aws/ -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ -[download]: /riak/ts/1.5.1/downloads/ +[download]: {{}}riak/ts/1.5.1/downloads/ [OSX]: mac-osx/ [source]: source/ [Ubuntu]: debian-ubuntu/ diff --git a/content/riak/ts/1.5.1/setup/installing/aws.md b/content/riak/ts/1.5.1/setup/installing/aws.md index 1c34931a93..0672f4f533 100644 --- a/content/riak/ts/1.5.1/setup/installing/aws.md +++ b/content/riak/ts/1.5.1/setup/installing/aws.md @@ -18,14 +18,13 @@ aliases: - /riakts/1.5.1/installing/aws/ - /riakts/1.5.1/setup/installing/aws/ - /riak/ts/1.5.1/installing/aws/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/aws/" --- [AWS]: http://aws.amazon.com -[download]: /riak/ts/1.5.1/downloads/ +[download]: {{}}riak/ts/1.5.1/downloads/ [ec2 guide]: http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html -[security basics]: /riak/ts/1.5.1/using/security/ +[security basics]: {{}}riak/ts/1.5.1/using/security/ Riak TS can be installed on AWS virtual machines (VMs) using a binary @@ -42,7 +41,7 @@ Get started by launching a Riak TS virtual machine via the AWS Marketplace. (You 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair. - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Then click the **Accept Terms and Launch with 1-Click** button. @@ -67,7 +66,7 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) We also recommend that you read more about [Security in TS][security basics]. diff --git a/content/riak/ts/1.5.1/setup/installing/debian-ubuntu.md b/content/riak/ts/1.5.1/setup/installing/debian-ubuntu.md index 2a70dced6c..2b0d82fd1e 100644 --- a/content/riak/ts/1.5.1/setup/installing/debian-ubuntu.md +++ b/content/riak/ts/1.5.1/setup/installing/debian-ubuntu.md @@ -18,13 +18,12 @@ aliases: - /riakts/1.5.1/installing/debian-ubuntu/ - /riakts/1.5.1/setup/installing/debian-ubuntu/ - /riak/ts/1.5.1/installing/debian-ubuntu/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/debian-ubuntu/" --- -[download]: /riak/ts/1.5.1/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.1/using/planning -[security basics pam]: /riak/ts/1.5.1/using/security/sources-management/#pam-based-authentication +[download]: {{}}riak/ts/1.5.1/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.1/using/planning +[security basics pam]: {{}}riak/ts/1.5.1/using/security/sources-management/#pam-based-authentication Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.5.1/setup/installing/mac-osx.md b/content/riak/ts/1.5.1/setup/installing/mac-osx.md index bb0f427282..32efc1f923 100644 --- a/content/riak/ts/1.5.1/setup/installing/mac-osx.md +++ b/content/riak/ts/1.5.1/setup/installing/mac-osx.md @@ -18,13 +18,12 @@ aliases: - /riakts/1.5.1/installing/mac-osx/ - /riakts/1.5.1/setup/installing/mac-osx/ - /riak/ts/1.5.1/installing/mac-osx/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/mac-osx/" --- -[download]: /riak/ts/1.5.1/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.1/using/planning +[download]: {{}}riak/ts/1.5.1/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.1/using/planning Riak TS can be installed on Mac OS X systems using a binary package available [here][download]. diff --git a/content/riak/ts/1.5.1/setup/installing/rhel-centos.md b/content/riak/ts/1.5.1/setup/installing/rhel-centos.md index 46146d8116..c3147552c6 100644 --- a/content/riak/ts/1.5.1/setup/installing/rhel-centos.md +++ b/content/riak/ts/1.5.1/setup/installing/rhel-centos.md @@ -18,12 +18,11 @@ aliases: - /riakts/1.5.1/installing/rhel-centos/ - /riakts/1.5.1/setup/installing/rhel-centos/ - /riak/ts/1.5.1/installing/rhel-centos/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/rhel-centos/" --- -[download]: /riak/ts/1.5.1/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.1/using/planning/ +[download]: {{}}riak/ts/1.5.1/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.1/using/planning/ Riak TS can be installed on CentOS-based systems using a binary diff --git a/content/riak/ts/1.5.1/setup/installing/source.md b/content/riak/ts/1.5.1/setup/installing/source.md index ee6371ccb8..5934e43db1 100644 --- a/content/riak/ts/1.5.1/setup/installing/source.md +++ b/content/riak/ts/1.5.1/setup/installing/source.md @@ -18,16 +18,15 @@ aliases: - /riakts/1.5.1/installing/source/ - /riakts/1.5.1/setup/installing/source/ - /riak/ts/1.5.1/installing/source/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/" --- -[download]: /riak/ts/1.5.1/downloads/ +[download]: {{}}riak/ts/1.5.1/downloads/ [Erlang]: http://www.erlang.org/ [GCC]: https://gcc.gnu.org/ [Git]: https://git-scm.com/ -[install erlang]: /riak/ts/1.5.1/setup/installing/source/erlang -[planning]: /riak/ts/1.5.1/using/planning/ +[install erlang]: {{}}riak/ts/1.5.1/setup/installing/source/erlang +[planning]: {{}}riak/ts/1.5.1/using/planning/ [Riak TS GitHub repository]: https://github.com/basho/riak/tree/riak_ts-1.5.1 diff --git a/content/riak/ts/1.5.1/setup/installing/source/erlang.md b/content/riak/ts/1.5.1/setup/installing/source/erlang.md index 34c2028019..65509e5e18 100644 --- a/content/riak/ts/1.5.1/setup/installing/source/erlang.md +++ b/content/riak/ts/1.5.1/setup/installing/source/erlang.md @@ -18,7 +18,6 @@ aliases: - /riakts/1.5.1/installing/source/erlang/ - /riakts/1.5.1/setup/installing/source/erlang/ - /riak/ts/1.5.1/installing/source/erlang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/erlang/" --- @@ -34,7 +33,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/e [make]: http://www.gnu.org/software/make/ [ncurses]: http://www.gnu.org/software/ncurses/ [OpenSSL]: https://www.openssl.org/ -[source]: /riak/ts/1.5.1/setup/installing/source/ +[source]: {{}}riak/ts/1.5.1/setup/installing/source/ [XCode Developer Tools]: https://developer.apple.com/xcode/downloads/ diff --git a/content/riak/ts/1.5.1/setup/upgrading.md b/content/riak/ts/1.5.1/setup/upgrading.md index cd69858023..ca1919584c 100644 --- a/content/riak/ts/1.5.1/setup/upgrading.md +++ b/content/riak/ts/1.5.1/setup/upgrading.md @@ -15,17 +15,16 @@ version_history: aliases: - /riakts/1.5.1/setup/upgrading/ - /riakts/1.5.1/upgrading/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/upgrading/" --- -[use admin commands]: /riak/kv/2.2.0/using/admin/commands -[use admin riak-admin]: /riak/kv/2.2.0/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.2.0/developing/usage/secondary-indexes +[use admin commands]: {{}}riak/kv/2.2.0/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.0/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes [riak ts enterprise]: http://basho.com/products/riak-ts/ -[cluster ops mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.2.0/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.2.0/using/reference/jmx -[snmp]: /riak/kv/2.2.0/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.0/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.0/using/reference/snmp {{% note title="**CAUTION**" %}} diff --git a/content/riak/ts/1.5.1/using.md b/content/riak/ts/1.5.1/using.md index 5ab1a96229..a657c89657 100644 --- a/content/riak/ts/1.5.1/using.md +++ b/content/riak/ts/1.5.1/using.md @@ -12,17 +12,16 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using" --- [activating]: creating-activating/ [aggregate]: querying/select/aggregate-functions/ [arithmetic]: querying/select/arithmetic-operations/ -[configuring]: /riak/ts/1.5.1/configuring/ -[download]: /riak/ts/1.5.1/downloads/ +[configuring]: {{}}riak/ts/1.5.1/configuring/ +[download]: {{}}riak/ts/1.5.1/downloads/ [installing]: ../setup/installing/ -[mdc]: /riak/ts/1.5.1/configuring/mdc/ +[mdc]: {{}}riak/ts/1.5.1/configuring/mdc/ [planning]: planning/ [querying]: querying/ [riakshell]: riakshell/ diff --git a/content/riak/ts/1.5.1/using/core-fundamentals.md b/content/riak/ts/1.5.1/using/core-fundamentals.md index 0accd1ed7f..520247ac9b 100644 --- a/content/riak/ts/1.5.1/using/core-fundamentals.md +++ b/content/riak/ts/1.5.1/using/core-fundamentals.md @@ -12,11 +12,10 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/core-fundamentals/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/core-fundamentals" --- -[Riak KV]: /riak/kv/2.2.0/ +[Riak KV]: {{}}riak/kv/2.2.0/ Riak TS shares the same core codebase as [Riak KV], which allows you to operate a TS cluster much the same as you would operate a KV cluster. @@ -53,40 +52,40 @@ Below, you will find links to Riak KV documents that are applicable and helpful ### Configuration -Basic Configuration will help you set up your Riak core configuration. +Basic Configuration will help you set up your Riak core configuration. -Managing Configuration will show you how to retrieve your configuration, check your settings, and debug your configuration. +Managing Configuration will show you how to retrieve your configuration, check your settings, and debug your configuration. -Configuration Reference provides you with everything you need to know about configuring Riak core. +Configuration Reference provides you with everything you need to know about configuring Riak core. -Load Balancing will walk you through configuring a load balancer with your Riak cluster. +Load Balancing will walk you through configuring a load balancer with your Riak cluster. ### Cluster Operations -Running a Cluster gives you a basic walkthrough of how to run a Riak cluster. +Running a Cluster gives you a basic walkthrough of how to run a Riak cluster. -Cluster Administration provides a series of links to information on various ways to administer your cluster. +Cluster Administration provides a series of links to information on various ways to administer your cluster. -Adding & Removing Nodes walks you through the process of adding or removing nodes in your cluster. +Adding & Removing Nodes walks you through the process of adding or removing nodes in your cluster. -Changing Cluster Information will show you how to change various parts of your cluster. +Changing Cluster Information will show you how to change various parts of your cluster. -Replace a Node is a step-by-step guide for how to replace a node in your cluster. +Replace a Node is a step-by-step guide for how to replace a node in your cluster. -Inspect a Node shows you the steps and tools for inspecting nodes in your cluster. +Inspect a Node shows you the steps and tools for inspecting nodes in your cluster. -Logging will provide you the steps for enabling and disabling debug logging. +Logging will provide you the steps for enabling and disabling debug logging. -Backing Up is a how-to guide for backing up your data. +Backing Up is a how-to guide for backing up your data. -Handoff will tell you everything you need to know to enable and disable handoff. +Handoff will tell you everything you need to know to enable and disable handoff. ### Repair, Tuning, and Reference -Repair & Recovery will cover all of the important topics of what can go wrong and what you can do to fix it. +Repair & Recovery will cover all of the important topics of what can go wrong and what you can do to fix it. -Performance will give you all the information you need to tune your cluster configurations to optimize performance. +Performance will give you all the information you need to tune your cluster configurations to optimize performance. -Reference will provide you with explanations of various core functions, such as logging and handoff. \ No newline at end of file +Reference will provide you with explanations of various core functions, such as logging and handoff. \ No newline at end of file diff --git a/content/riak/ts/1.5.1/using/creating-activating.md b/content/riak/ts/1.5.1/using/creating-activating.md index fc34c79bca..c47fb4d394 100644 --- a/content/riak/ts/1.5.1/using/creating-activating.md +++ b/content/riak/ts/1.5.1/using/creating-activating.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/creating-activating/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/creating-activating" --- @@ -26,7 +25,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/creating-activating [ruby]: ../../developing/ruby#sql-queries [planning]: ../planning/ [writing]: ../writingdata/ -[Riak bucket properties]: /riak/kv/2.2.0/configuring/reference/#default-bucket-properties +[Riak bucket properties]: {{}}riak/kv/2.2.0/configuring/reference/#default-bucket-properties Once you have [planned out your table][planning] you can create it by: diff --git a/content/riak/ts/1.5.1/using/deleting-data.md b/content/riak/ts/1.5.1/using/deleting-data.md index f103c6ecc0..87ac4997de 100644 --- a/content/riak/ts/1.5.1/using/deleting-data.md +++ b/content/riak/ts/1.5.1/using/deleting-data.md @@ -14,11 +14,10 @@ version_history: in: "1.5.1+" aliases: - /riakts/1.5.1/using/deleting-data/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/deleting-data" --- -[delete]: /riak/ts/1.5.1/using/querying/delete -[expiry]: /riak/ts/1.5.1/configuring/global-object-expiration +[delete]: {{}}riak/ts/1.5.1/using/querying/delete +[expiry]: {{}}riak/ts/1.5.1/configuring/global-object-expiration Riak TS offers several ways to delete data: with clients, using the DELETE statement, and through global expiry. Global expiry is more efficient than other delete options but operates on all of your data. `DELETE` works per-row but takes more resources to run. diff --git a/content/riak/ts/1.5.1/using/planning.md b/content/riak/ts/1.5.1/using/planning.md index ed36d00e87..c8c075c5f1 100644 --- a/content/riak/ts/1.5.1/using/planning.md +++ b/content/riak/ts/1.5.1/using/planning.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/planning/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/planning" --- @@ -23,7 +22,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/planning" [epoch]: https://en.wikipedia.org/wiki/Unix_time [installing]: ../../setup/installing/ [sql]: ../../learn-about/sqlriakts/ -[order by]: /riak/ts/1.5.1/using/querying/select/order-by +[order by]: {{}}riak/ts/1.5.1/using/querying/select/order-by You've [installed][installing] Riak TS, and you're ready to create a table. diff --git a/content/riak/ts/1.5.1/using/querying.md b/content/riak/ts/1.5.1/using/querying.md index 75b0f745fd..cbaa82e21b 100644 --- a/content/riak/ts/1.5.1/using/querying.md +++ b/content/riak/ts/1.5.1/using/querying.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/querying/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying" --- [activating]: ../creating-activating/ diff --git a/content/riak/ts/1.5.1/using/querying/delete.md b/content/riak/ts/1.5.1/using/querying/delete.md index a1aa72df0f..a64df133e1 100644 --- a/content/riak/ts/1.5.1/using/querying/delete.md +++ b/content/riak/ts/1.5.1/using/querying/delete.md @@ -14,12 +14,11 @@ version_history: in: "1.5.1+" aliases: - /riakts/1.5.1/using/querying/delete -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/delete" --- -[query guidelines]: /riak/ts/1.5.1/using/querying/guidelines/ -[time rep]: /riak/ts/1.5.1/using/timerepresentations/ -[http delete]: /riak/ts/1.4.0/using/writingdata/#deleting-data +[query guidelines]: {{}}riak/ts/1.5.1/using/querying/guidelines/ +[time rep]: {{}}riak/ts/1.5.1/using/timerepresentations/ +[http delete]: {{}}riak/ts/1.4.0/using/writingdata/#deleting-data # DELETE diff --git a/content/riak/ts/1.5.1/using/querying/describe.md b/content/riak/ts/1.5.1/using/querying/describe.md index 16b8893678..6949512975 100644 --- a/content/riak/ts/1.5.1/using/querying/describe.md +++ b/content/riak/ts/1.5.1/using/querying/describe.md @@ -12,10 +12,9 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/querying/describe -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/describe" --- -[riak shell]: /riak/ts/1.5.1/using/riakshell +[riak shell]: {{}}riak/ts/1.5.1/using/riakshell You can use the DESCRIBE statement to obtain the definition of your Riak TS table. This document will show you how to execute `DESCRIBE` in TS. diff --git a/content/riak/ts/1.5.1/using/querying/explain.md b/content/riak/ts/1.5.1/using/querying/explain.md index 5eb705a7e7..14c2cd7c16 100644 --- a/content/riak/ts/1.5.1/using/querying/explain.md +++ b/content/riak/ts/1.5.1/using/querying/explain.md @@ -12,13 +12,12 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/querying/explain -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/explain" --- -[creating-activating]: /riak/ts/1.5.1/using/creating-activating -[develop]: /riak/ts/1.5.1/developing -[planning]: /riak/ts/1.5.1/using/planning -[riak shell]: /riak/ts/1.5.1/using/riakshell +[creating-activating]: {{}}riak/ts/1.5.1/using/creating-activating +[develop]: {{}}riak/ts/1.5.1/developing +[planning]: {{}}riak/ts/1.5.1/using/planning +[riak shell]: {{}}riak/ts/1.5.1/using/riakshell You can use the EXPLAIN statement to better understand how a query you would like to run will be executed. This document will show you how to use `EXPLAIN` in Riak TS. diff --git a/content/riak/ts/1.5.1/using/querying/guidelines.md b/content/riak/ts/1.5.1/using/querying/guidelines.md index bdc81ccf79..aac5bc04dd 100644 --- a/content/riak/ts/1.5.1/using/querying/guidelines.md +++ b/content/riak/ts/1.5.1/using/querying/guidelines.md @@ -13,7 +13,6 @@ toc: true aliases: - /riakts/1.5.1/using/querying/basic-querying - /riakts/1.5.1/using/querying/guidelines -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/guidelines" --- [table arch]: ../../../learn-about/tablearchitecture/#data-modeling @@ -21,7 +20,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/guidelines [writing]: ../../writingdata/ [planning]: ../../planning#column-definitions [iso8601]: ../../../timerepresentations/ -[SELECT]: /riak/ts/1.5.1/using/querying/SELECT#iso_8601 +[SELECT]: {{}}riak/ts/1.5.1/using/querying/SELECT#iso_8601 [configuring]: ../../../configuring/riakconf/ @@ -60,7 +59,7 @@ Any quantized field in your partition key must be included in the query as a bou * Invalid: `time > 1449864277000 or time < 1449864290000` {{% note title="A Note About `SELECT`" %}} -It is possible to use ISO 8601-compliant date/time strings rather than integer timestamps in SELECT statements. Please see [SELECT](/riak/ts/1.5.1/using/querying/select/#iso-8601) for an example or [Time Representations](/riak/ts/1.5.1/using/timerepresentations/) for more information. +It is possible to use ISO 8601-compliant date/time strings rather than integer timestamps in SELECT statements. Please see [SELECT]({{}}riak/ts/1.5.1/using/querying/select/#iso-8601) for an example or [Time Representations]({{}}riak/ts/1.5.1/using/timerepresentations/) for more information. {{% /note %}} diff --git a/content/riak/ts/1.5.1/using/querying/reference.md b/content/riak/ts/1.5.1/using/querying/reference.md index b5dee88c02..922aa8579f 100644 --- a/content/riak/ts/1.5.1/using/querying/reference.md +++ b/content/riak/ts/1.5.1/using/querying/reference.md @@ -14,21 +14,20 @@ version_history: in: "1.5.1+" aliases: - /riakts/1.5.1/using/querying/reference -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/reference" --- -[select]: /riak/ts/1.5.1/using/querying/select/ -[describe]: /riak/ts/1.5.1/using/querying/describe/ -[delete]: /riak/ts/1.5.1/using/querying/delete/ -[explain]: /riak/ts/1.5.1/using/querying/explain/ -[show tables]: /riak/ts/1.5.1/using/querying/show-tables/ -[create table]: /riak/ts/1.5.1/using/creating-activating/ -[group by]: /riak/ts/1.5.1/using/querying/select/group-by/ -[order by]: /riak/ts/1.5.1/using/querying/select/order-by/ -[limit]: /riak/ts/1.5.1/using/querying/select/limit/ -[offset]: /riak/ts/1.5.1/using/querying/select/ -[arithmetic]: /riak/ts/1.5.1/using/querying/select/arithmetic-operations/ -[aggregate]: /riak/ts/1.5.1/using/querying/select/aggregate-functions/ +[select]: {{}}riak/ts/1.5.1/using/querying/select/ +[describe]: {{}}riak/ts/1.5.1/using/querying/describe/ +[delete]: {{}}riak/ts/1.5.1/using/querying/delete/ +[explain]: {{}}riak/ts/1.5.1/using/querying/explain/ +[show tables]: {{}}riak/ts/1.5.1/using/querying/show-tables/ +[create table]: {{}}riak/ts/1.5.1/using/creating-activating/ +[group by]: {{}}riak/ts/1.5.1/using/querying/select/group-by/ +[order by]: {{}}riak/ts/1.5.1/using/querying/select/order-by/ +[limit]: {{}}riak/ts/1.5.1/using/querying/select/limit/ +[offset]: {{}}riak/ts/1.5.1/using/querying/select/ +[arithmetic]: {{}}riak/ts/1.5.1/using/querying/select/arithmetic-operations/ +[aggregate]: {{}}riak/ts/1.5.1/using/querying/select/aggregate-functions/ This document lists each SQL statement available in Riak TS. diff --git a/content/riak/ts/1.5.1/using/querying/select.md b/content/riak/ts/1.5.1/using/querying/select.md index fe5ec61e39..26ea77a479 100644 --- a/content/riak/ts/1.5.1/using/querying/select.md +++ b/content/riak/ts/1.5.1/using/querying/select.md @@ -12,18 +12,17 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/querying/select -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select" --- [aggregate functions]: aggregate-functions/ [arithmetic operations]: arithmetic-operations/ [GROUP BY]: group-by/ -[guidelines]: /riak/ts/1.5.1/using/querying/guidelines +[guidelines]: {{}}riak/ts/1.5.1/using/querying/guidelines [iso8601]: ../../timerepresentations/ -[iso8601 accuracy]: /riak/ts/1.5.1/using/timerepresentations/#reduced-accuracy +[iso8601 accuracy]: {{}}riak/ts/1.5.1/using/timerepresentations/#reduced-accuracy [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 -[learn timestamps accuracy]: /riak/ts/1.5.1/learn-about/timestamps/#reduced-accuracy +[learn timestamps accuracy]: {{}}riak/ts/1.5.1/learn-about/timestamps/#reduced-accuracy You can use the SELECT statement in Riak TS to query your TS dataset. This document will show you how to run various queries using `SELECT`. diff --git a/content/riak/ts/1.5.1/using/querying/select/aggregate-functions.md b/content/riak/ts/1.5.1/using/querying/select/aggregate-functions.md index 697bb19d27..0efbf7557b 100644 --- a/content/riak/ts/1.5.1/using/querying/select/aggregate-functions.md +++ b/content/riak/ts/1.5.1/using/querying/select/aggregate-functions.md @@ -10,7 +10,6 @@ menu: project: "riak_ts" project_version: "1.5.1" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/aggregate-functions" version_history: present_from: "1.4.0+" moved: diff --git a/content/riak/ts/1.5.1/using/querying/select/arithmetic-operations.md b/content/riak/ts/1.5.1/using/querying/select/arithmetic-operations.md index 3fb65e0bf4..9eb01d7072 100644 --- a/content/riak/ts/1.5.1/using/querying/select/arithmetic-operations.md +++ b/content/riak/ts/1.5.1/using/querying/select/arithmetic-operations.md @@ -10,7 +10,6 @@ menu: project: "riak_ts" project_version: "1.5.1" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/arithmetic-operations" version_history: present_from: "1.4.0+" moved: @@ -22,7 +21,7 @@ aliases: --- -[querying select]: /riak/ts/1.5.1/using/querying/#select-query +[querying select]: {{}}riak/ts/1.5.1/using/querying/#select-query Riak TS supports arithmetic operations in the SELECT statement. diff --git a/content/riak/ts/1.5.1/using/querying/select/group-by.md b/content/riak/ts/1.5.1/using/querying/select/group-by.md index 891d98960a..dc96978f77 100644 --- a/content/riak/ts/1.5.1/using/querying/select/group-by.md +++ b/content/riak/ts/1.5.1/using/querying/select/group-by.md @@ -12,11 +12,10 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/querying/select/group-by -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/group-by" --- [aggregate function]: ../aggregate-functions -[guidelines]: /riak/ts/1.5.1/using/querying/guidelines +[guidelines]: {{}}riak/ts/1.5.1/using/querying/guidelines The GROUP BY statement is used with `SELECT` to pick out and condense rows sharing the same value and return a single row. `GROUP BY` is useful for aggregating an attribute of a device over a time period; for instance, you could use it to pull average values for every 30 minute period over the last 24 hours. diff --git a/content/riak/ts/1.5.1/using/querying/select/limit.md b/content/riak/ts/1.5.1/using/querying/select/limit.md index 7cdac48111..713dd88f66 100644 --- a/content/riak/ts/1.5.1/using/querying/select/limit.md +++ b/content/riak/ts/1.5.1/using/querying/select/limit.md @@ -12,12 +12,11 @@ project_version: "1.5.1" toc: true version_history: in: "1.5.1+" -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/limit/" --- -[select]: /riak/ts/1.5.1/using/querying/select -[query guidelines]: /riak/ts/1.5.1/using/querying/guidelines/ -[configuring]: /riak/ts/1.5.1/configuring/riakconf/#maximum-returned-data-size +[select]: {{}}riak/ts/1.5.1/using/querying/select +[query guidelines]: {{}}riak/ts/1.5.1/using/querying/guidelines/ +[configuring]: {{}}riak/ts/1.5.1/configuring/riakconf/#maximum-returned-data-size The LIMIT statement is used with [`SELECT`][select] to return a limited number of results. @@ -26,7 +25,7 @@ This document shows how to run various queries using `LIMIT`. See the [guideline {{% note title="A Note on Latency" %}} `LIMIT` uses on-disk query buffer to prevent overload, which adds some overhead and increases the query latency. -You may adjust various parameters in [riak.conf](/riak/ts/1.5.1/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for LIMIT statements; you can read more about that [here](/riak/ts/1.5.1/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. +You may adjust various parameters in [riak.conf]({{}}riak/ts/1.5.1/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for LIMIT statements; you can read more about that [here]({{}}riak/ts/1.5.1/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. However, the most effective means of speeding up your `LIMIT` queries is to place the query buffer directory (`timeseries_query_buffers_root_path`) on fast storage or in memory-backed /tmp directory. {{% /note %}} diff --git a/content/riak/ts/1.5.1/using/querying/select/order-by.md b/content/riak/ts/1.5.1/using/querying/select/order-by.md index 13c4448760..105c09ec5d 100644 --- a/content/riak/ts/1.5.1/using/querying/select/order-by.md +++ b/content/riak/ts/1.5.1/using/querying/select/order-by.md @@ -12,12 +12,11 @@ project_version: "1.5.1" toc: true version_history: in: "1.5.1+" -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/order-by" --- -[select]: /riak/ts/1.5.1/using/querying/select -[query guidelines]: /riak/ts/1.5.1/using/querying/guidelines/ -[configuring]: /riak/ts/1.5.1/configuring/riakconf/#maximum-returned-data-size +[select]: {{}}riak/ts/1.5.1/using/querying/select +[query guidelines]: {{}}riak/ts/1.5.1/using/querying/guidelines/ +[configuring]: {{}}riak/ts/1.5.1/configuring/riakconf/#maximum-returned-data-size The ORDER BY statement is used with [`SELECT`][select] to sort results by one or more columns in ascending or descending order. `ORDER BY` is useful for operations such as returning the most recent results in a set. @@ -26,7 +25,7 @@ This document shows how to run various queries using `ORDER BY`. See the [guidel {{% note title="A Note on Latency" %}} `ORDER BY` uses on-disk query buffer to prevent overload, which adds some overhead and increases the query latency. -You may adjust various parameters in [riak.conf](/riak/ts/1.5.1/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for ORDER BY statements; you can read more about that [here](/riak/ts/1.5.1/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. +You may adjust various parameters in [riak.conf]({{}}riak/ts/1.5.1/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for ORDER BY statements; you can read more about that [here]({{}}riak/ts/1.5.1/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. However, the most effective means of speeding up your `ORDER BY` queries is to place the query buffer directory (`timeseries_query_buffers_root_path`) on fast storage or in memory-backed /tmp directory. {{% /note %}} diff --git a/content/riak/ts/1.5.1/using/querying/show-create-table.md b/content/riak/ts/1.5.1/using/querying/show-create-table.md index dad8f57a16..a901e18f49 100644 --- a/content/riak/ts/1.5.1/using/querying/show-create-table.md +++ b/content/riak/ts/1.5.1/using/querying/show-create-table.md @@ -14,10 +14,9 @@ version_history: in: "1.5.1+" aliases: - /riakts/1.5.1/using/querying/show-create-table -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/show-create-table" --- -[riak shell]: /riak/ts/1.5.1/using/riakshell +[riak shell]: {{}}riak/ts/1.5.1/using/riakshell You can use the SHOW CREATE TABLE statement to obtain the SQL used to create your Riak TS table. This document will show you how to execute `SHOW CREATE TABLE` in TS. diff --git a/content/riak/ts/1.5.1/using/querying/show-tables.md b/content/riak/ts/1.5.1/using/querying/show-tables.md index d2385f70da..af2978521c 100644 --- a/content/riak/ts/1.5.1/using/querying/show-tables.md +++ b/content/riak/ts/1.5.1/using/querying/show-tables.md @@ -12,10 +12,9 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/querying/show-tables -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/show-tables" --- -[riak shell]: /riak/ts/1.5.1/using/riakshell +[riak shell]: {{}}riak/ts/1.5.1/using/riakshell You can use the SHOW TABLES statement to enumerate the Riak TS tables you have set up. This document will show you how to execute `SHOW TABLES` in TS. diff --git a/content/riak/ts/1.5.1/using/querying/single-key-fetch.md b/content/riak/ts/1.5.1/using/querying/single-key-fetch.md index 7142ac3fbc..5907c34dba 100644 --- a/content/riak/ts/1.5.1/using/querying/single-key-fetch.md +++ b/content/riak/ts/1.5.1/using/querying/single-key-fetch.md @@ -12,7 +12,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/querying/single-key-fetch -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/single-key-fetch" --- You may find the need to fetch a single key from Riak TS. The below examples show you how to perform a single key fetch in each of our official clients that support Riak TS. diff --git a/content/riak/ts/1.5.1/using/riakshell.md b/content/riak/ts/1.5.1/using/riakshell.md index 89b53f0696..9474695008 100644 --- a/content/riak/ts/1.5.1/using/riakshell.md +++ b/content/riak/ts/1.5.1/using/riakshell.md @@ -12,12 +12,11 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/riakshell/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/riakshell" --- -[nodename]: /riak/kv/2.2.0/using/cluster-operations/changing-cluster-info/ -[creating]: /riak/ts/1.5.1/using/creating-activating -[writing]: /riak/ts/1.5.1/using/writingdata +[nodename]: {{}}riak/kv/2.2.0/using/cluster-operations/changing-cluster-info/ +[creating]: {{}}riak/ts/1.5.1/using/creating-activating +[writing]: {{}}riak/ts/1.5.1/using/writingdata [riak shell README]: https://github.com/basho/riak_shell/blob/develop/README.md @@ -195,7 +194,7 @@ An example of the second format is shown below: In both of these formats multiple rows of data can be specified (3)>INSERT INTO mytable VALUES ('keyvalue', '2016-11-30 19:30:00', 123, 12.3, false), ('newvalue', '2016-11-30 19:31:04' 456, 45.6, true); -For more details please go to http://docs.basho.com/riak/ts +For more details please go to /riak/ts ``` diff --git a/content/riak/ts/1.5.1/using/security.md b/content/riak/ts/1.5.1/using/security.md index b2bf57d452..cc3641b050 100644 --- a/content/riak/ts/1.5.1/using/security.md +++ b/content/riak/ts/1.5.1/using/security.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/security/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/" --- [security checklist]: ./checklist @@ -90,7 +89,7 @@ cluster on the following TCP ports: Protocol | Port :--------|:---- -Protocol Buffers | TCP port 8087 +Protocol Buffers | TCP port 8087 ## Best Practices diff --git a/content/riak/ts/1.5.1/using/security/checklist.md b/content/riak/ts/1.5.1/using/security/checklist.md index 7f78d963c8..9e42e35220 100644 --- a/content/riak/ts/1.5.1/using/security/checklist.md +++ b/content/riak/ts/1.5.1/using/security/checklist.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/security/checklist -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/checklist/" --- [enable ssl]: ../enable-disable/#enabling-ssl @@ -21,7 +20,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/checklist/ [security users]: ../user-management [security sources]: ../sources-management [manage permissions]: ../user-management/#managing-permissions -[pbc]: /riak/kv/2.2.0/developing/api/protocol-buffers/ +[pbc]: {{}}riak/kv/2.2.0/developing/api/protocol-buffers/ [security enable disable]: ../enable-disable Before turning on Riak TS security there are key steps all applications need to take. Missing one of these steps may break your application, so make sure you have done each of the following BEFORE enabling security: diff --git a/content/riak/ts/1.5.1/using/security/enable-disable.md b/content/riak/ts/1.5.1/using/security/enable-disable.md index 37415decd4..c03c7aeca9 100644 --- a/content/riak/ts/1.5.1/using/security/enable-disable.md +++ b/content/riak/ts/1.5.1/using/security/enable-disable.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/security/enable-disable -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/enable-disable/" --- Riak TS security may be [checked](#checking-security-status), [enabled](#enabling-security), or [disabled](#disabling-security) through the command line, allowing an administrator to change security settings for the whole cluster without needing to go node-by-node. diff --git a/content/riak/ts/1.5.1/using/security/notify-basho.md b/content/riak/ts/1.5.1/using/security/notify-basho.md index 3dd36082b6..298cc0a035 100644 --- a/content/riak/ts/1.5.1/using/security/notify-basho.md +++ b/content/riak/ts/1.5.1/using/security/notify-basho.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/security/notify-basho -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/notify-basho/" --- Data security is an important and sensitive issue. A real-world approach to security allows us to balance appropriate levels of security and related overhead while creating a fast, scalable, and operationally straightforward database. diff --git a/content/riak/ts/1.5.1/using/security/sources-management.md b/content/riak/ts/1.5.1/using/security/sources-management.md index 0db4c3d11b..73d91f2b4c 100644 --- a/content/riak/ts/1.5.1/using/security/sources-management.md +++ b/content/riak/ts/1.5.1/using/security/sources-management.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/security/sources-management -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/sources-management/" --- [cidr]: http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing @@ -22,8 +21,8 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/sources-ma [security enabling]: ../enable-disable/#enabling-security [security add user]: ../user-management/#add-user [root cert]: http://en.wikipedia.org/wiki/Root_certificate -[rolling restart]: /riak/kv/2.2.0/using/repair-recovery/rolling-restart/ -[config ref security]: /riak/kv/2.2.0/configuring/reference/#security +[rolling restart]: {{}}riak/kv/2.2.0/using/repair-recovery/rolling-restart/ +[config ref security]: {{}}riak/kv/2.2.0/configuring/reference/#security [xss]: http://en.wikipedia.org/wiki/Cross-site_scripting [request forgery]: http://en.wikipedia.org/wiki/Cross-site_request_forgery [http referer]: http://en.wikipedia.org/wiki/HTTP_referer diff --git a/content/riak/ts/1.5.1/using/security/user-management.md b/content/riak/ts/1.5.1/using/security/user-management.md index 93f1e77e0d..8cc412dc75 100644 --- a/content/riak/ts/1.5.1/using/security/user-management.md +++ b/content/riak/ts/1.5.1/using/security/user-management.md @@ -13,7 +13,6 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/security/user-management -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/user-management/" --- Riak TS security lets you to control authorization by creating, modifying, and deleting user characteristics and granting users selective access to Riak TS functionality. Users can be assigned one or more of the following characteristics: diff --git a/content/riak/ts/1.5.1/using/timerepresentations.md b/content/riak/ts/1.5.1/using/timerepresentations.md index febd801b91..763fc27bd9 100644 --- a/content/riak/ts/1.5.1/using/timerepresentations.md +++ b/content/riak/ts/1.5.1/using/timerepresentations.md @@ -12,15 +12,14 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/timerepresentations/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/timerepresentations" --- [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[config reference]: /riak/kv/2.2.0/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.5.1/using/mdc +[config reference]: {{}}riak/kv/2.2.0/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.5.1/using/mdc [riak shell]: ../riakshell [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 [learn timestamps]: ../../learn-about/timestamps diff --git a/content/riak/ts/1.5.1/using/writingdata.md b/content/riak/ts/1.5.1/using/writingdata.md index 1d1a860171..9bcdc1644c 100644 --- a/content/riak/ts/1.5.1/using/writingdata.md +++ b/content/riak/ts/1.5.1/using/writingdata.md @@ -12,16 +12,15 @@ project_version: "1.5.1" toc: true aliases: - /riakts/1.5.1/using/writingdata/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/writingdata" --- [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[http]: /riak/ts/1.5.1/developing/http/ -[config reference]: /riak/kv/2.2.0/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.5.1/configuring/mdc +[http]: {{}}riak/ts/1.5.1/developing/http/ +[config reference]: {{}}riak/kv/2.2.0/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.5.1/configuring/mdc [riakshell]: ../riakshell [iso8601]: ../timerepresentations/ [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 diff --git a/content/riak/ts/1.5.2/add-ons.md b/content/riak/ts/1.5.2/add-ons.md index 1d0e116fc3..0e701d0f66 100644 --- a/content/riak/ts/1.5.2/add-ons.md +++ b/content/riak/ts/1.5.2/add-ons.md @@ -10,7 +10,8 @@ menu: weight: 450 pre: tools toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons" +aliases: + --- Here at Basho, we've developed integrations between Riak TS and other best-of-breed components in your application stack. Each integration, called an add-on, is explained in this section, from installation to feature-set. diff --git a/content/riak/ts/1.5.2/add-ons/redis.md b/content/riak/ts/1.5.2/add-ons/redis.md index f6445e4139..5f92548367 100644 --- a/content/riak/ts/1.5.2/add-ons/redis.md +++ b/content/riak/ts/1.5.2/add-ons/redis.md @@ -11,6 +11,8 @@ menu: parent: "addons" toc: true commercial_offering: true +aliases: + --- diff --git a/content/riak/ts/1.5.2/add-ons/redis/developing-rra.md b/content/riak/ts/1.5.2/add-ons/redis/developing-rra.md index 8b73b1a11b..53df83f17f 100644 --- a/content/riak/ts/1.5.2/add-ons/redis/developing-rra.md +++ b/content/riak/ts/1.5.2/add-ons/redis/developing-rra.md @@ -11,15 +11,17 @@ menu: parent: "addons_redis" toc: true commercial_offering: true +aliases: + --- [redis-clients]: http://redis.io/clients -[usage bucket types]: /riak/kv/2.2.0/developing/usage/bucket-types/ -[dev api http]: /riak/kv/2.2.0/developing/api/http +[usage bucket types]: {{}}riak/kv/2.2.0/developing/usage/bucket-types/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http [config-behaviors]: http://basho.com/posts/technical/riaks-config-behaviors-part-4/ -[apps replication properties]: /riak/kv/2.2.0/developing/app-guide/replication-properties -[usage commit hooks]: /riak/kv/2.2.0/developing/usage/commit-hooks/ -[concept causal context]: /riak/kv/2.2.0/learn/concepts/causal-context +[apps replication properties]: {{}}riak/kv/2.2.0/developing/app-guide/replication-properties +[usage commit hooks]: {{}}riak/kv/2.2.0/developing/usage/commit-hooks/ +[concept causal context]: {{}}riak/kv/2.2.0/learn/concepts/causal-context [ee]: http://basho.com/contact/ This page will walk you through setting up your environment for development with Riak Redis Add-on (RRA), as well as present examples and configuration parameters for basic development operations. @@ -83,8 +85,8 @@ fi The following is an example, using Riak TS's default HTTP port, of setting `allow_mult` to 'true' and `last_write_wins` to 'false': ```sh -curl -XPUT -H 'Content-Type: application/json' \ - -d '{"props": {"allow_mult": true, "last_write_wins": false}}' \ +curl -XPUT -H 'Content-Type: application/json' / + -d '{"props": {"allow_mult": true, "last_write_wins": false}}' / 'http://127.0.0.1:8098/types/rra/buckets/test/props' ``` diff --git a/content/riak/ts/1.5.2/add-ons/redis/redis-add-on-features.md b/content/riak/ts/1.5.2/add-ons/redis/redis-add-on-features.md index d4f5c12116..0d4cc9862f 100644 --- a/content/riak/ts/1.5.2/add-ons/redis/redis-add-on-features.md +++ b/content/riak/ts/1.5.2/add-ons/redis/redis-add-on-features.md @@ -11,15 +11,17 @@ menu: parent: "addons_redis" toc: true commercial_offering: true +aliases: + --- [ee]: http://basho.com/contact/ -[GET-sequence]: /images/redis/GET_seq.msc.png -[SET-sequence]: /images/redis/SET_seq.msc.png -[DEL-sequence]: /images/redis/DEL_seq.msc.png -[Object-lifetime]: /images/redis/Object_lifetime.msc.png +[GET-sequence]: {{}}images/redis/GET_seq.msc.png +[SET-sequence]: {{}}images/redis/SET_seq.msc.png +[DEL-sequence]: {{}}images/redis/DEL_seq.msc.png +[Object-lifetime]: {{}}images/redis/Object_lifetime.msc.png [redis docs]: http://redis.io/commands -[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis.md +[twemproxy docs]: https://github.com/twitter/twemproxy/blob/master/notes/redis/ ## Overview @@ -69,7 +71,7 @@ Implementing caching strategies in the cache proxy service reduces the cost of i The read-through cache strategy of the GET command is represented by the following sequence diagram: -![GET command sequence diagram](/images/redis/GET_seq.msc.png) +![GET command sequence diagram]({{}}images/redis/GET_seq.msc.png) The `CACHE_TTL` configuration option establishes how long the cache takes to @@ -96,12 +98,12 @@ expected to be followed by several accurate cache hits. The write-around cache strategy of the SET command is represented by the following sequence diagram: -![SET command sequence diagram](/images/redis/SET_seq.msc.png) +![SET command sequence diagram]({{}}images/redis/SET_seq.msc.png) The write-around cache strategy of the DEL command is represented by the following sequence diagram: -![DEL command sequence diagram](/images/redis/DEL_seq.msc.png) +![DEL command sequence diagram]({{}}images/redis/DEL_seq.msc.png) ## Commands @@ -128,4 +130,4 @@ With the combination of read-through and write-around cache strategies, the full object lifetime for a key-value is represented by the following sequence diagram: -![Object lifetime sequence diagram](/images/redis/Object_lifetime.msc.png) +![Object lifetime sequence diagram]({{}}images/redis/Object_lifetime.msc.png) diff --git a/content/riak/ts/1.5.2/add-ons/redis/set-up-rra.md b/content/riak/ts/1.5.2/add-ons/redis/set-up-rra.md index 23f7cfd536..2dd54f630e 100644 --- a/content/riak/ts/1.5.2/add-ons/redis/set-up-rra.md +++ b/content/riak/ts/1.5.2/add-ons/redis/set-up-rra.md @@ -11,13 +11,15 @@ menu: parent: "addons_redis" toc: true commercial_offering: true +aliases: + --- [addon redis develop]: ../developing-rra/ [addon redis use]: ../using-rra [ee]: http://basho.com/contact/ -[install index]: /riak/ts/1.5.2/setup/installing -[perf open files]: /riak/ts/1.5.2/setup/installing/rhel-centos/#ulimit +[install index]: {{}}riak/ts/1.5.2/setup/installing +[perf open files]: {{}}riak/ts/1.5.2/setup/installing/rhel-centos/#ulimit [lab ansible]: https://github.com/paegun/ansible-cache-proxy This page will walk you through the process of installing Riak Redis Add-on (RRA) and configuring it to run in your environment. Check the [prerequisites](#prerequisites) before you get started to make sure you have everything you need in order to successfully install and use RRA. diff --git a/content/riak/ts/1.5.2/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/ts/1.5.2/add-ons/redis/set-up-rra/deployment-models.md index 54083c3b19..079338c853 100644 --- a/content/riak/ts/1.5.2/add-ons/redis/set-up-rra/deployment-models.md +++ b/content/riak/ts/1.5.2/add-ons/redis/set-up-rra/deployment-models.md @@ -11,11 +11,13 @@ menu: parent: "addons_redis_setup" toc: true commercial_offering: true +aliases: + --- -[Local-deployment]: /images/redis/rra_deployment_local.png -[Colocated-deployment]: /images/redis/rra_deployment_colocated.png -[Distributed-deployment]: /images/redis/rra_deployment_distributed.png +[Local-deployment]: {{}}images/redis/rra_deployment_local.png +[Colocated-deployment]: {{}}images/redis/rra_deployment_colocated.png +[Distributed-deployment]: {{}}images/redis/rra_deployment_distributed.png ## Deployment Models @@ -24,7 +26,7 @@ commercial_offering: true In a local cache deployment, the RRA and Redis are deployed to the application server. -![Local-deployment](/images/redis/rra_deployment_local.png) +![Local-deployment]({{}}images/redis/rra_deployment_local.png) Connections: @@ -65,7 +67,7 @@ connections from the application services while deployment costs remain a matter of pushing a service and its configuration. In either case, deployment should be automated, so are not multiplied by the number of servers. -![Colocated-deployment](/images/redis/rra_deployment_colocated.png) +![Colocated-deployment]({{}}images/redis/rra_deployment_colocated.png) Connections: @@ -103,7 +105,7 @@ Disadvantages: In a distributed cache deployment, the RRA is deployed to the application server and Redis is deployed to standalone servers, separate from Riak cluster nodes. -![Distributed-deployment](/images/redis/rra_deployment_distributed.png) +![Distributed-deployment]({{}}images/redis/rra_deployment_distributed.png) Connections: diff --git a/content/riak/ts/1.5.2/add-ons/redis/using-rra.md b/content/riak/ts/1.5.2/add-ons/redis/using-rra.md index b3fb4199dd..9a439da54a 100644 --- a/content/riak/ts/1.5.2/add-ons/redis/using-rra.md +++ b/content/riak/ts/1.5.2/add-ons/redis/using-rra.md @@ -13,11 +13,12 @@ toc: true commercial_offering: true aliases: - /riak/ts/1.5.2/add-ons/redis/get-started-with-rra + --- [addon redis develop]: ../developing-rra/ [addon redis setup]: ../set-up-rra/ -[dev api http]: /riak/kv/2.2.0/developing/api/http/ +[dev api http]: {{}}riak/kv/2.2.0/developing/api/http/ [ee]: http://basho.com/contact/ diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector.md index 87e840a12d..7d3edb6a54 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector.md @@ -10,7 +10,8 @@ menu: weight: 101 parent: "addons" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector" +aliases: + --- The Spark-Riak connector enables you to connect Spark applications to Riak TS with the Spark RDD and Spark DataFrames APIs. You can write your app in Scala, Python, and Java. The connector makes it easy to partition the data you get from Riak so multiple Spark workers can process the data in parallel, and it has support for failover if a Riak node goes down while your Spark job is running. diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/building-testing.md index cdb88e2695..4960667c14 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/building-testing.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/building-testing.md @@ -11,7 +11,8 @@ menu: weight: 103 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/building-testing" +aliases: + --- If you want to download the source code of the Spark-Riak connector, build it, and install the results in your local repo, this is the document for you! Keep reading for instructions on downloading, building, and installing the connector. @@ -26,7 +27,7 @@ In order to build the Spark-Riak connector, you'll need to have the following in * [Java OpenJDK 8](http://openjdk.java.net/install/) * [Maven 3](https://maven.apache.org/download.cgi) * [Spark 1.6](http://spark.apache.org/docs/latest/#downloading) -* [Riak TS](http://docs.basho.com/riak/ts/latest/installing/) +* [Riak TS]({{< baseurl >}}riak/ts/latest/installing/) ## Download diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/getting.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/getting.md index 4f5706db60..8f14b9ef13 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/getting.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/getting.md @@ -10,7 +10,8 @@ menu: weight: 102 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/getting" +aliases: + --- > **Note:** @@ -62,16 +63,16 @@ You can download the Spark-Riak connector library by going to https://github.com You can also download and install the library in your home directory by running: ```bash -curl https://bintray.com/artifact/download/basho/data-platform/com/basho/riak/spark-riak-connector/»VERSION«/spark-riak-connector-»VERSION«-uber.jar \ +curl https://bintray.com/artifact/download/basho/data-platform/com/basho/riak/spark-riak-connector/»VERSION«/spark-riak-connector-»VERSION«-uber.jar / -o ~/spark-riak-connector-»VERSION«-uber.jar ``` Once you've downloaded the connector, you can add it to the driver classpath when submitting your Python app to Spark, like this: ```bash -/path/to/spark-submit \ - --master "local[*]" \ - --driver-class-path /path/to/spark-riak-connector-»VERSION«-uber.jar \ +/path/to/spark-submit / + --master "local[*]" / + --driver-class-path /path/to/spark-riak-connector-»VERSION«-uber.jar / /path/to/your-python-script.py ``` @@ -82,23 +83,23 @@ As an alternative, you can have Spark automatically download it when starting up For Scala, Python, & Java: ``` -$SPARK_HOME/bin/spark-submit \ ---repositories https://dl.bintray.com/basho/data-platform \ +$SPARK_HOME/bin/spark-submit / +--repositories https://dl.bintray.com/basho/data-platform / --packages com.basho.riak:spark-riak-connector:»VERSION« ``` Alternative for Scala: ``` -$SPARK_HOME/bin/spark-shell \ ---repositories https://dl.bintray.com/basho/data-platform \ +$SPARK_HOME/bin/spark-shell / +--repositories https://dl.bintray.com/basho/data-platform / --packages com.basho.riak:spark-riak-connector:»VERSION« ``` Alternative for Python: ``` -$SPARK_HOME/bin/pyspark \ ---repositories https://dl.bintray.com/basho/data-platform \ +$SPARK_HOME/bin/pyspark / +--repositories https://dl.bintray.com/basho/data-platform / --packages com.basho.riak:spark-riak-connector:»VERSION« ``` diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/quick-start.md index 7152896b9c..6cb245c417 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/quick-start.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/quick-start.md @@ -11,7 +11,8 @@ menu: weight: 101 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/quick-start" +aliases: + --- This guide will run you through a quick example that uses the Spark-Riak connector to read and write data using Java, Scala, and Python. We will assume you are running this guide on Mac OSX. @@ -19,8 +20,8 @@ This guide will run you through a quick example that uses the Spark-Riak connect ## Prerequisites - Update Homebrew with `brew update`. -- Install Riak TS OSX build. Instruction can be found [here](http://docs.basho.com/riak/ts/1.2.0/installing/mac-osx/) -- Set open file limits for Riak by following the guide [here](http://docs.basho.com/riak/latest/ops/tuning/open-files-limit/#Mac-OS-X). +- Install Riak TS OSX build. Instruction can be found [here]({{< baseurl >}}riak/ts/1.2.0/installing/mac-osx/) +- Set open file limits for Riak by following the guide [here]({{< baseurl >}}riak/kv/latest/ops/tuning/open-files-limit/#Mac-OS-X). - Install Spark with `brew install apache-spark`. - Download the Spark-Riak connector uber jar (containing all dependencies) from here: https://github.com/basho/spark-riak-connector/releases/latest. @@ -38,8 +39,8 @@ In this quick start guide we will run you through an example usage of the Spark- Start Spark Scala REPL with: ``` -path/to/spark-shell \ ---conf spark.riak.connection.host=127.0.0.1:8087 \ +path/to/spark-shell / +--conf spark.riak.connection.host=127.0.0.1:8087 / --driver-class-path /path/to/spark-riak-connector-»VERSION«-uber.jar ``` @@ -123,8 +124,8 @@ In this quick start guide we will run through some examples usages of the Spark Start `pyspark` with: ``` -/path/to/bin/pyspark \ ---conf spark.riak.connection.host=127.0.0.1:8087 \ +/path/to/bin/pyspark / +--conf spark.riak.connection.host=127.0.0.1:8087 / --driver-class-path /path/to/spark-riak-connector-{{version}}-uber.jar ``` @@ -242,10 +243,10 @@ Write the DataFrame to the TS table: ```python -df.write \ - .format('org.apache.spark.sql.riak') \ - .option('spark.riak.connection.host', hostAndPort) \ - .mode('Append') \ +df.write / + .format('org.apache.spark.sql.riak') / + .option('spark.riak.connection.host', hostAndPort) / + .mode('Append') / .save(table_name) ``` @@ -253,11 +254,11 @@ Let's check that the write was successful by reading the TS table into a new Dat ```python sqlContext = SQLContext(sc) -df2 = sqlContext.read\ - .format("org.apache.spark.sql.riak")\ - .option("spark.riak.connection.host", hostAndPort)\ - .option("spark.riakts.bindings.timestamp", "useLong")\ - .load(table_name)\ +df2 = sqlContext.read/ + .format("org.apache.spark.sql.riak")/ + .option("spark.riak.connection.host", hostAndPort)/ + .option("spark.riakts.bindings.timestamp", "useLong")/ + .load(table_name)/ .filter("""measurementDate > %(start_date)s AND measurementDate < %(end_date)s AND site = '%(site)s' diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage.md index 197be6d5bd..aa3937a9e7 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage.md @@ -11,7 +11,8 @@ menu: weight: 104 parent: "addons_spark_riak" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage" +aliases: + --- This section will walk you through setting up your application for development with the Spark-Riak connector. diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/bulk-write.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/bulk-write.md index 00c1355eea..541a9aad34 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/bulk-write.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/bulk-write.md @@ -11,7 +11,8 @@ menu: weight: 107 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/bulk-write" +aliases: + --- To write into a Riak TS table, the Spark-Riak Connector splits the initial set of rows into smaller bulks and processes them in parallel. Bulk size can be configured using `spark.riakts.write.bulk-size` property. The default number is `100`. diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/config-spark-context.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/config-spark-context.md index 8ddd2b5a9d..c6cd173794 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/config-spark-context.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/config-spark-context.md @@ -11,7 +11,8 @@ menu: weight: 101 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/config-spark-context" +aliases: + --- The following `import` statements should be included at the top of your Spark application to enable the connector: diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/dataframes.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/dataframes.md index 305e82c829..bc469dc647 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/dataframes.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/dataframes.md @@ -11,7 +11,8 @@ menu: weight: 104 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/dataframes" +aliases: + --- ## Spark Dataframes With TS Table @@ -43,11 +44,11 @@ val df = sqlContext.read ``` ```python -df = sqlContext.read \ - .option("spark.riak.connection.hosts","riak_host_ip:10017") \ - .format("org.apache.spark.sql.riak") \ - .load(ts_table_name) \ - .select(“time”, “col1”, “col2”) \ +df = sqlContext.read / + .option("spark.riak.connection.hosts","riak_host_ip:10017") / + .format("org.apache.spark.sql.riak") / + .load(ts_table_name) / + .select(“time”, “col1”, “col2”) / .filter(s"time >= CAST($from AS TIMESTAMP) AND time <= CAST($to AS TIMESTAMP) AND col1= $value1") ``` @@ -71,10 +72,10 @@ inputDF.write ``` ```python -inputDF.write \ - .option("spark.riak.connection.hosts","riak_host_ip:10017") \ - .format("org.apache.spark.sql.riak") \ - .mode(SaveMode.Append) \ +inputDF.write / + .option("spark.riak.connection.hosts","riak_host_ip:10017") / + .format("org.apache.spark.sql.riak") / + .mode(SaveMode.Append) / .save(ts_table_name) ``` diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/dates.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/dates.md index a3a430dd92..3a3bae3bbb 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/dates.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/dates.md @@ -11,7 +11,8 @@ menu: weight: 105 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/dates" +aliases: + --- Riak TS automatically stores all datetimes as a Long integer that represents milliseconds from the [beginning of the epoc](https://en.wikipedia.org/wiki/Unix_time). This is not very human friendly so we have provided a Spark configuration option called `spark.riakts.bindings.timestamp`. This option is for use with Automatic Schema Discovery and allows for conversion from Riak TS datetimes, which are stored as Longs, to Timestamps. The default value of this option is `useTimestamp` which converts Longs to Timestamps. If you would like to use the original Long value, you can use the option value of `useLong`. All conversion takes place during Automatic Schema Discovery when reading from Riak TS tables. diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/range-query-partition.md index 030b87bf7e..405b78569e 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/range-query-partition.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -11,10 +11,11 @@ menu: weight: 106 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/range-query-partition" +aliases: + --- -Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS](http://docs.basho.com/riakts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. +Riak TS range queries are limited to a maximum of 5 quanta (see [Querying Data in Riak TS]({{< baseurl >}}riak/ts/latest/using/querying/)). To work around this limitation or simply achieve higher read performance, large ranges can be split into smaller sub-ranges at partitioning time. To use this functionality, you must provide the following options: @@ -34,12 +35,12 @@ For example: ``` ```python -df = sqlContext.read \ - .option("spark.riak.input.split.count", "5") \ - .option("spark.riak.partitioning.ts-range-field-name", "time") \ - .format("org.apache.spark.sql.riak") \ - .schema(schema) \ - .load(ts_table_name) \ +df = sqlContext.read / + .option("spark.riak.input.split.count", "5") / + .option("spark.riak.partitioning.ts-range-field-name", "time") / + .format("org.apache.spark.sql.riak") / + .schema(schema) / + .load(ts_table_name) / .filter(s"time >= CAST(111111 AS TIMESTAMP) AND time <= CAST(555555 AS TIMESTAMP) AND col1 = 'val1'") ``` diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/reading-data.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/reading-data.md index 76c73bfd0c..426e3c73ff 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/reading-data.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/reading-data.md @@ -11,7 +11,8 @@ menu: weight: 102 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/reading-data" +aliases: + --- ## Reading Data From TS Table diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/streaming-example.md index 736165e652..d86f35df63 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/streaming-example.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/streaming-example.md @@ -11,7 +11,8 @@ menu: weight: 108 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/streaming-example" +aliases: + --- > **Note:** @@ -28,7 +29,7 @@ path/to/kafka/bin/kafka-server-start.sh config/server.properties path/to/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic streaming ``` -We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here](/riak/ts/1.2.0/installing/mac-osx/). +We also assume Riak TS is installed and there is a Riak TS node running at `127.0.0.1:8087`. You can find instruction to do so [here]({{}}riak/ts/1.2.0/installing/mac-osx/). You will need to build the TS example as well. Please follow the instructions on [building the examples](https://github.com/basho/spark-riak-connector/tree/master/examples#building-and-running-examplesdemos). diff --git a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/writing-data.md b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/writing-data.md index c474f6d0c6..8055876b5d 100644 --- a/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/writing-data.md +++ b/content/riak/ts/1.5.2/add-ons/spark-riak-connector/usage/writing-data.md @@ -11,7 +11,8 @@ menu: weight: 103 parent: "spark_riak_usage" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/add-ons/spark-riak-connector/usage/writing-data" +aliases: + --- ## Writing Data To TS Table diff --git a/content/riak/ts/1.5.2/configuring.md b/content/riak/ts/1.5.2/configuring.md index fe712ff960..f12c15b1cf 100644 --- a/content/riak/ts/1.5.2/configuring.md +++ b/content/riak/ts/1.5.2/configuring.md @@ -15,16 +15,16 @@ version_history: - [">=1.5.2", "configuring"] - ["<=1.4.0", "using/configuring"] aliases: - - /riakts/1.5.2/configuring/ -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring" + - /riakts/1.5.2/configuring/ + - /riak/ts/latest/using/configuring/ --- -[riakconf]: /riak/ts/1.5.2/configuring/riakconf/ -[mdc]: /riak/ts/1.5.2/configuring/mdc/ -[global expiry]: /riak/ts/1.5.2/configuring/global-object-expiration/ -[kv config]: /riak/kv/2.2.0/configuring/reference -[WITH]: /riak/ts/1.5.2/using/creating-activating/#using-the-with-clause +[riakconf]: {{}}riak/ts/1.5.2/configuring/riakconf/ +[mdc]: {{}}riak/ts/1.5.2/configuring/mdc/ +[global expiry]: {{}}riak/ts/1.5.2/configuring/global-object-expiration/ +[kv config]: {{}}riak/kv/2.2.0/configuring/reference +[WITH]: {{}}riak/ts/1.5.2/using/creating-activating/#using-the-with-clause Riak TS mostly relies on Riak KV's [default configuration settings][kv config]. However, there are a few TS-specific configurations you should know about: diff --git a/content/riak/ts/1.5.2/configuring/global-object-expiration.md b/content/riak/ts/1.5.2/configuring/global-object-expiration.md index b2b2d88c84..2e5642ddd3 100644 --- a/content/riak/ts/1.5.2/configuring/global-object-expiration.md +++ b/content/riak/ts/1.5.2/configuring/global-object-expiration.md @@ -16,8 +16,8 @@ version_history: - [">=1.5.2", "configuring/global-object-expiration"] - ["<=1.4.0", "using/global-object-expiration"] aliases: - - /riakts/1.5.2/configuring/global-object-expiration/ -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/global-object-expiration" + - /riakts/1.5.2/configuring/global-object-expiration/ + --- [ttl]: https://en.wikipedia.org/wiki/Time_to_live diff --git a/content/riak/ts/1.5.2/configuring/mdc.md b/content/riak/ts/1.5.2/configuring/mdc.md index 877e00e2a8..010965c749 100644 --- a/content/riak/ts/1.5.2/configuring/mdc.md +++ b/content/riak/ts/1.5.2/configuring/mdc.md @@ -17,16 +17,16 @@ version_history: - [">=1.5.2", "configuring/mdc"] - ["<=1.4.0", "using/mdc"] aliases: - - /riakts/1.5.2/configuring/mdc -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/mdc" + - /riakts/1.5.2/configuring/mdc + - /riak/ts/latest/using/mdc --- -[activating]: /riak/ts/1.5.2/using/creating-activating -[cluster ops v3 mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[activating]: {{}}riak/ts/1.5.2/using/creating-activating +[cluster ops v3 mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter [ee]: http://basho.com/contact/ [Enterprise]: http://basho.com/products/riak-ts/ -[install]: /riak/ts/1.5.2/setup/installing +[install]: {{}}riak/ts/1.5.2/setup/installing Multi-Datacenter (MDC) replication makes it possible to replicate your time series data between Riak clusters. This document will walk through how to configure MDC to work with Riak TS. @@ -72,7 +72,7 @@ data definition language (DDL) on each cluster to make certain they are equivalent before synchronization occurs. {{% note title="Tip" %}} -[`SHOW CREATE TABLE`](/riak/ts/1.5.2/using/querying/show-create-table/) makes this much easier. +[`SHOW CREATE TABLE`]({{}}riak/ts/1.5.2/using/querying/show-create-table/) makes this much easier. {{% /note %}} diff --git a/content/riak/ts/1.5.2/configuring/riakconf.md b/content/riak/ts/1.5.2/configuring/riakconf.md index 51bb4dd5ef..275ce7d58a 100644 --- a/content/riak/ts/1.5.2/configuring/riakconf.md +++ b/content/riak/ts/1.5.2/configuring/riakconf.md @@ -15,13 +15,13 @@ version_history: locations: - [">=1.5.2", "using/configuring"] aliases: - - /riakts/1.5.2/configuring/riakconf -canonical_link: "https://docs.basho.com/riak/ts/latest/configuring/riakconf" + - /riakts/1.5.2/configuring/riakconf + --- [glossary quanta]: ../../learn/glossary/quanta -[Riak object settings]: /riak/kv/2.2.0/configuring/reference/#object-settings +[Riak object settings]: {{}}riak/kv/2.2.0/configuring/reference/#object-settings Riak TS exposes a few configuration settings in riak.conf. This document will walk you through the TS configurations. @@ -183,4 +183,4 @@ The following settings show up in riak.conf but should not be changed: * `riak_kv.query.timeseries.qbuf_soft_watermark` * `riak_kv.query.timeseries.qbuf_hard_watermark` * `riak_kv.query.timeseries.qbuf_expire_ms` -* `riak_kv.query.timeseries.qbuf_incomplete_release_ms` \ No newline at end of file +* `riak_kv.query.timeseries.qbuf_incomplete_release_ms` diff --git a/content/riak/ts/1.5.2/developing.md b/content/riak/ts/1.5.2/developing.md index 18e055eab4..ca71ff4070 100644 --- a/content/riak/ts/1.5.2/developing.md +++ b/content/riak/ts/1.5.2/developing.md @@ -11,21 +11,21 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/developing/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing" + - /riakts/1.5.2/developing/developing/ + --- -[erlang]: /riak/ts/1.5.2/developing/erlang -[go]: /riak/ts/1.5.2/developing/golang -[http]: /riak/ts/1.5.2/developing/http -[java]: /riak/ts/1.5.2/developing/java -[ruby]: /riak/ts/1.5.2/developing/ruby -[python]: /riak/ts/1.5.2/developing/python -[csharp]: /riak/ts/1.5.2/developing/csharp -[nodejs]: /riak/ts/1.5.2/developing/nodejs -[erlang]: /riak/ts/1.5.2/developing/erlang -[php]: /riak/ts/1.5.2/developing/php +[erlang]: {{}}riak/ts/1.5.2/developing/erlang +[go]: {{}}riak/ts/1.5.2/developing/golang +[http]: {{}}riak/ts/1.5.2/developing/http +[java]: {{}}riak/ts/1.5.2/developing/java +[ruby]: {{}}riak/ts/1.5.2/developing/ruby +[python]: {{}}riak/ts/1.5.2/developing/python +[csharp]: {{}}riak/ts/1.5.2/developing/csharp +[nodejs]: {{}}riak/ts/1.5.2/developing/nodejs +[erlang]: {{}}riak/ts/1.5.2/developing/erlang +[php]: {{}}riak/ts/1.5.2/developing/php You can access Riak TS data over HTTP through the [API][http]. diff --git a/content/riak/ts/1.5.2/developing/csharp.md b/content/riak/ts/1.5.2/developing/csharp.md index 02c42b574b..b6be07905b 100644 --- a/content/riak/ts/1.5.2/developing/csharp.md +++ b/content/riak/ts/1.5.2/developing/csharp.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/csharp/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/csharp" + - /riakts/1.5.2/developing/csharp/ + --- diff --git a/content/riak/ts/1.5.2/developing/erlang.md b/content/riak/ts/1.5.2/developing/erlang.md index dede9818b8..7803a0482f 100644 --- a/content/riak/ts/1.5.2/developing/erlang.md +++ b/content/riak/ts/1.5.2/developing/erlang.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/erlang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/erlang" + - /riakts/1.5.2/developing/erlang/ + --- diff --git a/content/riak/ts/1.5.2/developing/golang.md b/content/riak/ts/1.5.2/developing/golang.md index a8dc81ac27..86bb72f891 100644 --- a/content/riak/ts/1.5.2/developing/golang.md +++ b/content/riak/ts/1.5.2/developing/golang.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/golang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/golang" + - /riakts/1.5.2/developing/golang/ + --- diff --git a/content/riak/ts/1.5.2/developing/http.md b/content/riak/ts/1.5.2/developing/http.md index 727d3e3f4e..979fbfdc59 100644 --- a/content/riak/ts/1.5.2/developing/http.md +++ b/content/riak/ts/1.5.2/developing/http.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/http/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/http" + - /riakts/1.5.2/developing/http/ + --- diff --git a/content/riak/ts/1.5.2/developing/java.md b/content/riak/ts/1.5.2/developing/java.md index ea5e5c9850..7ea5a0dbfd 100644 --- a/content/riak/ts/1.5.2/developing/java.md +++ b/content/riak/ts/1.5.2/developing/java.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/java/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/java" + - /riakts/1.5.2/developing/java/ + --- diff --git a/content/riak/ts/1.5.2/developing/nodejs.md b/content/riak/ts/1.5.2/developing/nodejs.md index 96ced38916..20f12767d2 100644 --- a/content/riak/ts/1.5.2/developing/nodejs.md +++ b/content/riak/ts/1.5.2/developing/nodejs.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/nodejs/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/nodejs" + - /riakts/1.5.2/developing/nodejs/ + --- @@ -207,8 +207,8 @@ var cb = function (err, rslt) { // 'rows' - row matching the Query request }; -var query = "select * from TimeSeriesData \ - where time > 0 and time < 10 and \ +var query = "select * from TimeSeriesData / + where time > 0 and time < 10 and / region = 'South Atlantic' and state = 'South Carolina'"; var cmd = new Riak.Commands.TS.Query.Builder() diff --git a/content/riak/ts/1.5.2/developing/php.md b/content/riak/ts/1.5.2/developing/php.md index 8d1735a6cc..9127ae438d 100644 --- a/content/riak/ts/1.5.2/developing/php.md +++ b/content/riak/ts/1.5.2/developing/php.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/php/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/php" + - /riakts/1.5.2/developing/php/ + --- @@ -26,7 +26,7 @@ Language | Source | Documentation | :--------|:-------|:--------------| PHP | [riak-php-client](https://github.com/basho/riak-php-client) | [apigen](http://basho.github.io/riak-php-client) -TS support within the PHP client is implemented through the following command builders, all beginning Basho\Riak\Command\Builder\TimeSeries: +TS support within the PHP client is implemented through the following command builders, all beginning Basho/Riak/Command/Builder/TimeSeries: * StoreRows * FetchRow @@ -96,7 +96,7 @@ All command classes have a `Builder` class to create and build each command. Each command is created through a `Builder` class. This pattern ensures the commands are created as correctly as possible. To create the command from the builder, call the `.build()` method. -To execute any command, you must have an instance of a `\Basho\Riak` object. You then pass the Riak object as a parameter into the constructor of the command builder. +To execute any command, you must have an instance of a `/Basho/Riak` object. You then pass the Riak object as a parameter into the constructor of the command builder. @@ -106,7 +106,7 @@ Deletes a single row by it's key values. ```php # delete a row -$response = (new Command\Builder\TimeSeries\DeleteRow($riak)) +$response = (new Command/Builder/TimeSeries/DeleteRow($riak)) ->atKey([ (new Cell("region"))->setValue("South Atlantic"), (new Cell("state"))->setValue("South Carolina"), @@ -128,8 +128,8 @@ if (!$response->isSuccess()) { Fetches a single row by it's key values. ```php -/** @var Command\TimeSeries\Response $response */ -$response = (new Command\Builder\TimeSeries\FetchRow($riak)) +/** @var Command/TimeSeries/Response $response */ +$response = (new Command/Builder/TimeSeries/FetchRow($riak)) ->atKey([ (new Cell("region"))->setValue("South Atlantic"), (new Cell("state"))->setValue("South Carolina"), @@ -147,20 +147,20 @@ if (!$response->isSuccess()) { # output row data foreach ($response->getRow() as $index => $column) { switch ($column->getType()) { - case Riak\TimeSeries\Cell::INT_TYPE: - printf("Column %d: %s is an integer equal to %d\n", $index, $column->getName(), $column->getValue()); + case Riak/TimeSeries/Cell::INT_TYPE: + printf("Column %d: %s is an integer equal to %d/n", $index, $column->getName(), $column->getValue()); break; - case Riak\TimeSeries\Cell::DOUBLE_TYPE: - printf("Column %d: %s is a double equal to %d\n", $index, $column->getName(), $column->getValue()); + case Riak/TimeSeries/Cell::DOUBLE_TYPE: + printf("Column %d: %s is a double equal to %d/n", $index, $column->getName(), $column->getValue()); break; - case Riak\TimeSeries\Cell::BOOL_TYPE: - printf("Column %d: %s is a boolean equal to %s\n", $index, $column->getName(), $column->getValue()); + case Riak/TimeSeries/Cell::BOOL_TYPE: + printf("Column %d: %s is a boolean equal to %s/n", $index, $column->getName(), $column->getValue()); break; - case Riak\TimeSeries\Cell::TIMESTAMP_TYPE: - printf("Column %d: %s is a timestamp equal to %d\n", $index, $column->getName(), $column->getValue()); + case Riak/TimeSeries/Cell::TIMESTAMP_TYPE: + printf("Column %d: %s is a timestamp equal to %d/n", $index, $column->getName(), $column->getValue()); break; default: - printf("Column %d: %s is a string equal to %s\n", $index, $column->getName(), $column->getValue()); + printf("Column %d: %s is a string equal to %s/n", $index, $column->getName(), $column->getValue()); break; } } @@ -172,7 +172,7 @@ foreach ($response->getRow() as $index => $column) { Allows you to query a Riak TS table with the given query string. ```php -$response = (new Command\Builder\TimeSeries\Query($riak)) +$response = (new Command/Builder/TimeSeries/Query($riak)) ->withQuery("select * from GeoCheckins where region = 'South Atlantic' and state = 'South Carolina' and (time > 1420113500 and time < 1420116000)") ->build() ->execute(); @@ -181,20 +181,20 @@ $response = (new Command\Builder\TimeSeries\Query($riak)) foreach ($response->getResults() as $row_index => $row) { foreach ($row as $column_index => $column) { switch ($column->getType()) { - case Riak\TimeSeries\Cell::INT_TYPE: - printf("Column %d: %s is an integer equal to %d\n", $index, $column->getName(), $column->getValue()); + case Riak/TimeSeries/Cell::INT_TYPE: + printf("Column %d: %s is an integer equal to %d/n", $index, $column->getName(), $column->getValue()); break; - case Riak\TimeSeries\Cell::DOUBLE_TYPE: - printf("Column %d: %s is a double equal to %d\n", $index, $column->getName(), $column->getValue()); + case Riak/TimeSeries/Cell::DOUBLE_TYPE: + printf("Column %d: %s is a double equal to %d/n", $index, $column->getName(), $column->getValue()); break; - case Riak\TimeSeries\Cell::BOOL_TYPE: - printf("Column %d: %s is a boolean equal to %s\n", $index, $column->getName(), $column->getValue()); + case Riak/TimeSeries/Cell::BOOL_TYPE: + printf("Column %d: %s is a boolean equal to %s/n", $index, $column->getName(), $column->getValue()); break; - case Riak\TimeSeries\Cell::TIMESTAMP_TYPE: - printf("Column %d: %s is a timestamp equal to %d\n", $index, $column->getName(), $column->getValue()); + case Riak/TimeSeries/Cell::TIMESTAMP_TYPE: + printf("Column %d: %s is a timestamp equal to %d/n", $index, $column->getName(), $column->getValue()); break; default: - printf("Column %d: %s is a string equal to %s\n", $index, $column->getName(), $column->getValue()); + printf("Column %d: %s is a string equal to %s/n", $index, $column->getName(), $column->getValue()); break; } } @@ -208,7 +208,7 @@ Stores data in the Riak TS table. ```php # store a row -$response = (new Command\Builder\TimeSeries\StoreRows($riak)) +$response = (new Command/Builder/TimeSeries/StoreRows($riak)) ->inTable('GeoCheckins') ->withRow([ (new Cell("region"))->setValue("South Atlantic"), @@ -227,7 +227,7 @@ if (!$response->isSuccess()) { # store rows -$response = (new Command\Builder\TimeSeries\StoreRows($riak)) +$response = (new Command/Builder/TimeSeries/StoreRows($riak)) ->inTable('GeoCheckins') ->withRows([ [ diff --git a/content/riak/ts/1.5.2/developing/python.md b/content/riak/ts/1.5.2/developing/python.md index 41a9af8a61..4462f8cad3 100644 --- a/content/riak/ts/1.5.2/developing/python.md +++ b/content/riak/ts/1.5.2/developing/python.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/python/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/python" + - /riakts/1.5.2/developing/python/ + --- diff --git a/content/riak/ts/1.5.2/developing/ruby.md b/content/riak/ts/1.5.2/developing/ruby.md index cb699c130c..0653c66323 100644 --- a/content/riak/ts/1.5.2/developing/ruby.md +++ b/content/riak/ts/1.5.2/developing/ruby.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/developing/ruby/ -canonical_link: "https://docs.basho.com/riak/ts/latest/developing/ruby" + - /riakts/1.5.2/developing/ruby/ + --- You can develop applications and tools using Riak TS with the Riak Ruby client. @@ -230,4 +230,4 @@ the cells are in the table's DDL. #### Instance Method -The `write!` method writes the data to Riak TS. \ No newline at end of file +The `write!` method writes the data to Riak TS. diff --git a/content/riak/ts/1.5.2/downloads.md b/content/riak/ts/1.5.2/downloads.md index a6a3f4db12..f7c0af4115 100644 --- a/content/riak/ts/1.5.2/downloads.md +++ b/content/riak/ts/1.5.2/downloads.md @@ -17,8 +17,7 @@ listed_projects: title: "Riak TS" install_instructions_set: "installing" aliases: - - /riakts/1.5.2/downloads/ -canonical_link: "https://docs.basho.com/riak/ts/latest/downloads" + - /riakts/1.5.2/downloads/ --- diff --git a/content/riak/ts/1.5.2/index.md b/content/riak/ts/1.5.2/index.md index 0fd386ce57..dd47d416fd 100644 --- a/content/riak/ts/1.5.2/index.md +++ b/content/riak/ts/1.5.2/index.md @@ -11,11 +11,9 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/ -canonical_link: "https://docs.basho.com/riak/ts/latest" + - /riakts/1.5.2/ --- - [download]: downloads/ [installing]: setup/installing/ [learnabout]: learn-about/ diff --git a/content/riak/ts/1.5.2/learn-about.md b/content/riak/ts/1.5.2/learn-about.md index 7f116fbdd0..c22207cbcd 100644 --- a/content/riak/ts/1.5.2/learn-about.md +++ b/content/riak/ts/1.5.2/learn-about.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/learn-about/learn-about/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about" + - /riakts/1.5.2/learn-about/learn-about/ + --- [table arch]: tablearchitecture/ diff --git a/content/riak/ts/1.5.2/learn-about/bestpractices.md b/content/riak/ts/1.5.2/learn-about/bestpractices.md index 15d3c48b31..fa9c98c246 100644 --- a/content/riak/ts/1.5.2/learn-about/bestpractices.md +++ b/content/riak/ts/1.5.2/learn-about/bestpractices.md @@ -12,12 +12,12 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/learn-about/bestpractices/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/bestpractices" + - /riakts/1.5.2/learn-about/bestpractices/ + --- -[glossary bucket]: http://docs.basho.com/riak/kv/2.2.0/learn/glossary/#bucket +[glossary bucket]: {{< baseurl >}}riak/kv/2.2.0/learn/glossary/#bucket [planning column def]: ../../using/planning/#column-definitions [planning partition]: ../../using/planning/#partition-key [planning primary]: ../../using/planning/#primary-key diff --git a/content/riak/ts/1.5.2/learn-about/sqlriakts.md b/content/riak/ts/1.5.2/learn-about/sqlriakts.md index 50554e2668..d2921d571e 100644 --- a/content/riak/ts/1.5.2/learn-about/sqlriakts.md +++ b/content/riak/ts/1.5.2/learn-about/sqlriakts.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/learn-about/sqlriakts -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/sqlriakts" + - /riakts/1.5.2/learn-about/sqlriakts + --- diff --git a/content/riak/ts/1.5.2/learn-about/tablearchitecture.md b/content/riak/ts/1.5.2/learn-about/tablearchitecture.md index 65e1e8047d..63c33c22fb 100644 --- a/content/riak/ts/1.5.2/learn-about/tablearchitecture.md +++ b/content/riak/ts/1.5.2/learn-about/tablearchitecture.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/learn-about/advancedplanning/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/tablearchitecture" + - /riakts/1.5.2/learn-about/advancedplanning/ + --- diff --git a/content/riak/ts/1.5.2/learn-about/timestamps.md b/content/riak/ts/1.5.2/learn-about/timestamps.md index 2f9757d6e2..1ade1f884f 100644 --- a/content/riak/ts/1.5.2/learn-about/timestamps.md +++ b/content/riak/ts/1.5.2/learn-about/timestamps.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/learn-about/timestamps/ -canonical_link: "https://docs.basho.com/riak/ts/latest/learn-about/timestamps" + - /riakts/1.5.2/learn-about/timestamps/ + --- [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 @@ -62,7 +62,7 @@ database. For querying data, there are two important exceptions to the simple expand and convert sequence above. -1\. Strictly greater than (>) and greater-than/equal-to (>=) queries involve +1/. Strictly greater than (>) and greater-than/equal-to (>=) queries involve incrementing the original reduced accuracy representation before expansion. @@ -75,7 +75,7 @@ Thus, querying for timestamp values greater than `'1970-12-18 21:00'` will ignore any values which fall between 9pm and 9:01pm, while using a fully-specified string `'1970-12-18 21:00:00'` will include them. -2\. Fractional times are not considered reduced accuracy, so selecting for +2/. Fractional times are not considered reduced accuracy, so selecting for timestamps greater than `2016-08-03 15:00` will give different results than `2016-08-01 15.0` (or `2016-08-01 15:00:00`). diff --git a/content/riak/ts/1.5.2/releasenotes.md b/content/riak/ts/1.5.2/releasenotes.md index 0880823de9..7d15b6fc46 100644 --- a/content/riak/ts/1.5.2/releasenotes.md +++ b/content/riak/ts/1.5.2/releasenotes.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/releasenotes -canonical_link: "https://docs.basho.com/riak/ts/latest/releasenotes" + - /riakts/1.5.2/releasenotes + --- @@ -71,7 +71,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on ## New Features -* `ASC` and `DESC` have been added to the CREATE TABLE statement. Adding the ASC/DESC keywords to your local key during `CREATE TABLE` means you can have your data pre-sorted in ascending or descending order as it's input into your TS table. You can read more about `ASC`/`DESC` in the local key [here](/riak/ts/1.5.2/using/planning). +* `ASC` and `DESC` have been added to the CREATE TABLE statement. Adding the ASC/DESC keywords to your local key during `CREATE TABLE` means you can have your data pre-sorted in ascending or descending order as it's input into your TS table. You can read more about `ASC`/`DESC` in the local key [here]({{}}riak/ts/1.5.2/using/planning). * [[PR 1427](https://github.com/basho/riak_kv/pull/1427)] * [[PR 1500](https://github.com/basho/riak_kv/pull/1500)] * [[PR 1558](https://github.com/basho/riak_kv/pull/1558 )] @@ -81,17 +81,17 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * [[riak_test PR 1200](https://github.com/basho/riak_test/pull/1200)] * [[riak_test PR 1081](https://github.com/basho/riak_test/pull/1081)] * [[riak_test PR 1201](https://github.com/basho/riak_test/pull/1201 )] -* The ORDER BY statement has been added to `SELECT`, allowing you to sort the results of your query in various ways, including: ascending or descending order, or nulls first or last. You can learn about `ORDER BY` [here](/riak/ts/1.5.2/using/querying/select/order-by). +* The ORDER BY statement has been added to `SELECT`, allowing you to sort the results of your query in various ways, including: ascending or descending order, or nulls first or last. You can learn about `ORDER BY` [here]({{}}riak/ts/1.5.2/using/querying/select/order-by). * [[PR 1479](https://github.com/basho/riak_kv/pull/1479)] * [[riak erlang client PR 321](https://github.com/basho/riak-erlang-client/pull/321)] * [[riak_pb PR 208](https://github.com/basho/riak_pb/pull/208)] * [[riak_test PR 1152](https://github.com/basho/riak_test/pull/1152)] -* `LIMIT` allows you to specify that you only want a specific number of records from your query, and it can be expanded by `OFFSET`. You can read about how to use the LIMIT statement [here](/riak/ts/1.5.2/using/querying/select/limit). +* `LIMIT` allows you to specify that you only want a specific number of records from your query, and it can be expanded by `OFFSET`. You can read about how to use the LIMIT statement [here]({{}}riak/ts/1.5.2/using/querying/select/limit). * [[PR 1479](https://github.com/basho/riak_kv/pull/1479)] * [[riak erlang client PR 321](https://github.com/basho/riak-erlang-client/pull/321)] * [[riak_pb PR 208](https://github.com/basho/riak_pb/pull/208)] * [[riak_test PR 1152](https://github.com/basho/riak_test/pull/1152)] -* You can now use `DELETE` from riak shell to remove a record from your TS table. Learn all about `DELETE` [here](/riak/ts/1.5.2/using/querying/delete). +* You can now use `DELETE` from riak shell to remove a record from your TS table. Learn all about `DELETE` [here]({{}}riak/ts/1.5.2/using/querying/delete). * [[PR 1552](https://github.com/basho/riak_kv/pull/1552)] * [[riak_ql PR 145](https://github.com/basho/riak_ql/pull/145)] * [[riak_shell PR 23](https://github.com/basho/riak_shell/pull/23)] @@ -103,14 +103,14 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * [[riak_ql PR 144](https://github.com/basho/riak_ql/pull/144)] * [[riak_shell PR 56](https://github.com/basho/riak_shell/pull/56)] * [[riak_test PR 1169](https://github.com/basho/riak_test/pull/1169)] -* You can now run `SHOW CREATE TABLE` to review SQL definition and replication properties of existing Riak TS tables. You can read more about the SHOW CREATE TABLE statement [here](/riak/ts/1.5.2/using/querying/show-create-table). +* You can now run `SHOW CREATE TABLE` to review SQL definition and replication properties of existing Riak TS tables. You can read more about the SHOW CREATE TABLE statement [here]({{}}riak/ts/1.5.2/using/querying/show-create-table). * [[PR 1536](https://github.com/basho/riak_kv/pull/1536) * [[riak_ql 155](https://github.com/basho/riak_ql/pull/155)] * [[riak_ql 159](https://github.com/basho/riak_ql/pull/159 )] * [[riak_shell PR 62](https://github.com/basho/riak_shell/pull/62)] * [[riak_test PR 1193](https://github.com/basho/riak_test/pull/1193)] * [[riak_test PR 1211](https://github.com/basho/riak_test/pull/1211)] -* A BLOB data type is now available. BLOB allows the storage of unstructured data, binary or opaque (JSON), in a Riak TS column. Learn about BLOB data type [here](/riak/ts/1.5.2/using/writingdata/#blob-data). +* A BLOB data type is now available. BLOB allows the storage of unstructured data, binary or opaque (JSON), in a Riak TS column. Learn about BLOB data type [here]({{}}riak/ts/1.5.2/using/writingdata/#blob-data). * [[PR 1540](https://github.com/basho/riak_kv/pull/1540)] * [[riak_pb PR 211](https://github.com/basho/riak_pb/issues/211)] * [[riak_ql PR 156](https://github.com/basho/riak_ql/issues/156)] @@ -134,7 +134,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * The timestamp type is now able to be used as an argument in aggregate functions. [[riak_ql PR 146](https://github.com/basho/riak_ql/pull/146) & [riak_ql PR 147](https://github.com/basho/riak_ql/pull/147)] * You can now see the Status field of your TS table when you use `SHOW TABLES`. [[PR 1514](https://github.com/basho/riak_kv/pull/1514 ) and [PR 1176](https://github.com/basho/riak_test/pull/1176 )] -* Introduced the following new parameters in riak.conf. See the [TS configuration docs](/riak/ts/1.5.2/configuring/riakconf) for details. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] +* Introduced the following new parameters in riak.conf. See the [TS configuration docs]({{}}riak/ts/1.5.2/configuring/riakconf) for details. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] * riak_kv.query.timeseries.max_returned_data_size * riak_kv.query.timeseries.max_running_fsms * riak_kv.query.timeseries.qbuf_root_path @@ -149,7 +149,7 @@ Riak TS has significantly improved performance, thanks to streamlining of the on * Write-once conflict resolution has been changed to be more predictable. It is now based on timestamp rather than SHA-1 hash on value part. [[PR 1512](https://github.com/basho/riak_kv/pull/1512)] * LevelDB has been updated to version 2.0.33 [[eleveldb PR 231](https://github.com/basho/eleveldb/pull/231)] * LZ4 is now the default compression for LevelDB. [[leveldb PR 164](https://github.com/basho/leveldb/pull/164) & [eleveldb PR 208](https://github.com/basho/eleveldb/pull/208)] -* Updated the default value for `riak_kv.query.timeseries.max_quanta_span`. See the [TS configuration docs](/riak/ts/1.5.2/configuring/riakconf) for details. **Note:** due to a bug in the code, the `max_quanta_span` is capped at 1000. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] +* Updated the default value for `riak_kv.query.timeseries.max_quanta_span`. See the [TS configuration docs]({{}}riak/ts/1.5.2/configuring/riakconf) for details. **Note:** due to a bug in the code, the `max_quanta_span` is capped at 1000. [[PR 1505](https://github.com/basho/riak_kv/pull/1505)] * The default value for `OFFSET` is `[ ]`. [[PR 1546](https://github.com/basho/riak_kv/pull/1546)] @@ -189,4 +189,4 @@ Riak TS is compatible with the following: * You cannot use Bitcask with Riak TS tables. * `riak_kv.query.timeseries.max_quanta_span` is capped at 1000 due to a bug. -You can see a table of KV and TS features [here](/riak/ts/1.5.2/using/core-fundamentals/). \ No newline at end of file +You can see a table of KV and TS features [here]({{}}riak/ts/1.5.2/using/core-fundamentals/). diff --git a/content/riak/ts/1.5.2/setup.md b/content/riak/ts/1.5.2/setup.md index 39871dcf6d..9d3ae77c80 100644 --- a/content/riak/ts/1.5.2/setup.md +++ b/content/riak/ts/1.5.2/setup.md @@ -13,13 +13,13 @@ toc: true version_history: in: "1.5.2+" aliases: - - /riakts/1.5.2/setup/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/" + - /riakts/1.5.2/setup/ + --- -[install]: /riak/ts/1.5.2/setup/installing -[upgrade]: /riak/ts/1.5.2/setup/upgrading -[downgrade]: /riak/ts/1.5.2/setup/downgrading +[install]: {{}}riak/ts/1.5.2/setup/installing +[upgrade]: {{}}riak/ts/1.5.2/setup/upgrading +[downgrade]: {{}}riak/ts/1.5.2/setup/downgrading ## In This Section diff --git a/content/riak/ts/1.5.2/setup/downgrading.md b/content/riak/ts/1.5.2/setup/downgrading.md index 1448242060..fa1ef56af9 100644 --- a/content/riak/ts/1.5.2/setup/downgrading.md +++ b/content/riak/ts/1.5.2/setup/downgrading.md @@ -13,9 +13,9 @@ toc: true version_history: in: "1.5.2+" aliases: - - /riakts/1.5.2/setup/downgrading/ - - /riakts/1.5.2/downgrading/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/downgrading/" + - /riakts/1.5.2/setup/downgrading/ + - /riakts/1.5.2/downgrading/ + --- ## Caution diff --git a/content/riak/ts/1.5.2/setup/installing.md b/content/riak/ts/1.5.2/setup/installing.md index c8e9ac7e76..1f78f6e9ba 100644 --- a/content/riak/ts/1.5.2/setup/installing.md +++ b/content/riak/ts/1.5.2/setup/installing.md @@ -16,18 +16,18 @@ version_history: - ["1.0.0-1.3.1", "installing"] - ["1.4.0+", "setup/installing"] aliases: - - /riakts/1.5.2/installing/installing/ - - /riakts/1.5.2/setup/installing/ - - /riak/ts/1.5.2/installing/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/" + - /riakts/1.5.2/installing/installing/ + - /riakts/1.5.2/setup/installing/ + - /riak/ts/1.5.2/installing/ + - /riak/ts/latest/installing/ --- [AWS]: aws/ -[concept aae]: /riak/kv/2.1.3/learn/concepts/active-anti-entropy +[concept aae]: {{}}riak/kv/2.1.3/learn/concepts/active-anti-entropy [Centos]: rhel-centos/ [Debian]: debian-ubuntu/ -[download]: /riak/ts/1.5.2/downloads/ +[download]: {{}}riak/ts/1.5.2/downloads/ [OSX]: mac-osx/ [source]: source/ [Ubuntu]: debian-ubuntu/ diff --git a/content/riak/ts/1.5.2/setup/installing/aws.md b/content/riak/ts/1.5.2/setup/installing/aws.md index 84562da984..6b40e742cf 100644 --- a/content/riak/ts/1.5.2/setup/installing/aws.md +++ b/content/riak/ts/1.5.2/setup/installing/aws.md @@ -15,17 +15,17 @@ version_history: - ["1.0.0-1.3.1", "installing/aws"] - ["1.4.0+", "setup/installing/aws"] aliases: - - /riakts/1.5.2/installing/aws/ - - /riakts/1.5.2/setup/installing/aws/ - - /riak/ts/1.5.2/installing/aws/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/aws/" + - /riakts/1.5.2/installing/aws/ + - /riakts/1.5.2/setup/installing/aws/ + - /riak/ts/1.5.2/installing/aws/ + - /riak/ts/latest/installing/aws/ --- [AWS]: http://aws.amazon.com -[download]: /riak/ts/1.5.2/downloads/ +[download]: {{}}riak/ts/1.5.2/downloads/ [ec2 guide]: http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AccessingInstances.html -[security basics]: /riak/ts/1.5.2/using/security/ +[security basics]: {{}}riak/ts/1.5.2/using/security/ Riak TS can be installed on AWS virtual machines (VMs) using a binary @@ -42,7 +42,7 @@ Get started by launching a Riak TS virtual machine via the AWS Marketplace. (You 3. Set your desired AWS region, EC2 instance type, firewall settings, and key pair. - ![AWS Marketplace Instance Settings](/images/aws-marketplace-settings.png) + ![AWS Marketplace Instance Settings]({{}}images/aws-marketplace-settings.png) 4. Then click the **Accept Terms and Launch with 1-Click** button. @@ -67,7 +67,7 @@ Once the virtual machine is created, you should verify that your selected EC2 se 4. When complete, your security group should contain all of the rules listed below. If you are missing any rules, add them in the lower panel and then click the **Apply Rule Changes** button. - ![EC2 Security Group Settings](/images/aws-marketplace-security-group.png) + ![EC2 Security Group Settings]({{}}images/aws-marketplace-security-group.png) We also recommend that you read more about [Security in TS][security basics]. diff --git a/content/riak/ts/1.5.2/setup/installing/debian-ubuntu.md b/content/riak/ts/1.5.2/setup/installing/debian-ubuntu.md index b6b1a5e1bb..f4276c4cff 100644 --- a/content/riak/ts/1.5.2/setup/installing/debian-ubuntu.md +++ b/content/riak/ts/1.5.2/setup/installing/debian-ubuntu.md @@ -15,16 +15,16 @@ version_history: - ["1.0.0-1.3.1", "installing/debian-ubuntu"] - ["1.4.0+", "setup/installing/debian-ubuntu"] aliases: - - /riakts/1.5.2/installing/debian-ubuntu/ - - /riakts/1.5.2/setup/installing/debian-ubuntu/ - - /riak/ts/1.5.2/installing/debian-ubuntu/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/debian-ubuntu/" + - /riakts/1.5.2/installing/debian-ubuntu/ + - /riakts/1.5.2/setup/installing/debian-ubuntu/ + - /riak/ts/1.5.2/installing/debian-ubuntu/ + - /riak/ts/latest/installing/debian-ubuntu/ --- -[download]: /riak/ts/1.5.2/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.2/using/planning -[security basics pam]: /riak/ts/1.5.2/using/security/sources-management/#pam-based-authentication +[download]: {{}}riak/ts/1.5.2/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.2/using/planning +[security basics pam]: {{}}riak/ts/1.5.2/using/security/sources-management/#pam-based-authentication Riak TS can be installed on Debian or Ubuntu-based systems using a binary diff --git a/content/riak/ts/1.5.2/setup/installing/mac-osx.md b/content/riak/ts/1.5.2/setup/installing/mac-osx.md index 1d2a12af00..375af581cb 100644 --- a/content/riak/ts/1.5.2/setup/installing/mac-osx.md +++ b/content/riak/ts/1.5.2/setup/installing/mac-osx.md @@ -15,16 +15,16 @@ version_history: - ["1.0.0-1.3.1", "installing/mac-osx"] - ["1.4.0+", "setup/installing/mac-osx"] aliases: - - /riakts/1.5.2/installing/mac-osx/ - - /riakts/1.5.2/setup/installing/mac-osx/ - - /riak/ts/1.5.2/installing/mac-osx/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/mac-osx/" + - /riakts/1.5.2/installing/mac-osx/ + - /riakts/1.5.2/setup/installing/mac-osx/ + - /riak/ts/1.5.2/installing/mac-osx/ + - /riak/ts/latest/installing/mac-osx/ --- -[download]: /riak/ts/1.5.2/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.2/using/planning +[download]: {{}}riak/ts/1.5.2/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.2/using/planning Riak TS can be installed on Mac OS X systems using a binary package available [here][download]. diff --git a/content/riak/ts/1.5.2/setup/installing/rhel-centos.md b/content/riak/ts/1.5.2/setup/installing/rhel-centos.md index 0148aa422a..eda76594f6 100644 --- a/content/riak/ts/1.5.2/setup/installing/rhel-centos.md +++ b/content/riak/ts/1.5.2/setup/installing/rhel-centos.md @@ -15,15 +15,15 @@ version_history: - ["1.0.0-1.3.1", "installing/rhel-centos"] - ["1.4.0+", "setup/installing/rhel-centos"] aliases: - - /riakts/1.5.2/installing/rhel-centos/ - - /riakts/1.5.2/setup/installing/rhel-centos/ - - /riak/ts/1.5.2/installing/rhel-centos/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/rhel-centos/" + - /riakts/1.5.2/installing/rhel-centos/ + - /riakts/1.5.2/setup/installing/rhel-centos/ + - /riak/ts/1.5.2/installing/rhel-centos/ + - /riak/ts/latest/installing/rhel-centos/ --- -[download]: /riak/ts/1.5.2/downloads/ -[openfileslimit]: /riak/kv/2.2.0/using/performance/open-files-limit -[planning]: /riak/ts/1.5.2/using/planning/ +[download]: {{}}riak/ts/1.5.2/downloads/ +[openfileslimit]: {{}}riak/kv/2.2.0/using/performance/open-files-limit +[planning]: {{}}riak/ts/1.5.2/using/planning/ Riak TS can be installed on CentOS-based systems using a binary diff --git a/content/riak/ts/1.5.2/setup/installing/source.md b/content/riak/ts/1.5.2/setup/installing/source.md index c475cbef08..5755870b31 100644 --- a/content/riak/ts/1.5.2/setup/installing/source.md +++ b/content/riak/ts/1.5.2/setup/installing/source.md @@ -15,19 +15,19 @@ version_history: - ["1.0.0-1.3.1", "installing/source"] - ["1.4.0+", "setup/installing/source"] aliases: - - /riakts/1.5.2/installing/source/ - - /riakts/1.5.2/setup/installing/source/ - - /riak/ts/1.5.2/installing/source/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/" + - /riakts/1.5.2/installing/source/ + - /riakts/1.5.2/setup/installing/source/ + - /riak/ts/1.5.2/installing/source/ + - /riak/ts/latest/installing/source/ --- -[download]: /riak/ts/1.5.2/downloads/ +[download]: {{}}riak/ts/1.5.2/downloads/ [Erlang]: http://www.erlang.org/ [GCC]: https://gcc.gnu.org/ [Git]: https://git-scm.com/ -[install erlang]: /riak/ts/1.5.2/setup/installing/source/erlang -[planning]: /riak/ts/1.5.2/using/planning/ +[install erlang]: {{}}riak/ts/1.5.2/setup/installing/source/erlang +[planning]: {{}}riak/ts/1.5.2/using/planning/ [Riak TS GitHub repository]: https://github.com/basho/riak/tree/riak_ts-1.5.2 diff --git a/content/riak/ts/1.5.2/setup/installing/source/erlang.md b/content/riak/ts/1.5.2/setup/installing/source/erlang.md index 0ce3a95675..128974e95a 100644 --- a/content/riak/ts/1.5.2/setup/installing/source/erlang.md +++ b/content/riak/ts/1.5.2/setup/installing/source/erlang.md @@ -15,10 +15,10 @@ version_history: - ["1.0.0-1.3.1", "installing/source/erlang"] - ["1.4.0+", "setup/installing/source/erlang"] aliases: - - /riakts/1.5.2/installing/source/erlang/ - - /riakts/1.5.2/setup/installing/source/erlang/ - - /riak/ts/1.5.2/installing/source/erlang/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/erlang/" + - /riakts/1.5.2/installing/source/erlang/ + - /riakts/1.5.2/setup/installing/source/erlang/ + - /riak/ts/1.5.2/installing/source/erlang/ + - /riak/ts/latest/installing/source/erlang/ --- @@ -34,7 +34,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/setup/installing/source/e [make]: http://www.gnu.org/software/make/ [ncurses]: http://www.gnu.org/software/ncurses/ [OpenSSL]: https://www.openssl.org/ -[source]: /riak/ts/1.5.2/setup/installing/source/ +[source]: {{}}riak/ts/1.5.2/setup/installing/source/ [XCode Developer Tools]: https://developer.apple.com/xcode/downloads/ @@ -349,15 +349,15 @@ If you're on Mavericks (OS X 10.9), Mountain Lion (OS X 10.8), or Lion Using LLVM: ```bash -CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads \ +CFLAGS=-O0 ./configure --disable-hipe --enable-smp-support --enable-threads / --enable-kernel-poll --enable-darwin-64bit ``` Or if you prefer GCC: ```bash -CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' \ -./configure --disable-hipe --enable-smp-support --enable-threads \ +CC=gcc-4.2 CPPFLAGS='-DNDEBUG' MAKEFLAGS='-j 3' / +./configure --disable-hipe --enable-smp-support --enable-threads / --enable-kernel-poll --enable-darwin-64bit ``` @@ -367,7 +367,7 @@ If you're on Snow Leopard (OS X 10.6) or Leopard (OS X 10.5) with an Intel processor: ```bash -./configure --disable-hipe --enable-smp-support --enable-threads \ +./configure --disable-hipe --enable-smp-support --enable-threads / --enable-kernel-poll --enable-darwin-64bit ``` @@ -376,7 +376,7 @@ Intel processor: If you're on a non-Intel processor or older version of OS X: ```bash -./configure --disable-hipe --enable-smp-support --enable-threads \ +./configure --disable-hipe --enable-smp-support --enable-threads / --enable-kernel-poll ``` diff --git a/content/riak/ts/1.5.2/setup/upgrading.md b/content/riak/ts/1.5.2/setup/upgrading.md index 828743db4c..5d5dd1b6bf 100644 --- a/content/riak/ts/1.5.2/setup/upgrading.md +++ b/content/riak/ts/1.5.2/setup/upgrading.md @@ -13,19 +13,19 @@ toc: true version_history: present_from: "1.4.0+" aliases: - - /riakts/1.5.2/setup/upgrading/ - - /riakts/1.5.2/upgrading/ -canonical_link: "https://docs.basho.com/riak/ts/latest/setup/upgrading/" + - /riakts/1.5.2/setup/upgrading/ + - /riakts/1.5.2/upgrading/ + --- -[use admin commands]: /riak/kv/2.2.0/using/admin/commands -[use admin riak-admin]: /riak/kv/2.2.0/using/admin/riak-admin -[usage secondary-indexes]: /riak/kv/2.2.0/developing/usage/secondary-indexes +[use admin commands]: {{}}riak/kv/2.2.0/using/admin/commands +[use admin riak-admin]: {{}}riak/kv/2.2.0/using/admin/riak-admin +[usage secondary-indexes]: {{}}riak/kv/2.2.0/developing/usage/secondary-indexes [riak ts enterprise]: http://basho.com/products/riak-ts/ -[cluster ops mdc]: /riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter -[config v3 mdc]: /riak/kv/2.2.0/configuring/v3-multi-datacenter -[jmx monitor]: /riak/kv/2.2.0/using/reference/jmx -[snmp]: /riak/kv/2.2.0/using/reference/snmp +[cluster ops mdc]: {{}}riak/kv/2.2.0/using/cluster-operations/v3-multi-datacenter +[config v3 mdc]: {{}}riak/kv/2.2.0/configuring/v3-multi-datacenter +[jmx monitor]: {{}}riak/kv/2.2.0/using/reference/jmx +[snmp]: {{}}riak/kv/2.2.0/using/reference/snmp {{% note title="**CAUTION**" %}} @@ -45,25 +45,25 @@ operating modes. This allows clusters containing mixed-versions of Riak TS to pr The following example demonstrates upgrading a Riak TS node that has been installed with the Debian/Ubuntu packages provided by Basho. -1\. Stop Riak TS: +1/. Stop Riak TS: ```bash riak stop ``` -2\. Back up your Riak TS node's /etc and /data directories: +2/. Back up your Riak TS node's /etc and /data directories: ```bash sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak ``` -3\. Upgrade Riak TS: +3/. Upgrade Riak TS: ```bash sudo dpkg -i »riakts_package_name«.deb ``` -4\. Restart Riak TS: +4/. Restart Riak TS: {{% note %}} Before restarting Riak TS, check your riak.conf file and verify that your settings are configured as expected. @@ -73,13 +73,13 @@ Before restarting Riak TS, check your riak.conf file and verify that your settin riak start ``` -5\. Verify Riak TS is running the new version: +5/. Verify Riak TS is running the new version: ```bash riak version ``` -6\. Wait for the `riak_kv` service to start: +6/. Wait for the `riak_kv` service to start: ```bash riak-admin wait-for-service riak_kv »target node« @@ -88,7 +88,7 @@ riak-admin wait-for-service riak_kv »target node« * `»target node«` is the node which you have just upgraded (e.g. `riak@192.168.1.11`) -7\. Wait for any hinted handoff transfers to complete: +7/. Wait for any hinted handoff transfers to complete: ```bash riak-admin transfers @@ -98,32 +98,32 @@ riak-admin transfers While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. {{% /note %}} -8\. Repeat the process for the remaining nodes in the cluster. +8/. Repeat the process for the remaining nodes in the cluster. ## RHEL/CentOS The following example demonstrates upgrading a Riak TS node that has been installed with the RHEL/CentOS packages provided by Basho. -1\. Stop Riak TS: +1/. Stop Riak TS: ```bash riak stop ``` -2\. Back up your Riak TS node's /etc and /data directories: +2/. Back up your Riak TS node's /etc and /data directories: ```bash sudo tar -czf riak_backup.tar.gz /var/lib/riak /etc/riak ``` -3\. Upgrade Riak TS: +3/. Upgrade Riak TS: ```bash sudo rpm -Uvh »riakts_package_name«.rpm ``` -4\. Restart Riak TS: +4/. Restart Riak TS: {{% note %}} Before restarting Riak TS, check your riak.conf file and verify that your settings are configured as expected. @@ -133,13 +133,13 @@ Before restarting Riak TS, check your riak.conf file and verify that your settin riak start ``` -5\. Verify that Riak TS is running the new version: +5/. Verify that Riak TS is running the new version: ```bash riak version ``` -6\. Wait for the `riak_kv` service to start: +6/. Wait for the `riak_kv` service to start: ```bash riak-admin wait-for-service riak_kv »target node« @@ -148,7 +148,7 @@ riak-admin wait-for-service riak_kv »target node« * `»target node«` is the node which you have just upgraded (e.g. riak@192.168.1.11) -7\. Wait for any hinted handoff transfers to complete: +7/. Wait for any hinted handoff transfers to complete: ```bash riak-admin transfers @@ -159,7 +159,7 @@ While the node was offline, other nodes may have accepted writes on its behalf. This data is transferred to the node when it becomes available. {{% /note %}} -8\. Repeat the process for the remaining nodes in the cluster. +8/. Repeat the process for the remaining nodes in the cluster. ## Rolling Upgrade to Enterprise diff --git a/content/riak/ts/1.5.2/using.md b/content/riak/ts/1.5.2/using.md index f767510fb1..51d62579f8 100644 --- a/content/riak/ts/1.5.2/using.md +++ b/content/riak/ts/1.5.2/using.md @@ -11,18 +11,18 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using" + - /riakts/1.5.2/using/ + --- [activating]: creating-activating/ [aggregate]: querying/select/aggregate-functions/ [arithmetic]: querying/select/arithmetic-operations/ -[configuring]: /riak/ts/1.5.2/configuring/ -[download]: /riak/ts/1.5.2/downloads/ +[configuring]: {{}}riak/ts/1.5.2/configuring/ +[download]: {{}}riak/ts/1.5.2/downloads/ [installing]: ../setup/installing/ -[mdc]: /riak/ts/1.5.2/configuring/mdc/ +[mdc]: {{}}riak/ts/1.5.2/configuring/mdc/ [planning]: planning/ [querying]: querying/ [riakshell]: riakshell/ diff --git a/content/riak/ts/1.5.2/using/core-fundamentals.md b/content/riak/ts/1.5.2/using/core-fundamentals.md index 5c9f802122..4cba1adf7d 100644 --- a/content/riak/ts/1.5.2/using/core-fundamentals.md +++ b/content/riak/ts/1.5.2/using/core-fundamentals.md @@ -11,12 +11,12 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/core-fundamentals/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/core-fundamentals" + - /riakts/1.5.2/using/core-fundamentals/ + --- -[Riak KV]: /riak/kv/2.2.0/ +[Riak KV]: {{}}riak/kv/2.2.0/ Riak TS shares the same core codebase as [Riak KV], which allows you to operate a TS cluster much the same as you would operate a KV cluster. @@ -53,40 +53,40 @@ Below, you will find links to Riak KV documents that are applicable and helpful ### Configuration -Basic Configuration will help you set up your Riak core configuration. +Basic Configuration will help you set up your Riak core configuration. -Managing Configuration will show you how to retrieve your configuration, check your settings, and debug your configuration. +Managing Configuration will show you how to retrieve your configuration, check your settings, and debug your configuration. -Configuration Reference provides you with everything you need to know about configuring Riak core. +Configuration Reference provides you with everything you need to know about configuring Riak core. -Load Balancing will walk you through configuring a load balancer with your Riak cluster. +Load Balancing will walk you through configuring a load balancer with your Riak cluster. ### Cluster Operations -Running a Cluster gives you a basic walkthrough of how to run a Riak cluster. +Running a Cluster gives you a basic walkthrough of how to run a Riak cluster. -Cluster Administration provides a series of links to information on various ways to administer your cluster. +Cluster Administration provides a series of links to information on various ways to administer your cluster. -Adding & Removing Nodes walks you through the process of adding or removing nodes in your cluster. +Adding & Removing Nodes walks you through the process of adding or removing nodes in your cluster. -Changing Cluster Information will show you how to change various parts of your cluster. +Changing Cluster Information will show you how to change various parts of your cluster. -Replace a Node is a step-by-step guide for how to replace a node in your cluster. +Replace a Node is a step-by-step guide for how to replace a node in your cluster. -Inspect a Node shows you the steps and tools for inspecting nodes in your cluster. +Inspect a Node shows you the steps and tools for inspecting nodes in your cluster. -Logging will provide you the steps for enabling and disabling debug logging. +Logging will provide you the steps for enabling and disabling debug logging. -Backing Up is a how-to guide for backing up your data. +Backing Up is a how-to guide for backing up your data. -Handoff will tell you everything you need to know to enable and disable handoff. +Handoff will tell you everything you need to know to enable and disable handoff. ### Repair, Tuning, and Reference -Repair & Recovery will cover all of the important topics of what can go wrong and what you can do to fix it. +Repair & Recovery will cover all of the important topics of what can go wrong and what you can do to fix it. -Performance will give you all the information you need to tune your cluster configurations to optimize performance. +Performance will give you all the information you need to tune your cluster configurations to optimize performance. -Reference will provide you with explanations of various core functions, such as logging and handoff. \ No newline at end of file +Reference will provide you with explanations of various core functions, such as logging and handoff. diff --git a/content/riak/ts/1.5.2/using/creating-activating.md b/content/riak/ts/1.5.2/using/creating-activating.md index bd05ee8c1d..a2e455fab3 100644 --- a/content/riak/ts/1.5.2/using/creating-activating.md +++ b/content/riak/ts/1.5.2/using/creating-activating.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/creating-activating/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/creating-activating" + - /riakts/1.5.2/using/creating-activating/ + --- @@ -26,7 +26,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/creating-activating [ruby]: ../../developing/ruby#sql-queries [planning]: ../planning/ [writing]: ../writingdata/ -[Riak bucket properties]: /riak/kv/2.2.0/configuring/reference/#default-bucket-properties +[Riak bucket properties]: {{}}riak/kv/2.2.0/configuring/reference/#default-bucket-properties Once you have [planned out your table][planning] you can create it by: @@ -147,16 +147,16 @@ client.execute(cmd); ```php require __DIR__ . '/../vendor/autoload.php'; -use Basho\Riak; -use Basho\Riak\Command; -use Basho\Riak\Node; +use Basho/Riak; +use Basho/Riak/Command; +use Basho/Riak/Node; -$node = (new Node\Builder) +$node = (new Node/Builder) ->atHost('riak-test') ->onPort(8087) ->build(); -$riak = new Riak([$node], [], new Riak\Api\Pb()); +$riak = new Riak([$node], [], new Riak/Api/Pb()); # create table @@ -170,7 +170,7 @@ $table_definition = " PRIMARY KEY((region, state, quantum(time, 15, 'm')), region, state, time) )"; -$command = (new Command\Builder\TimeSeries\Query($riak)) +$command = (new Command/Builder/TimeSeries/Query($riak)) ->withQuery(sprintf($table_definition, "GeoCheckins")) ->build(); @@ -238,7 +238,7 @@ Any property with any string or numeric value can be associated with a table, in Please note the following when using `WITH`: -* The property values can be of numeric or string types (parseable as `sint64`, `double` or `varchar`, correspondingly). String values should be quoted with a `'`; literal single quote characters appearing in the string should be doubled (and not escaped with a `\`). +* The property values can be of numeric or string types (parseable as `sint64`, `double` or `varchar`, correspondingly). String values should be quoted with a `'`; literal single quote characters appearing in the string should be doubled (and not escaped with a `/`). * Values from the WITH clause will override those specified outside the query statement. * The default `n_val` (the number of distinct copies of each record kept in your cluster for safety and availability) is 3. This default cannot be changed; instead, each time a table is created the WITH clause can be used to configure that table's `n_val`. diff --git a/content/riak/ts/1.5.2/using/deleting-data.md b/content/riak/ts/1.5.2/using/deleting-data.md index 7174655d5e..51285c2784 100644 --- a/content/riak/ts/1.5.2/using/deleting-data.md +++ b/content/riak/ts/1.5.2/using/deleting-data.md @@ -13,12 +13,12 @@ toc: true version_history: in: "1.5.2+" aliases: - - /riakts/1.5.2/using/deleting-data/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/deleting-data" + - /riakts/1.5.2/using/deleting-data/ + --- -[delete]: /riak/ts/1.5.2/using/querying/delete -[expiry]: /riak/ts/1.5.2/configuring/global-object-expiration +[delete]: {{}}riak/ts/1.5.2/using/querying/delete +[expiry]: {{}}riak/ts/1.5.2/configuring/global-object-expiration Riak TS offers several ways to delete data: with clients, using the DELETE statement, and through global expiry. Global expiry is more efficient than other delete options but operates on all of your data. `DELETE` works per-row but takes more resources to run. @@ -89,7 +89,7 @@ $key = [ (new Cell("time"))->setTimestampValue(1420113600), ]; -$response = (new Command\Builder\TimeSeries\DeleteRow($riak)) +$response = (new Command/Builder/TimeSeries/DeleteRow($riak)) ->atKey($key) ->inTable('GeoCheckins') ->build() diff --git a/content/riak/ts/1.5.2/using/planning.md b/content/riak/ts/1.5.2/using/planning.md index dca5ba990e..b532e39876 100644 --- a/content/riak/ts/1.5.2/using/planning.md +++ b/content/riak/ts/1.5.2/using/planning.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/planning/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/planning" + - /riakts/1.5.2/using/planning/ + --- @@ -23,7 +23,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/planning" [epoch]: https://en.wikipedia.org/wiki/Unix_time [installing]: ../../setup/installing/ [sql]: ../../learn-about/sqlriakts/ -[order by]: /riak/ts/1.5.2/using/querying/select/order-by +[order by]: {{}}riak/ts/1.5.2/using/querying/select/order-by You've [installed][installing] Riak TS, and you're ready to create a table. diff --git a/content/riak/ts/1.5.2/using/querying.md b/content/riak/ts/1.5.2/using/querying.md index 4b32cbcd47..b654c906a1 100644 --- a/content/riak/ts/1.5.2/using/querying.md +++ b/content/riak/ts/1.5.2/using/querying.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/querying/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying" + - /riakts/1.5.2/using/querying/ + --- [activating]: ../creating-activating/ @@ -40,4 +40,4 @@ You can also take a look at the [guidelines] to get an idea of the rules and bes When querying, you must ensure the node issuing the query has adequate memory to receive the response. Queries will return rows based on the timespan (quanta) specified, if the returning rows do not fit into the memory of the requesting node, the node is likely to fail. Any given query consists of subqueries. If a single subquery loads a result that does not fit into memory, an out of memory error will occur on the subquery node and the requesting node will return a timeout error as it waits for the subquery to return. -{{% /note %}} \ No newline at end of file +{{% /note %}} diff --git a/content/riak/ts/1.5.2/using/querying/delete.md b/content/riak/ts/1.5.2/using/querying/delete.md index e92c31672f..0841d5697c 100644 --- a/content/riak/ts/1.5.2/using/querying/delete.md +++ b/content/riak/ts/1.5.2/using/querying/delete.md @@ -13,13 +13,13 @@ toc: true version_history: in: "1.5.2+" aliases: - - /riakts/1.5.2/using/querying/delete -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/delete" + - /riakts/1.5.2/using/querying/delete + --- -[query guidelines]: /riak/ts/1.5.2/using/querying/guidelines/ -[time rep]: /riak/ts/1.5.2/using/timerepresentations/ -[http delete]: /riak/ts/1.4.0/using/writingdata/#deleting-data +[query guidelines]: {{}}riak/ts/1.5.2/using/querying/guidelines/ +[time rep]: {{}}riak/ts/1.5.2/using/timerepresentations/ +[http delete]: {{}}riak/ts/1.4.0/using/writingdata/#deleting-data # DELETE diff --git a/content/riak/ts/1.5.2/using/querying/describe.md b/content/riak/ts/1.5.2/using/querying/describe.md index 2af66380ba..bd97f84d98 100644 --- a/content/riak/ts/1.5.2/using/querying/describe.md +++ b/content/riak/ts/1.5.2/using/querying/describe.md @@ -11,11 +11,11 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/querying/describe -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/describe" + - /riakts/1.5.2/using/querying/describe + --- -[riak shell]: /riak/ts/1.5.2/using/riakshell +[riak shell]: {{}}riak/ts/1.5.2/using/riakshell You can use the DESCRIBE statement to obtain the definition of your Riak TS table. This document will show you how to execute `DESCRIBE` in TS. diff --git a/content/riak/ts/1.5.2/using/querying/explain.md b/content/riak/ts/1.5.2/using/querying/explain.md index cf50b79121..36fcd2e77a 100644 --- a/content/riak/ts/1.5.2/using/querying/explain.md +++ b/content/riak/ts/1.5.2/using/querying/explain.md @@ -11,14 +11,14 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/querying/explain -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/explain" + - /riakts/1.5.2/using/querying/explain + --- -[creating-activating]: /riak/ts/1.5.2/using/creating-activating -[develop]: /riak/ts/1.5.2/developing -[planning]: /riak/ts/1.5.2/using/planning -[riak shell]: /riak/ts/1.5.2/using/riakshell +[creating-activating]: {{}}riak/ts/1.5.2/using/creating-activating +[develop]: {{}}riak/ts/1.5.2/developing +[planning]: {{}}riak/ts/1.5.2/using/planning +[riak shell]: {{}}riak/ts/1.5.2/using/riakshell You can use the EXPLAIN statement to better understand how a query you would like to run will be executed. This document will show you how to use `EXPLAIN` in Riak TS. diff --git a/content/riak/ts/1.5.2/using/querying/guidelines.md b/content/riak/ts/1.5.2/using/querying/guidelines.md index ba6423a77f..52aaa367d3 100644 --- a/content/riak/ts/1.5.2/using/querying/guidelines.md +++ b/content/riak/ts/1.5.2/using/querying/guidelines.md @@ -11,9 +11,9 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/querying/basic-querying - - /riakts/1.5.2/using/querying/guidelines -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/guidelines" + - /riakts/1.5.2/using/querying/basic-querying + - /riakts/1.5.2/using/querying/guidelines + --- [table arch]: ../../../learn-about/tablearchitecture/#data-modeling @@ -21,7 +21,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/guidelines [writing]: ../../writingdata/ [planning]: ../../planning#column-definitions [iso8601]: ../../../timerepresentations/ -[SELECT]: /riak/ts/1.5.2/using/querying/SELECT#iso_8601 +[SELECT]: {{}}riak/ts/1.5.2/using/querying/SELECT#iso_8601 [configuring]: ../../../configuring/riakconf/ @@ -60,7 +60,7 @@ Any quantized field in your partition key must be included in the query as a bou * Invalid: `time > 1449864277000 or time < 1449864290000` {{% note title="A Note About `SELECT`" %}} -It is possible to use ISO 8601-compliant date/time strings rather than integer timestamps in SELECT statements. Please see [SELECT](/riak/ts/1.5.2/using/querying/select/#iso-8601) for an example or [Time Representations](/riak/ts/1.5.2/using/timerepresentations/) for more information. +It is possible to use ISO 8601-compliant date/time strings rather than integer timestamps in SELECT statements. Please see [SELECT]({{}}riak/ts/1.5.2/using/querying/select/#iso-8601) for an example or [Time Representations]({{}}riak/ts/1.5.2/using/timerepresentations/) for more information. {{% /note %}} @@ -176,4 +176,4 @@ CREATE TABLE GeoCheckin With the above quantum and with the default `max_quanta_span` of 5000, the maximum timeframe we can query at a time is going to be 5000 minutes provided that the data returned from the query wouldn’t exceed the limits set in `max_returned_data_size`. -See the Data Modeling section in [Table Architecture][table arch] for more information on selecting your quanta and setting parameters. \ No newline at end of file +See the Data Modeling section in [Table Architecture][table arch] for more information on selecting your quanta and setting parameters. diff --git a/content/riak/ts/1.5.2/using/querying/reference.md b/content/riak/ts/1.5.2/using/querying/reference.md index c6095b9553..75a30bc1ce 100644 --- a/content/riak/ts/1.5.2/using/querying/reference.md +++ b/content/riak/ts/1.5.2/using/querying/reference.md @@ -13,22 +13,22 @@ toc: true version_history: in: "1.5.2+" aliases: - - /riakts/1.5.2/using/querying/reference -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/reference" + - /riakts/1.5.2/using/querying/reference + --- -[select]: /riak/ts/1.5.2/using/querying/select/ -[describe]: /riak/ts/1.5.2/using/querying/describe/ -[delete]: /riak/ts/1.5.2/using/querying/delete/ -[explain]: /riak/ts/1.5.2/using/querying/explain/ -[show tables]: /riak/ts/1.5.2/using/querying/show-tables/ -[create table]: /riak/ts/1.5.2/using/creating-activating/ -[group by]: /riak/ts/1.5.2/using/querying/select/group-by/ -[order by]: /riak/ts/1.5.2/using/querying/select/order-by/ -[limit]: /riak/ts/1.5.2/using/querying/select/limit/ -[offset]: /riak/ts/1.5.2/using/querying/select/ -[arithmetic]: /riak/ts/1.5.2/using/querying/select/arithmetic-operations/ -[aggregate]: /riak/ts/1.5.2/using/querying/select/aggregate-functions/ +[select]: {{}}riak/ts/1.5.2/using/querying/select/ +[describe]: {{}}riak/ts/1.5.2/using/querying/describe/ +[delete]: {{}}riak/ts/1.5.2/using/querying/delete/ +[explain]: {{}}riak/ts/1.5.2/using/querying/explain/ +[show tables]: {{}}riak/ts/1.5.2/using/querying/show-tables/ +[create table]: {{}}riak/ts/1.5.2/using/creating-activating/ +[group by]: {{}}riak/ts/1.5.2/using/querying/select/group-by/ +[order by]: {{}}riak/ts/1.5.2/using/querying/select/order-by/ +[limit]: {{}}riak/ts/1.5.2/using/querying/select/limit/ +[offset]: {{}}riak/ts/1.5.2/using/querying/select/ +[arithmetic]: {{}}riak/ts/1.5.2/using/querying/select/arithmetic-operations/ +[aggregate]: {{}}riak/ts/1.5.2/using/querying/select/aggregate-functions/ This document lists each SQL statement available in Riak TS. diff --git a/content/riak/ts/1.5.2/using/querying/select.md b/content/riak/ts/1.5.2/using/querying/select.md index 3cb8fa5a97..7cf9863b5b 100644 --- a/content/riak/ts/1.5.2/using/querying/select.md +++ b/content/riak/ts/1.5.2/using/querying/select.md @@ -11,19 +11,19 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/querying/select -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select" + - /riakts/1.5.2/using/querying/select + --- [aggregate functions]: aggregate-functions/ [arithmetic operations]: arithmetic-operations/ [GROUP BY]: group-by/ -[guidelines]: /riak/ts/1.5.2/using/querying/guidelines +[guidelines]: {{}}riak/ts/1.5.2/using/querying/guidelines [iso8601]: ../../timerepresentations/ -[iso8601 accuracy]: /riak/ts/1.5.2/using/timerepresentations/#reduced-accuracy +[iso8601 accuracy]: {{}}riak/ts/1.5.2/using/timerepresentations/#reduced-accuracy [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 -[learn timestamps accuracy]: /riak/ts/1.5.2/learn-about/timestamps/#reduced-accuracy +[learn timestamps accuracy]: {{}}riak/ts/1.5.2/learn-about/timestamps/#reduced-accuracy You can use the SELECT statement in Riak TS to query your TS dataset. This document will show you how to run various queries using `SELECT`. @@ -131,7 +131,7 @@ riakc_ts:query(Pid, "select weather, temperature from GeoCheckin where time > 12 ``` ```php -$response = (new Command\Builder\TimeSeries\Query($riak)) +$response = (new Command/Builder/TimeSeries/Query($riak)) ->withQuery("select weather, temperature from GeoCheckin where region = 'South Atlantic' and state = 'state1' and time > 1234560 and time < 1234569") ->build() ->execute(); @@ -224,7 +224,7 @@ riakc_ts:query(Pid, "select weather, temperature from GeoCheckin where time > 12 ``` ```php -$response = (new Command\Builder\TimeSeries\Query($riak)) +$response = (new Command/Builder/TimeSeries/Query($riak)) ->withQuery("select weather, temperature from GeoCheckin where region = 'South Atlantic' and state = 'state1' and time > 1234560 and time < 1234569 and temperature > 27.0") ->build() ->execute(); @@ -418,7 +418,7 @@ riakc_ts:query(Pid, "SELECT region, temperature FROM GeoCheckin WHERE time > 123 ``` ```php -$response = (new Command\Builder\TimeSeries\Query($riak)) +$response = (new Command/Builder/TimeSeries/Query($riak)) ->withQuery("SELECT region, temperature FROM GeoCheckin WHERE time > 1234560 AND time < 1234569 AND region = 'South Atlantic' AND state = 'South Carolina' AND temperature IS NULL") ->build() ->execute(); diff --git a/content/riak/ts/1.5.2/using/querying/select/aggregate-functions.md b/content/riak/ts/1.5.2/using/querying/select/aggregate-functions.md index 00e1fcb1f6..19ac8ee03b 100644 --- a/content/riak/ts/1.5.2/using/querying/select/aggregate-functions.md +++ b/content/riak/ts/1.5.2/using/querying/select/aggregate-functions.md @@ -10,15 +10,15 @@ menu: project: "riak_ts" project_version: "1.5.2" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/aggregate-functions" version_history: present_from: "1.4.0+" moved: - ["1.1.0+", "using/aggregate-functions"] aliases: - - /riakts/1.5.2/using/aggregate-functions/ - - /riak/ts/1.5.2/using/aggregate-functions/ - - /riakts/1.5.2/using/querying/select/aggregate-functions/ + - /riakts/1.5.2/using/aggregate-functions/ + - /riak/ts/latest/using/aggregate-functions/ + - /riak/ts/1.5.2/using/aggregate-functions/ + - /riakts/1.5.2/using/querying/select/aggregate-functions/ --- diff --git a/content/riak/ts/1.5.2/using/querying/select/arithmetic-operations.md b/content/riak/ts/1.5.2/using/querying/select/arithmetic-operations.md index 214081cbfd..3a0486bd27 100644 --- a/content/riak/ts/1.5.2/using/querying/select/arithmetic-operations.md +++ b/content/riak/ts/1.5.2/using/querying/select/arithmetic-operations.md @@ -10,19 +10,18 @@ menu: project: "riak_ts" project_version: "1.5.2" toc: true -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/arithmetic-operations" version_history: present_from: "1.4.0+" moved: - ["1.1.0+", "using/arithmetic-operations"] aliases: - - /riak/ts/1.5.2/using/arithmetic-operations - - /riakts/1.5.2/using/arithmetic-operations - - /riakts/1.5.2/using/querying/select/arithmetic-operations + - /riak/ts/latest/using/arithmetic-operations + - /riak/ts/1.5.2/using/arithmetic-operations + - /riakts/1.5.2/using/arithmetic-operations --- -[querying select]: /riak/ts/1.5.2/using/querying/#select-query +[querying select]: {{}}riak/ts/1.5.2/using/querying/#select-query Riak TS supports arithmetic operations in the SELECT statement. @@ -63,7 +62,7 @@ WHERE time > 1452252523182 AND time < 1452252543182 AND region = 'South Atlantic Returns: -| 555\ | 1.1\ | 10.0\ | 0.01123\ | +| 555/ | 1.1/ | 10.0/ | 0.01123/ | |---------------|---------------|----------------|-----------------| | 555 | 1.1 | 10.0 | 0.01123 | @@ -77,7 +76,7 @@ WHERE time > 1452252523182 AND time < 1452252543182 AND region = 'South Atlantic Returns: -| temperature\ | (temperature\+1)\ | (temperature\-1)\ | +| temperature/ | (temperature/+1)/ | (temperature/-1)/ | |-----------------------|----------------------------|-------------------------| | 27.1 | 28.1 | 26.1 | @@ -91,7 +90,7 @@ WHERE time > 1452252523182 AND time < 1452252543182 AND region = 'South Atlantic Returns: -| temperature\ | (temperature\*2)\ | (temperature/2)\ | +| temperature/ | (temperature/*2)/ | (temperature/2)/ | |-----------------------|----------------------------|-------------------------| | 27.1 | 54.2 | 13.55 | @@ -105,7 +104,7 @@ WHERE time > 1452252523182 AND time < 1452252543182 AND region = 'South Atlantic Returns: -| temperature\ | -temperature\ | +| temperature/ | -temperature/ | |-----------------------|----------------------| | 27.1 | -27.1 | @@ -119,7 +118,7 @@ WHERE time > 1452252523182 AND time < 1452252543182 AND region = 'South Atlantic Returns: -| (temperature+(2\*3))\ | ((temperature\+2)\*3)\ | +| (temperature+(2/*3))/ | ((temperature/+2)/*3)/ | |--------------------------------|-----------------------------| | 33.1 | 87.30000000000001 | diff --git a/content/riak/ts/1.5.2/using/querying/select/group-by.md b/content/riak/ts/1.5.2/using/querying/select/group-by.md index 4105a32377..c08c8ed5ba 100644 --- a/content/riak/ts/1.5.2/using/querying/select/group-by.md +++ b/content/riak/ts/1.5.2/using/querying/select/group-by.md @@ -11,12 +11,12 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/querying/select/group-by -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/group-by" + - /riakts/1.5.2/using/querying/select/group-by + --- [aggregate function]: ../aggregate-functions -[guidelines]: /riak/ts/1.5.2/using/querying/guidelines +[guidelines]: {{}}riak/ts/1.5.2/using/querying/guidelines The GROUP BY statement is used with `SELECT` to pick out and condense rows sharing the same value and return a single row. `GROUP BY` is useful for aggregating an attribute of a device over a time period; for instance, you could use it to pull average values for every 30 minute period over the last 24 hours. @@ -140,4 +140,4 @@ GROUP BY userid; The result set would only have the group 'roddy' because it is required by the WHERE clause. -If, however, we combine two column names from the partition key in the group using `SUM` without specifying `userid`, `GROUP BY` will return multiple result rows for the `userid` 'roddy' with one column per visit. \ No newline at end of file +If, however, we combine two column names from the partition key in the group using `SUM` without specifying `userid`, `GROUP BY` will return multiple result rows for the `userid` 'roddy' with one column per visit. diff --git a/content/riak/ts/1.5.2/using/querying/select/limit.md b/content/riak/ts/1.5.2/using/querying/select/limit.md index dd47498f6e..ec371469dc 100644 --- a/content/riak/ts/1.5.2/using/querying/select/limit.md +++ b/content/riak/ts/1.5.2/using/querying/select/limit.md @@ -12,12 +12,13 @@ project_version: "1.5.2" toc: true version_history: in: "1.5.2+" -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/limit/" +aliases: + --- -[select]: /riak/ts/1.5.2/using/querying/select -[query guidelines]: /riak/ts/1.5.2/using/querying/guidelines/ -[configuring]: /riak/ts/1.5.2/configuring/riakconf/#maximum-returned-data-size +[select]: {{}}riak/ts/1.5.2/using/querying/select +[query guidelines]: {{}}riak/ts/1.5.2/using/querying/guidelines/ +[configuring]: {{}}riak/ts/1.5.2/configuring/riakconf/#maximum-returned-data-size The LIMIT statement is used with [`SELECT`][select] to return a limited number of results. @@ -26,7 +27,7 @@ This document shows how to run various queries using `LIMIT`. See the [guideline {{% note title="A Note on Latency" %}} `LIMIT` uses on-disk query buffer to prevent overload, which adds some overhead and increases the query latency. -You may adjust various parameters in [riak.conf](/riak/ts/1.5.2/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for LIMIT statements; you can read more about that [here](/riak/ts/1.5.2/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. +You may adjust various parameters in [riak.conf]({{}}riak/ts/1.5.2/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for LIMIT statements; you can read more about that [here]({{}}riak/ts/1.5.2/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. However, the most effective means of speeding up your `LIMIT` queries is to place the query buffer directory (`timeseries_query_buffers_root_path`) on fast storage or in memory-backed /tmp directory. {{% /note %}} diff --git a/content/riak/ts/1.5.2/using/querying/select/order-by.md b/content/riak/ts/1.5.2/using/querying/select/order-by.md index e33e8e645f..0895e915a7 100644 --- a/content/riak/ts/1.5.2/using/querying/select/order-by.md +++ b/content/riak/ts/1.5.2/using/querying/select/order-by.md @@ -12,12 +12,13 @@ project_version: "1.5.2" toc: true version_history: in: "1.5.2+" -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/select/order-by" +aliases: + --- -[select]: /riak/ts/1.5.2/using/querying/select -[query guidelines]: /riak/ts/1.5.2/using/querying/guidelines/ -[configuring]: /riak/ts/1.5.2/configuring/riakconf/#maximum-returned-data-size +[select]: {{}}riak/ts/1.5.2/using/querying/select +[query guidelines]: {{}}riak/ts/1.5.2/using/querying/guidelines/ +[configuring]: {{}}riak/ts/1.5.2/configuring/riakconf/#maximum-returned-data-size The ORDER BY statement is used with [`SELECT`][select] to sort results by one or more columns in ascending or descending order. `ORDER BY` is useful for operations such as returning the most recent results in a set. @@ -26,7 +27,7 @@ This document shows how to run various queries using `ORDER BY`. See the [guidel {{% note title="A Note on Latency" %}} `ORDER BY` uses on-disk query buffer to prevent overload, which adds some overhead and increases the query latency. -You may adjust various parameters in [riak.conf](/riak/ts/1.5.2/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for ORDER BY statements; you can read more about that [here](/riak/ts/1.5.2/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. +You may adjust various parameters in [riak.conf]({{}}riak/ts/1.5.2/configuring/riakconf/) depending on how much memory your riak nodes will have, including `max_running_fsms`, `max_quanta_span`, `max_concurrent_queries`. It is also worth noting that `max_returned_data_size` is calculated differently for ORDER BY statements; you can read more about that [here]({{}}riak/ts/1.5.2/configuring/riakconf/#maximum-returned-data-size). All of these settings impact the maximum size of data you can retrieve at one time, and it is important to understand your environmental limitations or you run the risk of an out-of-memory condition. However, the most effective means of speeding up your `ORDER BY` queries is to place the query buffer directory (`timeseries_query_buffers_root_path`) on fast storage or in memory-backed /tmp directory. {{% /note %}} diff --git a/content/riak/ts/1.5.2/using/querying/show-create-table.md b/content/riak/ts/1.5.2/using/querying/show-create-table.md index e52a710ad3..40100bf2f1 100644 --- a/content/riak/ts/1.5.2/using/querying/show-create-table.md +++ b/content/riak/ts/1.5.2/using/querying/show-create-table.md @@ -13,11 +13,11 @@ toc: true version_history: in: "1.5.2+" aliases: - - /riakts/1.5.2/using/querying/show-create-table -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/show-create-table" + - /riakts/1.5.2/using/querying/show-create-table + --- -[riak shell]: /riak/ts/1.5.2/using/riakshell +[riak shell]: {{}}riak/ts/1.5.2/using/riakshell You can use the SHOW CREATE TABLE statement to obtain the SQL used to create your Riak TS table. This document will show you how to execute `SHOW CREATE TABLE` in TS. diff --git a/content/riak/ts/1.5.2/using/querying/show-tables.md b/content/riak/ts/1.5.2/using/querying/show-tables.md index 4a57ddf900..cd719e962c 100644 --- a/content/riak/ts/1.5.2/using/querying/show-tables.md +++ b/content/riak/ts/1.5.2/using/querying/show-tables.md @@ -11,11 +11,11 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/querying/show-tables -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/show-tables" + - /riakts/1.5.2/using/querying/show-tables + --- -[riak shell]: /riak/ts/1.5.2/using/riakshell +[riak shell]: {{}}riak/ts/1.5.2/using/riakshell You can use the SHOW TABLES statement to enumerate the Riak TS tables you have set up. This document will show you how to execute `SHOW TABLES` in TS. diff --git a/content/riak/ts/1.5.2/using/querying/single-key-fetch.md b/content/riak/ts/1.5.2/using/querying/single-key-fetch.md index 38e867c254..efba8cf083 100644 --- a/content/riak/ts/1.5.2/using/querying/single-key-fetch.md +++ b/content/riak/ts/1.5.2/using/querying/single-key-fetch.md @@ -11,8 +11,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/querying/single-key-fetch -canonical_link: "https://docs.basho.com/riak/ts/latest/using/querying/single-key-fetch" + - /riakts/1.5.2/using/querying/single-key-fetch + --- You may find the need to fetch a single key from Riak TS. The below examples show you how to perform a single key fetch in each of our official clients that support Riak TS. @@ -76,7 +76,7 @@ riakc_ts:get(Pid, <<"GeoCheckin">>, [<<"South Atlantic">>, <<"South Carolina">>, ``` ```php -$response = (new Command\Builder\TimeSeries\FetchRow($riak)) +$response = (new Command/Builder/TimeSeries/FetchRow($riak)) ->atKey([ (new Cell("region"))->setValue("South Atlantic"), (new Cell("state"))->setValue("South Carolina"), @@ -103,4 +103,4 @@ if err != nil { } err = cluster.Execute(cmd) -``` \ No newline at end of file +``` diff --git a/content/riak/ts/1.5.2/using/riakshell.md b/content/riak/ts/1.5.2/using/riakshell.md index 87548507a0..4cbe1f32d8 100644 --- a/content/riak/ts/1.5.2/using/riakshell.md +++ b/content/riak/ts/1.5.2/using/riakshell.md @@ -11,14 +11,14 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/riakshell/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/riakshell" + - /riakts/1.5.2/using/riakshell/ + --- -[nodename]: /riak/kv/2.2.0/using/cluster-operations/changing-cluster-info/ -[creating]: /riak/ts/1.5.2/using/creating-activating -[writing]: /riak/ts/1.5.2/using/writingdata -[riak shell README]: https://github.com/basho/riak_shell/blob/develop/README.md +[nodename]: {{}}riak/kv/2.2.0/using/cluster-operations/changing-cluster-info/ +[creating]: {{}}riak/ts/1.5.2/using/creating-activating +[writing]: {{}}riak/ts/1.5.2/using/writingdata +[riak shell README]: https://github.com/basho/riak_shell/blob/develop/README/ You can use riak shell within Riak TS to run SQL and logging commands from one place. @@ -49,13 +49,13 @@ The shell is also trivially extendable for developer use. To get started using riak shell: -1\. Upon installing Riak TS from a package (.deb or .rpm), your riak shell should be configured to attach to your local node. You can verify this by running: +1/. Upon installing Riak TS from a package (.deb or .rpm), your riak shell should be configured to attach to your local node. You can verify this by running: ``` > sudo riak-shell ``` -1a\. You can attach to any node in your cluster from riak shell. To do this, locate your riak_shell.config file. On most systems, it will be in the `/etc/riak` directory with the other Riak TS configuration files. On Mac OS X, the configuration files are in the `~/riak-ts-1.5.2/etc` directory. Open riak_shell.config, and add the nodename and IP addresses you wish to connect to to nodes: +1a/. You can attach to any node in your cluster from riak shell. To do this, locate your riak_shell.config file. On most systems, it will be in the `/etc/riak` directory with the other Riak TS configuration files. On Mac OS X, the configuration files are in the `~/riak-ts-1.5.2/etc` directory. Open riak_shell.config, and add the nodename and IP addresses you wish to connect to to nodes: ``` [ @@ -71,7 +71,7 @@ To get started using riak shell: ]. ``` -2\. Open riak shell (if you have updated riak_shell.config, you will need to navigate back to your Riak TS directory): +2/. Open riak shell (if you have updated riak_shell.config, you will need to navigate back to your Riak TS directory): ```bash riak-shell @@ -195,7 +195,7 @@ An example of the second format is shown below: In both of these formats multiple rows of data can be specified (3)>INSERT INTO mytable VALUES ('keyvalue', '2016-11-30 19:30:00', 123, 12.3, false), ('newvalue', '2016-11-30 19:31:04' 456, 45.6, true); -For more details please go to http://docs.basho.com/riak/ts +For more details please go to /riak/ts ``` diff --git a/content/riak/ts/1.5.2/using/security.md b/content/riak/ts/1.5.2/using/security.md index 61c7aefd93..23e2e6cada 100644 --- a/content/riak/ts/1.5.2/using/security.md +++ b/content/riak/ts/1.5.2/using/security.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/security/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/" + - /riakts/1.5.2/using/security/ + --- [security checklist]: ./checklist @@ -90,7 +90,7 @@ cluster on the following TCP ports: Protocol | Port :--------|:---- -Protocol Buffers | TCP port 8087 +Protocol Buffers | TCP port 8087 ## Best Practices diff --git a/content/riak/ts/1.5.2/using/security/checklist.md b/content/riak/ts/1.5.2/using/security/checklist.md index 632e7a0bac..b8dd2e6878 100644 --- a/content/riak/ts/1.5.2/using/security/checklist.md +++ b/content/riak/ts/1.5.2/using/security/checklist.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/security/checklist -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/checklist/" + - /riakts/1.5.2/using/security/checklist + --- [enable ssl]: ../enable-disable/#enabling-ssl @@ -21,7 +21,7 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/checklist/ [security users]: ../user-management [security sources]: ../sources-management [manage permissions]: ../user-management/#managing-permissions -[pbc]: /riak/kv/2.2.0/developing/api/protocol-buffers/ +[pbc]: {{}}riak/kv/2.2.0/developing/api/protocol-buffers/ [security enable disable]: ../enable-disable Before turning on Riak TS security there are key steps all applications need to take. Missing one of these steps may break your application, so make sure you have done each of the following BEFORE enabling security: diff --git a/content/riak/ts/1.5.2/using/security/enable-disable.md b/content/riak/ts/1.5.2/using/security/enable-disable.md index e8c09352b0..467f38dd8f 100644 --- a/content/riak/ts/1.5.2/using/security/enable-disable.md +++ b/content/riak/ts/1.5.2/using/security/enable-disable.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/security/enable-disable -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/enable-disable/" + - /riakts/1.5.2/using/security/enable-disable + --- Riak TS security may be [checked](#checking-security-status), [enabled](#enabling-security), or [disabled](#disabling-security) through the command line, allowing an administrator to change security settings for the whole cluster without needing to go node-by-node. diff --git a/content/riak/ts/1.5.2/using/security/notify-basho.md b/content/riak/ts/1.5.2/using/security/notify-basho.md index 82cedc7a9b..2460924cd5 100644 --- a/content/riak/ts/1.5.2/using/security/notify-basho.md +++ b/content/riak/ts/1.5.2/using/security/notify-basho.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/security/notify-basho -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/notify-basho/" + - /riakts/1.5.2/using/security/notify-basho + --- Data security is an important and sensitive issue. A real-world approach to security allows us to balance appropriate levels of security and related overhead while creating a fast, scalable, and operationally straightforward database. diff --git a/content/riak/ts/1.5.2/using/security/sources-management.md b/content/riak/ts/1.5.2/using/security/sources-management.md index f70b48c128..584cde23cf 100644 --- a/content/riak/ts/1.5.2/using/security/sources-management.md +++ b/content/riak/ts/1.5.2/using/security/sources-management.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/security/sources-management -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/sources-management/" + - /riakts/1.5.2/using/security/sources-management + --- [cidr]: http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing @@ -22,8 +22,8 @@ canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/sources-ma [security enabling]: ../enable-disable/#enabling-security [security add user]: ../user-management/#add-user [root cert]: http://en.wikipedia.org/wiki/Root_certificate -[rolling restart]: /riak/kv/2.2.0/using/repair-recovery/rolling-restart/ -[config ref security]: /riak/kv/2.2.0/configuring/reference/#security +[rolling restart]: {{}}riak/kv/2.2.0/using/repair-recovery/rolling-restart/ +[config ref security]: {{}}riak/kv/2.2.0/configuring/reference/#security [xss]: http://en.wikipedia.org/wiki/Cross-site_scripting [request forgery]: http://en.wikipedia.org/wiki/Cross-site_request_forgery [http referer]: http://en.wikipedia.org/wiki/HTTP_referer diff --git a/content/riak/ts/1.5.2/using/security/user-management.md b/content/riak/ts/1.5.2/using/security/user-management.md index fa112ec7c7..048ca0e0bc 100644 --- a/content/riak/ts/1.5.2/using/security/user-management.md +++ b/content/riak/ts/1.5.2/using/security/user-management.md @@ -12,8 +12,8 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/security/user-management -canonical_link: "https://docs.basho.com/riak/ts/latest/using/security/user-management/" + - /riakts/1.5.2/using/security/user-management + --- Riak TS security lets you to control authorization by creating, modifying, and deleting user characteristics and granting users selective access to Riak TS functionality. Users can be assigned one or more of the following characteristics: diff --git a/content/riak/ts/1.5.2/using/timerepresentations.md b/content/riak/ts/1.5.2/using/timerepresentations.md index cce6b43794..7af1e5ff92 100644 --- a/content/riak/ts/1.5.2/using/timerepresentations.md +++ b/content/riak/ts/1.5.2/using/timerepresentations.md @@ -11,16 +11,16 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/timerepresentations/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/timerepresentations" + - /riakts/1.5.2/using/timerepresentations/ + --- [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[config reference]: /riak/kv/2.2.0/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.5.2/using/mdc +[config reference]: {{}}riak/kv/2.2.0/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.5.2/using/mdc [riak shell]: ../riakshell [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 [learn timestamps]: ../../learn-about/timestamps @@ -125,4 +125,4 @@ Effectively, there is no way in the UNIX time scheme to differentiate an event t Similarly, Riak TS would treat `915148800` as the start of a new time quantum, and any data points which a client added for that second would be considered to be in the first time quantum in 1999. -The data is not lost, but a query against 1998 time quanta will not produce those data points despite the fact that some of the events flagged as `915148800` technically occurred in 1998. \ No newline at end of file +The data is not lost, but a query against 1998 time quanta will not produce those data points despite the fact that some of the events flagged as `915148800` technically occurred in 1998. diff --git a/content/riak/ts/1.5.2/using/writingdata.md b/content/riak/ts/1.5.2/using/writingdata.md index df5d458643..8c27bbc76a 100644 --- a/content/riak/ts/1.5.2/using/writingdata.md +++ b/content/riak/ts/1.5.2/using/writingdata.md @@ -11,17 +11,17 @@ project: "riak_ts" project_version: "1.5.2" toc: true aliases: - - /riakts/1.5.2/using/writingdata/ -canonical_link: "https://docs.basho.com/riak/ts/latest/using/writingdata" + - /riakts/1.5.2/using/writingdata/ + --- [activating]: ../creating-activating/ [planning]: ../planning/ [querying]: ../querying/ -[http]: /riak/ts/1.5.2/developing/http/ -[config reference]: /riak/kv/2.2.0/configuring/reference/#the-advanced-config-file -[MDC]: /riak/ts/1.5.2/configuring/mdc +[http]: {{}}riak/ts/1.5.2/developing/http/ +[config reference]: {{}}riak/kv/2.2.0/configuring/reference/#the-advanced-config-file +[MDC]: {{}}riak/ts/1.5.2/configuring/mdc [riakshell]: ../riakshell [iso8601]: ../timerepresentations/ [ISO 8601]: https://en.wikipedia.org/wiki/ISO_8601 @@ -209,18 +209,18 @@ riakc_ts:put(Pid, "GeoCheckin", [{1, <<"South Atlantic">>, <<"Florida">>, 145160 require __DIR__ . '/../vendor/autoload.php'; -use Basho\Riak; -use Basho\Riak\Command; -use Basho\Riak\Node; +use Basho/Riak; +use Basho/Riak/Command; +use Basho/Riak/Node; -$node = (new Node\Builder) +$node = (new Node/Builder) ->atHost('myriakdb.host') ->onPort(8087) ->build(); -$riak = new Riak([$node], [], new Riak\Api\Pb()); +$riak = new Riak([$node], [], new Riak/Api/Pb()); -$response = (new Command\Builder\TimeSeries\StoreRows($riak)) +$response = (new Command/Builder/TimeSeries/StoreRows($riak)) ->inTable('GeoCheckins') ->withRow([ (new Cell("region"))->setValue("South Atlantic"), diff --git a/content/riak/ts/latest/add-ons.md b/content/riak/ts/latest/add-ons.md new file mode 100644 index 0000000000..5997f09d1d --- /dev/null +++ b/content/riak/ts/latest/add-ons.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/redis.md b/content/riak/ts/latest/add-ons/redis.md new file mode 100644 index 0000000000..f21cab9188 --- /dev/null +++ b/content/riak/ts/latest/add-ons/redis.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/redis/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/redis/developing-rra.md b/content/riak/ts/latest/add-ons/redis/developing-rra.md new file mode 100644 index 0000000000..4f85eaeb65 --- /dev/null +++ b/content/riak/ts/latest/add-ons/redis/developing-rra.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/redis/developing-rra/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/redis/redis-add-on-features.md b/content/riak/ts/latest/add-ons/redis/redis-add-on-features.md new file mode 100644 index 0000000000..7684b70dc4 --- /dev/null +++ b/content/riak/ts/latest/add-ons/redis/redis-add-on-features.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/redis/redis-add-on-features/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/redis/set-up-rra.md b/content/riak/ts/latest/add-ons/redis/set-up-rra.md new file mode 100644 index 0000000000..b4e1ffb44e --- /dev/null +++ b/content/riak/ts/latest/add-ons/redis/set-up-rra.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/redis/set-up-rra/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/redis/set-up-rra/deployment-models.md b/content/riak/ts/latest/add-ons/redis/set-up-rra/deployment-models.md new file mode 100644 index 0000000000..4ae3da864f --- /dev/null +++ b/content/riak/ts/latest/add-ons/redis/set-up-rra/deployment-models.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/redis/set-up-rra/deployment-models/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/redis/using-rra.md b/content/riak/ts/latest/add-ons/redis/using-rra.md new file mode 100644 index 0000000000..af6cb77442 --- /dev/null +++ b/content/riak/ts/latest/add-ons/redis/using-rra.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/redis/using-rra/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector.md b/content/riak/ts/latest/add-ons/spark-riak-connector.md new file mode 100644 index 0000000000..362eceb6c2 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/building-testing.md b/content/riak/ts/latest/add-ons/spark-riak-connector/building-testing.md new file mode 100644 index 0000000000..f2d3679b0b --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/building-testing.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/building-testing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/getting.md b/content/riak/ts/latest/add-ons/spark-riak-connector/getting.md new file mode 100644 index 0000000000..ed919c9b95 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/getting.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/getting/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/quick-start.md b/content/riak/ts/latest/add-ons/spark-riak-connector/quick-start.md new file mode 100644 index 0000000000..eaea34b4cd --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/quick-start.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/quick-start/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage.md new file mode 100644 index 0000000000..2f4043df14 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage/bulk-write.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/bulk-write.md new file mode 100644 index 0000000000..7ceae0f04f --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/bulk-write.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/bulk-write/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage/config-spark-context.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/config-spark-context.md new file mode 100644 index 0000000000..f822987750 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/config-spark-context.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/config-spark-context/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage/dataframes.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/dataframes.md new file mode 100644 index 0000000000..32ae2aaec7 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/dataframes.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/dataframes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage/dates.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/dates.md new file mode 100644 index 0000000000..7e277ff939 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/dates.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/dates/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage/range-query-partition.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/range-query-partition.md new file mode 100644 index 0000000000..d298e81975 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/range-query-partition.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/range-query-partition/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage/reading-data.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/reading-data.md new file mode 100644 index 0000000000..f0ecaed7f5 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/reading-data.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/reading-data/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage/streaming-example.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/streaming-example.md new file mode 100644 index 0000000000..00e4df4759 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/streaming-example.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/streaming-example/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/add-ons/spark-riak-connector/usage/writing-data.md b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/writing-data.md new file mode 100644 index 0000000000..568e6e73e4 --- /dev/null +++ b/content/riak/ts/latest/add-ons/spark-riak-connector/usage/writing-data.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/add-ons/spark-riak-connector/usage/writing-data/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/configuring.md b/content/riak/ts/latest/configuring.md new file mode 100644 index 0000000000..e91a8e2cea --- /dev/null +++ b/content/riak/ts/latest/configuring.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/configuring/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/configuring/global-object-expiration.md b/content/riak/ts/latest/configuring/global-object-expiration.md new file mode 100644 index 0000000000..6aa19b77fb --- /dev/null +++ b/content/riak/ts/latest/configuring/global-object-expiration.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/configuring/global-object-expiration/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/configuring/mdc.md b/content/riak/ts/latest/configuring/mdc.md new file mode 100644 index 0000000000..57075666ea --- /dev/null +++ b/content/riak/ts/latest/configuring/mdc.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/configuring/mdc/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/configuring/riakconf.md b/content/riak/ts/latest/configuring/riakconf.md new file mode 100644 index 0000000000..9eb6fe95f8 --- /dev/null +++ b/content/riak/ts/latest/configuring/riakconf.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/configuring/riakconf/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing.md b/content/riak/ts/latest/developing.md new file mode 100644 index 0000000000..f33c64aa9d --- /dev/null +++ b/content/riak/ts/latest/developing.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/csharp.md b/content/riak/ts/latest/developing/csharp.md new file mode 100644 index 0000000000..f4a9cbc0a7 --- /dev/null +++ b/content/riak/ts/latest/developing/csharp.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/csharp/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/erlang.md b/content/riak/ts/latest/developing/erlang.md new file mode 100644 index 0000000000..256a449d0b --- /dev/null +++ b/content/riak/ts/latest/developing/erlang.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/erlang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/golang.md b/content/riak/ts/latest/developing/golang.md new file mode 100644 index 0000000000..8f15a0c8e9 --- /dev/null +++ b/content/riak/ts/latest/developing/golang.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/golang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/http.md b/content/riak/ts/latest/developing/http.md new file mode 100644 index 0000000000..c339d71d20 --- /dev/null +++ b/content/riak/ts/latest/developing/http.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/http/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/java.md b/content/riak/ts/latest/developing/java.md new file mode 100644 index 0000000000..dbd87ae8cc --- /dev/null +++ b/content/riak/ts/latest/developing/java.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/java/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/nodejs.md b/content/riak/ts/latest/developing/nodejs.md new file mode 100644 index 0000000000..3c21e40c72 --- /dev/null +++ b/content/riak/ts/latest/developing/nodejs.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/nodejs/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/php.md b/content/riak/ts/latest/developing/php.md new file mode 100644 index 0000000000..70d7c72985 --- /dev/null +++ b/content/riak/ts/latest/developing/php.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/php/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/python.md b/content/riak/ts/latest/developing/python.md new file mode 100644 index 0000000000..6d571b14e1 --- /dev/null +++ b/content/riak/ts/latest/developing/python.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/python/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/developing/ruby.md b/content/riak/ts/latest/developing/ruby.md new file mode 100644 index 0000000000..e6cb5b1713 --- /dev/null +++ b/content/riak/ts/latest/developing/ruby.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/developing/ruby/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/downloads.md b/content/riak/ts/latest/downloads.md new file mode 100644 index 0000000000..2b5b645a32 --- /dev/null +++ b/content/riak/ts/latest/downloads.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/downloads/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/index.md b/content/riak/ts/latest/index.md new file mode 100644 index 0000000000..7769bc646e --- /dev/null +++ b/content/riak/ts/latest/index.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/index/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/learn-about.md b/content/riak/ts/latest/learn-about.md new file mode 100644 index 0000000000..728d8d1f67 --- /dev/null +++ b/content/riak/ts/latest/learn-about.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/learn-about/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/learn-about/bestpractices.md b/content/riak/ts/latest/learn-about/bestpractices.md new file mode 100644 index 0000000000..700013a29a --- /dev/null +++ b/content/riak/ts/latest/learn-about/bestpractices.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/learn-about/bestpractices/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/learn-about/sqlriakts.md b/content/riak/ts/latest/learn-about/sqlriakts.md new file mode 100644 index 0000000000..796f4e76ed --- /dev/null +++ b/content/riak/ts/latest/learn-about/sqlriakts.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/learn-about/sqlriakts/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/learn-about/tablearchitecture.md b/content/riak/ts/latest/learn-about/tablearchitecture.md new file mode 100644 index 0000000000..f534949c05 --- /dev/null +++ b/content/riak/ts/latest/learn-about/tablearchitecture.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/learn-about/tablearchitecture/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/learn-about/timestamps.md b/content/riak/ts/latest/learn-about/timestamps.md new file mode 100644 index 0000000000..465bb7364b --- /dev/null +++ b/content/riak/ts/latest/learn-about/timestamps.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/learn-about/timestamps/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/releasenotes.md b/content/riak/ts/latest/releasenotes.md new file mode 100644 index 0000000000..f7294ff50e --- /dev/null +++ b/content/riak/ts/latest/releasenotes.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/releasenotes/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup.md b/content/riak/ts/latest/setup.md new file mode 100644 index 0000000000..e4548a91e0 --- /dev/null +++ b/content/riak/ts/latest/setup.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/downgrading.md b/content/riak/ts/latest/setup/downgrading.md new file mode 100644 index 0000000000..2a252ac286 --- /dev/null +++ b/content/riak/ts/latest/setup/downgrading.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/downgrading/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/installing.md b/content/riak/ts/latest/setup/installing.md new file mode 100644 index 0000000000..336d35f860 --- /dev/null +++ b/content/riak/ts/latest/setup/installing.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/installing/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/installing/aws.md b/content/riak/ts/latest/setup/installing/aws.md new file mode 100644 index 0000000000..8eb52aab3b --- /dev/null +++ b/content/riak/ts/latest/setup/installing/aws.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/installing/aws/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/installing/debian-ubuntu.md b/content/riak/ts/latest/setup/installing/debian-ubuntu.md new file mode 100644 index 0000000000..294d316a67 --- /dev/null +++ b/content/riak/ts/latest/setup/installing/debian-ubuntu.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/installing/debian-ubuntu/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/installing/mac-osx.md b/content/riak/ts/latest/setup/installing/mac-osx.md new file mode 100644 index 0000000000..bb303ee435 --- /dev/null +++ b/content/riak/ts/latest/setup/installing/mac-osx.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/installing/mac-osx/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/installing/rhel-centos.md b/content/riak/ts/latest/setup/installing/rhel-centos.md new file mode 100644 index 0000000000..8bd62b84eb --- /dev/null +++ b/content/riak/ts/latest/setup/installing/rhel-centos.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/installing/rhel-centos/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/installing/source.md b/content/riak/ts/latest/setup/installing/source.md new file mode 100644 index 0000000000..d4fa8aa49c --- /dev/null +++ b/content/riak/ts/latest/setup/installing/source.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/installing/source/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/installing/source/erlang.md b/content/riak/ts/latest/setup/installing/source/erlang.md new file mode 100644 index 0000000000..ece4489f39 --- /dev/null +++ b/content/riak/ts/latest/setup/installing/source/erlang.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/installing/source/erlang/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/setup/upgrading.md b/content/riak/ts/latest/setup/upgrading.md new file mode 100644 index 0000000000..19eba8c85f --- /dev/null +++ b/content/riak/ts/latest/setup/upgrading.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/setup/upgrading/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using.md b/content/riak/ts/latest/using.md new file mode 100644 index 0000000000..7ec799ab64 --- /dev/null +++ b/content/riak/ts/latest/using.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/core-fundamentals.md b/content/riak/ts/latest/using/core-fundamentals.md new file mode 100644 index 0000000000..dcc3efe636 --- /dev/null +++ b/content/riak/ts/latest/using/core-fundamentals.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/core-fundamentals/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/creating-activating.md b/content/riak/ts/latest/using/creating-activating.md new file mode 100644 index 0000000000..4d8d4b4d06 --- /dev/null +++ b/content/riak/ts/latest/using/creating-activating.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/creating-activating/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/deleting-data.md b/content/riak/ts/latest/using/deleting-data.md new file mode 100644 index 0000000000..cf902a64a7 --- /dev/null +++ b/content/riak/ts/latest/using/deleting-data.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/deleting-data/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/planning.md b/content/riak/ts/latest/using/planning.md new file mode 100644 index 0000000000..3bff7c8edb --- /dev/null +++ b/content/riak/ts/latest/using/planning.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/planning/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying.md b/content/riak/ts/latest/using/querying.md new file mode 100644 index 0000000000..3e4f663b68 --- /dev/null +++ b/content/riak/ts/latest/using/querying.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/delete.md b/content/riak/ts/latest/using/querying/delete.md new file mode 100644 index 0000000000..131de5055e --- /dev/null +++ b/content/riak/ts/latest/using/querying/delete.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/delete/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/describe.md b/content/riak/ts/latest/using/querying/describe.md new file mode 100644 index 0000000000..e87aad517a --- /dev/null +++ b/content/riak/ts/latest/using/querying/describe.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/describe/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/explain.md b/content/riak/ts/latest/using/querying/explain.md new file mode 100644 index 0000000000..bee89e2f15 --- /dev/null +++ b/content/riak/ts/latest/using/querying/explain.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/explain/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/guidelines.md b/content/riak/ts/latest/using/querying/guidelines.md new file mode 100644 index 0000000000..e72bc9d648 --- /dev/null +++ b/content/riak/ts/latest/using/querying/guidelines.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/guidelines/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/reference.md b/content/riak/ts/latest/using/querying/reference.md new file mode 100644 index 0000000000..e38fa79c9a --- /dev/null +++ b/content/riak/ts/latest/using/querying/reference.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/reference/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/select.md b/content/riak/ts/latest/using/querying/select.md new file mode 100644 index 0000000000..2ce6b75cf5 --- /dev/null +++ b/content/riak/ts/latest/using/querying/select.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/select/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/select/aggregate-functions.md b/content/riak/ts/latest/using/querying/select/aggregate-functions.md new file mode 100644 index 0000000000..838d65f47e --- /dev/null +++ b/content/riak/ts/latest/using/querying/select/aggregate-functions.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/select/aggregate-functions/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/select/arithmetic-operations.md b/content/riak/ts/latest/using/querying/select/arithmetic-operations.md new file mode 100644 index 0000000000..7d6de44fd7 --- /dev/null +++ b/content/riak/ts/latest/using/querying/select/arithmetic-operations.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/select/arithmetic-operations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/select/group-by.md b/content/riak/ts/latest/using/querying/select/group-by.md new file mode 100644 index 0000000000..5c87650c16 --- /dev/null +++ b/content/riak/ts/latest/using/querying/select/group-by.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/select/group-by/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/select/limit.md b/content/riak/ts/latest/using/querying/select/limit.md new file mode 100644 index 0000000000..42d4f78ab0 --- /dev/null +++ b/content/riak/ts/latest/using/querying/select/limit.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/select/limit/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/select/order-by.md b/content/riak/ts/latest/using/querying/select/order-by.md new file mode 100644 index 0000000000..438828f0c6 --- /dev/null +++ b/content/riak/ts/latest/using/querying/select/order-by.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/select/order-by/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/show-create-table.md b/content/riak/ts/latest/using/querying/show-create-table.md new file mode 100644 index 0000000000..51fed60d0a --- /dev/null +++ b/content/riak/ts/latest/using/querying/show-create-table.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/show-create-table/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/show-tables.md b/content/riak/ts/latest/using/querying/show-tables.md new file mode 100644 index 0000000000..852694a277 --- /dev/null +++ b/content/riak/ts/latest/using/querying/show-tables.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/show-tables/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/querying/single-key-fetch.md b/content/riak/ts/latest/using/querying/single-key-fetch.md new file mode 100644 index 0000000000..0513be54f6 --- /dev/null +++ b/content/riak/ts/latest/using/querying/single-key-fetch.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/querying/single-key-fetch/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/riakshell.md b/content/riak/ts/latest/using/riakshell.md new file mode 100644 index 0000000000..a109acfe98 --- /dev/null +++ b/content/riak/ts/latest/using/riakshell.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/riakshell/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/security.md b/content/riak/ts/latest/using/security.md new file mode 100644 index 0000000000..64f1c881ff --- /dev/null +++ b/content/riak/ts/latest/using/security.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/security/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/security/checklist.md b/content/riak/ts/latest/using/security/checklist.md new file mode 100644 index 0000000000..85749c39af --- /dev/null +++ b/content/riak/ts/latest/using/security/checklist.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/security/checklist/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/security/enable-disable.md b/content/riak/ts/latest/using/security/enable-disable.md new file mode 100644 index 0000000000..c7149ed8e2 --- /dev/null +++ b/content/riak/ts/latest/using/security/enable-disable.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/security/enable-disable/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/security/notify-basho.md b/content/riak/ts/latest/using/security/notify-basho.md new file mode 100644 index 0000000000..36474bf541 --- /dev/null +++ b/content/riak/ts/latest/using/security/notify-basho.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/security/notify-basho/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/security/sources-management.md b/content/riak/ts/latest/using/security/sources-management.md new file mode 100644 index 0000000000..8dc8f4a3cb --- /dev/null +++ b/content/riak/ts/latest/using/security/sources-management.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/security/sources-management/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/security/user-management.md b/content/riak/ts/latest/using/security/user-management.md new file mode 100644 index 0000000000..000e3633da --- /dev/null +++ b/content/riak/ts/latest/using/security/user-management.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/security/user-management/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/timerepresentations.md b/content/riak/ts/latest/using/timerepresentations.md new file mode 100644 index 0000000000..1ff35b0528 --- /dev/null +++ b/content/riak/ts/latest/using/timerepresentations.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/timerepresentations/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/content/riak/ts/latest/using/writingdata.md b/content/riak/ts/latest/using/writingdata.md new file mode 100644 index 0000000000..50d031eba5 --- /dev/null +++ b/content/riak/ts/latest/using/writingdata.md @@ -0,0 +1,19 @@ +--- +layout: latest_redirect +project: riak_ts +replace_text: latest +latest_text: "{latest}" +aliases: + - "/riakts/latest/using/writingdata/" +--- + +Redirects `someroot/latest/somepath` to `someroot/{latest}/somepath` +where `{latest}` is the `project_descriptions.{project}.latest` value +and `{project}` is the metadata value of `project` above. + +This page exists solely to redirect from the generated URL to the latest version of +that url. + + + + diff --git a/data/download_info.yaml b/data/download_info.yaml index 680102ef9b..ddcdc450d9 100644 --- a/data/download_info.yaml +++ b/data/download_info.yaml @@ -4,8 +4,8 @@ riak_kv: - os: source file_info: file_name: riak-2.0.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/riak-2.0.0.tar.gz - file_size: 16504321 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/riak-2.0.0.tar.gz + file_size: '16504321' - os: debian versions: - version: '6' @@ -13,17 +13,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/debian/6/riak_2.0.0-1_amd64.deb - file_size: 60340618 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/debian/6/riak_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/debian/6/riak_2.0.0-1_amd64.deb + file_size: '60340618' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/debian/6/riak_2.0.0-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/debian/7/riak_2.0.0-1_amd64.deb - file_size: 60345648 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/debian/7/riak_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/debian/7/riak_2.0.0-1_amd64.deb + file_size: '60345648' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/debian/7/riak_2.0.0-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -31,15 +31,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.0-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/fedora/19/riak-2.0.0-1.fc19.src.rpm - file_size: 16510939 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/fedora/19/riak-2.0.0-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/fedora/19/riak-2.0.0-1.fc19.src.rpm + file_size: '16510939' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/fedora/19/riak-2.0.0-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.0-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/fedora/19/riak-2.0.0-1.fc19.x86_64.rpm - file_size: 58001888 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/fedora/19/riak-2.0.0-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/fedora/19/riak-2.0.0-1.fc19.x86_64.rpm + file_size: '58001888' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/fedora/19/riak-2.0.0-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -47,17 +47,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.0.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/freebsd/10/riak-2.0.0.txz - file_size: 61452100 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/freebsd/10/riak-2.0.0.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/freebsd/10/riak-2.0.0.txz + file_size: '61452100' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/freebsd/10/riak-2.0.0.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.0-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/freebsd/9.2/riak-2.0.0-FreeBSD-amd64.tbz - file_size: 67087746 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/freebsd/9.2/riak-2.0.0-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/freebsd/9.2/riak-2.0.0-FreeBSD-amd64.tbz + file_size: '67087746' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/freebsd/9.2/riak-2.0.0-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -65,9 +65,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/osx/10.8/riak-2.0.0-OSX-x86_64.tar.gz - file_size: 60856838 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/osx/10.8/riak-2.0.0-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/osx/10.8/riak-2.0.0-OSX-x86_64.tar.gz + file_size: '60856838' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/osx/10.8/riak-2.0.0-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -75,43 +75,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.0-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/5/riak-2.0.0-1.el5.x86_64.rpm - file_size: 61693471 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/5/riak-2.0.0-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/5/riak-2.0.0-1.el5.x86_64.rpm + file_size: '61693471' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/5/riak-2.0.0-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/5/riak-2.0.0-1.src.rpm - file_size: 16502962 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/5/riak-2.0.0-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/5/riak-2.0.0-1.src.rpm + file_size: '16502962' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/5/riak-2.0.0-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/6/riak-2.0.0-1.el6.src.rpm - file_size: 16473631 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/6/riak-2.0.0-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/6/riak-2.0.0-1.el6.src.rpm + file_size: '16473631' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/6/riak-2.0.0-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/6/riak-2.0.0-1.el6.x86_64.rpm - file_size: 58763712 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/6/riak-2.0.0-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/6/riak-2.0.0-1.el6.x86_64.rpm + file_size: '58763712' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/6/riak-2.0.0-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/7/riak-2.0.0-1.el7.centos.src.rpm - file_size: 16446360 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/7/riak-2.0.0-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/7/riak-2.0.0-1.el7.centos.src.rpm + file_size: '16446360' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/7/riak-2.0.0-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/7/riak-2.0.0-1.el7.centos.x86_64.rpm - file_size: 58325400 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/rhel/7/riak-2.0.0-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/7/riak-2.0.0-1.el7.centos.x86_64.rpm + file_size: '58325400' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/rhel/7/riak-2.0.0-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -119,15 +119,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.0-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/sles/11/riak-2.0.0-1.SLES11.x86_64.rpm - file_size: 62131097 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/sles/11/riak-2.0.0-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/sles/11/riak-2.0.0-1.SLES11.x86_64.rpm + file_size: '62131097' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/sles/11/riak-2.0.0-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/sles/11/riak-2.0.0-1.src.rpm - file_size: 16496009 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/sles/11/riak-2.0.0-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/sles/11/riak-2.0.0-1.src.rpm + file_size: '16496009' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/sles/11/riak-2.0.0-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -135,17 +135,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/smartos/1.8/riak-2.0.0-SmartOS-x86_64.tgz - file_size: 69828372 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/smartos/1.8/riak-2.0.0-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/smartos/1.8/riak-2.0.0-SmartOS-x86_64.tgz + file_size: '69828372' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/smartos/1.8/riak-2.0.0-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.0.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/smartos/13.1/riak-2.0.0-SmartOS-x86_64.tgz - file_size: 69808919 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/smartos/13.1/riak-2.0.0-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/smartos/13.1/riak-2.0.0-SmartOS-x86_64.tgz + file_size: '69808919' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/smartos/13.1/riak-2.0.0-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -153,9 +153,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.0-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/solaris/10/BASHOriak-2.0.0-Solaris10-x86_64.pkg.gz - file_size: 66965477 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/solaris/10/BASHOriak-2.0.0-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/solaris/10/BASHOriak-2.0.0-Solaris10-x86_64.pkg.gz + file_size: '66965477' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/solaris/10/BASHOriak-2.0.0-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -163,31 +163,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/ubuntu/lucid/riak_2.0.0-1_amd64.deb - file_size: 60324214 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/ubuntu/lucid/riak_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/ubuntu/lucid/riak_2.0.0-1_amd64.deb + file_size: '60324214' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/ubuntu/lucid/riak_2.0.0-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/ubuntu/precise/riak_2.0.0-1_amd64.deb - file_size: 60290794 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/ubuntu/precise/riak_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/ubuntu/precise/riak_2.0.0-1_amd64.deb + file_size: '60290794' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/ubuntu/precise/riak_2.0.0-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/ubuntu/trusty/riak_2.0.0-1_amd64.deb - file_size: 54990894 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.0/ubuntu/trusty/riak_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/ubuntu/trusty/riak_2.0.0-1_amd64.deb + file_size: '54990894' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.0/ubuntu/trusty/riak_2.0.0-1_amd64.deb.sha 2.0.1: - os: source file_info: file_name: riak-2.0.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/riak-2.0.1.tar.gz - file_size: 16501073 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/riak-2.0.1.tar.gz + file_size: '16501073' - os: debian versions: - version: '6' @@ -195,17 +195,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/debian/6/riak_2.0.1-1_amd64.deb - file_size: 60346724 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/debian/6/riak_2.0.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/debian/6/riak_2.0.1-1_amd64.deb + file_size: '60346724' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/debian/6/riak_2.0.1-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/debian/7/riak_2.0.1-1_amd64.deb - file_size: 60353586 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/debian/7/riak_2.0.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/debian/7/riak_2.0.1-1_amd64.deb + file_size: '60353586' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/debian/7/riak_2.0.1-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -213,15 +213,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.1-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/fedora/19/riak-2.0.1-1.fc19.src.rpm - file_size: 16508590 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/fedora/19/riak-2.0.1-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/fedora/19/riak-2.0.1-1.fc19.src.rpm + file_size: '16508590' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/fedora/19/riak-2.0.1-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.1-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/fedora/19/riak-2.0.1-1.fc19.x86_64.rpm - file_size: 58005416 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/fedora/19/riak-2.0.1-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/fedora/19/riak-2.0.1-1.fc19.x86_64.rpm + file_size: '58005416' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/fedora/19/riak-2.0.1-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -229,17 +229,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.1.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/freebsd/10/riak-2.0.1.txz - file_size: 61460384 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/freebsd/10/riak-2.0.1.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/freebsd/10/riak-2.0.1.txz + file_size: '61460384' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/freebsd/10/riak-2.0.1.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.1-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/freebsd/9.2/riak-2.0.1-FreeBSD-amd64.tbz - file_size: 67088862 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/freebsd/9.2/riak-2.0.1-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/freebsd/9.2/riak-2.0.1-FreeBSD-amd64.tbz + file_size: '67088862' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/freebsd/9.2/riak-2.0.1-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -247,9 +247,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/osx/10.8/riak-2.0.1-OSX-x86_64.tar.gz - file_size: 60862616 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/osx/10.8/riak-2.0.1-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/osx/10.8/riak-2.0.1-OSX-x86_64.tar.gz + file_size: '60862616' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/osx/10.8/riak-2.0.1-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -257,43 +257,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.1-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/5/riak-2.0.1-1.el5.x86_64.rpm - file_size: 61696945 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/5/riak-2.0.1-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/5/riak-2.0.1-1.el5.x86_64.rpm + file_size: '61696945' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/5/riak-2.0.1-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/5/riak-2.0.1-1.src.rpm - file_size: 16508458 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/5/riak-2.0.1-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/5/riak-2.0.1-1.src.rpm + file_size: '16508458' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/5/riak-2.0.1-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/6/riak-2.0.1-1.el6.src.rpm - file_size: 16474418 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/6/riak-2.0.1-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/6/riak-2.0.1-1.el6.src.rpm + file_size: '16474418' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/6/riak-2.0.1-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/6/riak-2.0.1-1.el6.x86_64.rpm - file_size: 58768292 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/6/riak-2.0.1-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/6/riak-2.0.1-1.el6.x86_64.rpm + file_size: '58768292' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/6/riak-2.0.1-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.1-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/7/riak-2.0.1-1.el7.centos.src.rpm - file_size: 16459798 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/7/riak-2.0.1-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/7/riak-2.0.1-1.el7.centos.src.rpm + file_size: '16459798' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/7/riak-2.0.1-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.1-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/7/riak-2.0.1-1.el7.centos.x86_64.rpm - file_size: 58329556 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/rhel/7/riak-2.0.1-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/7/riak-2.0.1-1.el7.centos.x86_64.rpm + file_size: '58329556' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/rhel/7/riak-2.0.1-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -301,15 +301,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.1-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/sles/11/riak-2.0.1-1.SLES11.x86_64.rpm - file_size: 62135276 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/sles/11/riak-2.0.1-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/sles/11/riak-2.0.1-1.SLES11.x86_64.rpm + file_size: '62135276' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/sles/11/riak-2.0.1-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/sles/11/riak-2.0.1-1.src.rpm - file_size: 16500823 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/sles/11/riak-2.0.1-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/sles/11/riak-2.0.1-1.src.rpm + file_size: '16500823' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/sles/11/riak-2.0.1-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -317,17 +317,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/smartos/1.8/riak-2.0.1-SmartOS-x86_64.tgz - file_size: 69819937 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/smartos/1.8/riak-2.0.1-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/smartos/1.8/riak-2.0.1-SmartOS-x86_64.tgz + file_size: '69819937' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/smartos/1.8/riak-2.0.1-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.0.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/smartos/13.1/riak-2.0.1-SmartOS-x86_64.tgz - file_size: 69819747 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/smartos/13.1/riak-2.0.1-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/smartos/13.1/riak-2.0.1-SmartOS-x86_64.tgz + file_size: '69819747' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/smartos/13.1/riak-2.0.1-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -335,9 +335,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.1-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/solaris/10/BASHOriak-2.0.1-Solaris10-x86_64.pkg.gz - file_size: 66970252 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/solaris/10/BASHOriak-2.0.1-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/solaris/10/BASHOriak-2.0.1-Solaris10-x86_64.pkg.gz + file_size: '66970252' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/solaris/10/BASHOriak-2.0.1-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -345,31 +345,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/ubuntu/lucid/riak_2.0.1-1_amd64.deb - file_size: 60336690 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/ubuntu/lucid/riak_2.0.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/ubuntu/lucid/riak_2.0.1-1_amd64.deb + file_size: '60336690' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/ubuntu/lucid/riak_2.0.1-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/ubuntu/precise/riak_2.0.1-1_amd64.deb - file_size: 60301588 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/ubuntu/precise/riak_2.0.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/ubuntu/precise/riak_2.0.1-1_amd64.deb + file_size: '60301588' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/ubuntu/precise/riak_2.0.1-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/ubuntu/trusty/riak_2.0.1-1_amd64.deb - file_size: 54989732 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.1/ubuntu/trusty/riak_2.0.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/ubuntu/trusty/riak_2.0.1-1_amd64.deb + file_size: '54989732' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.1/ubuntu/trusty/riak_2.0.1-1_amd64.deb.sha 2.0.2: - os: source file_info: file_name: riak-2.0.2.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/riak-2.0.2.tar.gz - file_size: 16511741 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/riak-2.0.2.tar.gz + file_size: '16511741' - os: debian versions: - version: '6' @@ -377,17 +377,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/debian/6/riak_2.0.2-1_amd64.deb - file_size: 60348796 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/debian/6/riak_2.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/debian/6/riak_2.0.2-1_amd64.deb + file_size: '60348796' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/debian/6/riak_2.0.2-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/debian/7/riak_2.0.2-1_amd64.deb - file_size: 60355850 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/debian/7/riak_2.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/debian/7/riak_2.0.2-1_amd64.deb + file_size: '60355850' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/debian/7/riak_2.0.2-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -395,15 +395,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.2-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/fedora/19/riak-2.0.2-1.fc19.src.rpm - file_size: 16518698 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/fedora/19/riak-2.0.2-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/fedora/19/riak-2.0.2-1.fc19.src.rpm + file_size: '16518698' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/fedora/19/riak-2.0.2-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.2-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/fedora/19/riak-2.0.2-1.fc19.x86_64.rpm - file_size: 58011940 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/fedora/19/riak-2.0.2-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/fedora/19/riak-2.0.2-1.fc19.x86_64.rpm + file_size: '58011940' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/fedora/19/riak-2.0.2-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -411,17 +411,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.2.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/freebsd/10/riak-2.0.2.txz - file_size: 61465124 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/freebsd/10/riak-2.0.2.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/freebsd/10/riak-2.0.2.txz + file_size: '61465124' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/freebsd/10/riak-2.0.2.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.2-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/freebsd/9.2/riak-2.0.2-FreeBSD-amd64.tbz - file_size: 67089772 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/freebsd/9.2/riak-2.0.2-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/freebsd/9.2/riak-2.0.2-FreeBSD-amd64.tbz + file_size: '67089772' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/freebsd/9.2/riak-2.0.2-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -429,9 +429,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.2-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/osx/10.8/riak-2.0.2-OSX-x86_64.tar.gz - file_size: 60869054 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/osx/10.8/riak-2.0.2-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/osx/10.8/riak-2.0.2-OSX-x86_64.tar.gz + file_size: '60869054' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/osx/10.8/riak-2.0.2-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -439,43 +439,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.2-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/5/riak-2.0.2-1.el5.x86_64.rpm - file_size: 61708991 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/5/riak-2.0.2-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/5/riak-2.0.2-1.el5.x86_64.rpm + file_size: '61708991' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/5/riak-2.0.2-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.2-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/5/riak-2.0.2-1.src.rpm - file_size: 16510788 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/5/riak-2.0.2-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/5/riak-2.0.2-1.src.rpm + file_size: '16510788' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/5/riak-2.0.2-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.2-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/6/riak-2.0.2-1.el6.src.rpm - file_size: 16478170 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/6/riak-2.0.2-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/6/riak-2.0.2-1.el6.src.rpm + file_size: '16478170' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/6/riak-2.0.2-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.2-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/6/riak-2.0.2-1.el6.x86_64.rpm - file_size: 58773788 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/6/riak-2.0.2-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/6/riak-2.0.2-1.el6.x86_64.rpm + file_size: '58773788' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/6/riak-2.0.2-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.2-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/7/riak-2.0.2-1.el7.centos.src.rpm - file_size: 16460701 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/7/riak-2.0.2-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/7/riak-2.0.2-1.el7.centos.src.rpm + file_size: '16460701' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/7/riak-2.0.2-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.2-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/7/riak-2.0.2-1.el7.centos.x86_64.rpm - file_size: 58335040 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/rhel/7/riak-2.0.2-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/7/riak-2.0.2-1.el7.centos.x86_64.rpm + file_size: '58335040' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/rhel/7/riak-2.0.2-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -483,15 +483,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.2-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/sles/11/riak-2.0.2-1.SLES11.x86_64.rpm - file_size: 62141277 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/sles/11/riak-2.0.2-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/sles/11/riak-2.0.2-1.SLES11.x86_64.rpm + file_size: '62141277' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/sles/11/riak-2.0.2-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.2-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/sles/11/riak-2.0.2-1.src.rpm - file_size: 16505807 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/sles/11/riak-2.0.2-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/sles/11/riak-2.0.2-1.src.rpm + file_size: '16505807' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/sles/11/riak-2.0.2-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -499,17 +499,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.2-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/smartos/1.8/riak-2.0.2-SmartOS-x86_64.tgz - file_size: 69818737 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/smartos/1.8/riak-2.0.2-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/smartos/1.8/riak-2.0.2-SmartOS-x86_64.tgz + file_size: '69818737' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/smartos/1.8/riak-2.0.2-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.0.2-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/smartos/13.1/riak-2.0.2-SmartOS-x86_64.tgz - file_size: 69817430 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/smartos/13.1/riak-2.0.2-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/smartos/13.1/riak-2.0.2-SmartOS-x86_64.tgz + file_size: '69817430' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/smartos/13.1/riak-2.0.2-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -517,9 +517,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.2-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/solaris/10/BASHOriak-2.0.2-Solaris10-x86_64.pkg.gz - file_size: 66976265 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/solaris/10/BASHOriak-2.0.2-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/solaris/10/BASHOriak-2.0.2-Solaris10-x86_64.pkg.gz + file_size: '66976265' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/solaris/10/BASHOriak-2.0.2-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -527,31 +527,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/ubuntu/lucid/riak_2.0.2-1_amd64.deb - file_size: 60339918 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/ubuntu/lucid/riak_2.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/ubuntu/lucid/riak_2.0.2-1_amd64.deb + file_size: '60339918' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/ubuntu/lucid/riak_2.0.2-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/ubuntu/precise/riak_2.0.2-1_amd64.deb - file_size: 60310544 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/ubuntu/precise/riak_2.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/ubuntu/precise/riak_2.0.2-1_amd64.deb + file_size: '60310544' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/ubuntu/precise/riak_2.0.2-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/ubuntu/trusty/riak_2.0.2-1_amd64.deb - file_size: 54976614 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.2/ubuntu/trusty/riak_2.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/ubuntu/trusty/riak_2.0.2-1_amd64.deb + file_size: '54976614' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.2/ubuntu/trusty/riak_2.0.2-1_amd64.deb.sha 2.0.4: - os: source file_info: file_name: riak-2.0.4.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/riak-2.0.4.tar.gz - file_size: 17797611 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/riak-2.0.4.tar.gz + file_size: '17797611' - os: debian versions: - version: '6' @@ -559,17 +559,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/debian/6/riak_2.0.4-1_amd64.deb - file_size: 60871828 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/debian/6/riak_2.0.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/debian/6/riak_2.0.4-1_amd64.deb + file_size: '60871828' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/debian/6/riak_2.0.4-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.0.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/debian/7/riak_2.0.4-1_amd64.deb - file_size: 60883288 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/debian/7/riak_2.0.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/debian/7/riak_2.0.4-1_amd64.deb + file_size: '60883288' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/debian/7/riak_2.0.4-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -577,15 +577,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.4-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/fedora/19/riak-2.0.4-1.fc19.src.rpm - file_size: 17804750 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/fedora/19/riak-2.0.4-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/fedora/19/riak-2.0.4-1.fc19.src.rpm + file_size: '17804750' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/fedora/19/riak-2.0.4-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.4-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/fedora/19/riak-2.0.4-1.fc19.x86_64.rpm - file_size: 58818164 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/fedora/19/riak-2.0.4-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/fedora/19/riak-2.0.4-1.fc19.x86_64.rpm + file_size: '58818164' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/fedora/19/riak-2.0.4-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -593,17 +593,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.4.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/freebsd/10/riak-2.0.4.txz - file_size: 60100044 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/freebsd/10/riak-2.0.4.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/freebsd/10/riak-2.0.4.txz + file_size: '60100044' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/freebsd/10/riak-2.0.4.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.4-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/freebsd/9.2/riak-2.0.4-FreeBSD-amd64.tbz - file_size: 67654356 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/freebsd/9.2/riak-2.0.4-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/freebsd/9.2/riak-2.0.4-FreeBSD-amd64.tbz + file_size: '67654356' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/freebsd/9.2/riak-2.0.4-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -611,9 +611,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.4-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/osx/10.8/riak-2.0.4-OSX-x86_64.tar.gz - file_size: 61392008 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/osx/10.8/riak-2.0.4-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/osx/10.8/riak-2.0.4-OSX-x86_64.tar.gz + file_size: '61392008' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/osx/10.8/riak-2.0.4-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -621,43 +621,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.4-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/5/riak-2.0.4-1.el5.x86_64.rpm - file_size: 62234868 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/5/riak-2.0.4-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/5/riak-2.0.4-1.el5.x86_64.rpm + file_size: '62234868' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/5/riak-2.0.4-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.4-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/5/riak-2.0.4-1.src.rpm - file_size: 17801721 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/5/riak-2.0.4-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/5/riak-2.0.4-1.src.rpm + file_size: '17801721' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/5/riak-2.0.4-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.4-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/6/riak-2.0.4-1.el6.src.rpm - file_size: 17772655 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/6/riak-2.0.4-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/6/riak-2.0.4-1.el6.src.rpm + file_size: '17772655' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/6/riak-2.0.4-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.4-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/6/riak-2.0.4-1.el6.x86_64.rpm - file_size: 59248776 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/6/riak-2.0.4-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/6/riak-2.0.4-1.el6.x86_64.rpm + file_size: '59248776' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/6/riak-2.0.4-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.4-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/7/riak-2.0.4-1.el7.centos.src.rpm - file_size: 17732205 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/7/riak-2.0.4-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/7/riak-2.0.4-1.el7.centos.src.rpm + file_size: '17732205' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/7/riak-2.0.4-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.4-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/7/riak-2.0.4-1.el7.centos.x86_64.rpm - file_size: 58816188 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/rhel/7/riak-2.0.4-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/7/riak-2.0.4-1.el7.centos.x86_64.rpm + file_size: '58816188' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/rhel/7/riak-2.0.4-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -665,15 +665,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.4-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/sles/11/riak-2.0.4-1.SLES11.x86_64.rpm - file_size: 62557110 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/sles/11/riak-2.0.4-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/sles/11/riak-2.0.4-1.SLES11.x86_64.rpm + file_size: '62557110' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/sles/11/riak-2.0.4-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.4-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/sles/11/riak-2.0.4-1.src.rpm - file_size: 17788607 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/sles/11/riak-2.0.4-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/sles/11/riak-2.0.4-1.src.rpm + file_size: '17788607' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/sles/11/riak-2.0.4-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -681,17 +681,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.4-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/smartos/1.8/riak-2.0.4-SmartOS-x86_64.tgz - file_size: 70192677 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/smartos/1.8/riak-2.0.4-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/smartos/1.8/riak-2.0.4-SmartOS-x86_64.tgz + file_size: '70192677' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/smartos/1.8/riak-2.0.4-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.0.4-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/smartos/13.1/riak-2.0.4-SmartOS-x86_64.tgz - file_size: 70201167 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/smartos/13.1/riak-2.0.4-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/smartos/13.1/riak-2.0.4-SmartOS-x86_64.tgz + file_size: '70201167' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/smartos/13.1/riak-2.0.4-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -699,9 +699,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.4-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/solaris/10/BASHOriak-2.0.4-Solaris10-x86_64.pkg.gz - file_size: 66886675 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/solaris/10/BASHOriak-2.0.4-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/solaris/10/BASHOriak-2.0.4-Solaris10-x86_64.pkg.gz + file_size: '66886675' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/solaris/10/BASHOriak-2.0.4-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -709,31 +709,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/ubuntu/lucid/riak_2.0.4-1_amd64.deb - file_size: 60850718 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/ubuntu/lucid/riak_2.0.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/ubuntu/lucid/riak_2.0.4-1_amd64.deb + file_size: '60850718' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/ubuntu/lucid/riak_2.0.4-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.0.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/ubuntu/precise/riak_2.0.4-1_amd64.deb - file_size: 60830960 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/ubuntu/precise/riak_2.0.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/ubuntu/precise/riak_2.0.4-1_amd64.deb + file_size: '60830960' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/ubuntu/precise/riak_2.0.4-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/ubuntu/trusty/riak_2.0.4-1_amd64.deb - file_size: 55438444 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.4/ubuntu/trusty/riak_2.0.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/ubuntu/trusty/riak_2.0.4-1_amd64.deb + file_size: '55438444' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.4/ubuntu/trusty/riak_2.0.4-1_amd64.deb.sha 2.0.5: - os: source file_info: file_name: riak-2.0.5.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/riak-2.0.5.tar.gz - file_size: 17805547 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/riak-2.0.5.tar.gz + file_size: '17805547' - os: debian versions: - version: '6' @@ -741,17 +741,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.5-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/debian/6/riak_2.0.5-1_amd64.deb - file_size: 60893600 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/debian/6/riak_2.0.5-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/debian/6/riak_2.0.5-1_amd64.deb + file_size: '60893600' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/debian/6/riak_2.0.5-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.0.5-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/debian/7/riak_2.0.5-1_amd64.deb - file_size: 60903632 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/debian/7/riak_2.0.5-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/debian/7/riak_2.0.5-1_amd64.deb + file_size: '60903632' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/debian/7/riak_2.0.5-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -759,15 +759,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.5-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/fedora/19/riak-2.0.5-1.fc19.src.rpm - file_size: 17812232 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/fedora/19/riak-2.0.5-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/fedora/19/riak-2.0.5-1.fc19.src.rpm + file_size: '17812232' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/fedora/19/riak-2.0.5-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.5-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/fedora/19/riak-2.0.5-1.fc19.x86_64.rpm - file_size: 58837144 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/fedora/19/riak-2.0.5-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/fedora/19/riak-2.0.5-1.fc19.x86_64.rpm + file_size: '58837144' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/fedora/19/riak-2.0.5-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -775,17 +775,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.5.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/freebsd/10/riak-2.0.5.txz - file_size: 60138372 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/freebsd/10/riak-2.0.5.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/freebsd/10/riak-2.0.5.txz + file_size: '60138372' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/freebsd/10/riak-2.0.5.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.5-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/freebsd/9.2/riak-2.0.5-FreeBSD-amd64.tbz - file_size: 67676108 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/freebsd/9.2/riak-2.0.5-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/freebsd/9.2/riak-2.0.5-FreeBSD-amd64.tbz + file_size: '67676108' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/freebsd/9.2/riak-2.0.5-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -793,9 +793,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.5-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/osx/10.8/riak-2.0.5-OSX-x86_64.tar.gz - file_size: 61409701 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/osx/10.8/riak-2.0.5-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/osx/10.8/riak-2.0.5-OSX-x86_64.tar.gz + file_size: '61409701' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/osx/10.8/riak-2.0.5-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -803,43 +803,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.5-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/5/riak-2.0.5-1.el5.x86_64.rpm - file_size: 62256172 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/5/riak-2.0.5-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/5/riak-2.0.5-1.el5.x86_64.rpm + file_size: '62256172' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/5/riak-2.0.5-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.5-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/5/riak-2.0.5-1.src.rpm - file_size: 17813995 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/5/riak-2.0.5-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/5/riak-2.0.5-1.src.rpm + file_size: '17813995' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/5/riak-2.0.5-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.5-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/6/riak-2.0.5-1.el6.src.rpm - file_size: 17782309 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/6/riak-2.0.5-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/6/riak-2.0.5-1.el6.src.rpm + file_size: '17782309' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/6/riak-2.0.5-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.5-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/6/riak-2.0.5-1.el6.x86_64.rpm - file_size: 59269904 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/6/riak-2.0.5-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/6/riak-2.0.5-1.el6.x86_64.rpm + file_size: '59269904' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/6/riak-2.0.5-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.5-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/7/riak-2.0.5-1.el7.centos.src.rpm - file_size: 17750514 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/7/riak-2.0.5-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/7/riak-2.0.5-1.el7.centos.src.rpm + file_size: '17750514' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/7/riak-2.0.5-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.5-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/7/riak-2.0.5-1.el7.centos.x86_64.rpm - file_size: 58835528 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/rhel/7/riak-2.0.5-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/7/riak-2.0.5-1.el7.centos.x86_64.rpm + file_size: '58835528' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/rhel/7/riak-2.0.5-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -847,15 +847,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.5-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/sles/11/riak-2.0.5-1.SLES11.x86_64.rpm - file_size: 62577576 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/sles/11/riak-2.0.5-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/sles/11/riak-2.0.5-1.SLES11.x86_64.rpm + file_size: '62577576' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/sles/11/riak-2.0.5-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.5-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/sles/11/riak-2.0.5-1.src.rpm - file_size: 17796840 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/sles/11/riak-2.0.5-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/sles/11/riak-2.0.5-1.src.rpm + file_size: '17796840' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/sles/11/riak-2.0.5-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -863,17 +863,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.5-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/smartos/1.8/riak-2.0.5-SmartOS-x86_64.tgz - file_size: 70219883 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/smartos/1.8/riak-2.0.5-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/smartos/1.8/riak-2.0.5-SmartOS-x86_64.tgz + file_size: '70219883' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/smartos/1.8/riak-2.0.5-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.0.5-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/smartos/13.1/riak-2.0.5-SmartOS-x86_64.tgz - file_size: 70218188 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/smartos/13.1/riak-2.0.5-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/smartos/13.1/riak-2.0.5-SmartOS-x86_64.tgz + file_size: '70218188' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/smartos/13.1/riak-2.0.5-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -881,9 +881,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.5-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/solaris/10/BASHOriak-2.0.5-Solaris10-x86_64.pkg.gz - file_size: 66917538 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/solaris/10/BASHOriak-2.0.5-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/solaris/10/BASHOriak-2.0.5-Solaris10-x86_64.pkg.gz + file_size: '66917538' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/solaris/10/BASHOriak-2.0.5-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -891,31 +891,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.5-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/ubuntu/lucid/riak_2.0.5-1_amd64.deb - file_size: 60879946 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/ubuntu/lucid/riak_2.0.5-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/ubuntu/lucid/riak_2.0.5-1_amd64.deb + file_size: '60879946' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/ubuntu/lucid/riak_2.0.5-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.0.5-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/ubuntu/precise/riak_2.0.5-1_amd64.deb - file_size: 60858548 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/ubuntu/precise/riak_2.0.5-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/ubuntu/precise/riak_2.0.5-1_amd64.deb + file_size: '60858548' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/ubuntu/precise/riak_2.0.5-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.5-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/ubuntu/trusty/riak_2.0.5-1_amd64.deb - file_size: 55442534 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.5/ubuntu/trusty/riak_2.0.5-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/ubuntu/trusty/riak_2.0.5-1_amd64.deb + file_size: '55442534' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.5/ubuntu/trusty/riak_2.0.5-1_amd64.deb.sha 2.0.6: - os: source file_info: file_name: riak-2.0.6.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/riak-2.0.6.tar.gz - file_size: 17924138 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/riak-2.0.6.tar.gz + file_size: '17924138' - os: debian versions: - version: '6' @@ -923,17 +923,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.6-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/debian/6/riak_2.0.6-1_amd64.deb - file_size: 60896854 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/debian/6/riak_2.0.6-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/debian/6/riak_2.0.6-1_amd64.deb + file_size: '60896854' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/debian/6/riak_2.0.6-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.0.6-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/debian/7/riak_2.0.6-1_amd64.deb - file_size: 60919112 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/debian/7/riak_2.0.6-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/debian/7/riak_2.0.6-1_amd64.deb + file_size: '60919112' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/debian/7/riak_2.0.6-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -941,15 +941,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.6-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/fedora/19/riak-2.0.6-1.fc19.src.rpm - file_size: 17930086 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/fedora/19/riak-2.0.6-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/fedora/19/riak-2.0.6-1.fc19.src.rpm + file_size: '17930086' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/fedora/19/riak-2.0.6-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.6-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/fedora/19/riak-2.0.6-1.fc19.x86_64.rpm - file_size: 58848816 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/fedora/19/riak-2.0.6-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/fedora/19/riak-2.0.6-1.fc19.x86_64.rpm + file_size: '58848816' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/fedora/19/riak-2.0.6-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -957,17 +957,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.6.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/freebsd/10/riak-2.0.6.txz - file_size: 60156284 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/freebsd/10/riak-2.0.6.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/freebsd/10/riak-2.0.6.txz + file_size: '60156284' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/freebsd/10/riak-2.0.6.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.6-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/freebsd/9.2/riak-2.0.6-FreeBSD-amd64.tbz - file_size: 67689140 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/freebsd/9.2/riak-2.0.6-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/freebsd/9.2/riak-2.0.6-FreeBSD-amd64.tbz + file_size: '67689140' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/freebsd/9.2/riak-2.0.6-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -975,9 +975,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.6-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/osx/10.8/riak-2.0.6-OSX-x86_64.tar.gz - file_size: 61419492 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/osx/10.8/riak-2.0.6-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/osx/10.8/riak-2.0.6-OSX-x86_64.tar.gz + file_size: '61419492' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/osx/10.8/riak-2.0.6-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -985,43 +985,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.6-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/5/riak-2.0.6-1.el5.x86_64.rpm - file_size: 62270796 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/5/riak-2.0.6-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/5/riak-2.0.6-1.el5.x86_64.rpm + file_size: '62270796' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/5/riak-2.0.6-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.6-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/5/riak-2.0.6-1.src.rpm - file_size: 17928144 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/5/riak-2.0.6-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/5/riak-2.0.6-1.src.rpm + file_size: '17928144' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/5/riak-2.0.6-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.6-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/6/riak-2.0.6-1.el6.src.rpm - file_size: 17899977 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/6/riak-2.0.6-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/6/riak-2.0.6-1.el6.src.rpm + file_size: '17899977' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/6/riak-2.0.6-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.6-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/6/riak-2.0.6-1.el6.x86_64.rpm - file_size: 59279536 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/6/riak-2.0.6-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/6/riak-2.0.6-1.el6.x86_64.rpm + file_size: '59279536' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/6/riak-2.0.6-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.6-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/7/riak-2.0.6-1.el7.centos.src.rpm - file_size: 17867736 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/7/riak-2.0.6-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/7/riak-2.0.6-1.el7.centos.src.rpm + file_size: '17867736' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/7/riak-2.0.6-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.6-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/7/riak-2.0.6-1.el7.centos.x86_64.rpm - file_size: 58846248 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/rhel/7/riak-2.0.6-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/7/riak-2.0.6-1.el7.centos.x86_64.rpm + file_size: '58846248' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/rhel/7/riak-2.0.6-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -1029,15 +1029,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.6-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/sles/11/riak-2.0.6-1.SLES11.x86_64.rpm - file_size: 62594978 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/sles/11/riak-2.0.6-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/sles/11/riak-2.0.6-1.SLES11.x86_64.rpm + file_size: '62594978' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/sles/11/riak-2.0.6-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.6-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/sles/11/riak-2.0.6-1.src.rpm - file_size: 17916658 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/sles/11/riak-2.0.6-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/sles/11/riak-2.0.6-1.src.rpm + file_size: '17916658' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/sles/11/riak-2.0.6-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -1045,17 +1045,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.6-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/smartos/1.8/riak-2.0.6-SmartOS-x86_64.tgz - file_size: 69491818 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/smartos/1.8/riak-2.0.6-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/smartos/1.8/riak-2.0.6-SmartOS-x86_64.tgz + file_size: '69491818' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/smartos/1.8/riak-2.0.6-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.0.6-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/smartos/13.1/riak-2.0.6-SmartOS-x86_64.tgz - file_size: 69469677 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/smartos/13.1/riak-2.0.6-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/smartos/13.1/riak-2.0.6-SmartOS-x86_64.tgz + file_size: '69469677' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/smartos/13.1/riak-2.0.6-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -1063,9 +1063,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.6-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/solaris/10/BASHOriak-2.0.6-Solaris10-x86_64.pkg.gz - file_size: 66509803 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/solaris/10/BASHOriak-2.0.6-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/solaris/10/BASHOriak-2.0.6-Solaris10-x86_64.pkg.gz + file_size: '66509803' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/solaris/10/BASHOriak-2.0.6-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -1073,31 +1073,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.6-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/ubuntu/lucid/riak_2.0.6-1_amd64.deb - file_size: 60882948 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/ubuntu/lucid/riak_2.0.6-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/ubuntu/lucid/riak_2.0.6-1_amd64.deb + file_size: '60882948' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/ubuntu/lucid/riak_2.0.6-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.0.6-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/ubuntu/precise/riak_2.0.6-1_amd64.deb - file_size: 60865074 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/ubuntu/precise/riak_2.0.6-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/ubuntu/precise/riak_2.0.6-1_amd64.deb + file_size: '60865074' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/ubuntu/precise/riak_2.0.6-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.6-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/ubuntu/trusty/riak_2.0.6-1_amd64.deb - file_size: 55469868 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.6/ubuntu/trusty/riak_2.0.6-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/ubuntu/trusty/riak_2.0.6-1_amd64.deb + file_size: '55469868' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.6/ubuntu/trusty/riak_2.0.6-1_amd64.deb.sha 2.0.7: - os: source file_info: file_name: riak-2.0.7.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/riak-2.0.7.tar.gz - file_size: 18164228 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/riak-2.0.7.tar.gz + file_size: '18164228' - os: debian versions: - version: '6' @@ -1105,17 +1105,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.7-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/debian/6/riak_2.0.7-1_amd64.deb - file_size: 61921180 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/debian/6/riak_2.0.7-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/debian/6/riak_2.0.7-1_amd64.deb + file_size: '61921180' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/debian/6/riak_2.0.7-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.0.7-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/debian/7/riak_2.0.7-1_amd64.deb - file_size: 61947312 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/debian/7/riak_2.0.7-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/debian/7/riak_2.0.7-1_amd64.deb + file_size: '61947312' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/debian/7/riak_2.0.7-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -1123,15 +1123,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.7-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/fedora/19/riak-2.0.7-1.fc19.src.rpm - file_size: 18171058 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/fedora/19/riak-2.0.7-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/fedora/19/riak-2.0.7-1.fc19.src.rpm + file_size: '18171058' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/fedora/19/riak-2.0.7-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.7-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/fedora/19/riak-2.0.7-1.fc19.x86_64.rpm - file_size: 59516864 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/fedora/19/riak-2.0.7-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/fedora/19/riak-2.0.7-1.fc19.x86_64.rpm + file_size: '59516864' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/fedora/19/riak-2.0.7-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -1139,17 +1139,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.7.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/freebsd/10/riak-2.0.7.txz - file_size: 61342760 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/freebsd/10/riak-2.0.7.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/freebsd/10/riak-2.0.7.txz + file_size: '61342760' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/freebsd/10/riak-2.0.7.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.7-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/freebsd/9.2/riak-2.0.7-FreeBSD-amd64.tbz - file_size: 73416975 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/freebsd/9.2/riak-2.0.7-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/freebsd/9.2/riak-2.0.7-FreeBSD-amd64.tbz + file_size: '73416975' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/freebsd/9.2/riak-2.0.7-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -1157,9 +1157,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.7-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/osx/10.8/riak-2.0.7-OSX-x86_64.tar.gz - file_size: 62578226 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/osx/10.8/riak-2.0.7-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/osx/10.8/riak-2.0.7-OSX-x86_64.tar.gz + file_size: '62578226' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/osx/10.8/riak-2.0.7-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -1167,43 +1167,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.7-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/5/riak-2.0.7-1.el5.x86_64.rpm - file_size: 63432268 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/5/riak-2.0.7-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/5/riak-2.0.7-1.el5.x86_64.rpm + file_size: '63432268' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/5/riak-2.0.7-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.7-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/5/riak-2.0.7-1.src.rpm - file_size: 18169764 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/5/riak-2.0.7-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/5/riak-2.0.7-1.src.rpm + file_size: '18169764' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/5/riak-2.0.7-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.7-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/6/riak-2.0.7-1.el6.src.rpm - file_size: 18168761 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/6/riak-2.0.7-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/6/riak-2.0.7-1.el6.src.rpm + file_size: '18168761' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/6/riak-2.0.7-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.7-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/6/riak-2.0.7-1.el6.x86_64.rpm - file_size: 61818076 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/6/riak-2.0.7-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/6/riak-2.0.7-1.el6.x86_64.rpm + file_size: '61818076' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/6/riak-2.0.7-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.7-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/7/riak-2.0.7-1.el7.centos.src.rpm - file_size: 18107564 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/7/riak-2.0.7-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/7/riak-2.0.7-1.el7.centos.src.rpm + file_size: '18107564' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/7/riak-2.0.7-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.7-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/7/riak-2.0.7-1.el7.centos.x86_64.rpm - file_size: 61386544 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/rhel/7/riak-2.0.7-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/7/riak-2.0.7-1.el7.centos.x86_64.rpm + file_size: '61386544' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/rhel/7/riak-2.0.7-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -1211,15 +1211,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.7-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/sles/11/riak-2.0.7-1.SLES11.x86_64.rpm - file_size: 66392890 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/sles/11/riak-2.0.7-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/sles/11/riak-2.0.7-1.SLES11.x86_64.rpm + file_size: '66392890' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/sles/11/riak-2.0.7-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.7-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/sles/11/riak-2.0.7-1.src.rpm - file_size: 18152585 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/sles/11/riak-2.0.7-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/sles/11/riak-2.0.7-1.src.rpm + file_size: '18152585' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/sles/11/riak-2.0.7-1.src.rpm.sha - os: smartos versions: - version: '13.1' @@ -1227,9 +1227,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.7-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/smartos/13.1/riak-2.0.7-SmartOS-x86_64.tgz - file_size: 78514350 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/smartos/13.1/riak-2.0.7-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/smartos/13.1/riak-2.0.7-SmartOS-x86_64.tgz + file_size: '78514350' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/smartos/13.1/riak-2.0.7-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -1237,9 +1237,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.7-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/solaris/10/BASHOriak-2.0.7-Solaris10-x86_64.pkg.gz - file_size: 71924972 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/solaris/10/BASHOriak-2.0.7-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/solaris/10/BASHOriak-2.0.7-Solaris10-x86_64.pkg.gz + file_size: '71924972' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/solaris/10/BASHOriak-2.0.7-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -1247,31 +1247,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.7-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/ubuntu/lucid/riak_2.0.7-1_amd64.deb - file_size: 61914084 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/ubuntu/lucid/riak_2.0.7-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/ubuntu/lucid/riak_2.0.7-1_amd64.deb + file_size: '61914084' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/ubuntu/lucid/riak_2.0.7-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.0.7-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/ubuntu/precise/riak_2.0.7-1_amd64.deb - file_size: 64022626 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/ubuntu/precise/riak_2.0.7-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/ubuntu/precise/riak_2.0.7-1_amd64.deb + file_size: '64022626' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/ubuntu/precise/riak_2.0.7-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.7-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/ubuntu/trusty/riak_2.0.7-1_amd64.deb - file_size: 57501416 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.7/ubuntu/trusty/riak_2.0.7-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/ubuntu/trusty/riak_2.0.7-1_amd64.deb + file_size: '57501416' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.7/ubuntu/trusty/riak_2.0.7-1_amd64.deb.sha 2.0.8: - os: source file_info: file_name: riak-2.0.8.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/riak-2.0.8.tar.gz - file_size: 18204351 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/riak-2.0.8.tar.gz + file_size: '18204351' - os: debian versions: - version: jessie @@ -1279,17 +1279,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.8-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/debian/jessie/riak_2.0.8-1_amd64.deb - file_size: 57578490 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/debian/jessie/riak_2.0.8-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/debian/jessie/riak_2.0.8-1_amd64.deb + file_size: '57578490' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/debian/jessie/riak_2.0.8-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak_2.0.8-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/debian/wheezy/riak_2.0.8-1_amd64.deb - file_size: 63937940 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/debian/wheezy/riak_2.0.8-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/debian/wheezy/riak_2.0.8-1_amd64.deb + file_size: '63937940' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/debian/wheezy/riak_2.0.8-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -1297,15 +1297,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.8-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/fedora/19/riak-2.0.8-1.fc19.src.rpm - file_size: 18212621 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/fedora/19/riak-2.0.8-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/fedora/19/riak-2.0.8-1.fc19.src.rpm + file_size: '18212621' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/fedora/19/riak-2.0.8-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.8-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/fedora/19/riak-2.0.8-1.fc19.x86_64.rpm - file_size: 59641640 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/fedora/19/riak-2.0.8-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/fedora/19/riak-2.0.8-1.fc19.x86_64.rpm + file_size: '59641640' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/fedora/19/riak-2.0.8-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -1313,17 +1313,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.8.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/freebsd/10/riak-2.0.8.txz - file_size: 61229004 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/freebsd/10/riak-2.0.8.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/freebsd/10/riak-2.0.8.txz + file_size: '61229004' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/freebsd/10/riak-2.0.8.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.8-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/freebsd/9.2/riak-2.0.8-FreeBSD-amd64.tbz - file_size: 73127078 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/freebsd/9.2/riak-2.0.8-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/freebsd/9.2/riak-2.0.8-FreeBSD-amd64.tbz + file_size: '73127078' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/freebsd/9.2/riak-2.0.8-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -1331,9 +1331,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.8-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/osx/10.8/riak-2.0.8-OSX-x86_64.tar.gz - file_size: 63516129 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/osx/10.8/riak-2.0.8-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/osx/10.8/riak-2.0.8-OSX-x86_64.tar.gz + file_size: '63516129' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/osx/10.8/riak-2.0.8-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -1341,43 +1341,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.8-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/5/riak-2.0.8-1.el5.x86_64.rpm - file_size: 63382339 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/5/riak-2.0.8-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/5/riak-2.0.8-1.el5.x86_64.rpm + file_size: '63382339' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/5/riak-2.0.8-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.8-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/5/riak-2.0.8-1.src.rpm - file_size: 18211518 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/5/riak-2.0.8-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/5/riak-2.0.8-1.src.rpm + file_size: '18211518' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/5/riak-2.0.8-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.8-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/6/riak-2.0.8-1.el6.src.rpm - file_size: 18204160 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/6/riak-2.0.8-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/6/riak-2.0.8-1.el6.src.rpm + file_size: '18204160' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/6/riak-2.0.8-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.8-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/6/riak-2.0.8-1.el6.x86_64.rpm - file_size: 61936612 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/6/riak-2.0.8-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/6/riak-2.0.8-1.el6.x86_64.rpm + file_size: '61936612' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/6/riak-2.0.8-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.8-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/7/riak-2.0.8-1.el7.centos.src.rpm - file_size: 18145006 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/7/riak-2.0.8-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/7/riak-2.0.8-1.el7.centos.src.rpm + file_size: '18145006' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/7/riak-2.0.8-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.8-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/7/riak-2.0.8-1.el7.centos.x86_64.rpm - file_size: 61513540 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/rhel/7/riak-2.0.8-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/7/riak-2.0.8-1.el7.centos.x86_64.rpm + file_size: '61513540' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/rhel/7/riak-2.0.8-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -1385,15 +1385,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.8-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/sles/11/riak-2.0.8-1.SLES11.x86_64.rpm - file_size: 65814676 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/sles/11/riak-2.0.8-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/sles/11/riak-2.0.8-1.SLES11.x86_64.rpm + file_size: '65814676' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/sles/11/riak-2.0.8-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.8-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/sles/11/riak-2.0.8-1.src.rpm - file_size: 18193137 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/sles/11/riak-2.0.8-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/sles/11/riak-2.0.8-1.src.rpm + file_size: '18193137' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/sles/11/riak-2.0.8-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -1401,17 +1401,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.8-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/smartos/1.8/riak-2.0.8-SmartOS-x86_64.tgz - file_size: 78075400 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/smartos/1.8/riak-2.0.8-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/smartos/1.8/riak-2.0.8-SmartOS-x86_64.tgz + file_size: '78075400' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/smartos/1.8/riak-2.0.8-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.0.8-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/smartos/13.1/riak-2.0.8-SmartOS-x86_64.tgz - file_size: 78015927 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/smartos/13.1/riak-2.0.8-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/smartos/13.1/riak-2.0.8-SmartOS-x86_64.tgz + file_size: '78015927' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/smartos/13.1/riak-2.0.8-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -1419,9 +1419,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.8-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/solaris/10/BASHOriak-2.0.8-Solaris10-x86_64.pkg.gz - file_size: 71288567 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/solaris/10/BASHOriak-2.0.8-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/solaris/10/BASHOriak-2.0.8-Solaris10-x86_64.pkg.gz + file_size: '71288567' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/solaris/10/BASHOriak-2.0.8-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: precise @@ -1429,31 +1429,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.8-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/ubuntu/precise/riak_2.0.8-1_amd64.deb - file_size: 63859294 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/ubuntu/precise/riak_2.0.8-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/ubuntu/precise/riak_2.0.8-1_amd64.deb + file_size: '63859294' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/ubuntu/precise/riak_2.0.8-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.8-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/ubuntu/trusty/riak_2.0.8-1_amd64.deb - file_size: 57498422 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/ubuntu/trusty/riak_2.0.8-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/ubuntu/trusty/riak_2.0.8-1_amd64.deb + file_size: '57498422' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/ubuntu/trusty/riak_2.0.8-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak_2.0.8-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/ubuntu/xenial/riak_2.0.8-1_amd64.deb - file_size: 57608136 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.8/ubuntu/xenial/riak_2.0.8-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/ubuntu/xenial/riak_2.0.8-1_amd64.deb + file_size: '57608136' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.8/ubuntu/xenial/riak_2.0.8-1_amd64.deb.sha 2.0.9: - os: source file_info: file_name: riak-2.0.9.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/riak-2.0.9.tar.gz - file_size: 16395522 + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/riak-2.0.9.tar.gz + file_size: '16395522' - os: debian versions: - version: jessie @@ -1461,17 +1461,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.0.9-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/debian/jessie/riak_2.0.9-1_amd64.deb - file_size: 57578190 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/debian/jessie/riak_2.0.9-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/debian/jessie/riak_2.0.9-1_amd64.deb + file_size: '57578190' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/debian/jessie/riak_2.0.9-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak_2.0.9-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/debian/wheezy/riak_2.0.9-1_amd64.deb - file_size: 63936446 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/debian/wheezy/riak_2.0.9-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/debian/wheezy/riak_2.0.9-1_amd64.deb + file_size: '63936446' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/debian/wheezy/riak_2.0.9-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -1479,15 +1479,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.0.9-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/fedora/19/riak-2.0.9-1.fc19.src.rpm - file_size: 18211267 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/fedora/19/riak-2.0.9-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/fedora/19/riak-2.0.9-1.fc19.src.rpm + file_size: '18211267' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/fedora/19/riak-2.0.9-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.9-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/fedora/19/riak-2.0.9-1.fc19.x86_64.rpm - file_size: 59641648 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/fedora/19/riak-2.0.9-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/fedora/19/riak-2.0.9-1.fc19.x86_64.rpm + file_size: '59641648' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/fedora/19/riak-2.0.9-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -1495,17 +1495,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.0.9.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/freebsd/10/riak-2.0.9.txz - file_size: 61224760 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/freebsd/10/riak-2.0.9.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/freebsd/10/riak-2.0.9.txz + file_size: '61224760' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/freebsd/10/riak-2.0.9.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.0.9-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/freebsd/9.2/riak-2.0.9-FreeBSD-amd64.tbz - file_size: 73126114 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/freebsd/9.2/riak-2.0.9-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/freebsd/9.2/riak-2.0.9-FreeBSD-amd64.tbz + file_size: '73126114' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/freebsd/9.2/riak-2.0.9-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -1513,9 +1513,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.9-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/osx/10.8/riak-2.0.9-OSX-x86_64.tar.gz - file_size: 62396124 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/osx/10.8/riak-2.0.9-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/osx/10.8/riak-2.0.9-OSX-x86_64.tar.gz + file_size: '62396124' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/osx/10.8/riak-2.0.9-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -1523,37 +1523,42 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.9-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/5/riak-2.0.9-1.el5.x86_64.rpm - file_size: 63382006 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/5/riak-2.0.9-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/5/riak-2.0.9-1.el5.x86_64.rpm + file_size: '63382006' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/5/riak-2.0.9-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.9-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/5/riak-2.0.9-1.src.rpm - file_size: 18211132 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/5/riak-2.0.9-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/5/riak-2.0.9-1.src.rpm + file_size: '18211132' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/5/riak-2.0.9-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.0.9-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/6/riak-2.0.9-1.el6.src.rpm - file_size: 18203399 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/6/riak-2.0.9-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/6/riak-2.0.9-1.el6.src.rpm + file_size: '18203399' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/6/riak-2.0.9-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.0.9-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/6/riak-2.0.9-1.el6.x86_64.rpm + file_size: '61936396' - version: '7' architectures: - arch: source file_info: file_name: riak-2.0.9-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/7/riak-2.0.9-1.el7.centos.src.rpm - file_size: 18148415 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/7/riak-2.0.9-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/7/riak-2.0.9-1.el7.centos.src.rpm + file_size: '18148415' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/7/riak-2.0.9-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.0.9-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/7/riak-2.0.9-1.el7.centos.x86_64.rpm - file_size: 61513384 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/rhel/7/riak-2.0.9-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/7/riak-2.0.9-1.el7.centos.x86_64.rpm + file_size: '61513384' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/rhel/7/riak-2.0.9-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -1561,15 +1566,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.9-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/sles/11/riak-2.0.9-1.SLES11.x86_64.rpm - file_size: 65814029 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/sles/11/riak-2.0.9-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/sles/11/riak-2.0.9-1.SLES11.x86_64.rpm + file_size: '65814029' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/sles/11/riak-2.0.9-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.0.9-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/sles/11/riak-2.0.9-1.src.rpm - file_size: 18192528 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/sles/11/riak-2.0.9-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/sles/11/riak-2.0.9-1.src.rpm + file_size: '18192528' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/sles/11/riak-2.0.9-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -1577,17 +1582,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.0.9-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/smartos/1.8/riak-2.0.9-SmartOS-x86_64.tgz - file_size: 78064454 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/smartos/1.8/riak-2.0.9-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/smartos/1.8/riak-2.0.9-SmartOS-x86_64.tgz + file_size: '78064454' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/smartos/1.8/riak-2.0.9-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.0.9-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/smartos/13.1/riak-2.0.9-SmartOS-x86_64.tgz - file_size: 78018333 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/smartos/13.1/riak-2.0.9-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/smartos/13.1/riak-2.0.9-SmartOS-x86_64.tgz + file_size: '78018333' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/smartos/13.1/riak-2.0.9-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -1595,41 +1600,49 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.0.9-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/solaris/10/BASHOriak-2.0.9-Solaris10-x86_64.pkg.gz - file_size: 71290411 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/solaris/10/BASHOriak-2.0.9-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/solaris/10/BASHOriak-2.0.9-Solaris10-x86_64.pkg.gz + file_size: '71290411' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/solaris/10/BASHOriak-2.0.9-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: + - version: bionic + architectures: + - arch: amd64 + file_info: + file_name: riak_2.0.9-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/ubuntu/bionic/riak_2.0.9-1_amd64.deb + file_size: '59596960' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/ubuntu/bionic/riak_2.0.9-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.0.9-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/ubuntu/precise/riak_2.0.9-1_amd64.deb - file_size: 63858982 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/ubuntu/precise/riak_2.0.9-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/ubuntu/precise/riak_2.0.9-1_amd64.deb + file_size: '63858982' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/ubuntu/precise/riak_2.0.9-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.0.9-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/ubuntu/trusty/riak_2.0.9-1_amd64.deb - file_size: 57527576 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/ubuntu/trusty/riak_2.0.9-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/ubuntu/trusty/riak_2.0.9-1_amd64.deb + file_size: '57527576' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/ubuntu/trusty/riak_2.0.9-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak_2.0.9-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/ubuntu/xenial/riak_2.0.9-1_amd64.deb - file_size: 57610004 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.0/2.0.9/ubuntu/xenial/riak_2.0.9-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/ubuntu/xenial/riak_2.0.9-1_amd64.deb + file_size: '57610004' + chksum_href: https://files.tiot.jp/riak/kv/2.0/2.0.9/ubuntu/xenial/riak_2.0.9-1_amd64.deb.sha 2.1.1: - os: source file_info: file_name: riak-2.1.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/riak-2.1.1.tar.gz - file_size: 18771429 + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/riak-2.1.1.tar.gz + file_size: '18771429' - os: debian versions: - version: '6' @@ -1637,17 +1650,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/debian/6/riak_2.1.1-1_amd64.deb - file_size: 61055476 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/debian/6/riak_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/debian/6/riak_2.1.1-1_amd64.deb + file_size: '61055476' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/debian/6/riak_2.1.1-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/debian/7/riak_2.1.1-1_amd64.deb - file_size: 61075780 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/debian/7/riak_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/debian/7/riak_2.1.1-1_amd64.deb + file_size: '61075780' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/debian/7/riak_2.1.1-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -1655,15 +1668,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.1.1-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/fedora/19/riak-2.1.1-1.fc19.src.rpm - file_size: 18778682 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/fedora/19/riak-2.1.1-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/fedora/19/riak-2.1.1-1.fc19.src.rpm + file_size: '18778682' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/fedora/19/riak-2.1.1-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.1-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/fedora/19/riak-2.1.1-1.fc19.x86_64.rpm - file_size: 58992960 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/fedora/19/riak-2.1.1-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/fedora/19/riak-2.1.1-1.fc19.x86_64.rpm + file_size: '58992960' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/fedora/19/riak-2.1.1-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -1671,17 +1684,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.1.1.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/freebsd/10/riak-2.1.1.txz - file_size: 60301916 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/freebsd/10/riak-2.1.1.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/freebsd/10/riak-2.1.1.txz + file_size: '60301916' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/freebsd/10/riak-2.1.1.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.1.1-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/freebsd/9.2/riak-2.1.1-FreeBSD-amd64.tbz - file_size: 67848851 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/freebsd/9.2/riak-2.1.1-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/freebsd/9.2/riak-2.1.1-FreeBSD-amd64.tbz + file_size: '67848851' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/freebsd/9.2/riak-2.1.1-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -1689,9 +1702,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/osx/10.8/riak-2.1.1-OSX-x86_64.tar.gz - file_size: 61571906 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/osx/10.8/riak-2.1.1-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/osx/10.8/riak-2.1.1-OSX-x86_64.tar.gz + file_size: '61571906' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/osx/10.8/riak-2.1.1-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -1699,43 +1712,37 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.1-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/5/riak-2.1.1-1.el5.x86_64.rpm - file_size: 62428622 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/5/riak-2.1.1-1.el5.x86_64.rpm.sha - - arch: source - file_info: - file_name: riak-2.1.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/5/riak-2.1.1-1.src.rpm - file_size: 18773351 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/5/riak-2.1.1-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/5/riak-2.1.1-1.el5.x86_64.rpm + file_size: '62428622' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/5/riak-2.1.1-1.el5.x86_64.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.1.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/6/riak-2.1.1-1.el6.src.rpm - file_size: 18745943 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/6/riak-2.1.1-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/6/riak-2.1.1-1.el6.src.rpm + file_size: '18745943' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/6/riak-2.1.1-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/6/riak-2.1.1-1.el6.x86_64.rpm - file_size: 59427072 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/6/riak-2.1.1-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/6/riak-2.1.1-1.el6.x86_64.rpm + file_size: '59427072' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/6/riak-2.1.1-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.1.1-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/7/riak-2.1.1-1.el7.centos.src.rpm - file_size: 18710400 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/7/riak-2.1.1-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/7/riak-2.1.1-1.el7.centos.src.rpm + file_size: '18710400' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/7/riak-2.1.1-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.1-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/7/riak-2.1.1-1.el7.centos.x86_64.rpm - file_size: 58987996 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/rhel/7/riak-2.1.1-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/7/riak-2.1.1-1.el7.centos.x86_64.rpm + file_size: '58987996' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/rhel/7/riak-2.1.1-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -1743,15 +1750,32 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.1-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/sles/11/riak-2.1.1-1.SLES11.x86_64.rpm - file_size: 62737857 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/sles/11/riak-2.1.1-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.SLES11.x86_64.rpm + file_size: '62737857' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.SLES11.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-2.1.1-1.fc19.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.fc19.src.rpm + file_size: '18778682' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.fc19.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.1.1-1.fc19.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.fc19.x86_64.rpm + file_size: '58992960' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.fc19.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.1.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/sles/11/riak-2.1.1-1.src.rpm - file_size: 18764645 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/sles/11/riak-2.1.1-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.src.rpm + file_size: '18773351' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.src.rpm.sha + - arch: source + file_info: + file_name: riak-2.1.1-1.src.rpm.1 + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/sles/11/riak-2.1.1-1.src.rpm.1 + file_size: '18764645' - os: smartos versions: - version: '1.8' @@ -1759,17 +1783,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/smartos/1.8/riak-2.1.1-SmartOS-x86_64.tgz - file_size: 69641090 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/smartos/1.8/riak-2.1.1-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/smartos/1.8/riak-2.1.1-SmartOS-x86_64.tgz + file_size: '69641090' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/smartos/1.8/riak-2.1.1-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.1.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/smartos/13.1/riak-2.1.1-SmartOS-x86_64.tgz - file_size: 69640840 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/smartos/13.1/riak-2.1.1-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/smartos/13.1/riak-2.1.1-SmartOS-x86_64.tgz + file_size: '69640840' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/smartos/13.1/riak-2.1.1-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -1777,9 +1801,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.1.1-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/solaris/10/BASHOriak-2.1.1-Solaris10-x86_64.pkg.gz - file_size: 66674361 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/solaris/10/BASHOriak-2.1.1-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/solaris/10/BASHOriak-2.1.1-Solaris10-x86_64.pkg.gz + file_size: '66674361' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/solaris/10/BASHOriak-2.1.1-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -1787,31 +1811,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/ubuntu/lucid/riak_2.1.1-1_amd64.deb - file_size: 61046542 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/ubuntu/lucid/riak_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/ubuntu/lucid/riak_2.1.1-1_amd64.deb + file_size: '61046542' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/ubuntu/lucid/riak_2.1.1-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/ubuntu/precise/riak_2.1.1-1_amd64.deb - file_size: 61019696 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/ubuntu/precise/riak_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/ubuntu/precise/riak_2.1.1-1_amd64.deb + file_size: '61019696' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/ubuntu/precise/riak_2.1.1-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/ubuntu/trusty/riak_2.1.1-1_amd64.deb - file_size: 55594822 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.1/ubuntu/trusty/riak_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/ubuntu/trusty/riak_2.1.1-1_amd64.deb + file_size: '55594822' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.1/ubuntu/trusty/riak_2.1.1-1_amd64.deb.sha 2.1.2: - os: source file_info: file_name: riak-2.1.2.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/riak-2.1.2.tar.gz - file_size: 18962306 + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/riak-2.1.2.tar.gz + file_size: '18962306' - os: debian versions: - version: '6' @@ -1819,17 +1843,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.1.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/debian/6/riak_2.1.2-1_amd64.deb - file_size: 63146194 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/debian/6/riak_2.1.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/debian/6/riak_2.1.2-1_amd64.deb + file_size: '63146194' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/debian/6/riak_2.1.2-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.1.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/debian/7/riak_2.1.2-1_amd64.deb - file_size: 63184258 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/debian/7/riak_2.1.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/debian/7/riak_2.1.2-1_amd64.deb + file_size: '63184258' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/debian/7/riak_2.1.2-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -1837,15 +1861,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.1.2-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/fedora/19/riak-2.1.2-1.fc19.src.rpm - file_size: 18968911 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/fedora/19/riak-2.1.2-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/fedora/19/riak-2.1.2-1.fc19.src.rpm + file_size: '18968911' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/fedora/19/riak-2.1.2-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.2-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/fedora/19/riak-2.1.2-1.fc19.x86_64.rpm - file_size: 61017984 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/fedora/19/riak-2.1.2-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/fedora/19/riak-2.1.2-1.fc19.x86_64.rpm + file_size: '61017984' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/fedora/19/riak-2.1.2-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -1853,17 +1877,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.1.2.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/freebsd/10/riak-2.1.2.txz - file_size: 62198488 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/freebsd/10/riak-2.1.2.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/freebsd/10/riak-2.1.2.txz + file_size: '62198488' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/freebsd/10/riak-2.1.2.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.1.2-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/freebsd/9.2/riak-2.1.2-FreeBSD-amd64.tbz - file_size: 70018633 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/freebsd/9.2/riak-2.1.2-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/freebsd/9.2/riak-2.1.2-FreeBSD-amd64.tbz + file_size: '70018633' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/freebsd/9.2/riak-2.1.2-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -1871,9 +1895,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.2-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/osx/10.8/riak-2.1.2-OSX-x86_64.tar.gz - file_size: 63672411 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/osx/10.8/riak-2.1.2-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/osx/10.8/riak-2.1.2-OSX-x86_64.tar.gz + file_size: '63672411' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/osx/10.8/riak-2.1.2-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -1881,43 +1905,43 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.2-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/5/riak-2.1.2-1.el5.x86_64.rpm - file_size: 64601678 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/5/riak-2.1.2-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/5/riak-2.1.2-1.el5.x86_64.rpm + file_size: '64601678' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/5/riak-2.1.2-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.1.2-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/5/riak-2.1.2-1.src.rpm - file_size: 18964293 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/5/riak-2.1.2-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/5/riak-2.1.2-1.src.rpm + file_size: '18964293' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/5/riak-2.1.2-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.1.2-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/6/riak-2.1.2-1.el6.src.rpm - file_size: 18932446 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/6/riak-2.1.2-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/6/riak-2.1.2-1.el6.src.rpm + file_size: '18932446' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/6/riak-2.1.2-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.2-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/6/riak-2.1.2-1.el6.x86_64.rpm - file_size: 61442916 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/6/riak-2.1.2-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/6/riak-2.1.2-1.el6.x86_64.rpm + file_size: '61442916' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/6/riak-2.1.2-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.1.2-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/7/riak-2.1.2-1.el7.centos.src.rpm - file_size: 18897992 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/7/riak-2.1.2-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/7/riak-2.1.2-1.el7.centos.src.rpm + file_size: '18897992' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/7/riak-2.1.2-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.2-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/7/riak-2.1.2-1.el7.centos.x86_64.rpm - file_size: 61015552 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/rhel/7/riak-2.1.2-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/7/riak-2.1.2-1.el7.centos.x86_64.rpm + file_size: '61015552' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/rhel/7/riak-2.1.2-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -1925,15 +1949,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.2-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/sles/11/riak-2.1.2-1.SLES11.x86_64.rpm - file_size: 64726788 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/sles/11/riak-2.1.2-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/sles/11/riak-2.1.2-1.SLES11.x86_64.rpm + file_size: '64726788' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/sles/11/riak-2.1.2-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.1.2-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/sles/11/riak-2.1.2-1.src.rpm - file_size: 18951089 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/sles/11/riak-2.1.2-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/sles/11/riak-2.1.2-1.src.rpm + file_size: '18951089' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/sles/11/riak-2.1.2-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -1941,17 +1965,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.2-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/smartos/1.8/riak-2.1.2-SmartOS-x86_64.tgz - file_size: 71864509 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/smartos/1.8/riak-2.1.2-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/smartos/1.8/riak-2.1.2-SmartOS-x86_64.tgz + file_size: '71864509' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/smartos/1.8/riak-2.1.2-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.1.2-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/smartos/13.1/riak-2.1.2-SmartOS-x86_64.tgz - file_size: 71851231 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/smartos/13.1/riak-2.1.2-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/smartos/13.1/riak-2.1.2-SmartOS-x86_64.tgz + file_size: '71851231' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/smartos/13.1/riak-2.1.2-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -1959,9 +1983,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.1.2-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/solaris/10/BASHOriak-2.1.2-Solaris10-x86_64.pkg.gz - file_size: 68829828 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/solaris/10/BASHOriak-2.1.2-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/solaris/10/BASHOriak-2.1.2-Solaris10-x86_64.pkg.gz + file_size: '68829828' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/solaris/10/BASHOriak-2.1.2-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -1969,31 +1993,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.1.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/ubuntu/lucid/riak_2.1.2-1_amd64.deb - file_size: 63130670 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/ubuntu/lucid/riak_2.1.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/ubuntu/lucid/riak_2.1.2-1_amd64.deb + file_size: '63130670' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/ubuntu/lucid/riak_2.1.2-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.1.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/ubuntu/precise/riak_2.1.2-1_amd64.deb - file_size: 63119860 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/ubuntu/precise/riak_2.1.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/ubuntu/precise/riak_2.1.2-1_amd64.deb + file_size: '63119860' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/ubuntu/precise/riak_2.1.2-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.1.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/ubuntu/trusty/riak_2.1.2-1_amd64.deb - file_size: 57470564 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.2/ubuntu/trusty/riak_2.1.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/ubuntu/trusty/riak_2.1.2-1_amd64.deb + file_size: '57470564' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.2/ubuntu/trusty/riak_2.1.2-1_amd64.deb.sha 2.1.3: - os: source file_info: file_name: riak-2.1.3.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/riak-2.1.3.tar.gz - file_size: 18962967 + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/riak-2.1.3.tar.gz + file_size: '18962967' - os: debian versions: - version: '6' @@ -2001,17 +2025,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.1.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/debian/6/riak_2.1.3-1_amd64.deb - file_size: 63146820 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/debian/6/riak_2.1.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/debian/6/riak_2.1.3-1_amd64.deb + file_size: '63146820' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/debian/6/riak_2.1.3-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.1.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/debian/7/riak_2.1.3-1_amd64.deb - file_size: 63177214 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/debian/7/riak_2.1.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/debian/7/riak_2.1.3-1_amd64.deb + file_size: '63177214' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/debian/7/riak_2.1.3-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -2019,15 +2043,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.1.3-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/fedora/19/riak-2.1.3-1.fc19.src.rpm - file_size: 18970601 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/fedora/19/riak-2.1.3-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/fedora/19/riak-2.1.3-1.fc19.src.rpm + file_size: '18970601' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/fedora/19/riak-2.1.3-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.3-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/fedora/19/riak-2.1.3-1.fc19.x86_64.rpm - file_size: 61020488 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/fedora/19/riak-2.1.3-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/fedora/19/riak-2.1.3-1.fc19.x86_64.rpm + file_size: '61020488' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/fedora/19/riak-2.1.3-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -2035,17 +2059,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.1.3.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/freebsd/10/riak-2.1.3.txz - file_size: 62205548 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/freebsd/10/riak-2.1.3.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/freebsd/10/riak-2.1.3.txz + file_size: '62205548' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/freebsd/10/riak-2.1.3.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.1.3-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/freebsd/9.2/riak-2.1.3-FreeBSD-amd64.tbz - file_size: 70012757 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/freebsd/9.2/riak-2.1.3-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/freebsd/9.2/riak-2.1.3-FreeBSD-amd64.tbz + file_size: '70012757' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/freebsd/9.2/riak-2.1.3-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -2053,9 +2077,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.3-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/osx/10.8/riak-2.1.3-OSX-x86_64.tar.gz - file_size: 63671695 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/osx/10.8/riak-2.1.3-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/osx/10.8/riak-2.1.3-OSX-x86_64.tar.gz + file_size: '63671695' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/osx/10.8/riak-2.1.3-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -2063,43 +2087,37 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.3-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/5/riak-2.1.3-1.el5.x86_64.rpm - file_size: 64602471 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/5/riak-2.1.3-1.el5.x86_64.rpm.sha - - arch: source - file_info: - file_name: riak-2.1.3-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/5/riak-2.1.3-1.src.rpm - file_size: 18968375 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/5/riak-2.1.3-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/5/riak-2.1.3-1.el5.x86_64.rpm + file_size: '64602471' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/5/riak-2.1.3-1.el5.x86_64.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.1.3-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/6/riak-2.1.3-1.el6.src.rpm - file_size: 18939098 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/6/riak-2.1.3-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/6/riak-2.1.3-1.el6.src.rpm + file_size: '18939098' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/6/riak-2.1.3-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.3-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/6/riak-2.1.3-1.el6.x86_64.rpm - file_size: 61444644 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/6/riak-2.1.3-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/6/riak-2.1.3-1.el6.x86_64.rpm + file_size: '61444644' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/6/riak-2.1.3-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.1.3-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/7/riak-2.1.3-1.el7.centos.src.rpm - file_size: 18893135 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/7/riak-2.1.3-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/7/riak-2.1.3-1.el7.centos.src.rpm + file_size: '18893135' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/7/riak-2.1.3-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.3-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/7/riak-2.1.3-1.el7.centos.x86_64.rpm - file_size: 61017536 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/rhel/7/riak-2.1.3-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/7/riak-2.1.3-1.el7.centos.x86_64.rpm + file_size: '61017536' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/rhel/7/riak-2.1.3-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -2107,15 +2125,32 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.3-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/sles/11/riak-2.1.3-1.SLES11.x86_64.rpm - file_size: 64729668 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/sles/11/riak-2.1.3-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.SLES11.x86_64.rpm + file_size: '64729668' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.SLES11.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-2.1.3-1.fc19.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.fc19.src.rpm + file_size: '18970601' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.fc19.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.1.3-1.fc19.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.fc19.x86_64.rpm + file_size: '61020488' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.fc19.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.1.3-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/sles/11/riak-2.1.3-1.src.rpm - file_size: 18953155 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/sles/11/riak-2.1.3-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.src.rpm + file_size: '18968375' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.src.rpm.sha + - arch: source + file_info: + file_name: riak-2.1.3-1.src.rpm.1 + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/sles/11/riak-2.1.3-1.src.rpm.1 + file_size: '18953155' - os: smartos versions: - version: '1.8' @@ -2123,17 +2158,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.3-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/smartos/1.8/riak-2.1.3-SmartOS-x86_64.tgz - file_size: 71852022 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/smartos/1.8/riak-2.1.3-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/smartos/1.8/riak-2.1.3-SmartOS-x86_64.tgz + file_size: '71852022' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/smartos/1.8/riak-2.1.3-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.1.3-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/smartos/13.1/riak-2.1.3-SmartOS-x86_64.tgz - file_size: 71847142 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/smartos/13.1/riak-2.1.3-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/smartos/13.1/riak-2.1.3-SmartOS-x86_64.tgz + file_size: '71847142' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/smartos/13.1/riak-2.1.3-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -2141,9 +2176,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.1.3-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/solaris/10/BASHOriak-2.1.3-Solaris10-x86_64.pkg.gz - file_size: 68838010 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/solaris/10/BASHOriak-2.1.3-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/solaris/10/BASHOriak-2.1.3-Solaris10-x86_64.pkg.gz + file_size: '68838010' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/solaris/10/BASHOriak-2.1.3-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -2151,31 +2186,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.1.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/ubuntu/lucid/riak_2.1.3-1_amd64.deb - file_size: 63136488 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/ubuntu/lucid/riak_2.1.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/ubuntu/lucid/riak_2.1.3-1_amd64.deb + file_size: '63136488' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/ubuntu/lucid/riak_2.1.3-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.1.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/ubuntu/precise/riak_2.1.3-1_amd64.deb - file_size: 63116588 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/ubuntu/precise/riak_2.1.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/ubuntu/precise/riak_2.1.3-1_amd64.deb + file_size: '63116588' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/ubuntu/precise/riak_2.1.3-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.1.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/ubuntu/trusty/riak_2.1.3-1_amd64.deb - file_size: 57483354 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.3/ubuntu/trusty/riak_2.1.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/ubuntu/trusty/riak_2.1.3-1_amd64.deb + file_size: '57483354' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.3/ubuntu/trusty/riak_2.1.3-1_amd64.deb.sha 2.1.4: - os: source file_info: file_name: riak-2.1.4.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/riak-2.1.4.tar.gz - file_size: 18971981 + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/riak-2.1.4.tar.gz + file_size: '18971981' - os: debian versions: - version: '6' @@ -2183,17 +2218,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.1.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/debian/6/riak_2.1.4-1_amd64.deb - file_size: 63946634 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/debian/6/riak_2.1.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/debian/6/riak_2.1.4-1_amd64.deb + file_size: '63946634' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/debian/6/riak_2.1.4-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak_2.1.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/debian/7/riak_2.1.4-1_amd64.deb - file_size: 63977222 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/debian/7/riak_2.1.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/debian/7/riak_2.1.4-1_amd64.deb + file_size: '63977222' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/debian/7/riak_2.1.4-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -2201,15 +2236,15 @@ riak_kv: - arch: source file_info: file_name: riak-2.1.4-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/fedora/19/riak-2.1.4-1.fc19.src.rpm - file_size: 18979318 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/fedora/19/riak-2.1.4-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/fedora/19/riak-2.1.4-1.fc19.src.rpm + file_size: '18979318' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/fedora/19/riak-2.1.4-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.4-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/fedora/19/riak-2.1.4-1.fc19.x86_64.rpm - file_size: 61376808 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/fedora/19/riak-2.1.4-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/fedora/19/riak-2.1.4-1.fc19.x86_64.rpm + file_size: '61376808' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/fedora/19/riak-2.1.4-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -2217,17 +2252,17 @@ riak_kv: - arch: txz file_info: file_name: riak-2.1.4.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/freebsd/10/riak-2.1.4.txz - file_size: 63117152 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/freebsd/10/riak-2.1.4.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/freebsd/10/riak-2.1.4.txz + file_size: '63117152' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/freebsd/10/riak-2.1.4.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-2.1.4-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/freebsd/9.2/riak-2.1.4-FreeBSD-amd64.tbz - file_size: 75438863 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/freebsd/9.2/riak-2.1.4-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/freebsd/9.2/riak-2.1.4-FreeBSD-amd64.tbz + file_size: '75438863' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/freebsd/9.2/riak-2.1.4-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -2235,9 +2270,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.4-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/osx/10.8/riak-2.1.4-OSX-x86_64.tar.gz - file_size: 64601632 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/osx/10.8/riak-2.1.4-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/osx/10.8/riak-2.1.4-OSX-x86_64.tar.gz + file_size: '64601632' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/osx/10.8/riak-2.1.4-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -2245,43 +2280,37 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.4-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/5/riak-2.1.4-1.el5.x86_64.rpm - file_size: 65472200 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/5/riak-2.1.4-1.el5.x86_64.rpm.sha - - arch: source - file_info: - file_name: riak-2.1.4-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/5/riak-2.1.4-1.src.rpm - file_size: 18976626 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/5/riak-2.1.4-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/5/riak-2.1.4-1.el5.x86_64.rpm + file_size: '65472200' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/5/riak-2.1.4-1.el5.x86_64.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-2.1.4-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/6/riak-2.1.4-1.el6.src.rpm - file_size: 18943511 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/6/riak-2.1.4-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/6/riak-2.1.4-1.el6.src.rpm + file_size: '18943511' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/6/riak-2.1.4-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.4-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/6/riak-2.1.4-1.el6.x86_64.rpm - file_size: 61757540 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/6/riak-2.1.4-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/6/riak-2.1.4-1.el6.x86_64.rpm + file_size: '61757540' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/6/riak-2.1.4-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.1.4-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/7/riak-2.1.4-1.el7.centos.src.rpm - file_size: 18977548 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/7/riak-2.1.4-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/7/riak-2.1.4-1.el7.centos.src.rpm + file_size: '18977548' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/7/riak-2.1.4-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.1.4-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/7/riak-2.1.4-1.el7.centos.x86_64.rpm - file_size: 63249548 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/rhel/7/riak-2.1.4-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/7/riak-2.1.4-1.el7.centos.x86_64.rpm + file_size: '63249548' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/rhel/7/riak-2.1.4-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -2289,15 +2318,32 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.4-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/sles/11/riak-2.1.4-1.SLES11.x86_64.rpm - file_size: 68247770 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/sles/11/riak-2.1.4-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.SLES11.x86_64.rpm + file_size: '68247770' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.SLES11.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-2.1.4-1.fc19.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.fc19.src.rpm + file_size: '18979318' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.fc19.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.1.4-1.fc19.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.fc19.x86_64.rpm + file_size: '61376808' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.fc19.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.1.4-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/sles/11/riak-2.1.4-1.src.rpm - file_size: 18965164 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/sles/11/riak-2.1.4-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.src.rpm + file_size: '18976626' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.src.rpm.sha + - arch: source + file_info: + file_name: riak-2.1.4-1.src.rpm.1 + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/sles/11/riak-2.1.4-1.src.rpm.1 + file_size: '18965164' - os: smartos versions: - version: '1.8' @@ -2305,17 +2351,17 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.1.4-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz - file_size: 80602144 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz + file_size: '80602144' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/1.8/riak-2.1.4-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak-2.1.4-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/smartos/13.1/riak-2.1.4-SmartOS-x86_64.tgz - file_size: 80554489 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/smartos/13.1/riak-2.1.4-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/13.1/riak-2.1.4-SmartOS-x86_64.tgz + file_size: '80554489' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/smartos/13.1/riak-2.1.4-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -2323,9 +2369,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.1.4-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/solaris/10/BASHOriak-2.1.4-Solaris10-x86_64.pkg.gz - file_size: 73937786 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/solaris/10/BASHOriak-2.1.4-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/solaris/10/BASHOriak-2.1.4-Solaris10-x86_64.pkg.gz + file_size: '73937786' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/solaris/10/BASHOriak-2.1.4-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -2333,54 +2379,54 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.1.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/ubuntu/lucid/riak_2.1.4-1_amd64.deb - file_size: 63932008 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/ubuntu/lucid/riak_2.1.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/ubuntu/lucid/riak_2.1.4-1_amd64.deb + file_size: '63932008' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/ubuntu/lucid/riak_2.1.4-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak_2.1.4-2_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/ubuntu/precise/riak_2.1.4-2_amd64.deb - file_size: 66028586 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/ubuntu/precise/riak_2.1.4-2_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/ubuntu/precise/riak_2.1.4-2_amd64.deb + file_size: '66028586' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/ubuntu/precise/riak_2.1.4-2_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.1.4-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/ubuntu/trusty/riak_2.1.4-1_amd64.deb - file_size: 59328294 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.1/2.1.4/ubuntu/trusty/riak_2.1.4-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/ubuntu/trusty/riak_2.1.4-1_amd64.deb + file_size: '59328294' + chksum_href: https://files.tiot.jp/riak/kv/2.1/2.1.4/ubuntu/trusty/riak_2.1.4-1_amd64.deb.sha 2.2.0: + - os: source + file_info: + file_name: riak-2.2.0-devrel.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/riak-2.2.0-devrel.tar.gz + file_size: '595692529' - os: source file_info: file_name: riak-2.2.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/riak-2.2.0.tar.gz - file_size: 20926448 + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/riak-2.2.0.tar.gz + file_size: '20926448' - os: debian versions: - version: jessie architectures: - - arch: unknown - file_info: - file_name: riak-2.2.0-devrel.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/debian/jessie/riak-2.2.0-devrel.tar.gz - file_size: 595692529 - arch: amd64 file_info: file_name: riak_2.2.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/debian/jessie/riak_2.2.0-1_amd64.deb - file_size: 60612854 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/debian/jessie/riak_2.2.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/debian/jessie/riak_2.2.0-1_amd64.deb + file_size: '60612854' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/debian/jessie/riak_2.2.0-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak_2.2.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/debian/wheezy/riak_2.2.0-1_amd64.deb - file_size: 67348270 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/debian/wheezy/riak_2.2.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/debian/wheezy/riak_2.2.0-1_amd64.deb + file_size: '67348270' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/debian/wheezy/riak_2.2.0-1_amd64.deb.sha - os: osx versions: - version: '10.8' @@ -2388,9 +2434,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.2.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/osx/10.8/riak-2.2.0-OSX-x86_64.tar.gz - file_size: 65823227 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/osx/10.8/riak-2.2.0-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/osx/10.8/riak-2.2.0-OSX-x86_64.tar.gz + file_size: '65823227' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/osx/10.8/riak-2.2.0-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -2398,29 +2444,29 @@ riak_kv: - arch: source file_info: file_name: riak-2.2.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/rhel/6/riak-2.2.0-1.el6.src.rpm - file_size: 20925552 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/rhel/6/riak-2.2.0-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/rhel/6/riak-2.2.0-1.el6.src.rpm + file_size: '20925552' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/rhel/6/riak-2.2.0-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.2.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/rhel/6/riak-2.2.0-1.el6.x86_64.rpm - file_size: 65153400 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/rhel/6/riak-2.2.0-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/rhel/6/riak-2.2.0-1.el6.x86_64.rpm + file_size: '65153400' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/rhel/6/riak-2.2.0-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.2.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/rhel/7/riak-2.2.0-1.el7.centos.src.rpm - file_size: 20858211 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/rhel/7/riak-2.2.0-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/rhel/7/riak-2.2.0-1.el7.centos.src.rpm + file_size: '20858211' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/rhel/7/riak-2.2.0-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.2.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/rhel/7/riak-2.2.0-1.el7.centos.x86_64.rpm - file_size: 64718424 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/rhel/7/riak-2.2.0-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/rhel/7/riak-2.2.0-1.el7.centos.x86_64.rpm + file_size: '64718424' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/rhel/7/riak-2.2.0-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -2428,15 +2474,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.2.0-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/sles/11/riak-2.2.0-1.SLES11.x86_64.rpm - file_size: 69022207 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/sles/11/riak-2.2.0-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/sles/11/riak-2.2.0-1.SLES11.x86_64.rpm + file_size: '69022207' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/sles/11/riak-2.2.0-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.2.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/sles/11/riak-2.2.0-1.src.rpm - file_size: 20913618 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/sles/11/riak-2.2.0-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/sles/11/riak-2.2.0-1.src.rpm + file_size: '20913618' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/sles/11/riak-2.2.0-1.src.rpm.sha - os: solaris versions: - version: '10' @@ -2444,9 +2490,9 @@ riak_kv: - arch: x86_64 file_info: file_name: BASHOriak-2.2.0-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/solaris/10/BASHOriak-2.2.0-Solaris10-x86_64.pkg.gz - file_size: 74713413 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/solaris/10/BASHOriak-2.2.0-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/solaris/10/BASHOriak-2.2.0-Solaris10-x86_64.pkg.gz + file_size: '74713413' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/solaris/10/BASHOriak-2.2.0-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: precise @@ -2454,31 +2500,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.2.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/ubuntu/precise/riak_2.2.0-1_amd64.deb - file_size: 67285650 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/ubuntu/precise/riak_2.2.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/ubuntu/precise/riak_2.2.0-1_amd64.deb + file_size: '67285650' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/ubuntu/precise/riak_2.2.0-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.2.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/ubuntu/trusty/riak_2.2.0-1_amd64.deb - file_size: 60630540 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/ubuntu/trusty/riak_2.2.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/ubuntu/trusty/riak_2.2.0-1_amd64.deb + file_size: '60630540' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/ubuntu/trusty/riak_2.2.0-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak_2.2.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/ubuntu/xenial/riak_2.2.0-1_amd64.deb - file_size: 60694588 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.0/ubuntu/xenial/riak_2.2.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/ubuntu/xenial/riak_2.2.0-1_amd64.deb + file_size: '60694588' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.0/ubuntu/xenial/riak_2.2.0-1_amd64.deb.sha 2.2.1: - os: source file_info: file_name: riak-2.2.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/riak-2.2.1.tar.gz - file_size: 21215456 + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/riak-2.2.1.tar.gz + file_size: '21215456' - os: debian versions: - version: jessie @@ -2486,17 +2532,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.2.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/debian/jessie/riak_2.2.1-1_amd64.deb - file_size: 60631484 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/debian/jessie/riak_2.2.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/debian/jessie/riak_2.2.1-1_amd64.deb + file_size: '60631484' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/debian/jessie/riak_2.2.1-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak_2.2.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/debian/wheezy/riak_2.2.1-1_amd64.deb - file_size: 67358984 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/debian/wheezy/riak_2.2.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/debian/wheezy/riak_2.2.1-1_amd64.deb + file_size: '67358984' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/debian/wheezy/riak_2.2.1-1_amd64.deb.sha - os: freebsd versions: - version: '10' @@ -2504,9 +2550,9 @@ riak_kv: - arch: txz file_info: file_name: riak-2.2.1.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/freebsd/10/riak-2.2.1.txz - file_size: 64340688 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/freebsd/10/riak-2.2.1.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/freebsd/10/riak-2.2.1.txz + file_size: '64340688' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/freebsd/10/riak-2.2.1.txz.sha - os: osx versions: - version: '10.8' @@ -2514,9 +2560,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.2.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/osx/10.8/riak-2.2.1-OSX-x86_64.tar.gz - file_size: 66974395 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/osx/10.8/riak-2.2.1-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/osx/10.8/riak-2.2.1-OSX-x86_64.tar.gz + file_size: '66974395' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/osx/10.8/riak-2.2.1-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -2524,29 +2570,29 @@ riak_kv: - arch: source file_info: file_name: riak-2.2.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/rhel/6/riak-2.2.1-1.el6.src.rpm - file_size: 21217076 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/rhel/6/riak-2.2.1-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/rhel/6/riak-2.2.1-1.el6.src.rpm + file_size: '21217076' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/rhel/6/riak-2.2.1-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.2.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/rhel/6/riak-2.2.1-1.el6.x86_64.rpm - file_size: 65156852 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/rhel/6/riak-2.2.1-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/rhel/6/riak-2.2.1-1.el6.x86_64.rpm + file_size: '65156852' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/rhel/6/riak-2.2.1-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.2.1-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/rhel/7/riak-2.2.1-1.el7.centos.src.rpm - file_size: 21146771 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/rhel/7/riak-2.2.1-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/rhel/7/riak-2.2.1-1.el7.centos.src.rpm + file_size: '21146771' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/rhel/7/riak-2.2.1-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.2.1-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/rhel/7/riak-2.2.1-1.el7.centos.x86_64.rpm - file_size: 64698968 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/rhel/7/riak-2.2.1-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/rhel/7/riak-2.2.1-1.el7.centos.x86_64.rpm + file_size: '64698968' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/rhel/7/riak-2.2.1-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -2554,15 +2600,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.2.1-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/sles/11/riak-2.2.1-1.SLES11.x86_64.rpm - file_size: 68988567 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/sles/11/riak-2.2.1-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/sles/11/riak-2.2.1-1.SLES11.x86_64.rpm + file_size: '68988567' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/sles/11/riak-2.2.1-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.2.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/sles/11/riak-2.2.1-1.src.rpm - file_size: 21204202 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/sles/11/riak-2.2.1-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/sles/11/riak-2.2.1-1.src.rpm + file_size: '21204202' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/sles/11/riak-2.2.1-1.src.rpm.sha - os: ubuntu versions: - version: precise @@ -2570,31 +2616,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.2.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/ubuntu/precise/riak_2.2.1-1_amd64.deb - file_size: 67295178 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/ubuntu/precise/riak_2.2.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/ubuntu/precise/riak_2.2.1-1_amd64.deb + file_size: '67295178' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/ubuntu/precise/riak_2.2.1-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.2.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/ubuntu/trusty/riak_2.2.1-1_amd64.deb - file_size: 60662210 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/ubuntu/trusty/riak_2.2.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/ubuntu/trusty/riak_2.2.1-1_amd64.deb + file_size: '60662210' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/ubuntu/trusty/riak_2.2.1-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak_2.2.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/ubuntu/xenial/riak_2.2.1-1_amd64.deb - file_size: 60707736 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.1/ubuntu/xenial/riak_2.2.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/ubuntu/xenial/riak_2.2.1-1_amd64.deb + file_size: '60707736' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.1/ubuntu/xenial/riak_2.2.1-1_amd64.deb.sha 2.2.2: - os: source file_info: file_name: riak-2.2.2.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/riak-2.2.2.tar.gz - file_size: 16458565 + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/riak-2.2.2.tar.gz + file_size: '16458565' - os: debian versions: - version: jessie @@ -2602,17 +2648,17 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.2.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/debian/jessie/riak_2.2.2-1_amd64.deb - file_size: 60638660 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/debian/jessie/riak_2.2.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/debian/jessie/riak_2.2.2-1_amd64.deb + file_size: '60638660' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/debian/jessie/riak_2.2.2-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak_2.2.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/debian/wheezy/riak_2.2.2-1_amd64.deb - file_size: 67365406 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/debian/wheezy/riak_2.2.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/debian/wheezy/riak_2.2.2-1_amd64.deb + file_size: '67365406' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/debian/wheezy/riak_2.2.2-1_amd64.deb.sha - os: freebsd versions: - version: '10' @@ -2620,9 +2666,9 @@ riak_kv: - arch: txz file_info: file_name: riak-2.2.2.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/freebsd/10/riak-2.2.2.txz - file_size: 64346632 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/freebsd/10/riak-2.2.2.txz.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/freebsd/10/riak-2.2.2.txz + file_size: '64346632' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/freebsd/10/riak-2.2.2.txz.sha - os: osx versions: - version: '10.8' @@ -2630,9 +2676,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.2.2-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/osx/10.8/riak-2.2.2-OSX-x86_64.tar.gz - file_size: 71304579 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/osx/10.8/riak-2.2.2-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/osx/10.8/riak-2.2.2-OSX-x86_64.tar.gz + file_size: '71304579' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/osx/10.8/riak-2.2.2-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -2640,29 +2686,29 @@ riak_kv: - arch: source file_info: file_name: riak-2.2.2-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/rhel/6/riak-2.2.2-1.el6.src.rpm - file_size: 21215764 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/rhel/6/riak-2.2.2-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/rhel/6/riak-2.2.2-1.el6.src.rpm + file_size: '21215764' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/rhel/6/riak-2.2.2-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.2.2-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/rhel/6/riak-2.2.2-1.el6.x86_64.rpm - file_size: 65157468 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/rhel/6/riak-2.2.2-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/rhel/6/riak-2.2.2-1.el6.x86_64.rpm + file_size: '65157468' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/rhel/6/riak-2.2.2-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.2.2-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/rhel/7/riak-2.2.2-1.el7.centos.src.rpm - file_size: 21149654 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/rhel/7/riak-2.2.2-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/rhel/7/riak-2.2.2-1.el7.centos.src.rpm + file_size: '21149654' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/rhel/7/riak-2.2.2-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.2.2-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/rhel/7/riak-2.2.2-1.el7.centos.x86_64.rpm - file_size: 64699904 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/rhel/7/riak-2.2.2-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/rhel/7/riak-2.2.2-1.el7.centos.x86_64.rpm + file_size: '64699904' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/rhel/7/riak-2.2.2-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -2670,15 +2716,15 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.2.2-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/sles/11/riak-2.2.2-1.SLES11.x86_64.rpm - file_size: 68989249 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/sles/11/riak-2.2.2-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/sles/11/riak-2.2.2-1.SLES11.x86_64.rpm + file_size: '68989249' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/sles/11/riak-2.2.2-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-2.2.2-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/sles/11/riak-2.2.2-1.src.rpm - file_size: 21204577 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/sles/11/riak-2.2.2-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/sles/11/riak-2.2.2-1.src.rpm + file_size: '21204577' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/sles/11/riak-2.2.2-1.src.rpm.sha - os: ubuntu versions: - version: precise @@ -2686,31 +2732,31 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.2.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/ubuntu/precise/riak_2.2.2-1_amd64.deb - file_size: 67293006 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/ubuntu/precise/riak_2.2.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/ubuntu/precise/riak_2.2.2-1_amd64.deb + file_size: '67293006' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/ubuntu/precise/riak_2.2.2-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.2.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/ubuntu/trusty/riak_2.2.2-1_amd64.deb - file_size: 60623998 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/ubuntu/trusty/riak_2.2.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/ubuntu/trusty/riak_2.2.2-1_amd64.deb + file_size: '60623998' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/ubuntu/trusty/riak_2.2.2-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak_2.2.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/ubuntu/xenial/riak_2.2.2-1_amd64.deb - file_size: 60693774 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.2/ubuntu/xenial/riak_2.2.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/ubuntu/xenial/riak_2.2.2-1_amd64.deb + file_size: '60693774' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.2/ubuntu/xenial/riak_2.2.2-1_amd64.deb.sha 2.2.3: - os: source file_info: file_name: riak-2.2.3.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/riak-2.2.3.tar.gz - file_size: 16466667 + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/riak-2.2.3.tar.gz + file_size: '16466667' - os: debian versions: - version: jessie @@ -2718,27 +2764,27 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.2.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/debian/jessie/riak_2.2.3-1_amd64.deb - file_size: 60638408 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/debian/jessie/riak_2.2.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/debian/jessie/riak_2.2.3-1_amd64.deb + file_size: '60638408' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/debian/jessie/riak_2.2.3-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak_2.2.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/debian/wheezy/riak_2.2.3-1_amd64.deb - file_size: 67366054 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/debian/wheezy/riak_2.2.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/debian/wheezy/riak_2.2.3-1_amd64.deb + file_size: '67366054' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/debian/wheezy/riak_2.2.3-1_amd64.deb.sha - os: freebsd versions: - version: '10' architectures: - arch: txz file_info: - file_name: riak-ee-2.2.3.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/freebsd/10/riak-ee-2.2.3.txz - file_size: 65529736 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/freebsd/10/riak-ee-2.2.3.txz.sha + file_name: riak-2.2.3.txz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/freebsd/10/riak-2.2.3.txz + file_size: '64340836' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/freebsd/10/riak-2.2.3.txz.sha - os: osx versions: - version: '10.8' @@ -2746,9 +2792,9 @@ riak_kv: - arch: x86_64 file_info: file_name: riak-2.2.3-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/osx/10.8/riak-2.2.3-OSX-x86_64.tar.gz - file_size: 71304761 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/osx/10.8/riak-2.2.3-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/osx/10.8/riak-2.2.3-OSX-x86_64.tar.gz + file_size: '71304761' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/osx/10.8/riak-2.2.3-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -2756,55 +2802,55 @@ riak_kv: - arch: source file_info: file_name: riak-2.2.3-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/rhel/6/riak-2.2.3-1.el6.src.rpm - file_size: 21215991 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/rhel/6/riak-2.2.3-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/rhel/6/riak-2.2.3-1.el6.src.rpm + file_size: '21215991' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/rhel/6/riak-2.2.3-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.2.3-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/rhel/6/riak-2.2.3-1.el6.x86_64.rpm - file_size: 65157824 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/rhel/6/riak-2.2.3-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/rhel/6/riak-2.2.3-1.el6.x86_64.rpm + file_size: '65157824' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/rhel/6/riak-2.2.3-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-2.2.3-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/rhel/7/riak-2.2.3-1.el7.centos.src.rpm - file_size: 21147850 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/rhel/7/riak-2.2.3-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/rhel/7/riak-2.2.3-1.el7.centos.src.rpm + file_size: '21147850' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/rhel/7/riak-2.2.3-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-2.2.3-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/rhel/7/riak-2.2.3-1.el7.centos.x86_64.rpm - file_size: 64700084 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/rhel/7/riak-2.2.3-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/rhel/7/riak-2.2.3-1.el7.centos.x86_64.rpm + file_size: '64700084' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/rhel/7/riak-2.2.3-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' architectures: - arch: x86_64 file_info: - file_name: riak-ee-2.2.3-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/sles/11/riak-ee-2.2.3-1.SLES11.x86_64.rpm - file_size: 70294167 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/sles/11/riak-ee-2.2.3-1.SLES11.x86_64.rpm.sha + file_name: riak-2.2.3-1.SLES11.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm + file_size: '68989825' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.SLES11.x86_64.rpm.sha - arch: source file_info: - file_name: riak-ee-2.2.3-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/sles/11/riak-ee-2.2.3-1.src.rpm - file_size: 22434416 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/sles/11/riak-ee-2.2.3-1.src.rpm.sha + file_name: riak-2.2.3-1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.src.rpm + file_size: '21202300' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/sles/11/riak-2.2.3-1.src.rpm.sha - os: solaris versions: - version: '10' architectures: - arch: x86_64 file_info: - file_name: BASHOriak-ee-2.2.3-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/solaris/10/BASHOriak-ee-2.2.3-Solaris10-x86_64.pkg.gz - file_size: 76092645 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/solaris/10/BASHOriak-ee-2.2.3-Solaris10-x86_64.pkg.gz.sha + file_name: BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz + file_size: '74598036' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/solaris/10/BASHOriak-2.2.3-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: precise @@ -2812,502 +2858,7641 @@ riak_kv: - arch: amd64 file_info: file_name: riak_2.2.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/ubuntu/precise/riak_2.2.3-1_amd64.deb - file_size: 67293986 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/ubuntu/precise/riak_2.2.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/ubuntu/precise/riak_2.2.3-1_amd64.deb + file_size: '67293986' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/ubuntu/precise/riak_2.2.3-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak_2.2.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/ubuntu/trusty/riak_2.2.3-1_amd64.deb - file_size: 60625116 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/ubuntu/trusty/riak_2.2.3-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/ubuntu/trusty/riak_2.2.3-1_amd64.deb + file_size: '60625116' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/ubuntu/trusty/riak_2.2.3-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak_2.2.3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/ubuntu/xenial/riak_2.2.3-1_amd64.deb - file_size: 60694612 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak/2.2/2.2.3/ubuntu/xenial/riak_2.2.3-1_amd64.deb.sha -riak_cs: - 2.0.0: + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/ubuntu/xenial/riak_2.2.3-1_amd64.deb + file_size: '60694612' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.3/ubuntu/xenial/riak_2.2.3-1_amd64.deb.sha + 2.2.5: - os: source file_info: - file_name: riak-cs-2.0.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/riak-cs-2.0.0.tar.gz - file_size: 9709182 + file_name: riak-2.2.5.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/riak-2.2.5.tar.gz + file_size: '21496392' - os: debian versions: - - version: '6' - architectures: - - arch: amd64 - file_info: - file_name: riak-cs_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/debian/6/riak-cs_2.0.0-1_amd64.deb - file_size: 24144566 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/debian/6/riak-cs_2.0.0-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: - file_name: riak-cs_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/debian/7/riak-cs_2.0.0-1_amd64.deb - file_size: 24189380 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/debian/7/riak-cs_2.0.0-1_amd64.deb.sha - - os: fedora - versions: - - version: '19' + file_name: riak_2.2.5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/debian/7/riak_2.2.5-1_amd64.deb + file_size: '65851494' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/debian/7/riak_2.2.5-1_amd64.deb.sha + - version: '8' architectures: - - arch: source - file_info: - file_name: riak-cs-2.0.0-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/fedora/19/riak-cs-2.0.0-1.fc19.src.rpm - file_size: 9687523 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/fedora/19/riak-cs-2.0.0-1.fc19.src.rpm.sha - - arch: x86_64 + - arch: amd64 file_info: - file_name: riak-cs-2.0.0-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/fedora/19/riak-cs-2.0.0-1.fc19.x86_64.rpm - file_size: 21815512 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/fedora/19/riak-cs-2.0.0-1.fc19.x86_64.rpm.sha + file_name: riak_2.2.5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/debian/8/riak_2.2.5-1_amd64.deb + file_size: '59487524' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/debian/8/riak_2.2.5-1_amd64.deb.sha - os: freebsd versions: - - version: '10' + - version: '10.4' architectures: - arch: txz file_info: - file_name: riak-cs-2.0.0.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/freebsd/10/riak-cs-2.0.0.txz - file_size: 23688492 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/freebsd/10/riak-cs-2.0.0.txz.sha - - version: '9.2' + file_name: riak-2.2.5.txz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/freebsd/10.4/riak-2.2.5.txz + file_size: '66248676' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/freebsd/10.4/riak-2.2.5.txz.sha + - version: '11.1' architectures: - - arch: amd64 + - arch: txz file_info: - file_name: riak-cs-2.0.0-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/freebsd/9.2/riak-cs-2.0.0-FreeBSD-amd64.tbz - file_size: 29615523 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/freebsd/9.2/riak-cs-2.0.0-FreeBSD-amd64.tbz.sha + file_name: riak-2.2.5.txz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/freebsd/11.1/riak-2.2.5.txz + file_size: '66902652' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/freebsd/11.1/riak-2.2.5.txz.sha - os: osx versions: - - version: '10.8' + - version: '10.11' architectures: - arch: x86_64 file_info: - file_name: riak-cs-2.0.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/osx/10.8/riak-cs-2.0.0-OSX-x86_64.tar.gz - file_size: 24642728 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/osx/10.8/riak-cs-2.0.0-OSX-x86_64.tar.gz.sha + file_name: riak-2.2.5-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/osx/10.11/riak-2.2.5-OSX-x86_64.tar.gz + file_size: '68798804' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/osx/10.11/riak-2.2.5-OSX-x86_64.tar.gz.sha - os: rhel versions: - - version: '5' - architectures: - - arch: x86_64 - file_info: - file_name: riak-cs-2.0.0-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/5/riak-cs-2.0.0-1.el5.x86_64.rpm - file_size: 24362997 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/5/riak-cs-2.0.0-1.el5.x86_64.rpm.sha - - arch: source - file_info: - file_name: riak-cs-2.0.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/5/riak-cs-2.0.0-1.src.rpm - file_size: 9687080 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/5/riak-cs-2.0.0-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: - file_name: riak-cs-2.0.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/6/riak-cs-2.0.0-1.el6.src.rpm - file_size: 9670530 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/6/riak-cs-2.0.0-1.el6.src.rpm.sha + file_name: riak-2.2.5-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/rhel/6/riak-2.2.5-1.el6.src.rpm + file_size: '21503838' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/rhel/6/riak-2.2.5-1.el6.src.rpm.sha - arch: x86_64 file_info: - file_name: riak-cs-2.0.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/6/riak-cs-2.0.0-1.el6.x86_64.rpm - file_size: 21892916 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/6/riak-cs-2.0.0-1.el6.x86_64.rpm.sha + file_name: riak-2.2.5-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/rhel/6/riak-2.2.5-1.el6.x86_64.rpm + file_size: '63794944' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/rhel/6/riak-2.2.5-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: - file_name: riak-cs-2.0.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/7/riak-cs-2.0.0-1.el7.centos.src.rpm - file_size: 9618891 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/7/riak-cs-2.0.0-1.el7.centos.src.rpm.sha + file_name: riak-2.2.5-1.el7.centos.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/rhel/7/riak-2.2.5-1.el7.centos.src.rpm + file_size: '21465340' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/rhel/7/riak-2.2.5-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: - file_name: riak-cs-2.0.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/7/riak-cs-2.0.0-1.el7.centos.x86_64.rpm - file_size: 21812920 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/rhel/7/riak-cs-2.0.0-1.el7.centos.x86_64.rpm.sha - - os: sles + file_name: riak-2.2.5-1.el7.centos.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/rhel/7/riak-2.2.5-1.el7.centos.x86_64.rpm + file_size: '63373620' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/rhel/7/riak-2.2.5-1.el7.centos.x86_64.rpm.sha + - os: ubuntu versions: - - version: '11' + - version: artful64 architectures: - - arch: x86_64 + - arch: amd64 file_info: - file_name: riak-cs-2.0.0-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/sles/11/riak-cs-2.0.0-1.SLES11.x86_64.rpm - file_size: 25261860 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/sles/11/riak-cs-2.0.0-1.SLES11.x86_64.rpm.sha - - arch: source + file_name: riak_2.2.5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/ubuntu/artful64/riak_2.2.5-1_amd64.deb + file_size: '59596992' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/ubuntu/artful64/riak_2.2.5-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 file_info: - file_name: riak-cs-2.0.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/sles/11/riak-cs-2.0.0-1.src.rpm - file_size: 9671122 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/sles/11/riak-cs-2.0.0-1.src.rpm.sha - - os: smartos - versions: - - version: '1.8' + file_name: riak_2.2.5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/ubuntu/precise64/riak_2.2.5-1_amd64.deb + file_size: '65799852' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/ubuntu/precise64/riak_2.2.5-1_amd64.deb.sha + - version: trusty64 architectures: - - arch: x86_64 + - arch: amd64 file_info: - file_name: riak_cs-2.0.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/smartos/1.8/riak_cs-2.0.0-SmartOS-x86_64.tgz - file_size: 31991701 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/smartos/1.8/riak_cs-2.0.0-SmartOS-x86_64.tgz.sha - - version: '13.1' + file_name: riak_2.2.5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/ubuntu/trusty64/riak_2.2.5-1_amd64.deb + file_size: '59477052' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/ubuntu/trusty64/riak_2.2.5-1_amd64.deb.sha + - version: xenial64 architectures: - - arch: x86_64 + - arch: amd64 file_info: - file_name: riak_cs-2.0.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/smartos/13.1/riak_cs-2.0.0-SmartOS-x86_64.tgz - file_size: 31989822 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/smartos/13.1/riak_cs-2.0.0-SmartOS-x86_64.tgz.sha - - os: solaris + file_name: riak_2.2.5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/ubuntu/xenial64/riak_2.2.5-1_amd64.deb + file_size: '59434020' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.5/ubuntu/xenial64/riak_2.2.5-1_amd64.deb.sha + 2.2.6: + - os: source + file_info: + file_name: riak-2.2.6.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/riak-2.2.6.tar.gz + file_size: '21522145' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/riak-2.2.6.tar.gz.sha + - os: amazon versions: - - version: '10' + - version: '2' architectures: + - arch: source + file_info: + file_name: riak-2.2.6-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2/riak-2.2.6-1.amzn2.src.rpm + file_size: '21458761' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2/riak-2.2.6-1.amzn2.src.rpm.sha - arch: x86_64 file_info: - file_name: BASHOriak-cs-2.0.0-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/solaris/10/BASHOriak-cs-2.0.0-Solaris10-x86_64.pkg.gz - file_size: 28945489 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/solaris/10/BASHOriak-cs-2.0.0-Solaris10-x86_64.pkg.gz.sha - - os: ubuntu - versions: - - version: lucid + file_name: riak-2.2.6-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2/riak-2.2.6-1.amzn2x86_64.rpm + file_size: '63776736' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2/riak-2.2.6-1.amzn2x86_64.rpm.sha + - version: '2016.09' architectures: - - arch: amd64 + - arch: source file_info: - file_name: riak-cs_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/ubuntu/lucid/riak-cs_2.0.0-1_amd64.deb - file_size: 24146078 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/ubuntu/lucid/riak-cs_2.0.0-1_amd64.deb.sha - - version: precise + file_name: riak-2.2.6-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2016.09/riak-2.2.6-1.amzn1.src.rpm + file_size: '21518355' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2016.09/riak-2.2.6-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.2.6-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2016.09/riak-2.2.6-1.amzn1x86_64.rpm + file_size: '66552839' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/amazon/2016.09/riak-2.2.6-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '10' architectures: - arch: amd64 file_info: - file_name: riak-cs_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/ubuntu/precise/riak-cs_2.0.0-1_amd64.deb - file_size: 24119194 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/ubuntu/precise/riak-cs_2.0.0-1_amd64.deb.sha - - version: trusty - architectures: + file_name: riak-dbgsym_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/10/riak-dbgsym_2.2.6-1_amd64.deb + file_size: '9945612' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/10/riak-dbgsym_2.2.6-1_amd64.deb.sha - arch: amd64 file_info: - file_name: riak-cs_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/ubuntu/trusty/riak-cs_2.0.0-1_amd64.deb - file_size: 20021826 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.0/ubuntu/trusty/riak-cs_2.0.0-1_amd64.deb.sha - 2.0.1: - - os: source - file_info: - file_name: riak-cs-2.0.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/riak-cs-2.0.1.tar.gz - file_size: 9734438 - - os: debian - versions: - - version: '6' + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/10/riak_2.2.6-1_amd64.deb + file_size: '59711016' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/10/riak_2.2.6-1_amd64.deb.sha + - version: '7' architectures: - arch: amd64 file_info: - file_name: riak-cs_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/debian/6/riak-cs_2.0.1-1_amd64.deb - file_size: 24209698 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/debian/6/riak-cs_2.0.1-1_amd64.deb.sha - - version: '7' + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/7/riak_2.2.6-1_amd64.deb + file_size: '65851496' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/7/riak_2.2.6-1_amd64.deb.sha + - version: '8' architectures: - arch: amd64 file_info: - file_name: riak-cs_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/debian/7/riak-cs_2.0.1-1_amd64.deb - file_size: 24253352 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/debian/7/riak-cs_2.0.1-1_amd64.deb.sha - - os: fedora - versions: - - version: '19' + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/8/riak_2.2.6-1_amd64.deb + file_size: '59484850' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/8/riak_2.2.6-1_amd64.deb.sha + - version: '9' architectures: - - arch: source + - arch: amd64 file_info: - file_name: riak-cs-2.0.1-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/fedora/19/riak-cs-2.0.1-1.fc19.src.rpm - file_size: 9714696 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/fedora/19/riak-cs-2.0.1-1.fc19.src.rpm.sha - - arch: x86_64 + file_name: riak-dbgsym_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/9/riak-dbgsym_2.2.6-1_amd64.deb + file_size: '7481272' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/9/riak-dbgsym_2.2.6-1_amd64.deb.sha + - arch: amd64 file_info: - file_name: riak-cs-2.0.1-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/fedora/19/riak-cs-2.0.1-1.fc19.x86_64.rpm - file_size: 21874336 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/fedora/19/riak-cs-2.0.1-1.fc19.x86_64.rpm.sha + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/9/riak_2.2.6-1_amd64.deb + file_size: '59535974' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/debian/9/riak_2.2.6-1_amd64.deb.sha - os: freebsd versions: - - version: '10' + - version: '10.4' architectures: - arch: txz file_info: - file_name: riak-cs-2.0.1.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/freebsd/10/riak-cs-2.0.1.txz - file_size: 23760880 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/freebsd/10/riak-cs-2.0.1.txz.sha - - version: '9.2' + file_name: riak-2.2.6.txz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/freebsd/10.4/riak-2.2.6.txz + file_size: '66244652' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/freebsd/10.4/riak-2.2.6.txz.sha + - version: '11.1' architectures: - - arch: amd64 + - arch: txz file_info: - file_name: riak-cs-2.0.1-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/freebsd/9.2/riak-cs-2.0.1-FreeBSD-amd64.tbz - file_size: 29679739 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/freebsd/9.2/riak-cs-2.0.1-FreeBSD-amd64.tbz.sha + file_name: riak-2.2.6.txz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/freebsd/11.1/riak-2.2.6.txz + file_size: '66903000' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/freebsd/11.1/riak-2.2.6.txz.sha - os: osx versions: - - version: '10.8' + - version: '10.11' architectures: - arch: x86_64 file_info: - file_name: riak-cs-2.0.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/osx/10.8/riak-cs-2.0.1-OSX-x86_64.tar.gz - file_size: 24710129 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/osx/10.8/riak-cs-2.0.1-OSX-x86_64.tar.gz.sha + file_name: riak-2.2.6-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/osx/10.11/riak-2.2.6-OSX-x86_64.tar.gz + file_size: '68788737' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/osx/10.11/riak-2.2.6-OSX-x86_64.tar.gz.sha - os: rhel versions: - - version: '5' - architectures: - - arch: x86_64 - file_info: - file_name: riak-cs-2.0.1-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/5/riak-cs-2.0.1-1.el5.x86_64.rpm - file_size: 24421926 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/5/riak-cs-2.0.1-1.el5.x86_64.rpm.sha - - arch: source - file_info: - file_name: riak-cs-2.0.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/5/riak-cs-2.0.1-1.src.rpm - file_size: 9709943 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/5/riak-cs-2.0.1-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: - file_name: riak-cs-2.0.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/6/riak-cs-2.0.1-1.el6.src.rpm - file_size: 9694943 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/6/riak-cs-2.0.1-1.el6.src.rpm.sha + file_name: riak-2.2.6-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/6/riak-2.2.6-1.el6.src.rpm + file_size: '21504538' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/6/riak-2.2.6-1.el6.src.rpm.sha - arch: x86_64 file_info: - file_name: riak-cs-2.0.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/6/riak-cs-2.0.1-1.el6.x86_64.rpm - file_size: 21953756 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/6/riak-cs-2.0.1-1.el6.x86_64.rpm.sha + file_name: riak-2.2.6-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/6/riak-2.2.6-1.el6.x86_64.rpm + file_size: '63794768' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/6/riak-2.2.6-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: - file_name: riak-cs-2.0.1-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/7/riak-cs-2.0.1-1.el7.centos.src.rpm - file_size: 9649994 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/7/riak-cs-2.0.1-1.el7.centos.src.rpm.sha + file_name: riak-2.2.6-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/7/riak-2.2.6-1.el7.src.rpm + file_size: '21466334' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/7/riak-2.2.6-1.el7.src.rpm.sha - arch: x86_64 file_info: - file_name: riak-cs-2.0.1-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/7/riak-cs-2.0.1-1.el7.centos.x86_64.rpm - file_size: 21871700 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/rhel/7/riak-cs-2.0.1-1.el7.centos.x86_64.rpm.sha - - os: sles - versions: - - version: '11' + file_name: riak-2.2.6-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/7/riak-2.2.6-1.el7.x86_64.rpm + file_size: '63369960' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/7/riak-2.2.6-1.el7.x86_64.rpm.sha + - version: '8' architectures: - - arch: x86_64 - file_info: - file_name: riak-cs-2.0.1-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/sles/11/riak-cs-2.0.1-1.SLES11.x86_64.rpm - file_size: 25327783 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/sles/11/riak-cs-2.0.1-1.SLES11.x86_64.rpm.sha - arch: source file_info: - file_name: riak-cs-2.0.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/sles/11/riak-cs-2.0.1-1.src.rpm - file_size: 9682213 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/sles/11/riak-cs-2.0.1-1.src.rpm.sha - - os: smartos - versions: - - version: '1.8' - architectures: + file_name: riak-2.2.6-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/8/riak-2.2.6-1.el8.src.rpm + file_size: '21464757' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/8/riak-2.2.6-1.el8.src.rpm.sha - arch: x86_64 file_info: - file_name: riak_cs-2.0.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/smartos/1.8/riak_cs-2.0.1-SmartOS-x86_64.tgz - file_size: 31308976 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/smartos/1.8/riak_cs-2.0.1-SmartOS-x86_64.tgz.sha - - version: '13.1' + file_name: riak-2.2.6-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/8/riak-2.2.6-1.el8.x86_64.rpm + file_size: '64390480' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/rhel/8/riak-2.2.6-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: artful64 architectures: - - arch: x86_64 + - arch: amd64 file_info: - file_name: riak_cs-2.0.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/smartos/13.1/riak_cs-2.0.1-SmartOS-x86_64.tgz - file_size: 31292984 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/smartos/13.1/riak_cs-2.0.1-SmartOS-x86_64.tgz.sha - - os: solaris - versions: - - version: '10' + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/artful64/riak_2.2.6-1_amd64.deb + file_size: '59597270' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/artful64/riak_2.2.6-1_amd64.deb.sha + - version: bionic64 architectures: - - arch: x86_64 + - arch: amd64 file_info: - file_name: BASHOriak-cs-2.0.1-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/solaris/10/BASHOriak-cs-2.0.1-Solaris10-x86_64.pkg.gz - file_size: 28585773 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/solaris/10/BASHOriak-cs-2.0.1-Solaris10-x86_64.pkg.gz.sha - - os: ubuntu - versions: - - version: lucid + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/bionic64/riak_2.2.6-1_amd64.deb + file_size: '59595616' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/bionic64/riak_2.2.6-1_amd64.deb.sha + - version: precise64 architectures: - arch: amd64 file_info: - file_name: riak-cs_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/ubuntu/lucid/riak-cs_2.0.1-1_amd64.deb - file_size: 24207464 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/ubuntu/lucid/riak-cs_2.0.1-1_amd64.deb.sha - - version: precise + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/precise64/riak_2.2.6-1_amd64.deb + file_size: '65800528' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/precise64/riak_2.2.6-1_amd64.deb.sha + - version: trusty64 architectures: - arch: amd64 file_info: - file_name: riak-cs_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/ubuntu/precise/riak-cs_2.0.1-1_amd64.deb - file_size: 24189948 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/ubuntu/precise/riak-cs_2.0.1-1_amd64.deb.sha - - version: trusty + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/trusty64/riak_2.2.6-1_amd64.deb + file_size: '59478910' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/trusty64/riak_2.2.6-1_amd64.deb.sha + - version: xenial64 architectures: - arch: amd64 file_info: - file_name: riak-cs_2.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/ubuntu/trusty/riak-cs_2.0.1-1_amd64.deb - file_size: 20058620 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.0/2.0.1/ubuntu/trusty/riak-cs_2.0.1-1_amd64.deb.sha - 2.1.0: + file_name: riak_2.2.6-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/xenial64/riak_2.2.6-1_amd64.deb + file_size: '59431826' + chksum_href: https://files.tiot.jp/riak/kv/2.2/2.2.6/ubuntu/xenial64/riak_2.2.6-1_amd64.deb.sha + 2.9.0: - os: source file_info: - file_name: riak-cs-2.1.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/riak-cs-2.1.0.tar.gz - file_size: 10670941 + file_name: riak-2.9.0.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/riak-2.9.0.tar.gz + file_size: '36945412' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/riak-2.9.0.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/amazon/2/riak-2.9.0-1.amzn2.src.rpm + file_size: '36893408' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/amazon/2/riak-2.9.0-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/amazon/2/riak-2.9.0-1.amzn2.x86_64.rpm + file_size: '64715760' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/amazon/2/riak-2.9.0-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/amazon/2016.09/riak-2.9.0-1.amzn1.src.rpm + file_size: '36971980' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/amazon/2016.09/riak-2.9.0-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/amazon/2016.09/riak-2.9.0-1.amzn1.x86_64.rpm + file_size: '69449787' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/amazon/2016.09/riak-2.9.0-1.amzn1.x86_64.rpm.sha - os: debian versions: - - version: '6' + - version: '7' architectures: - arch: amd64 file_info: - file_name: riak-cs_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/debian/6/riak-cs_2.1.0-1_amd64.deb - file_size: 26657738 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/debian/6/riak-cs_2.1.0-1_amd64.deb.sha - - version: '7' + file_name: riak_2.9.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/debian/7/riak_2.9.0-1_amd64.deb + file_size: '66892916' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/debian/7/riak_2.9.0-1_amd64.deb.sha + - version: '8' architectures: - arch: amd64 file_info: - file_name: riak-cs_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/debian/7/riak-cs_2.1.0-1_amd64.deb - file_size: 26703798 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/debian/7/riak-cs_2.1.0-1_amd64.deb.sha - - os: fedora - versions: - - version: '19' + file_name: riak_2.9.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/debian/8/riak_2.9.0-1_amd64.deb + file_size: '60350278' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/debian/8/riak_2.9.0-1_amd64.deb.sha + - version: '9' architectures: - - arch: source + - arch: amd64 file_info: - file_name: riak-cs-2.1.0-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/fedora/19/riak-cs-2.1.0-1.fc19.src.rpm - file_size: 10655944 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/fedora/19/riak-cs-2.1.0-1.fc19.src.rpm.sha - - arch: x86_64 + file_name: riak-dbgsym_2.9.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/debian/9/riak-dbgsym_2.9.0-1_amd64.deb + file_size: '7944376' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/debian/9/riak-dbgsym_2.9.0-1_amd64.deb.sha + - arch: amd64 file_info: - file_name: riak-cs-2.1.0-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/fedora/19/riak-cs-2.1.0-1.fc19.x86_64.rpm - file_size: 24117812 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/fedora/19/riak-cs-2.1.0-1.fc19.x86_64.rpm.sha + file_name: riak_2.9.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/debian/9/riak_2.9.0-1_amd64.deb + file_size: '60357674' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/debian/9/riak_2.9.0-1_amd64.deb.sha - os: freebsd versions: - - version: '10' + - version: '10.4' architectures: - arch: txz file_info: - file_name: riak-cs-2.1.0.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/freebsd/10/riak-cs-2.1.0.txz - file_size: 25878088 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/freebsd/10/riak-cs-2.1.0.txz.sha - - version: '9.2' + file_name: riak-2.9.0.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/freebsd/10.4/riak-2.9.0.txz + file_size: '67190668' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/freebsd/10.4/riak-2.9.0.txz.sha + - version: '11.1' architectures: - - arch: amd64 + - arch: txz file_info: - file_name: riak-cs-2.1.0-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/freebsd/9.2/riak-cs-2.1.0-FreeBSD-amd64.tbz - file_size: 32135218 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/freebsd/9.2/riak-cs-2.1.0-FreeBSD-amd64.tbz.sha + file_name: riak-2.9.0.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/freebsd/11.1/riak-2.9.0.txz + file_size: '67890956' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/freebsd/11.1/riak-2.9.0.txz.sha - os: osx versions: - - version: '10.8' + - version: '10.11' architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/osx/10.11/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/osx/10.11/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/osx/10.11/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/osx/10.11/libssl.1.0.0.dylib.sha - arch: x86_64 file_info: - file_name: riak-cs-2.1.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/osx/10.8/riak-cs-2.1.0-OSX-x86_64.tar.gz - file_size: 27157716 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/osx/10.8/riak-cs-2.1.0-OSX-x86_64.tar.gz.sha + file_name: riak-2.9.0-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/osx/10.11/riak-2.9.0-OSX-x86_64.tar.gz + file_size: '69123743' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/osx/10.11/riak-2.9.0-OSX-x86_64.tar.gz.sha - os: rhel versions: - - version: '5' + - version: '6' architectures: - - arch: x86_64 - file_info: - file_name: riak-cs-2.1.0-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/5/riak-cs-2.1.0-1.el5.x86_64.rpm - file_size: 26899774 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/5/riak-cs-2.1.0-1.el5.x86_64.rpm.sha - arch: source file_info: - file_name: riak-cs-2.1.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/5/riak-cs-2.1.0-1.src.rpm - file_size: 10645942 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/5/riak-cs-2.1.0-1.src.rpm.sha - - version: '6' + file_name: riak-2.9.0-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/6/riak-2.9.0-1.el6.src.rpm + file_size: '36955967' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/6/riak-2.9.0-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/6/riak-2.9.0-1.el6.x86_64.rpm + file_size: '64815512' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/6/riak-2.9.0-1.el6.x86_64.rpm.sha + - version: '7' architectures: - arch: source file_info: - file_name: riak-cs-2.1.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/6/riak-cs-2.1.0-1.el6.src.rpm - file_size: 10632114 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/6/riak-cs-2.1.0-1.el6.src.rpm.sha + file_name: riak-2.9.0-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/7/riak-2.9.0-1.el7.src.rpm + file_size: '36895119' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/7/riak-2.9.0-1.el7.src.rpm.sha - arch: x86_64 file_info: - file_name: riak-cs-2.1.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/6/riak-cs-2.1.0-1.el6.x86_64.rpm - file_size: 24209960 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/6/riak-cs-2.1.0-1.el6.x86_64.rpm.sha - - version: '7' + file_name: riak-2.9.0-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/7/riak-2.9.0-1.el7.x86_64.rpm + file_size: '64351328' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/7/riak-2.9.0-1.el7.x86_64.rpm.sha + - version: '8' architectures: - arch: source file_info: - file_name: riak-cs-2.1.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/7/riak-cs-2.1.0-1.el7.centos.src.rpm - file_size: 10606405 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/7/riak-cs-2.1.0-1.el7.centos.src.rpm.sha + file_name: riak-2.9.0-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/8/riak-2.9.0-1.el8.src.rpm + file_size: '36900027' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/8/riak-2.9.0-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/8/riak-2.9.0-1.el8.x86_64.rpm + file_size: '65465352' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/rhel/8/riak-2.9.0-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/ubuntu/bionic64/riak_2.9.0-1_amd64.deb + file_size: '60420888' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/ubuntu/bionic64/riak_2.9.0-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/ubuntu/precise64/riak_2.9.0-1_amd64.deb + file_size: '66838378' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/ubuntu/precise64/riak_2.9.0-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/ubuntu/trusty64/riak_2.9.0-1_amd64.deb + file_size: '60279238' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/ubuntu/trusty64/riak_2.9.0-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/ubuntu/xenial64/riak_2.9.0-1_amd64.deb + file_size: '60330220' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0/ubuntu/xenial64/riak_2.9.0-1_amd64.deb.sha + 2.9.0p1: + - os: source + file_info: + file_name: riak-2.9.0p1.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/riak-2.9.0p1.tar.gz + file_size: '36942121' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/riak-2.9.0p1.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p1-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/amazon/2/riak-2.9.0p1-1.amzn2.src.rpm + file_size: '36898897' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/amazon/2/riak-2.9.0p1-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p1-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/amazon/2/riak-2.9.0p1-1.amzn2x86_64.rpm + file_size: '64777276' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/amazon/2/riak-2.9.0p1-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p1-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/amazon/2016.09/riak-2.9.0p1-1.amzn1.src.rpm + file_size: '36974526' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/amazon/2016.09/riak-2.9.0p1-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p1-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/amazon/2016.09/riak-2.9.0p1-1.amzn1x86_64.rpm + file_size: '67610183' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/amazon/2016.09/riak-2.9.0p1-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/debian/7/riak_2.9.0p1-1_amd64.deb + file_size: '66896176' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/debian/7/riak_2.9.0p1-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/debian/8/riak_2.9.0p1-1_amd64.deb + file_size: '60354818' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/debian/8/riak_2.9.0p1-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.0p1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/debian/9/riak-dbgsym_2.9.0p1-1_amd64.deb + file_size: '7944236' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/debian/9/riak-dbgsym_2.9.0p1-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.0p1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/debian/9/riak_2.9.0p1-1_amd64.deb + file_size: '60359920' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/debian/9/riak_2.9.0p1-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p1.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/freebsd/10.4/riak-2.9.0p1.txz + file_size: '67191468' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/freebsd/10.4/riak-2.9.0p1.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p1.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/freebsd/11.1/riak-2.9.0p1.txz + file_size: '67890324' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/freebsd/11.1/riak-2.9.0p1.txz.sha + - os: osx + versions: + - version: '10.11' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.11/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.11/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.11/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.11/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p1-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.11/riak-2.9.0p1-OSX-x86_64.tar.gz + file_size: '69113636' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.11/riak-2.9.0p1-OSX-x86_64.tar.gz.sha + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p1-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.14/riak-2.9.0p1-OSX-x86_64.tar.gz + file_size: '69539864' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/osx/10.14/riak-2.9.0p1-OSX-x86_64.tar.gz.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p1-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/rhel/6/riak-2.9.0p1-1.el6.src.rpm + file_size: '36953212' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/rhel/6/riak-2.9.0p1-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p1-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/rhel/6/riak-2.9.0p1-1.el6.x86_64.rpm + file_size: '64819208' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/rhel/6/riak-2.9.0p1-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p1-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/rhel/7/riak-2.9.0p1-1.el7.src.rpm + file_size: '36897911' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/rhel/7/riak-2.9.0p1-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p1-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/rhel/7/riak-2.9.0p1-1.el7.x86_64.rpm + file_size: '64352512' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/rhel/7/riak-2.9.0p1-1.el7.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/ubuntu/bionic64/riak_2.9.0p1-1_amd64.deb + file_size: '60417636' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/ubuntu/bionic64/riak_2.9.0p1-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/ubuntu/precise64/riak_2.9.0p1-1_amd64.deb + file_size: '66840158' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/ubuntu/precise64/riak_2.9.0p1-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/ubuntu/trusty64/riak_2.9.0p1-1_amd64.deb + file_size: '60275666' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/ubuntu/trusty64/riak_2.9.0p1-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/ubuntu/xenial64/riak_2.9.0p1-1_amd64.deb + file_size: '60331128' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p1/ubuntu/xenial64/riak_2.9.0p1-1_amd64.deb.sha + 2.9.0p2: + - os: source + file_info: + file_name: riak-2.9.0p2.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/riak-2.9.0p2.tar.gz + file_size: '36944411' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/riak-2.9.0p2.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p2-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/amazon/2/riak-2.9.0p2-1.amzn2.src.rpm + file_size: '36900790' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/amazon/2/riak-2.9.0p2-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p2-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/amazon/2/riak-2.9.0p2-1.amzn2x86_64.rpm + file_size: '64779888' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/amazon/2/riak-2.9.0p2-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p2-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/amazon/2016.09/riak-2.9.0p2-1.amzn1.src.rpm + file_size: '36977463' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/amazon/2016.09/riak-2.9.0p2-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p2-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/amazon/2016.09/riak-2.9.0p2-1.amzn1x86_64.rpm + file_size: '67617039' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/amazon/2016.09/riak-2.9.0p2-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/debian/7/riak_2.9.0p2-1_amd64.deb + file_size: '66899982' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/debian/7/riak_2.9.0p2-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/debian/8/riak_2.9.0p2-1_amd64.deb + file_size: '60383208' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/debian/8/riak_2.9.0p2-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.0p2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/debian/9/riak-dbgsym_2.9.0p2-1_amd64.deb + file_size: '7943932' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/debian/9/riak-dbgsym_2.9.0p2-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.0p2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/debian/9/riak_2.9.0p2-1_amd64.deb + file_size: '60362020' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/debian/9/riak_2.9.0p2-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p2.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/freebsd/10.4/riak-2.9.0p2.txz + file_size: '67199916' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/freebsd/10.4/riak-2.9.0p2.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p2.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/freebsd/11.1/riak-2.9.0p2.txz + file_size: '67894052' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/freebsd/11.1/riak-2.9.0p2.txz.sha + - os: osx + versions: + - version: '10.11' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.11/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.11/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.11/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.11/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p2-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.11/riak-2.9.0p2-OSX-x86_64.tar.gz + file_size: '69117361' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.11/riak-2.9.0p2-OSX-x86_64.tar.gz.sha + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p2-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.14/riak-2.9.0p2-OSX-x86_64.tar.gz + file_size: '69549304' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/osx/10.14/riak-2.9.0p2-OSX-x86_64.tar.gz.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p2-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/rhel/6/riak-2.9.0p2-1.el6.src.rpm + file_size: '36955361' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/rhel/6/riak-2.9.0p2-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p2-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/rhel/6/riak-2.9.0p2-1.el6.x86_64.rpm + file_size: '64820284' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/rhel/6/riak-2.9.0p2-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p2-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/rhel/7/riak-2.9.0p2-1.el7.src.rpm + file_size: '36900129' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/rhel/7/riak-2.9.0p2-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p2-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/rhel/7/riak-2.9.0p2-1.el7.x86_64.rpm + file_size: '64356588' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/rhel/7/riak-2.9.0p2-1.el7.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/ubuntu/bionic64/riak_2.9.0p2-1_amd64.deb + file_size: '60422872' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/ubuntu/bionic64/riak_2.9.0p2-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/ubuntu/precise64/riak_2.9.0p2-1_amd64.deb + file_size: '66844534' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/ubuntu/precise64/riak_2.9.0p2-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/ubuntu/trusty64/riak_2.9.0p2-1_amd64.deb + file_size: '60282094' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/ubuntu/trusty64/riak_2.9.0p2-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/ubuntu/xenial64/riak_2.9.0p2-1_amd64.deb + file_size: '60333832' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p2/ubuntu/xenial64/riak_2.9.0p2-1_amd64.deb.sha + 2.9.0p3: + - os: source + file_info: + file_name: riak-2.9.0p3.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/riak-2.9.0p3.tar.gz + file_size: '36949487' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/riak-2.9.0p3.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/amazon/2/riak-2.9.0p3-1.amzn2.src.rpm + file_size: '36895726' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/amazon/2/riak-2.9.0p3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/amazon/2/riak-2.9.0p3-1.amzn2x86_64.rpm + file_size: '64787588' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/amazon/2/riak-2.9.0p3-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/amazon/2016.09/riak-2.9.0p3-1.amzn1.src.rpm + file_size: '36979925' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/amazon/2016.09/riak-2.9.0p3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/amazon/2016.09/riak-2.9.0p3-1.amzn1x86_64.rpm + file_size: '67619634' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/amazon/2016.09/riak-2.9.0p3-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/debian/7/riak_2.9.0p3-1_amd64.deb + file_size: '66900526' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/debian/7/riak_2.9.0p3-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/debian/8/riak_2.9.0p3-1_amd64.deb + file_size: '60366394' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/debian/8/riak_2.9.0p3-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.0p3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/debian/9/riak-dbgsym_2.9.0p3-1_amd64.deb + file_size: '7943584' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/debian/9/riak-dbgsym_2.9.0p3-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.0p3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/debian/9/riak_2.9.0p3-1_amd64.deb + file_size: '60367988' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/debian/9/riak_2.9.0p3-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p3.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/freebsd/10.4/riak-2.9.0p3.txz + file_size: '67201256' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/freebsd/10.4/riak-2.9.0p3.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p3.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/freebsd/11.1/riak-2.9.0p3.txz + file_size: '67895140' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/freebsd/11.1/riak-2.9.0p3.txz.sha + - os: osx + versions: + - version: '10.11' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.11/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.11/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.11/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.11/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p3-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.11/riak-2.9.0p3-OSX-x86_64.tar.gz + file_size: '69119968' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.11/riak-2.9.0p3-OSX-x86_64.tar.gz.sha + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p3-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.14/riak-2.9.0p3-OSX-x86_64.tar.gz + file_size: '69559679' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/osx/10.14/riak-2.9.0p3-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.0p3-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/raspbian/buster/riak-dbgsym_2.9.0p3-1_armhf.deb + file_size: '9996548' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/raspbian/buster/riak-dbgsym_2.9.0p3-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.0p3-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/raspbian/buster/riak_2.9.0p3-1_armhf.deb + file_size: '59894448' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/raspbian/buster/riak_2.9.0p3-1_armhf.deb.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p3-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/rhel/6/riak-2.9.0p3-1.el6.src.rpm + file_size: '36960560' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/rhel/6/riak-2.9.0p3-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p3-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/rhel/6/riak-2.9.0p3-1.el6.x86_64.rpm + file_size: '64829328' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/rhel/6/riak-2.9.0p3-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/rhel/7/riak-2.9.0p3-1.el7.src.rpm + file_size: '36896623' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/rhel/7/riak-2.9.0p3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/rhel/7/riak-2.9.0p3-1.el7.x86_64.rpm + file_size: '64363596' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/rhel/7/riak-2.9.0p3-1.el7.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/ubuntu/bionic64/riak_2.9.0p3-1_amd64.deb + file_size: '60429456' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/ubuntu/bionic64/riak_2.9.0p3-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/ubuntu/precise64/riak_2.9.0p3-1_amd64.deb + file_size: '66847606' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/ubuntu/precise64/riak_2.9.0p3-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/ubuntu/trusty64/riak_2.9.0p3-1_amd64.deb + file_size: '60288746' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/ubuntu/trusty64/riak_2.9.0p3-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/ubuntu/xenial64/riak_2.9.0p3-1_amd64.deb + file_size: '60336486' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p3/ubuntu/xenial64/riak_2.9.0p3-1_amd64.deb.sha + 2.9.0p4: + - os: source + file_info: + file_name: riak-2.9.0p4.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/riak-2.9.0p4.tar.gz + file_size: '36949091' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/riak-2.9.0p4.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p4-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/amazon/2/riak-2.9.0p4-1.amzn2.src.rpm + file_size: '36912483' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/amazon/2/riak-2.9.0p4-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p4-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/amazon/2/riak-2.9.0p4-1.amzn2x86_64.rpm + file_size: '64798088' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/amazon/2/riak-2.9.0p4-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p4-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/amazon/2016.09/riak-2.9.0p4-1.amzn1.src.rpm + file_size: '36979241' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/amazon/2016.09/riak-2.9.0p4-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p4-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/amazon/2016.09/riak-2.9.0p4-1.amzn1x86_64.rpm + file_size: '67634707' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/amazon/2016.09/riak-2.9.0p4-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/debian/7/riak_2.9.0p4-1_amd64.deb + file_size: '66912520' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/debian/7/riak_2.9.0p4-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/debian/8/riak_2.9.0p4-1_amd64.deb + file_size: '60368172' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/debian/8/riak_2.9.0p4-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.0p4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/debian/9/riak-dbgsym_2.9.0p4-1_amd64.deb + file_size: '7944554' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/debian/9/riak-dbgsym_2.9.0p4-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.0p4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/debian/9/riak_2.9.0p4-1_amd64.deb + file_size: '60376186' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/debian/9/riak_2.9.0p4-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p4.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/freebsd/10.4/riak-2.9.0p4.txz + file_size: '67207516' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/freebsd/10.4/riak-2.9.0p4.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p4.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/freebsd/11.1/riak-2.9.0p4.txz + file_size: '67911920' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/freebsd/11.1/riak-2.9.0p4.txz.sha + - os: osx + versions: + - version: '10.11' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.11/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.11/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.11/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.11/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p4-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.11/riak-2.9.0p4-OSX-x86_64.tar.gz + file_size: '69128976' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.11/riak-2.9.0p4-OSX-x86_64.tar.gz.sha + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p4-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.14/riak-2.9.0p4-OSX-x86_64.tar.gz + file_size: '69558441' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/osx/10.14/riak-2.9.0p4-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.0p4-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/raspbian/buster/riak-dbgsym_2.9.0p4-1_armhf.deb + file_size: '9994920' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/raspbian/buster/riak-dbgsym_2.9.0p4-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.0p4-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/raspbian/buster/riak_2.9.0p4-1_armhf.deb + file_size: '59908788' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/raspbian/buster/riak_2.9.0p4-1_armhf.deb.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p4-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/rhel/6/riak-2.9.0p4-1.el6.src.rpm + file_size: '36952899' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/rhel/6/riak-2.9.0p4-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p4-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/rhel/6/riak-2.9.0p4-1.el6.x86_64.rpm + file_size: '64837168' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/rhel/6/riak-2.9.0p4-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p4-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/rhel/7/riak-2.9.0p4-1.el7.src.rpm + file_size: '36907077' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/rhel/7/riak-2.9.0p4-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p4-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/rhel/7/riak-2.9.0p4-1.el7.x86_64.rpm + file_size: '64372896' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/rhel/7/riak-2.9.0p4-1.el7.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/ubuntu/bionic64/riak_2.9.0p4-1_amd64.deb + file_size: '60437928' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/ubuntu/bionic64/riak_2.9.0p4-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/ubuntu/precise64/riak_2.9.0p4-1_amd64.deb + file_size: '66863670' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/ubuntu/precise64/riak_2.9.0p4-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/ubuntu/trusty64/riak_2.9.0p4-1_amd64.deb + file_size: '60297008' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/ubuntu/trusty64/riak_2.9.0p4-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/ubuntu/xenial64/riak_2.9.0p4-1_amd64.deb + file_size: '60346698' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p4/ubuntu/xenial64/riak_2.9.0p4-1_amd64.deb.sha + 2.9.0p5: + - os: source + file_info: + file_name: riak-2.9.0p5.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/riak-2.9.0p5.tar.gz + file_size: '38464557' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/riak-2.9.0p5.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p5-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2/riak-2.9.0p5-1.amzn2.src.rpm + file_size: '38424333' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2/riak-2.9.0p5-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p5-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2/riak-2.9.0p5-1.amzn2.x86_64.rpm + file_size: '64759840' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2/riak-2.9.0p5-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p5-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2016.09/riak-2.9.0p5-1.amzn1.src.rpm + file_size: '38492666' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2016.09/riak-2.9.0p5-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p5-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2016.09/riak-2.9.0p5-1.amzn1.x86_64.rpm + file_size: '67589895' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/amazon/2016.09/riak-2.9.0p5-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/10/riak-dbgsym_2.9.0p5-1_amd64.deb + file_size: '10578532' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/10/riak-dbgsym_2.9.0p5-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/10/riak_2.9.0p5-1_amd64.deb + file_size: '60526384' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/10/riak_2.9.0p5-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/7/riak_2.9.0p5-1_amd64.deb + file_size: '66879890' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/7/riak_2.9.0p5-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/8/riak_2.9.0p5-1_amd64.deb + file_size: '60333454' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/8/riak_2.9.0p5-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/9/riak-dbgsym_2.9.0p5-1_amd64.deb + file_size: '7943352' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/9/riak-dbgsym_2.9.0p5-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/9/riak_2.9.0p5-1_amd64.deb + file_size: '60360086' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/debian/9/riak_2.9.0p5-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p5.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/freebsd/10.4/riak-2.9.0p5.txz + file_size: '67170304' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/freebsd/10.4/riak-2.9.0p5.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.0p5.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/freebsd/11.1/riak-2.9.0p5.txz + file_size: '67883328' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/freebsd/11.1/riak-2.9.0p5.txz.sha + - os: osx + versions: + - version: '10.11' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.11/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.11/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.11/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.11/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p5-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.11/riak-2.9.0p5-OSX-x86_64.tar.gz + file_size: '69088174' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.11/riak-2.9.0p5-OSX-x86_64.tar.gz.sha + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p5-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.14/riak-2.9.0p5-OSX-x86_64.tar.gz + file_size: '69517579' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/osx/10.14/riak-2.9.0p5-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.0p5-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/raspbian/buster/riak-dbgsym_2.9.0p5-1_armhf.deb + file_size: '10012644' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/raspbian/buster/riak-dbgsym_2.9.0p5-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.0p5-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/raspbian/buster/riak_2.9.0p5-1_armhf.deb + file_size: '59885096' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/raspbian/buster/riak_2.9.0p5-1_armhf.deb.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p5-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/6/riak-2.9.0p5-1.el6.src.rpm + file_size: '38469089' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/6/riak-2.9.0p5-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p5-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/6/riak-2.9.0p5-1.el6.x86_64.rpm + file_size: '64801144' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/6/riak-2.9.0p5-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p5-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/7/riak-2.9.0p5-1.el7.src.rpm + file_size: '38424216' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/7/riak-2.9.0p5-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p5-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/7/riak-2.9.0p5-1.el7.x86_64.rpm + file_size: '64335368' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/7/riak-2.9.0p5-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.0p5-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/8/riak-2.9.0p5-1.el8.src.rpm + file_size: '38428422' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/8/riak-2.9.0p5-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.0p5-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/8/riak-2.9.0p5-1.el8.x86_64.rpm + file_size: '65404920' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/rhel/8/riak-2.9.0p5-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/bionic64/riak_2.9.0p5-1_amd64.deb + file_size: '60419288' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/bionic64/riak_2.9.0p5-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/precise64/riak_2.9.0p5-1_amd64.deb + file_size: '66830184' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/precise64/riak_2.9.0p5-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/trusty64/riak_2.9.0p5-1_amd64.deb + file_size: '60265716' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/trusty64/riak_2.9.0p5-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.0p5-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/xenial64/riak_2.9.0p5-1_amd64.deb + file_size: '60320790' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.0p5/ubuntu/xenial64/riak_2.9.0p5-1_amd64.deb.sha + 2.9.1: + - os: source + file_info: + file_name: riak-2.9.1.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/riak-2.9.1.tar.gz + file_size: '38562707' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/riak-2.9.1.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.1-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2/riak-2.9.1-1.amzn2.src.rpm + file_size: '38515921' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2/riak-2.9.1-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.1-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2/riak-2.9.1-1.amzn2.x86_64.rpm + file_size: '64990252' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2/riak-2.9.1-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.1-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2016.09/riak-2.9.1-1.amzn1.src.rpm + file_size: '38583616' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2016.09/riak-2.9.1-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.1-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2016.09/riak-2.9.1-1.amzn1.x86_64.rpm + file_size: '67840023' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/amazon/2016.09/riak-2.9.1-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/10/riak-dbgsym_2.9.1-1_amd64.deb + file_size: '10599760' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/10/riak-dbgsym_2.9.1-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/10/riak_2.9.1-1_amd64.deb + file_size: '60745272' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/10/riak_2.9.1-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/7/riak_2.9.1-1_amd64.deb + file_size: '67144238' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/7/riak_2.9.1-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/8/riak_2.9.1-1_amd64.deb + file_size: '60580146' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/8/riak_2.9.1-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/9/riak-dbgsym_2.9.1-1_amd64.deb + file_size: '7944192' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/9/riak-dbgsym_2.9.1-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/9/riak_2.9.1-1_amd64.deb + file_size: '60565458' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/debian/9/riak_2.9.1-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.1.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/freebsd/10.4/riak-2.9.1.txz + file_size: '67401988' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/freebsd/10.4/riak-2.9.1.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.1.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/freebsd/11.1/riak-2.9.1.txz + file_size: '68108760' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/freebsd/11.1/riak-2.9.1.txz.sha + - version: '12.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.1.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/freebsd/12.1/riak-2.9.1.txz + file_size: '69769328' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/freebsd/12.1/riak-2.9.1.txz.sha + - os: osx + versions: + - version: '10.11' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.11/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.11/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.11/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.11/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.1-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.11/riak-2.9.1-OSX-x86_64.tar.gz + file_size: '69329946' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.11/riak-2.9.1-OSX-x86_64.tar.gz.sha + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.1-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.14/riak-2.9.1-OSX-x86_64.tar.gz + file_size: '69762041' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/osx/10.14/riak-2.9.1-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.1-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/raspbian/buster/riak-dbgsym_2.9.1-1_armhf.deb + file_size: '9994640' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/raspbian/buster/riak-dbgsym_2.9.1-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.1-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/raspbian/buster/riak_2.9.1-1_armhf.deb + file_size: '60108872' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/raspbian/buster/riak_2.9.1-1_armhf.deb.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.1-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/6/riak-2.9.1-1.el6.src.rpm + file_size: '38574124' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/6/riak-2.9.1-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.1-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/6/riak-2.9.1-1.el6.x86_64.rpm + file_size: '65031492' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/6/riak-2.9.1-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.1-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/7/riak-2.9.1-1.el7.src.rpm + file_size: '38516064' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/7/riak-2.9.1-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.1-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/7/riak-2.9.1-1.el7.x86_64.rpm + file_size: '64562472' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/7/riak-2.9.1-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.1-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/8/riak-2.9.1-1.el8.src.rpm + file_size: '38521259' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/8/riak-2.9.1-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.1-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/8/riak-2.9.1-1.el8.x86_64.rpm + file_size: '65635276' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/rhel/8/riak-2.9.1-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/bionic64/riak_2.9.1-1_amd64.deb + file_size: '60633172' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/bionic64/riak_2.9.1-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/precise64/riak_2.9.1-1_amd64.deb + file_size: '67084142' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/precise64/riak_2.9.1-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/trusty64/riak_2.9.1-1_amd64.deb + file_size: '60465162' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/trusty64/riak_2.9.1-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/xenial64/riak_2.9.1-1_amd64.deb + file_size: '60542840' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.1/ubuntu/xenial64/riak_2.9.1-1_amd64.deb.sha + 2.9.10: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.10-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2/riak-2.9.10-1.amzn2.src.rpm + file_size: '38540228' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2/riak-2.9.10-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.10-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2/riak-2.9.10-1.amzn2.x86_64.rpm + file_size: '64997172' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2/riak-2.9.10-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.10-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2016.09/riak-2.9.10-1.amzn1.src.rpm + file_size: '38611648' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2016.09/riak-2.9.10-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.10-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2016.09/riak-2.9.10-1.amzn1.x86_64.rpm + file_size: '69754296' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/amazon/2016.09/riak-2.9.10-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/10/riak-dbgsym_2.9.10-1_amd64.deb + file_size: '10579908' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/10/riak-dbgsym_2.9.10-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/10/riak_2.9.10-1_amd64.deb + file_size: '60808772' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/10/riak_2.9.10-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/8/riak_2.9.10-1_amd64.deb + file_size: '60685298' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/8/riak_2.9.10-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/9/riak-dbgsym_2.9.10-1_amd64.deb + file_size: '7942902' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/9/riak-dbgsym_2.9.10-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/9/riak_2.9.10-1_amd64.deb + file_size: '60629264' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/debian/9/riak_2.9.10-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.10.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/freebsd/10.4/riak-2.9.10.txz + file_size: '67675832' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/freebsd/10.4/riak-2.9.10.txz.sha + - version: '11.1' + architectures: + - arch: unknown + file_info: + file_name: riak-2.9.10.pkg + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/freebsd/11.1/riak-2.9.10.pkg + file_size: '69531912' + - version: '12.1' + architectures: + - arch: unknown + file_info: + file_name: riak-2.9.10.pkg + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/freebsd/12.1/riak-2.9.10.pkg + file_size: '69881748' + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.10-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/oracle/8/riak-2.9.10-1.el8.src.rpm + file_size: '38544843' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/oracle/8/riak-2.9.10-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.10-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/oracle/8/riak-2.9.10-1.el8.x86_64.rpm + file_size: '65745960' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/oracle/8/riak-2.9.10-1.el8.x86_64.rpm.sha + - os: osx + versions: + - version: '10.14' + architectures: + - arch: x86_64 + file_info: + file_name: riak-2.9.10-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/osx/10.14/riak-2.9.10-OSX-x86_64.tar.gz + file_size: '139678172' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/osx/10.14/riak-2.9.10-OSX-x86_64.tar.gz.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.10-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/7/riak-2.9.10-1.el7.src.rpm + file_size: '38540186' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/7/riak-2.9.10-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.10-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/7/riak-2.9.10-1.el7.x86_64.rpm + file_size: '64631652' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/7/riak-2.9.10-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.10-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/8/riak-2.9.10-1.el8.src.rpm + file_size: '38544878' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/8/riak-2.9.10-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.10-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/8/riak-2.9.10-1.el8.x86_64.rpm + file_size: '65745636' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/rhel/8/riak-2.9.10-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/bionic64/riak_2.9.10-1_amd64.deb + file_size: '60690716' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/bionic64/riak_2.9.10-1_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/focal64/riak_2.9.10-1_amd64.deb + file_size: '60856484' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/focal64/riak_2.9.10-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/trusty64/riak_2.9.10-1_amd64.deb + file_size: '60593070' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/trusty64/riak_2.9.10-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.10-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/xenial64/riak_2.9.10-1_amd64.deb + file_size: '60642470' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.10/ubuntu/xenial64/riak_2.9.10-1_amd64.deb.sha + 2.9.2: + - os: source + file_info: + file_name: riak-2.9.2.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/riak-2.9.2.tar.gz + file_size: '38576663' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/riak-2.9.2.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.2-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2/riak-2.9.2-1.amzn2.src.rpm + file_size: '38526323' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2/riak-2.9.2-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.2-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2/riak-2.9.2-1.amzn2.x86_64.rpm + file_size: '65017748' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2/riak-2.9.2-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.2-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2016.09/riak-2.9.2-1.amzn1.src.rpm + file_size: '38595730' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2016.09/riak-2.9.2-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.2-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2016.09/riak-2.9.2-1.amzn1.x86_64.rpm + file_size: '67873081' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/amazon/2016.09/riak-2.9.2-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/10/riak-dbgsym_2.9.2-1_amd64.deb + file_size: '10577404' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/10/riak-dbgsym_2.9.2-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/10/riak_2.9.2-1_amd64.deb + file_size: '60765304' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/10/riak_2.9.2-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/7/riak_2.9.2-1_amd64.deb + file_size: '67168618' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/7/riak_2.9.2-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/8/riak_2.9.2-1_amd64.deb + file_size: '60600878' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/8/riak_2.9.2-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/9/riak-dbgsym_2.9.2-1_amd64.deb + file_size: '7944482' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/9/riak-dbgsym_2.9.2-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/9/riak_2.9.2-1_amd64.deb + file_size: '60588294' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/debian/9/riak_2.9.2-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.2.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/freebsd/10.4/riak-2.9.2.txz + file_size: '67420880' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/freebsd/10.4/riak-2.9.2.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.2.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/freebsd/11.1/riak-2.9.2.txz + file_size: '68118116' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/freebsd/11.1/riak-2.9.2.txz.sha + - version: '12.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.2.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/freebsd/12.1/riak-2.9.2.txz + file_size: '69781912' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/freebsd/12.1/riak-2.9.2.txz.sha + - os: osx + versions: + - version: '10.11' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.11/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.11/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.11/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.11/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.2-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.11/riak-2.9.2-OSX-x86_64.tar.gz + file_size: '69361406' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.11/riak-2.9.2-OSX-x86_64.tar.gz.sha + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.2-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.14/riak-2.9.2-OSX-x86_64.tar.gz + file_size: '69790481' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/osx/10.14/riak-2.9.2-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.2-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/raspbian/buster/riak-dbgsym_2.9.2-1_armhf.deb + file_size: '10016172' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/raspbian/buster/riak-dbgsym_2.9.2-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.2-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/raspbian/buster/riak_2.9.2-1_armhf.deb + file_size: '60128992' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/raspbian/buster/riak_2.9.2-1_armhf.deb.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.2-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/6/riak-2.9.2-1.el6.src.rpm + file_size: '38588057' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/6/riak-2.9.2-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.2-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/6/riak-2.9.2-1.el6.x86_64.rpm + file_size: '65060684' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/6/riak-2.9.2-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.2-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/7/riak-2.9.2-1.el7.src.rpm + file_size: '38526798' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/7/riak-2.9.2-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.2-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/7/riak-2.9.2-1.el7.x86_64.rpm + file_size: '64591052' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/7/riak-2.9.2-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.2-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/8/riak-2.9.2-1.el8.src.rpm + file_size: '38524870' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/8/riak-2.9.2-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.2-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/8/riak-2.9.2-1.el8.x86_64.rpm + file_size: '65660328' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/rhel/8/riak-2.9.2-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/bionic64/riak_2.9.2-1_amd64.deb + file_size: '60188376' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/bionic64/riak_2.9.2-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/precise64/riak_2.9.2-1_amd64.deb + file_size: '67116004' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/precise64/riak_2.9.2-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/trusty64/riak_2.9.2-1_amd64.deb + file_size: '60501996' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/trusty64/riak_2.9.2-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/xenial64/riak_2.9.2-1_amd64.deb + file_size: '60562430' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.2/ubuntu/xenial64/riak_2.9.2-1_amd64.deb.sha + 2.9.3: + - os: source + file_info: + file_name: riak-2.9.3.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/riak-2.9.3.tar.gz + file_size: '38577364' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/riak-2.9.3.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/amazon/2/riak-2.9.3-1.amzn2.src.rpm + file_size: '38523481' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/amazon/2/riak-2.9.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/amazon/2/riak-2.9.3-1.amzn2.x86_64.rpm + file_size: '65020276' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/amazon/2/riak-2.9.3-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/amazon/2016.09/riak-2.9.3-1.amzn1.src.rpm + file_size: '38596305' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/amazon/2016.09/riak-2.9.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/amazon/2016.09/riak-2.9.3-1.amzn1.x86_64.rpm + file_size: '67879525' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/amazon/2016.09/riak-2.9.3-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/10/riak-dbgsym_2.9.3-1_amd64.deb + file_size: '10578320' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/10/riak-dbgsym_2.9.3-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/10/riak_2.9.3-1_amd64.deb + file_size: '60769656' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/10/riak_2.9.3-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/7/riak_2.9.3-1_amd64.deb + file_size: '67168300' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/7/riak_2.9.3-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/8/riak_2.9.3-1_amd64.deb + file_size: '60605234' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/8/riak_2.9.3-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/9/riak-dbgsym_2.9.3-1_amd64.deb + file_size: '7943480' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/9/riak-dbgsym_2.9.3-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/9/riak_2.9.3-1_amd64.deb + file_size: '60593748' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/debian/9/riak_2.9.3-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.3.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/freebsd/10.4/riak-2.9.3.txz + file_size: '67420584' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/freebsd/10.4/riak-2.9.3.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.3.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/freebsd/11.1/riak-2.9.3.txz + file_size: '68119668' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/freebsd/11.1/riak-2.9.3.txz.sha + - version: '12.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.3.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/freebsd/12.1/riak-2.9.3.txz + file_size: '69786660' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/freebsd/12.1/riak-2.9.3.txz.sha + - os: osx + versions: + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.3-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/osx/10.14/riak-2.9.3-OSX-x86_64.tar.gz + file_size: '69798388' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/osx/10.14/riak-2.9.3-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.3-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/raspbian/buster/riak-dbgsym_2.9.3-1_armhf.deb + file_size: '10013584' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/raspbian/buster/riak-dbgsym_2.9.3-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.3-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/raspbian/buster/riak_2.9.3-1_armhf.deb + file_size: '60125228' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/raspbian/buster/riak_2.9.3-1_armhf.deb.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.3-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/6/riak-2.9.3-1.el6.src.rpm + file_size: '38581597' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/6/riak-2.9.3-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.3-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/6/riak-2.9.3-1.el6.x86_64.rpm + file_size: '65064056' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/6/riak-2.9.3-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/7/riak-2.9.3-1.el7.src.rpm + file_size: '38525186' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/7/riak-2.9.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/7/riak-2.9.3-1.el7.x86_64.rpm + file_size: '64594084' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/7/riak-2.9.3-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/8/riak-2.9.3-1.el8.src.rpm + file_size: '38528586' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/8/riak-2.9.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/8/riak-2.9.3-1.el8.x86_64.rpm + file_size: '65663600' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/rhel/8/riak-2.9.3-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/ubuntu/bionic64/riak_2.9.3-1_amd64.deb + file_size: '60660972' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/ubuntu/bionic64/riak_2.9.3-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/ubuntu/precise64/riak_2.9.3-1_amd64.deb + file_size: '67114726' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/ubuntu/precise64/riak_2.9.3-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/ubuntu/trusty64/riak_2.9.3-1_amd64.deb + file_size: '60504410' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/ubuntu/trusty64/riak_2.9.3-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.3-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/ubuntu/xenial64/riak_2.9.3-1_amd64.deb + file_size: '60589688' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.3/ubuntu/xenial64/riak_2.9.3-1_amd64.deb.sha + 2.9.4: + - os: source + file_info: + file_name: riak-2.9.4.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/riak-2.9.4.tar.gz + file_size: '38582451' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/riak-2.9.4.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.4-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2/riak-2.9.4-1.amzn2.src.rpm + file_size: '38526389' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2/riak-2.9.4-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.4-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2/riak-2.9.4-1.amzn2x86_64.rpm + file_size: '64958932' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2/riak-2.9.4-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.4-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2016.09/riak-2.9.4-1.amzn1.src.rpm + file_size: '38595596' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2016.09/riak-2.9.4-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.4-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2016.09/riak-2.9.4-1.amzn1x86_64.rpm + file_size: '69711638' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/amazon/2016.09/riak-2.9.4-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/10/riak-dbgsym_2.9.4-1_amd64.deb + file_size: '10581436' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/10/riak-dbgsym_2.9.4-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/10/riak_2.9.4-1_amd64.deb + file_size: '60771144' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/10/riak_2.9.4-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/7/riak_2.9.4-1_amd64.deb + file_size: '67182608' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/7/riak_2.9.4-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/8/riak_2.9.4-1_amd64.deb + file_size: '60647192' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/8/riak_2.9.4-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/9/riak-dbgsym_2.9.4-1_amd64.deb + file_size: '7924500' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/9/riak-dbgsym_2.9.4-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/9/riak_2.9.4-1_amd64.deb + file_size: '60596756' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/debian/9/riak_2.9.4-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.4.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/freebsd/10.4/riak-2.9.4.txz + file_size: '67637220' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/freebsd/10.4/riak-2.9.4.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.4.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/freebsd/11.1/riak-2.9.4.txz + file_size: '69496092' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/freebsd/11.1/riak-2.9.4.txz.sha + - version: '12.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.4.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/freebsd/12.1/riak-2.9.4.txz + file_size: '69845536' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/freebsd/12.1/riak-2.9.4.txz.sha + - os: osx + versions: + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.4-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/osx/10.14/riak-2.9.4-OSX-x86_64.tar.gz + file_size: '69798827' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/osx/10.14/riak-2.9.4-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.4-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/raspbian/buster/riak-dbgsym_2.9.4-1_armhf.deb + file_size: '9995680' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/raspbian/buster/riak-dbgsym_2.9.4-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.4-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/raspbian/buster/riak_2.9.4-1_armhf.deb + file_size: '60133336' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/raspbian/buster/riak_2.9.4-1_armhf.deb.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.4-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/6/riak-2.9.4-1.el6.src.rpm + file_size: '38594390' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/6/riak-2.9.4-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.4-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/6/riak-2.9.4-1.el6.x86_64.rpm + file_size: '65063636' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/6/riak-2.9.4-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.4-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/7/riak-2.9.4-1.el7.src.rpm + file_size: '38526469' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/7/riak-2.9.4-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.4-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/7/riak-2.9.4-1.el7.x86_64.rpm + file_size: '64594060' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/7/riak-2.9.4-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.4-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/8/riak-2.9.4-1.el8.src.rpm + file_size: '38528005' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/8/riak-2.9.4-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.4-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/8/riak-2.9.4-1.el8.x86_64.rpm + file_size: '65709928' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/rhel/8/riak-2.9.4-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/bionic64/riak_2.9.4-1_amd64.deb + file_size: '60657408' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/bionic64/riak_2.9.4-1_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/focal64/riak_2.9.4-1_amd64.deb + file_size: '60822916' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/focal64/riak_2.9.4-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/precise64/riak_2.9.4-1_amd64.deb + file_size: '67105642' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/precise64/riak_2.9.4-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/trusty64/riak_2.9.4-1_amd64.deb + file_size: '60540294' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/trusty64/riak_2.9.4-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.4-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/xenial64/riak_2.9.4-1_amd64.deb + file_size: '60608838' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.4/ubuntu/xenial64/riak_2.9.4-1_amd64.deb.sha + 2.9.7: + - os: source + file_info: + file_name: riak-2.9.7.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/riak-2.9.7.tar.gz + file_size: '38592668' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/riak-2.9.7.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.7-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2/riak-2.9.7-1.amzn2.src.rpm + file_size: '38535130' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2/riak-2.9.7-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.7-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2/riak-2.9.7-1.amzn2x86_64.rpm + file_size: '64976776' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2/riak-2.9.7-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.7-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2016.09/riak-2.9.7-1.amzn1.src.rpm + file_size: '38602500' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2016.09/riak-2.9.7-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.7-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2016.09/riak-2.9.7-1.amzn1x86_64.rpm + file_size: '69727076' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/amazon/2016.09/riak-2.9.7-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/10/riak-dbgsym_2.9.7-1_amd64.deb + file_size: '10578092' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/10/riak-dbgsym_2.9.7-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/10/riak_2.9.7-1_amd64.deb + file_size: '60788240' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/10/riak_2.9.7-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/7/riak_2.9.7-1_amd64.deb + file_size: '67197856' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/7/riak_2.9.7-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/8/riak_2.9.7-1_amd64.deb + file_size: '60679036' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/8/riak_2.9.7-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/9/riak-dbgsym_2.9.7-1_amd64.deb + file_size: '7924188' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/9/riak-dbgsym_2.9.7-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/9/riak_2.9.7-1_amd64.deb + file_size: '60604826' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/debian/9/riak_2.9.7-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.7.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/freebsd/10.4/riak-2.9.7.txz + file_size: '67654200' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/freebsd/10.4/riak-2.9.7.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.7.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/freebsd/11.1/riak-2.9.7.txz + file_size: '69514060' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/freebsd/11.1/riak-2.9.7.txz.sha + - version: '12.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.7.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/freebsd/12.1/riak-2.9.7.txz + file_size: '69870976' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/freebsd/12.1/riak-2.9.7.txz.sha + - os: osx + versions: + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.7-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/osx/10.14/riak-2.9.7-OSX-x86_64.tar.gz + file_size: '69819249' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/osx/10.14/riak-2.9.7-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.7-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/raspbian/buster/riak-dbgsym_2.9.7-1_armhf.deb + file_size: '9996712' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/raspbian/buster/riak-dbgsym_2.9.7-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.7-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/raspbian/buster/riak_2.9.7-1_armhf.deb + file_size: '60149272' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/raspbian/buster/riak_2.9.7-1_armhf.deb.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-2.9.7-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/6/riak-2.9.7-1.el6.src.rpm + file_size: '38604524' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/6/riak-2.9.7-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.7-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/6/riak-2.9.7-1.el6.x86_64.rpm + file_size: '65080928' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/6/riak-2.9.7-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.7-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/7/riak-2.9.7-1.el7.src.rpm + file_size: '38535381' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/7/riak-2.9.7-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.7-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/7/riak-2.9.7-1.el7.x86_64.rpm + file_size: '64613460' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/7/riak-2.9.7-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.7-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/8/riak-2.9.7-1.el8.src.rpm + file_size: '38528830' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/8/riak-2.9.7-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.7-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/8/riak-2.9.7-1.el8.x86_64.rpm + file_size: '65726120' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/rhel/8/riak-2.9.7-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/bionic64/riak_2.9.7-1_amd64.deb + file_size: '60676796' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/bionic64/riak_2.9.7-1_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/focal64/riak_2.9.7-1_amd64.deb + file_size: '60840280' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/focal64/riak_2.9.7-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/precise64/riak_2.9.7-1_amd64.deb + file_size: '67125878' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/precise64/riak_2.9.7-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/trusty64/riak_2.9.7-1_amd64.deb + file_size: '60554658' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/trusty64/riak_2.9.7-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.7-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/xenial64/riak_2.9.7-1_amd64.deb + file_size: '60624210' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.7/ubuntu/xenial64/riak_2.9.7-1_amd64.deb.sha + 2.9.8: + - os: source + file_info: + file_name: riak-2.9.8.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/riak-2.9.8.tar.gz + file_size: '38601129' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/riak-2.9.8.tar.gz.sha + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.8-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2/riak-2.9.8-1.amzn2.src.rpm + file_size: '38528771' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2/riak-2.9.8-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.8-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2/riak-2.9.8-1.amzn2.x86_64.rpm + file_size: '64993748' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2/riak-2.9.8-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.8-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2016.09/riak-2.9.8-1.amzn1.src.rpm + file_size: '38613198' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2016.09/riak-2.9.8-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.8-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2016.09/riak-2.9.8-1.amzn1.x86_64.rpm + file_size: '69745843' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/amazon/2016.09/riak-2.9.8-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/10/riak-dbgsym_2.9.8-1_amd64.deb + file_size: '10580572' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/10/riak-dbgsym_2.9.8-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/10/riak_2.9.8-1_amd64.deb + file_size: '60805240' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/10/riak_2.9.8-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/8/riak_2.9.8-1_amd64.deb + file_size: '60675194' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/8/riak_2.9.8-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/9/riak-dbgsym_2.9.8-1_amd64.deb + file_size: '7923704' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/9/riak-dbgsym_2.9.8-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/9/riak_2.9.8-1_amd64.deb + file_size: '60627968' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/debian/9/riak_2.9.8-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.8.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/freebsd/10.4/riak-2.9.8.txz + file_size: '67673004' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/freebsd/10.4/riak-2.9.8.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.8.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/freebsd/11.1/riak-2.9.8.txz + file_size: '69531492' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/freebsd/11.1/riak-2.9.8.txz.sha + - version: '12.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.8.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/freebsd/12.1/riak-2.9.8.txz + file_size: '69878996' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/freebsd/12.1/riak-2.9.8.txz.sha + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.8-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/oracle/8/riak-2.9.8-1.el8.src.rpm + file_size: '38541664' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/oracle/8/riak-2.9.8-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.8-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/oracle/8/riak-2.9.8-1.el8.x86_64.rpm + file_size: '65742948' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/oracle/8/riak-2.9.8-1.el8.x86_64.rpm.sha + - os: osx + versions: + - version: '10.14' + architectures: + - arch: unknown + file_info: + file_name: libcrypto.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/osx/10.14/libcrypto.1.0.0.dylib + file_size: '1871160' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/osx/10.14/libcrypto.1.0.0.dylib.sha + - arch: unknown + file_info: + file_name: libssl.1.0.0.dylib + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/osx/10.14/libssl.1.0.0.dylib + file_size: '377416' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/osx/10.14/libssl.1.0.0.dylib.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.8-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/osx/10.14/riak-2.9.8-OSX-x86_64.tar.gz + file_size: '69838001' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/osx/10.14/riak-2.9.8-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_2.9.8-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/raspbian/buster/riak-dbgsym_2.9.8-1_armhf.deb + file_size: '9996520' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/raspbian/buster/riak-dbgsym_2.9.8-1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_2.9.8-1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/raspbian/buster/riak_2.9.8-1_armhf.deb + file_size: '60162508' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/raspbian/buster/riak_2.9.8-1_armhf.deb.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.8-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/7/riak-2.9.8-1.el7.src.rpm + file_size: '38539342' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/7/riak-2.9.8-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.8-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/7/riak-2.9.8-1.el7.x86_64.rpm + file_size: '64628312' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/7/riak-2.9.8-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.8-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/8/riak-2.9.8-1.el8.src.rpm + file_size: '38545313' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/8/riak-2.9.8-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.8-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/8/riak-2.9.8-1.el8.x86_64.rpm + file_size: '65742484' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/rhel/8/riak-2.9.8-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/bionic64/riak_2.9.8-1_amd64.deb + file_size: '60680624' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/bionic64/riak_2.9.8-1_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/focal64/riak_2.9.8-1_amd64.deb + file_size: '60853640' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/focal64/riak_2.9.8-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/precise64/riak_2.9.8-1_amd64.deb + file_size: '67145210' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/precise64/riak_2.9.8-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/trusty64/riak_2.9.8-1_amd64.deb + file_size: '60574792' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/trusty64/riak_2.9.8-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.8-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/xenial64/riak_2.9.8-1_amd64.deb + file_size: '60641920' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.8/ubuntu/xenial64/riak_2.9.8-1_amd64.deb.sha + 2.9.9: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-2.9.9-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2/riak-2.9.9-1.amzn2.src.rpm + file_size: '38530559' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2/riak-2.9.9-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.9-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2/riak-2.9.9-1.amzn2.x86_64.rpm + file_size: '64998088' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2/riak-2.9.9-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-2.9.9-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2016.09/riak-2.9.9-1.amzn1.src.rpm + file_size: '38610873' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2016.09/riak-2.9.9-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.9-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2016.09/riak-2.9.9-1.amzn1.x86_64.rpm + file_size: '69755781' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/amazon/2016.09/riak-2.9.9-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.9-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/8/riak_2.9.9-1_amd64.deb + file_size: '60684056' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/8/riak_2.9.9-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_2.9.9-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/9/riak-dbgsym_2.9.9-1_amd64.deb + file_size: '7926656' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/9/riak-dbgsym_2.9.9-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_2.9.9-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/9/riak_2.9.9-1_amd64.deb + file_size: '60631742' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/debian/9/riak_2.9.9-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.9.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/freebsd/10.4/riak-2.9.9.txz + file_size: '67673892' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/freebsd/10.4/riak-2.9.9.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.9.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/freebsd/11.1/riak-2.9.9.txz + file_size: '69534600' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/freebsd/11.1/riak-2.9.9.txz.sha + - version: '12.1' + architectures: + - arch: txz + file_info: + file_name: riak-2.9.9.txz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/freebsd/12.1/riak-2.9.9.txz + file_size: '69886296' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/freebsd/12.1/riak-2.9.9.txz.sha + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.9-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/oracle/8/riak-2.9.9-1.el8.src.rpm + file_size: '38544092' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/oracle/8/riak-2.9.9-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.9-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/oracle/8/riak-2.9.9-1.el8.x86_64.rpm + file_size: '65742888' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/oracle/8/riak-2.9.9-1.el8.x86_64.rpm.sha + - os: osx + versions: + - version: '10.14' + architectures: + - arch: x86_64 + file_info: + file_name: riak-2.9.9-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/osx/10.14/riak-2.9.9-OSX-x86_64.tar.gz + file_size: '69837318' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/osx/10.14/riak-2.9.9-OSX-x86_64.tar.gz.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-2.9.9-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/7/riak-2.9.9-1.el7.src.rpm + file_size: '38540704' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/7/riak-2.9.9-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.9-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/7/riak-2.9.9-1.el7.x86_64.rpm + file_size: '64633156' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/7/riak-2.9.9-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-2.9.9-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/8/riak-2.9.9-1.el8.src.rpm + file_size: '38525910' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/8/riak-2.9.9-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-2.9.9-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/8/riak-2.9.9-1.el8.x86_64.rpm + file_size: '65747796' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/rhel/8/riak-2.9.9-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.9-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/bionic64/riak_2.9.9-1_amd64.deb + file_size: '60691448' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/bionic64/riak_2.9.9-1_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.9-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/focal64/riak_2.9.9-1_amd64.deb + file_size: '60857472' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/focal64/riak_2.9.9-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.9-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/trusty64/riak_2.9.9-1_amd64.deb + file_size: '60574978' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/trusty64/riak_2.9.9-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_2.9.9-1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/xenial64/riak_2.9.9-1_amd64.deb + file_size: '60619562' + chksum_href: https://files.tiot.jp/riak/kv/2.9/2.9.9/ubuntu/xenial64/riak_2.9.9-1_amd64.deb.sha + 3.0.1: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-3.0.1-OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-OTP20.3-1.amzn2.src.rpm + file_size: '1095813' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-OTP20.3-1.amzn2x86_64.rpm + file_size: '39708684' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1-OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-OTP22.3-1.amzn2.src.rpm + file_size: '1095813' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-OTP22.3-1.amzn2x86_64.rpm + file_size: '41016668' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1-OTP22.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1.yokozuna.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1.yokozuna.OTP20.3-1.amzn2.src.rpm + file_size: '1095570' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1.yokozuna.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_size: '95852392' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1.yokozuna.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1.yokozuna.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1.yokozuna.OTP22.3-1.amzn2.src.rpm + file_size: '1095648' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1.yokozuna.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_size: '97157588' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2/riak-3.0.1.yokozuna.OTP22.3-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-3.0.1-OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-OTP20.3-1.amzn1.src.rpm + file_size: '1095733' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-OTP20.3-1.amzn1x86_64.rpm + file_size: '45395988' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1-OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-OTP22.3-1.amzn1.src.rpm + file_size: '1095732' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-OTP22.3-1.amzn1x86_64.rpm + file_size: '46824197' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1-OTP22.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1.yokozuna.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1.yokozuna.OTP20.3-1.amzn1.src.rpm + file_size: '1095323' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1.yokozuna.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_size: '102803063' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1.yokozuna.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1.yokozuna.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1.yokozuna.OTP22.3-1.amzn1.src.rpm + file_size: '1095351' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1.yokozuna.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_size: '104237051' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/amazon/2016.09/riak-3.0.1.yokozuna.OTP22.3-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak-dbgsym_3.0.1-OTP20.3_amd64.deb + file_size: '17072432' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak-dbgsym_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak-dbgsym_3.0.1-OTP22.3_amd64.deb + file_size: '18019604' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak-dbgsym_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak_3.0.1-OTP20.3_amd64.deb + file_size: '35869320' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak_3.0.1-OTP22.3_amd64.deb + file_size: '37176888' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_size: '91695164' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak_3.0.1-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_size: '93004900' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/10/riak_3.0.1-yokozuna-OTP22.3_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/7/riak_3.0.1-OTP20.3_amd64.deb + file_size: '42724162' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/7/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/7/riak_3.0.1-OTP22.3_amd64.deb + file_size: '44189814' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/7/riak_3.0.1-OTP22.3_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-OTP20.3_amd64.deb + file_size: '34704772' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-OTP22.3_amd64.deb + file_size: '35981008' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_size: '90575162' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_size: '91862650' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/8/riak_3.0.1-yokozuna-OTP22.3_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak-dbgsym_3.0.1-OTP20.3_amd64.deb + file_size: '13043154' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak-dbgsym_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak-dbgsym_3.0.1-OTP22.3_amd64.deb + file_size: '13720988' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak-dbgsym_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-OTP20.3_amd64.deb + file_size: '35061488' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-OTP22.3_amd64.deb + file_size: '36289660' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_size: '90958434' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_size: '92135034' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/debian/9/riak_3.0.1-yokozuna-OTP22.3_amd64.deb.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_3.0.1-OTP20.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak-dbgsym_3.0.1-OTP20.3_armhf.deb + file_size: '16061248' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak-dbgsym_3.0.1-OTP20.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak-dbgsym_3.0.1-yokozuna-OTP20.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak-dbgsym_3.0.1-yokozuna-OTP20.3_armhf.deb + file_size: '16054664' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak-dbgsym_3.0.1-yokozuna-OTP20.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak-dbgsym_3.0.1-yokozuna-OTP22.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak-dbgsym_3.0.1-yokozuna-OTP22.3_armhf.deb + file_size: '16921104' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak-dbgsym_3.0.1-yokozuna-OTP22.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak-dbgsym_3.0.1_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak-dbgsym_3.0.1_armhf.deb + file_size: '16916732' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak-dbgsym_3.0.1_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_3.0.1-OTP20.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-OTP20.3_armhf.deb + file_size: '35142860' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-OTP20.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_3.0.1-OTP22.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-OTP22.3_armhf.deb + file_size: '36476072' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-OTP22.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_3.0.1-yokozuna-OTP20.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-yokozuna-OTP20.3_armhf.deb + file_size: '90993744' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-yokozuna-OTP20.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_3.0.1-yokozuna-OTP22.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-yokozuna-OTP22.3_armhf.deb + file_size: '92362892' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/raspbian/buster/riak_3.0.1-yokozuna-OTP22.3_armhf.deb.sha + - os: rhel + versions: + - version: 6-broken-builds-do-not-use + architectures: + - arch: source + file_info: + file_name: riak-3.0.1-OTP20.3-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6-broken-builds-do-not-use/riak-3.0.1-OTP20.3-1.el6.src.rpm + file_size: '1095813' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6-broken-builds-do-not-use/riak-3.0.1-OTP20.3-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP20.3-1.el6x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6-broken-builds-do-not-use/riak-3.0.1-OTP20.3-1.el6x86_64.rpm + file_size: '39503596' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6-broken-builds-do-not-use/riak-3.0.1-OTP20.3-1.el6x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1-OTP22.3-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6-broken-builds-do-not-use/riak-3.0.1-OTP22.3-1.el6.src.rpm + file_size: '1095813' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6-broken-builds-do-not-use/riak-3.0.1-OTP22.3-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP22.3-1.el6x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6-broken-builds-do-not-use/riak-3.0.1-OTP22.3-1.el6x86_64.rpm + file_size: '40792564' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/6-broken-builds-do-not-use/riak-3.0.1-OTP22.3-1.el6x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-3.0.1-OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-OTP20.3-1.el7.src.rpm + file_size: '1095813' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-OTP20.3-1.el7x86_64.rpm + file_size: '38806148' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1-OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-OTP22.3-1.el7.src.rpm + file_size: '1095812' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-OTP22.3-1.el7x86_64.rpm + file_size: '40100124' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1-OTP22.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1_yokozuna-OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1_yokozuna-OTP22.3-1.el7.src.rpm + file_size: '1095654' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1_yokozuna-OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1_yokozuna-OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1_yokozuna-OTP22.3-1.el7x86_64.rpm + file_size: '96240304' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/7/riak-3.0.1_yokozuna-OTP22.3-1.el7x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.1-OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-OTP20.3-1.el8.src.rpm + file_size: '1100171' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-OTP20.3-1.el8x86_64.rpm + file_size: '41131248' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1-OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-OTP22.3-1.el8.src.rpm + file_size: '1100171' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1-OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-OTP22.3-1.el8x86_64.rpm + file_size: '42453060' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1-OTP22.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1_yokozuna-OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1_yokozuna-OTP20.3-1.el8.src.rpm + file_size: '1100127' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1_yokozuna-OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1_yokozuna-OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1_yokozuna-OTP20.3-1.el8x86_64.rpm + file_size: '97503404' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1_yokozuna-OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.1_yokozuna-OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1_yokozuna-OTP22.3-1.el8.src.rpm + file_size: '1100127' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1_yokozuna-OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.1_yokozuna-OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1_yokozuna-OTP22.3-1.el8x86_64.rpm + file_size: '98820840' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/rhel/8/riak-3.0.1_yokozuna-OTP22.3-1.el8x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-OTP20.3_amd64.deb + file_size: '35234816' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-OTP22.3_amd64.deb + file_size: '36509996' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_size: '91094636' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_size: '92309940' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1_amd64.deb + file_size: '35236780' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/bionic64/riak_3.0.1_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/focal64/riak_3.0.1-OTP20.3_amd64.deb + file_size: '35922300' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/focal64/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/focal64/riak_3.0.1-OTP22.3_amd64.deb + file_size: '37218432' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/focal64/riak_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/focal64/riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_size: '91742996' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/focal64/riak_3.0.1-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/focal64/riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_size: '93030488' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/focal64/riak_3.0.1-yokozuna-OTP22.3_amd64.deb.sha + - version: precise64-broken-builds-do-not-use + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/precise64-broken-builds-do-not-use/riak_3.0.1-OTP20.3_amd64.deb + file_size: '42616882' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/precise64-broken-builds-do-not-use/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/precise64-broken-builds-do-not-use/riak_3.0.1-OTP22.3_amd64.deb + file_size: '44058204' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/precise64-broken-builds-do-not-use/riak_3.0.1-OTP22.3_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-OTP20.3_amd64.deb + file_size: '34463466' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-OTP22.3_amd64.deb + file_size: '35735324' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_size: '90329096' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_size: '91589666' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/trusty64/riak_3.0.1-yokozuna-OTP22.3_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-OTP20.3_amd64.deb + file_size: '34651634' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-OTP22.3_amd64.deb + file_size: '35937828' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-yokozuna-OTP20.3_amd64.deb + file_size: '90493066' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-yokozuna-OTP22.3_amd64.deb + file_size: '91770388' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.1/ubuntu/xenial64/riak_3.0.1-yokozuna-OTP22.3_amd64.deb.sha + 3.0.2: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-3.0.2.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.OTP20.3-1.amzn2.src.rpm + file_size: '1097843' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.OTP20.3-1.amzn2x86_64.rpm + file_size: '40842844' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.OTP22.3-1.amzn2.src.rpm + file_size: '1097843' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.OTP22.3-1.amzn2x86_64.rpm + file_size: '42166268' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.OTP22.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.yokozuna.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.yokozuna.OTP20.3-1.amzn2.src.rpm + file_size: '1098250' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.yokozuna.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_size: '96979800' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.yokozuna.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.yokozuna.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.yokozuna.OTP22.3-1.amzn2.src.rpm + file_size: '1098250' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.yokozuna.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_size: '98297292' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2/riak-3.0.2.yokozuna.OTP22.3-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-3.0.2.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.OTP20.3-1.amzn1.src.rpm + file_size: '1097764' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.OTP20.3-1.amzn1x86_64.rpm + file_size: '46613430' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.OTP22.3-1.amzn1.src.rpm + file_size: '1097763' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.OTP22.3-1.amzn1x86_64.rpm + file_size: '48071539' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.OTP22.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.yokozuna.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.yokozuna.OTP20.3-1.amzn1.src.rpm + file_size: '1097814' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.yokozuna.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_size: '104018639' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.yokozuna.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.yokozuna.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.yokozuna.OTP22.3-1.amzn1.src.rpm + file_size: '1097814' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.yokozuna.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_size: '105486618' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/amazon/2016.09/riak-3.0.2.yokozuna.OTP22.3-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak-dbgsym_3.0.2-OTP20.3_amd64.deb + file_size: '17072108' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak-dbgsym_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak-dbgsym_3.0.2-OTP22.3_amd64.deb + file_size: '18023432' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak-dbgsym_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '17047744' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '17997180' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak_3.0.2-OTP20.3_amd64.deb + file_size: '36945308' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak_3.0.2-OTP22.3_amd64.deb + file_size: '38264356' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '92761020' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '94118696' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/10/riak_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-OTP20.3_amd64.deb + file_size: '35776562' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-OTP22.3_amd64.deb + file_size: '37028850' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '91656786' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '92912608' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/8/riak_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak-dbgsym_3.0.2-OTP20.3_amd64.deb + file_size: '13049006' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak-dbgsym_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak-dbgsym_3.0.2-OTP22.3_amd64.deb + file_size: '13714634' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak-dbgsym_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '13043842' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '13718158' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-OTP20.3_amd64.deb + file_size: '36141706' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-OTP22.3_amd64.deb + file_size: '37382362' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '91983954' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '93183806' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/debian/9/riak_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-3.0.2-OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-OTP20.3-1.el7.src.rpm + file_size: '1097844' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2-OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-OTP20.3-1.el7x86_64.rpm + file_size: '39941536' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2-OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-OTP22.3-1.el7.src.rpm + file_size: '1097844' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2-OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-OTP22.3-1.el7x86_64.rpm + file_size: '41247588' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2-OTP22.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.yokozuna.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2.yokozuna.OTP20.3-1.el7.src.rpm + file_size: '1098187' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2.yokozuna.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.yokozuna.OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2.yokozuna.OTP20.3-1.el7x86_64.rpm + file_size: '96075948' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2.yokozuna.OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.yokozuna.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2.yokozuna.OTP22.3-1.el7.src.rpm + file_size: '1098187' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2.yokozuna.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.yokozuna.OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2.yokozuna.OTP22.3-1.el7x86_64.rpm + file_size: '97383544' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/7/riak-3.0.2.yokozuna.OTP22.3-1.el7x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.2-OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-OTP20.3-1.el8.src.rpm + file_size: '1102206' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2-OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-OTP20.3-1.el8x86_64.rpm + file_size: '42268684' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2-OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-OTP22.3-1.el8.src.rpm + file_size: '1102206' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2-OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-OTP22.3-1.el8x86_64.rpm + file_size: '43600936' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2-OTP22.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1102514' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.yokozuna.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2.yokozuna.OTP20.3-1.el8x86_64.rpm + file_size: '98642632' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2.yokozuna.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.2.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1102515' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.2.yokozuna.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2.yokozuna.OTP22.3-1.el8x86_64.rpm + file_size: '99975440' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/rhel/8/riak-3.0.2.yokozuna.OTP22.3-1.el8x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-OTP20.3_amd64.deb + file_size: '36323504' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-OTP22.3_amd64.deb + file_size: '37602316' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '92159848' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '93385572' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/bionic64/riak_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/focal64/riak_3.0.2-OTP20.3_amd64.deb + file_size: '36991720' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/focal64/riak_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/focal64/riak_3.0.2-OTP22.3_amd64.deb + file_size: '38302744' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/focal64/riak_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/focal64/riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '92802776' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/focal64/riak_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/focal64/riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '94142616' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/focal64/riak_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-OTP20.3_amd64.deb + file_size: '35525930' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-OTP22.3_amd64.deb + file_size: '36808750' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '91383462' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '92679050' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/trusty64/riak_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-OTP20.3_amd64.deb + file_size: '35723380' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-OTP22.3_amd64.deb + file_size: '37018428' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '91545234' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '92846884' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.2/ubuntu/xenial64/riak_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + 3.0.3: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-3.0.3.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.OTP20.3-1.amzn2.src.rpm + file_size: '1098307' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.OTP20.3-1.amzn2x86_64.rpm + file_size: '40842912' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.OTP22.3-1.amzn2.src.rpm + file_size: '1098311' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.OTP22.3-1.amzn2x86_64.rpm + file_size: '42166800' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.OTP22.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.yokozuna.OTP20.3-1.amzn2.src.rpm + file_size: '1098346' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.yokozuna.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_size: '96977156' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.yokozuna.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.yokozuna.OTP22.3-1.amzn2.src.rpm + file_size: '1098347' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.yokozuna.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_size: '98297180' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2/riak-3.0.3.yokozuna.OTP22.3-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-3.0.3.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.OTP20.3-1.amzn1.src.rpm + file_size: '1098230' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.OTP20.3-1.amzn1x86_64.rpm + file_size: '46611093' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.OTP22.3-1.amzn1.src.rpm + file_size: '1098231' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.OTP22.3-1.amzn1x86_64.rpm + file_size: '48071581' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.OTP22.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.yokozuna.OTP20.3-1.amzn1.src.rpm + file_size: '1098416' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.yokozuna.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_size: '104016124' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.yokozuna.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.yokozuna.OTP22.3-1.amzn1.src.rpm + file_size: '1098413' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.yokozuna.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_size: '105479331' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/amazon/2016.09/riak-3.0.3.yokozuna.OTP22.3-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '17047900' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak-dbgsym_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '17997108' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak-dbgsym_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.3-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak-dbgsym_3.0.3-OTP20.3_amd64.deb + file_size: '17074316' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak-dbgsym_3.0.3-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.3-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak-dbgsym_3.0.3-OTP22.3_amd64.deb + file_size: '18020512' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak-dbgsym_3.0.3-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak_3.0.2-yokozuna-OTP20.3_amd64.deb + file_size: '92757824' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak_3.0.2-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak_3.0.2-yokozuna-OTP22.3_amd64.deb + file_size: '94121072' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak_3.0.2-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak_3.0.3-OTP20.3_amd64.deb + file_size: '36950372' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak_3.0.3-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak_3.0.3-OTP22.3_amd64.deb + file_size: '38265940' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/10/riak_3.0.3-OTP22.3_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-OTP20.3_amd64.deb + file_size: '35759278' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-OTP22.3_amd64.deb + file_size: '37046542' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_size: '91626566' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_size: '92926750' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/8/riak_3.0.3-yokozuna-OTP22.3_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.3-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak-dbgsym_3.0.3-OTP20.3_amd64.deb + file_size: '13045004' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak-dbgsym_3.0.3-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.3-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak-dbgsym_3.0.3-OTP22.3_amd64.deb + file_size: '13720892' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak-dbgsym_3.0.3-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.3-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak-dbgsym_3.0.3-yokozuna-OTP20.3_amd64.deb + file_size: '13041334' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak-dbgsym_3.0.3-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.3-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak-dbgsym_3.0.3-yokozuna-OTP22.3_amd64.deb + file_size: '13716108' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak-dbgsym_3.0.3-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-OTP20.3_amd64.deb + file_size: '36139918' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-OTP22.3_amd64.deb + file_size: '37379854' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_size: '91988338' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_size: '93183304' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/debian/9/riak_3.0.3-yokozuna-OTP22.3_amd64.deb.sha + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.3.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.OTP20.3-1.el8.src.rpm + file_size: '1102686' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.OTP20.3-1.el8x86_64.rpm + file_size: '42794084' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.OTP22.3-1.el8.src.rpm + file_size: '1102687' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.OTP22.3-1.el8x86_64.rpm + file_size: '43831120' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.OTP22.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1102819' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.yokozuna.OTP20.3-1.el8x86_64.rpm + file_size: '99169552' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.yokozuna.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1102818' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.yokozuna.OTP22.3-1.el8x86_64.rpm + file_size: '100204080' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/oracle/8/riak-3.0.3.yokozuna.OTP22.3-1.el8x86_64.rpm.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-3.0.3.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.OTP20.3-1.el7.src.rpm + file_size: '1098306' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.OTP20.3-1.el7x86_64.rpm + file_size: '39938288' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.OTP22.3-1.el7.src.rpm + file_size: '1098309' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.OTP22.3-1.el7x86_64.rpm + file_size: '41248028' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.OTP22.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.yokozuna.OTP20.3-1.el7.src.rpm + file_size: '1098482' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.yokozuna.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.yokozuna.OTP20.3-1.el7x86_64.rpm + file_size: '96074792' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.yokozuna.OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.yokozuna.OTP22.3-1.el7.src.rpm + file_size: '1098482' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.yokozuna.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.yokozuna.OTP22.3-1.el7x86_64.rpm + file_size: '97383496' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/7/riak-3.0.3.yokozuna.OTP22.3-1.el7x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.3.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.OTP20.3-1.el8.src.rpm + file_size: '1102665' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.OTP20.3-1.el8x86_64.rpm + file_size: '42266028' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.OTP22.3-1.el8.src.rpm + file_size: '1102667' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.OTP22.3-1.el8x86_64.rpm + file_size: '43601900' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.OTP22.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1102759' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.yokozuna.OTP20.3-1.el8x86_64.rpm + file_size: '98640692' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.yokozuna.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1102759' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.3.yokozuna.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.yokozuna.OTP22.3-1.el8x86_64.rpm + file_size: '99975636' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/rhel/8/riak-3.0.3.yokozuna.OTP22.3-1.el8x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-OTP20.3_amd64.deb + file_size: '36321832' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-OTP22.3_amd64.deb + file_size: '37600804' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_size: '92158936' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_size: '93388708' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/bionic64/riak_3.0.3-yokozuna-OTP22.3_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/focal64/riak_3.0.3-OTP20.3_amd64.deb + file_size: '36988344' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/focal64/riak_3.0.3-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/focal64/riak_3.0.3-OTP22.3_amd64.deb + file_size: '38298168' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/focal64/riak_3.0.3-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/focal64/riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_size: '92805392' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/focal64/riak_3.0.3-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/focal64/riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_size: '94147556' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/focal64/riak_3.0.3-yokozuna-OTP22.3_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-OTP20.3_amd64.deb + file_size: '35724532' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-OTP22.3_amd64.deb + file_size: '37034782' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-yokozuna-OTP20.3_amd64.deb + file_size: '91556482' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_size: '92870036' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.3/ubuntu/xenial64/riak_3.0.3-yokozuna-OTP22.3_amd64.deb.sha + 3.0.4: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-3.0.4.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.OTP20.3-1.amzn2.src.rpm + file_size: '1098582' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.OTP20.3-1.amzn2x86_64.rpm + file_size: '40944412' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.OTP22.3-1.amzn2.src.rpm + file_size: '1098582' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.OTP22.3-1.amzn2x86_64.rpm + file_size: '42267684' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.OTP22.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.yokozuna.OTP20.3-1.amzn2.src.rpm + file_size: '1098561' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.yokozuna.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_size: '97079516' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.yokozuna.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.yokozuna.OTP22.3-1.amzn2.src.rpm + file_size: '1098562' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.yokozuna.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_size: '98398556' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2/riak-3.0.4.yokozuna.OTP22.3-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-3.0.4.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.OTP20.3-1.amzn1.src.rpm + file_size: '1098501' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.OTP20.3-1.amzn1x86_64.rpm + file_size: '46800476' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.OTP22.3-1.amzn1.src.rpm + file_size: '1098502' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.OTP22.3-1.amzn1x86_64.rpm + file_size: '48261247' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.OTP22.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.yokozuna.OTP20.3-1.amzn1.src.rpm + file_size: '1098314' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.yokozuna.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_size: '104202142' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.yokozuna.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.yokozuna.OTP22.3-1.amzn1.src.rpm + file_size: '1098314' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.yokozuna.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_size: '105671977' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/amazon/2016.09/riak-3.0.4.yokozuna.OTP22.3-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.4-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak-dbgsym_3.0.4-OTP20.3_amd64.deb + file_size: '17108816' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak-dbgsym_3.0.4-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak-dbgsym_3.0.4-OTP22.3_amd64.deb + file_size: '18054424' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak-dbgsym_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak-dbgsym_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '17111068' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak-dbgsym_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak-dbgsym_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '18035040' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak-dbgsym_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak_3.0.4-OTP20.3_amd64.deb + file_size: '37037888' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak_3.0.4-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak_3.0.4-OTP22.3_amd64.deb + file_size: '38380472' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '92839432' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '94201472' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/10/riak_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-OTP20.3_amd64.deb + file_size: '35848424' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-OTP22.3_amd64.deb + file_size: '37119850' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '91743178' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '92985160' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/8/riak_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.4-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak-dbgsym_3.0.4-OTP20.3_amd64.deb + file_size: '13070410' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak-dbgsym_3.0.4-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak-dbgsym_3.0.4-OTP22.3_amd64.deb + file_size: '13745752' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak-dbgsym_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak-dbgsym_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '13072696' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak-dbgsym_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak-dbgsym_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '13744780' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak-dbgsym_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-OTP20.3_amd64.deb + file_size: '36223862' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-OTP22.3_amd64.deb + file_size: '37471708' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '92053102' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '93262786' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/debian/9/riak_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.4.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.OTP20.3-1.el8.src.rpm + file_size: '1102963' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.OTP20.3-1.el8x86_64.rpm + file_size: '42900420' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.OTP22.3-1.el8.src.rpm + file_size: '1102963' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.OTP22.3-1.el8x86_64.rpm + file_size: '43939064' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.OTP22.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1102858' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.yokozuna.OTP20.3-1.el8x86_64.rpm + file_size: '99278488' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.yokozuna.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1102858' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.yokozuna.OTP22.3-1.el8x86_64.rpm + file_size: '100309256' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/oracle/8/riak-3.0.4.yokozuna.OTP22.3-1.el8x86_64.rpm.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-3.0.4.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.OTP20.3-1.el7.src.rpm + file_size: '1098582' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.OTP20.3-1.el7x86_64.rpm + file_size: '40039548' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.OTP22.3-1.el7.src.rpm + file_size: '1098582' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.OTP22.3-1.el7x86_64.rpm + file_size: '41347832' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.OTP22.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.yokozuna.OTP20.3-1.el7.src.rpm + file_size: '1098487' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.yokozuna.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.yokozuna.OTP20.3-1.el7x86_64.rpm + file_size: '96174988' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.yokozuna.OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.yokozuna.OTP22.3-1.el7.src.rpm + file_size: '1098487' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.yokozuna.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.yokozuna.OTP22.3-1.el7x86_64.rpm + file_size: '97480908' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/7/riak-3.0.4.yokozuna.OTP22.3-1.el7x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.4.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.OTP20.3-1.el8.src.rpm + file_size: '1102943' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.OTP20.3-1.el8x86_64.rpm + file_size: '42373972' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.OTP22.3-1.el8.src.rpm + file_size: '1102943' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.OTP22.3-1.el8x86_64.rpm + file_size: '43707600' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.OTP22.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1102790' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.yokozuna.OTP20.3-1.el8x86_64.rpm + file_size: '98746852' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.yokozuna.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1102790' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.4.yokozuna.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.yokozuna.OTP22.3-1.el8x86_64.rpm + file_size: '100080124' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/rhel/8/riak-3.0.4.yokozuna.OTP22.3-1.el8x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-OTP20.3_amd64.deb + file_size: '36409560' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-OTP22.3_amd64.deb + file_size: '37680304' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '92240000' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '93475696' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/bionic64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/focal64/riak_3.0.4-OTP20.3_amd64.deb + file_size: '37083140' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/focal64/riak_3.0.4-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/focal64/riak_3.0.4-OTP22.3_amd64.deb + file_size: '38404860' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/focal64/riak_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/focal64/riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '92880820' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/focal64/riak_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/focal64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '94239156' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/focal64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/trusty64/riak_3.0.4-OTP22.3_amd64.deb + file_size: '36911500' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/trusty64/riak_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/trusty64/riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '91503594' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/trusty64/riak_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/trusty64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '92774442' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/trusty64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.3-yokozuna-OTP22.3_amd64.deb + file_size: '92870036' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.3-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-OTP20.3_amd64.deb + file_size: '35809072' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-OTP22.3_amd64.deb + file_size: '37121286' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-yokozuna-OTP20.3_amd64.deb + file_size: '91646528' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '92941018' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.4/ubuntu/xenial64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + 3.0.6: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-3.0.6.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.OTP20.3-1.amzn2.src.rpm + file_size: '1098850' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.OTP20.3-1.amzn2x86_64.rpm + file_size: '40955696' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.OTP22.3-1.amzn2.src.rpm + file_size: '1098850' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.OTP22.3-1.amzn2x86_64.rpm + file_size: '42277988' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.OTP22.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.yokozuna.OTP20.3-1.amzn2.src.rpm + file_size: '1098684' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.yokozuna.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.yokozuna.OTP20.3-1.amzn2x86_64.rpm + file_size: '97092056' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.yokozuna.OTP20.3-1.amzn2x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.yokozuna.OTP22.3-1.amzn2.src.rpm + file_size: '1098681' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.yokozuna.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.yokozuna.OTP22.3-1.amzn2x86_64.rpm + file_size: '98410492' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2/riak-3.0.6.yokozuna.OTP22.3-1.amzn2x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-3.0.6.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.OTP20.3-1.amzn1.src.rpm + file_size: '1098770' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.OTP20.3-1.amzn1x86_64.rpm + file_size: '46811407' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.OTP22.3-1.amzn1.src.rpm + file_size: '1098770' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.OTP22.3-1.amzn1x86_64.rpm + file_size: '48270629' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.OTP22.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.yokozuna.OTP20.3-1.amzn1.src.rpm + file_size: '1098556' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.yokozuna.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.yokozuna.OTP20.3-1.amzn1x86_64.rpm + file_size: '104216173' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.yokozuna.OTP20.3-1.amzn1x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.yokozuna.OTP22.3-1.amzn1.src.rpm + file_size: '1098556' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.yokozuna.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.yokozuna.OTP22.3-1.amzn1x86_64.rpm + file_size: '105674499' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/amazon/2016.09/riak-3.0.6.yokozuna.OTP22.3-1.amzn1x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.6-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak-dbgsym_3.0.6-OTP20.3_amd64.deb + file_size: '17108424' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak-dbgsym_3.0.6-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.6-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak-dbgsym_3.0.6-OTP22.3_amd64.deb + file_size: '18058212' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak-dbgsym_3.0.6-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.6-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak-dbgsym_3.0.6-yokozuna-OTP20.3_amd64.deb + file_size: '17086572' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak-dbgsym_3.0.6-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.6-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak-dbgsym_3.0.6-yokozuna-OTP22.3_amd64.deb + file_size: '18059752' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak-dbgsym_3.0.6-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak_3.0.6-OTP20.3_amd64.deb + file_size: '37052300' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak_3.0.6-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak_3.0.6-OTP22.3_amd64.deb + file_size: '38389580' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak_3.0.6-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_size: '92850380' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak_3.0.6-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_size: '94217724' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/10/riak_3.0.6-yokozuna-OTP22.3_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/8/riak_3.0.6-OTP20.3_amd64.deb + file_size: '35863872' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/8/riak_3.0.6-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/8/riak_3.0.6-OTP22.3_amd64.deb + file_size: '37120700' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/8/riak_3.0.6-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/8/riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_size: '91747004' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/8/riak_3.0.6-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/8/riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_size: '92998168' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/8/riak_3.0.6-yokozuna-OTP22.3_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.6-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak-dbgsym_3.0.6-OTP20.3_amd64.deb + file_size: '13068032' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak-dbgsym_3.0.6-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.6-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak-dbgsym_3.0.6-OTP22.3_amd64.deb + file_size: '13746054' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak-dbgsym_3.0.6-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.6-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak-dbgsym_3.0.6-yokozuna-OTP20.3_amd64.deb + file_size: '13071234' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak-dbgsym_3.0.6-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.6-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak-dbgsym_3.0.6-yokozuna-OTP22.3_amd64.deb + file_size: '13744714' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak-dbgsym_3.0.6-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak_3.0.6-OTP20.3_amd64.deb + file_size: '36227028' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak_3.0.6-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak_3.0.6-OTP22.3_amd64.deb + file_size: '37482112' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak_3.0.6-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_size: '92068446' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak_3.0.6-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_size: '93283972' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/debian/9/riak_3.0.6-yokozuna-OTP22.3_amd64.deb.sha + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.6.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.OTP20.3-1.el8.src.rpm + file_size: '1103232' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.OTP20.3-1.el8x86_64.rpm + file_size: '42914048' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.OTP22.3-1.el8.src.rpm + file_size: '1103232' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.OTP22.3-1.el8x86_64.rpm + file_size: '43951276' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.OTP22.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1102924' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.yokozuna.OTP20.3-1.el8x86_64.rpm + file_size: '99291144' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.yokozuna.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1102924' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.yokozuna.OTP22.3-1.el8x86_64.rpm + file_size: '100321612' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/oracle/8/riak-3.0.6.yokozuna.OTP22.3-1.el8x86_64.rpm.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-3.0.6.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.OTP20.3-1.el7.src.rpm + file_size: '1098850' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.OTP20.3-1.el7x86_64.rpm + file_size: '40051388' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.OTP22.3-1.el7.src.rpm + file_size: '1098850' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.OTP22.3-1.el7x86_64.rpm + file_size: '41359740' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.OTP22.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.yokozuna.OTP20.3-1.el7.src.rpm + file_size: '1098693' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.yokozuna.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.yokozuna.OTP20.3-1.el7x86_64.rpm + file_size: '96187352' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.yokozuna.OTP20.3-1.el7x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.yokozuna.OTP22.3-1.el7.src.rpm + file_size: '1098693' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.yokozuna.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.el7x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.yokozuna.OTP22.3-1.el7x86_64.rpm + file_size: '97491644' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/7/riak-3.0.6.yokozuna.OTP22.3-1.el7x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.6.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.OTP20.3-1.el8.src.rpm + file_size: '1103212' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.OTP20.3-1.el8x86_64.rpm + file_size: '42384260' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.OTP22.3-1.el8.src.rpm + file_size: '1103212' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.OTP22.3-1.el8x86_64.rpm + file_size: '43720976' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.OTP22.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1103102' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP20.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.yokozuna.OTP20.3-1.el8x86_64.rpm + file_size: '98760252' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.yokozuna.OTP20.3-1.el8x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1103102' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.6.yokozuna.OTP22.3-1.el8x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.yokozuna.OTP22.3-1.el8x86_64.rpm + file_size: '100091916' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/rhel/8/riak-3.0.6.yokozuna.OTP22.3-1.el8x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/bionic64/riak_3.0.6-OTP20.3_amd64.deb + file_size: '36424864' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/bionic64/riak_3.0.6-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/bionic64/riak_3.0.6-OTP22.3_amd64.deb + file_size: '37697732' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/bionic64/riak_3.0.6-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/bionic64/riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_size: '92252188' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/bionic64/riak_3.0.6-yokozuna-OTP20.3_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/focal64/riak_3.0.6-OTP20.3_amd64.deb + file_size: '37092228' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/focal64/riak_3.0.6-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/focal64/riak_3.0.6-OTP22.3_amd64.deb + file_size: '38411968' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/focal64/riak_3.0.6-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/focal64/riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_size: '92890844' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/focal64/riak_3.0.6-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/focal64/riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_size: '94245652' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/focal64/riak_3.0.6-yokozuna-OTP22.3_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb + file_size: '92941018' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.4-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.6-OTP20.3_amd64.deb + file_size: '35866934' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.6-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.6-OTP22.3_amd64.deb + file_size: '37126298' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.6-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.6-yokozuna-OTP20.3_amd64.deb + file_size: '91697632' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.6-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_size: '92966920' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.6/ubuntu/xenial64/riak_3.0.6-yokozuna-OTP22.3_amd64.deb.sha + 3.0.7: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-3.0.7.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.OTP20.3-1.amzn2.src.rpm + file_size: '1099162' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP20.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.OTP20.3-1.amzn2.x86_64.rpm + file_size: '40996348' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.OTP20.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.OTP22.3-1.amzn2.src.rpm + file_size: '1099161' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP22.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.OTP22.3-1.amzn2.x86_64.rpm + file_size: '42317496' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.OTP22.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.yokozuna.OTP20.3-1.amzn2.src.rpm + file_size: '1099591' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.yokozuna.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.yokozuna.OTP20.3-1.amzn2.x86_64.rpm + file_size: '97129440' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.yokozuna.OTP20.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.yokozuna.OTP22.3-1.amzn2.src.rpm + file_size: '1099591' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.yokozuna.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.yokozuna.OTP22.3-1.amzn2.x86_64.rpm + file_size: '98444884' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2/riak-3.0.7.yokozuna.OTP22.3-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-3.0.7.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.OTP20.3-1.amzn1.src.rpm + file_size: '1099081' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP20.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.OTP20.3-1.amzn1.x86_64.rpm + file_size: '46852792' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.OTP20.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.OTP22.3-1.amzn1.src.rpm + file_size: '1099081' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP22.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.OTP22.3-1.amzn1.x86_64.rpm + file_size: '48311418' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.OTP22.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.yokozuna.OTP20.3-1.amzn1.src.rpm + file_size: '1099544' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.yokozuna.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.yokozuna.OTP20.3-1.amzn1.x86_64.rpm + file_size: '104255403' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.yokozuna.OTP20.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.yokozuna.OTP22.3-1.amzn1.src.rpm + file_size: '1099544' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.yokozuna.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.yokozuna.OTP22.3-1.amzn1.x86_64.rpm + file_size: '105711956' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/amazon/2016.09/riak-3.0.7.yokozuna.OTP22.3-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.7-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak-dbgsym_3.0.7-OTP20.3_amd64.deb + file_size: '17102292' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak-dbgsym_3.0.7-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.7-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak-dbgsym_3.0.7-OTP22.3_amd64.deb + file_size: '18060132' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak-dbgsym_3.0.7-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.7-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak-dbgsym_3.0.7-yokozuna-OTP20.3_amd64.deb + file_size: '17108888' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak-dbgsym_3.0.7-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak-dbgsym_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '18057476' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak-dbgsym_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak_3.0.7-OTP20.3_amd64.deb + file_size: '37084696' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak_3.0.7-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak_3.0.7-OTP22.3_amd64.deb + file_size: '38411356' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak_3.0.7-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_size: '92887524' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak_3.0.7-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '94262544' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/10/riak_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/8/riak_3.0.7-OTP20.3_amd64.deb + file_size: '35896960' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/8/riak_3.0.7-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/8/riak_3.0.7-OTP22.3_amd64.deb + file_size: '37153412' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/8/riak_3.0.7-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/8/riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_size: '91766190' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/8/riak_3.0.7-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/8/riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '93014852' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/8/riak_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.7-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak-dbgsym_3.0.7-OTP20.3_amd64.deb + file_size: '13068354' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak-dbgsym_3.0.7-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.7-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak-dbgsym_3.0.7-OTP22.3_amd64.deb + file_size: '13743744' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak-dbgsym_3.0.7-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.7-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak-dbgsym_3.0.7-yokozuna-OTP20.3_amd64.deb + file_size: '13071342' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak-dbgsym_3.0.7-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak-dbgsym_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '13746390' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak-dbgsym_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak_3.0.7-OTP20.3_amd64.deb + file_size: '36263990' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak_3.0.7-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak_3.0.7-OTP22.3_amd64.deb + file_size: '37517098' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak_3.0.7-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_size: '92090382' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak_3.0.7-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '93311722' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/debian/9/riak_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.7.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.OTP20.3-1.el8.src.rpm + file_size: '1103544' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.OTP20.3-1.el8.x86_64.rpm + file_size: '42951344' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.OTP22.3-1.el8.src.rpm + file_size: '1103544' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.OTP22.3-1.el8.x86_64.rpm + file_size: '43988560' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.OTP22.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1103968' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_size: '99325480' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.yokozuna.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1103968' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_size: '100356996' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/oracle/8/riak-3.0.7.yokozuna.OTP22.3-1.el8.x86_64.rpm.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-3.0.7.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.OTP20.3-1.el7.src.rpm + file_size: '1099160' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP20.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.OTP20.3-1.el7.x86_64.rpm + file_size: '40091016' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.OTP20.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.OTP22.3-1.el7.src.rpm + file_size: '1099160' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP22.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.OTP22.3-1.el7.x86_64.rpm + file_size: '41397788' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.OTP22.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.yokozuna.OTP20.3-1.el7.src.rpm + file_size: '1099226' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.yokozuna.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.yokozuna.OTP20.3-1.el7.x86_64.rpm + file_size: '96220540' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.yokozuna.OTP20.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.yokozuna.OTP22.3-1.el7.src.rpm + file_size: '1099226' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.yokozuna.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.yokozuna.OTP22.3-1.el7.x86_64.rpm + file_size: '97528172' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/7/riak-3.0.7.yokozuna.OTP22.3-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.7.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.OTP20.3-1.el8.src.rpm + file_size: '1103525' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.OTP20.3-1.el8.x86_64.rpm + file_size: '42424456' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.OTP22.3-1.el8.src.rpm + file_size: '1103525' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.OTP22.3-1.el8.x86_64.rpm + file_size: '43757904' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.OTP22.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1103615' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_size: '98798564' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.yokozuna.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1103615' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.7.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_size: '100126876' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/rhel/8/riak-3.0.7.yokozuna.OTP22.3-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/bionic64/riak_3.0.7-OTP20.3_amd64.deb + file_size: '36454080' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/bionic64/riak_3.0.7-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/bionic64/riak_3.0.7-OTP22.3_amd64.deb + file_size: '37723720' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/bionic64/riak_3.0.7-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/bionic64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '93521804' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/bionic64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/focal64/riak_3.0.7-OTP20.3_amd64.deb + file_size: '37123932' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/focal64/riak_3.0.7-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/focal64/riak_3.0.7-OTP22.3_amd64.deb + file_size: '38441356' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/focal64/riak_3.0.7-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/focal64/riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_size: '92929976' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/focal64/riak_3.0.7-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/focal64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '94278596' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/focal64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/trusty64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '92808002' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/trusty64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.6-yokozuna-OTP22.3_amd64.deb + file_size: '92966920' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.6-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.7-OTP20.3_amd64.deb + file_size: '35872536' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.7-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.7-OTP22.3_amd64.deb + file_size: '37143934' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.7-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.7-yokozuna-OTP20.3_amd64.deb + file_size: '91693682' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.7-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '92969810' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.7/ubuntu/xenial64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + 3.0.8: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-3.0.8.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.OTP20.3-1.amzn2.src.rpm + file_size: '1099968' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP20.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.OTP20.3-1.amzn2.x86_64.rpm + file_size: '41036064' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.OTP20.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.OTP22.3-1.amzn2.src.rpm + file_size: '1099968' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP22.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.OTP22.3-1.amzn2.x86_64.rpm + file_size: '42360412' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.OTP22.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.yokozuna.OTP20.3-1.amzn2.src.rpm + file_size: '1099988' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.yokozuna.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.yokozuna.OTP20.3-1.amzn2.x86_64.rpm + file_size: '97170276' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.yokozuna.OTP20.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.yokozuna.OTP22.3-1.amzn2.src.rpm + file_size: '1099988' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.yokozuna.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.yokozuna.OTP22.3-1.amzn2.x86_64.rpm + file_size: '98487468' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2/riak-3.0.8.yokozuna.OTP22.3-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-3.0.8.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.OTP20.3-1.amzn1.src.rpm + file_size: '1099887' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP20.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.OTP20.3-1.amzn1.x86_64.rpm + file_size: '46899332' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.OTP20.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.OTP22.3-1.amzn1.src.rpm + file_size: '1099887' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP22.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.OTP22.3-1.amzn1.x86_64.rpm + file_size: '48359372' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.OTP22.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.yokozuna.OTP20.3-1.amzn1.src.rpm + file_size: '1100182' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.yokozuna.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.yokozuna.OTP20.3-1.amzn1.x86_64.rpm + file_size: '104294784' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.yokozuna.OTP20.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.yokozuna.OTP22.3-1.amzn1.src.rpm + file_size: '1100182' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.yokozuna.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.yokozuna.OTP22.3-1.amzn1.x86_64.rpm + file_size: '105760203' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/amazon/2016.09/riak-3.0.8.yokozuna.OTP22.3-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.8-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak-dbgsym_3.0.8-OTP20.3_amd64.deb + file_size: '17105072' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak-dbgsym_3.0.8-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.8-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak-dbgsym_3.0.8-OTP22.3_amd64.deb + file_size: '18034872' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak-dbgsym_3.0.8-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.8-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak-dbgsym_3.0.8-yokozuna-OTP20.3_amd64.deb + file_size: '17112184' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak-dbgsym_3.0.8-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak-dbgsym_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '18061500' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak-dbgsym_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak_3.0.8-OTP20.3_amd64.deb + file_size: '37121196' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak_3.0.8-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak_3.0.8-OTP22.3_amd64.deb + file_size: '38461384' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak_3.0.8-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_size: '92929160' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak_3.0.8-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '94291108' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/10/riak_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/8/riak_3.0.8-OTP20.3_amd64.deb + file_size: '35956912' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/8/riak_3.0.8-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/8/riak_3.0.8-OTP22.3_amd64.deb + file_size: '37221918' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/8/riak_3.0.8-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/8/riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_size: '91824726' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/8/riak_3.0.8-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/8/riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '93080166' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/8/riak_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.8-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak-dbgsym_3.0.8-OTP20.3_amd64.deb + file_size: '13072336' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak-dbgsym_3.0.8-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.8-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak-dbgsym_3.0.8-OTP22.3_amd64.deb + file_size: '13748012' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak-dbgsym_3.0.8-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.8-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak-dbgsym_3.0.8-yokozuna-OTP20.3_amd64.deb + file_size: '13071598' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak-dbgsym_3.0.8-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak-dbgsym_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '13748960' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak-dbgsym_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak_3.0.8-OTP20.3_amd64.deb + file_size: '36301714' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak_3.0.8-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak_3.0.8-OTP22.3_amd64.deb + file_size: '37561542' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak_3.0.8-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_size: '92139678' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak_3.0.8-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '93350012' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/debian/9/riak_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.8.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.OTP20.3-1.el8.src.rpm + file_size: '1104350' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.OTP20.3-1.el8.x86_64.rpm + file_size: '42995908' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.OTP22.3-1.el8.src.rpm + file_size: '1104350' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.OTP22.3-1.el8.x86_64.rpm + file_size: '44032424' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.OTP22.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1104709' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_size: '99364208' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.yokozuna.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1104709' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_size: '100397916' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/oracle/8/riak-3.0.8.yokozuna.OTP22.3-1.el8.x86_64.rpm.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-3.0.8.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.OTP20.3-1.el7.src.rpm + file_size: '1099969' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP20.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.OTP20.3-1.el7.x86_64.rpm + file_size: '40134412' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.OTP20.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.OTP22.3-1.el7.src.rpm + file_size: '1099969' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP22.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.OTP22.3-1.el7.x86_64.rpm + file_size: '41440512' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.OTP22.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.yokozuna.OTP20.3-1.el7.src.rpm + file_size: '1100351' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.yokozuna.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.yokozuna.OTP20.3-1.el7.x86_64.rpm + file_size: '96265056' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.yokozuna.OTP20.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.yokozuna.OTP22.3-1.el7.src.rpm + file_size: '1100350' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.yokozuna.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.yokozuna.OTP22.3-1.el7.x86_64.rpm + file_size: '97570324' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/7/riak-3.0.8.yokozuna.OTP22.3-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.8.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.OTP20.3-1.el8.src.rpm + file_size: '1104330' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.OTP20.3-1.el8.x86_64.rpm + file_size: '42464788' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.OTP22.3-1.el8.src.rpm + file_size: '1104330' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.OTP22.3-1.el8.x86_64.rpm + file_size: '43799344' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.OTP22.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1104716' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_size: '98840132' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.yokozuna.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1104716' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.8.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_size: '100170720' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/rhel/8/riak-3.0.8.yokozuna.OTP22.3-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/bionic64/riak_3.0.8-OTP20.3_amd64.deb + file_size: '36500340' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/bionic64/riak_3.0.8-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/bionic64/riak_3.0.8-OTP22.3_amd64.deb + file_size: '37760372' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/bionic64/riak_3.0.8-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/bionic64/riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_size: '92326392' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/bionic64/riak_3.0.8-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/bionic64/riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '93565812' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/bionic64/riak_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/focal64/riak_3.0.8-OTP20.3_amd64.deb + file_size: '37161640' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/focal64/riak_3.0.8-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/focal64/riak_3.0.8-OTP22.3_amd64.deb + file_size: '38486756' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/focal64/riak_3.0.8-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/focal64/riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_size: '92975920' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/focal64/riak_3.0.8-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/focal64/riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '94317544' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/focal64/riak_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb + file_size: '92969810' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.7-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.8-OTP20.3_amd64.deb + file_size: '35888958' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.8-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.8-OTP22.3_amd64.deb + file_size: '37189768' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.8-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.8-yokozuna-OTP20.3_amd64.deb + file_size: '91720244' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.8-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '93019108' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.8/ubuntu/xenial64/riak_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + 3.0.9: + - os: amazon + versions: + - version: '2' + architectures: + - arch: source + file_info: + file_name: riak-3.0.9.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.OTP20.3-1.amzn2.src.rpm + file_size: '1100642' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP20.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.OTP20.3-1.amzn2.x86_64.rpm + file_size: '41059912' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.OTP20.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.OTP22.3-1.amzn2.src.rpm + file_size: '1100642' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP22.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.OTP22.3-1.amzn2.x86_64.rpm + file_size: '42381708' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.OTP22.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.yokozuna.OTP20.3-1.amzn2.src.rpm + file_size: '1100457' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.yokozuna.OTP20.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.yokozuna.OTP20.3-1.amzn2.x86_64.rpm + file_size: '97193716' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.yokozuna.OTP20.3-1.amzn2.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.amzn2.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.yokozuna.OTP22.3-1.amzn2.src.rpm + file_size: '1100457' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.yokozuna.OTP22.3-1.amzn2.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.amzn2.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.yokozuna.OTP22.3-1.amzn2.x86_64.rpm + file_size: '98509528' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2/riak-3.0.9.yokozuna.OTP22.3-1.amzn2.x86_64.rpm.sha + - version: '2016.09' + architectures: + - arch: source + file_info: + file_name: riak-3.0.9.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.OTP20.3-1.amzn1.src.rpm + file_size: '1100560' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP20.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.OTP20.3-1.amzn1.x86_64.rpm + file_size: '46926066' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.OTP20.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.OTP22.3-1.amzn1.src.rpm + file_size: '1100560' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP22.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.OTP22.3-1.amzn1.x86_64.rpm + file_size: '48383680' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.OTP22.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.yokozuna.OTP20.3-1.amzn1.src.rpm + file_size: '1100658' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.yokozuna.OTP20.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.yokozuna.OTP20.3-1.amzn1.x86_64.rpm + file_size: '104322419' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.yokozuna.OTP20.3-1.amzn1.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.amzn1.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.yokozuna.OTP22.3-1.amzn1.src.rpm + file_size: '1100658' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.yokozuna.OTP22.3-1.amzn1.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.amzn1.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.yokozuna.OTP22.3-1.amzn1.x86_64.rpm + file_size: '105783567' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/amazon/2016.09/riak-3.0.9.yokozuna.OTP22.3-1.amzn1.x86_64.rpm.sha + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.9-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak-dbgsym_3.0.9-OTP20.3_amd64.deb + file_size: '17104488' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak-dbgsym_3.0.9-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.9-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak-dbgsym_3.0.9-OTP22.3_amd64.deb + file_size: '18035948' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak-dbgsym_3.0.9-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.9-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak-dbgsym_3.0.9-yokozuna-OTP20.3_amd64.deb + file_size: '17085616' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak-dbgsym_3.0.9-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.9-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak-dbgsym_3.0.9-yokozuna-OTP22.3_amd64.deb + file_size: '18061340' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak-dbgsym_3.0.9-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak_3.0.9-OTP20.3_amd64.deb + file_size: '37146312' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak_3.0.9-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak_3.0.9-OTP22.3_amd64.deb + file_size: '38482340' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak_3.0.9-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_size: '92950944' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak_3.0.9-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_size: '94321292' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/10/riak_3.0.9-yokozuna-OTP22.3_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/8/riak_3.0.9-OTP20.3_amd64.deb + file_size: '35964348' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/8/riak_3.0.9-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/8/riak_3.0.9-OTP22.3_amd64.deb + file_size: '37238850' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/8/riak_3.0.9-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/8/riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_size: '91834256' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/8/riak_3.0.9-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/8/riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_size: '93113962' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/8/riak_3.0.9-yokozuna-OTP22.3_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.9-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak-dbgsym_3.0.9-OTP20.3_amd64.deb + file_size: '13071874' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak-dbgsym_3.0.9-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.9-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak-dbgsym_3.0.9-OTP22.3_amd64.deb + file_size: '13746522' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak-dbgsym_3.0.9-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.9-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak-dbgsym_3.0.9-yokozuna-OTP20.3_amd64.deb + file_size: '13069334' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak-dbgsym_3.0.9-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-dbgsym_3.0.9-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak-dbgsym_3.0.9-yokozuna-OTP22.3_amd64.deb + file_size: '13744262' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak-dbgsym_3.0.9-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak_3.0.9-OTP20.3_amd64.deb + file_size: '36325470' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak_3.0.9-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak_3.0.9-OTP22.3_amd64.deb + file_size: '37582024' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak_3.0.9-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_size: '92152682' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak_3.0.9-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_size: '93371710' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/debian/9/riak_3.0.9-yokozuna-OTP22.3_amd64.deb.sha + - os: oracle + versions: + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.9.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.OTP20.3-1.el8.src.rpm + file_size: '1105021' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.OTP20.3-1.el8.x86_64.rpm + file_size: '43015912' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.OTP22.3-1.el8.src.rpm + file_size: '1105022' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.OTP22.3-1.el8.x86_64.rpm + file_size: '44054412' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.OTP22.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1105243' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_size: '99390352' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.yokozuna.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1105243' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_size: '100419016' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/oracle/8/riak-3.0.9.yokozuna.OTP22.3-1.el8.x86_64.rpm.sha + - os: raspbian + versions: + - version: buster + architectures: + - arch: arm32 + file_info: + file_name: riak-dbgsym_3.0.9-OTP20.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak-dbgsym_3.0.9-OTP20.3_armhf.deb + file_size: '16080824' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak-dbgsym_3.0.9-OTP20.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak-dbgsym_3.0.9-OTP22.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak-dbgsym_3.0.9-OTP22.3_armhf.deb + file_size: '16950728' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak-dbgsym_3.0.9-OTP22.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak-dbgsym_3.0.9-yokozuna-OTP20.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak-dbgsym_3.0.9-yokozuna-OTP20.3_armhf.deb + file_size: '16081276' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak-dbgsym_3.0.9-yokozuna-OTP20.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak-dbgsym_3.0.9-yokozuna-OTP22.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak-dbgsym_3.0.9-yokozuna-OTP22.3_armhf.deb + file_size: '16960160' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak-dbgsym_3.0.9-yokozuna-OTP22.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_3.0.9-OTP20.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak_3.0.9-OTP20.3_armhf.deb + file_size: '36411584' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak_3.0.9-OTP20.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_3.0.9-OTP22.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak_3.0.9-OTP22.3_armhf.deb + file_size: '37756200' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak_3.0.9-OTP22.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_3.0.9-yokozuna-OTP20.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak_3.0.9-yokozuna-OTP20.3_armhf.deb + file_size: '92272836' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak_3.0.9-yokozuna-OTP20.3_armhf.deb.sha + - arch: arm32 + file_info: + file_name: riak_3.0.9-yokozuna-OTP22.3_armhf.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak_3.0.9-yokozuna-OTP22.3_armhf.deb + file_size: '93595860' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/raspbian/buster/riak_3.0.9-yokozuna-OTP22.3_armhf.deb.sha + - os: rhel + versions: + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-3.0.9.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.OTP20.3-1.el7.src.rpm + file_size: '1100640' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP20.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.OTP20.3-1.el7.x86_64.rpm + file_size: '40155056' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.OTP20.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.OTP22.3-1.el7.src.rpm + file_size: '1100643' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP22.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.OTP22.3-1.el7.x86_64.rpm + file_size: '41462512' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.OTP22.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.yokozuna.OTP20.3-1.el7.src.rpm + file_size: '1100647' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.yokozuna.OTP20.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.yokozuna.OTP20.3-1.el7.x86_64.rpm + file_size: '96285380' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.yokozuna.OTP20.3-1.el7.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.yokozuna.OTP22.3-1.el7.src.rpm + file_size: '1100647' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.yokozuna.OTP22.3-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.yokozuna.OTP22.3-1.el7.x86_64.rpm + file_size: '97591116' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/7/riak-3.0.9.yokozuna.OTP22.3-1.el7.x86_64.rpm.sha + - version: '8' + architectures: + - arch: source + file_info: + file_name: riak-3.0.9.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.OTP20.3-1.el8.src.rpm + file_size: '1105003' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.OTP20.3-1.el8.x86_64.rpm + file_size: '42487660' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.OTP22.3-1.el8.src.rpm + file_size: '1105003' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.OTP22.3-1.el8.x86_64.rpm + file_size: '43822488' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.OTP22.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.yokozuna.OTP20.3-1.el8.src.rpm + file_size: '1105256' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.yokozuna.OTP20.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.yokozuna.OTP20.3-1.el8.x86_64.rpm + file_size: '98863764' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.yokozuna.OTP20.3-1.el8.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.el8.src.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.yokozuna.OTP22.3-1.el8.src.rpm + file_size: '1105256' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.yokozuna.OTP22.3-1.el8.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-3.0.9.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.yokozuna.OTP22.3-1.el8.x86_64.rpm + file_size: '100189388' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/rhel/8/riak-3.0.9.yokozuna.OTP22.3-1.el8.x86_64.rpm.sha + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/bionic64/riak_3.0.9-OTP20.3_amd64.deb + file_size: '36519084' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/bionic64/riak_3.0.9-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/bionic64/riak_3.0.9-OTP22.3_amd64.deb + file_size: '37794300' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/bionic64/riak_3.0.9-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/bionic64/riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_size: '92343272' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/bionic64/riak_3.0.9-yokozuna-OTP20.3_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/focal64/riak_3.0.9-OTP20.3_amd64.deb + file_size: '37182476' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/focal64/riak_3.0.9-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/focal64/riak_3.0.9-OTP22.3_amd64.deb + file_size: '38504520' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/focal64/riak_3.0.9-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/focal64/riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_size: '92984568' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/focal64/riak_3.0.9-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/focal64/riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_size: '94327412' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/focal64/riak_3.0.9-yokozuna-OTP22.3_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.8-yokozuna-OTP22.3_amd64.deb + file_size: '93019108' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.8-yokozuna-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.9-OTP20.3_amd64.deb + file_size: '35906964' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.9-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.9-OTP22.3_amd64.deb + file_size: '37210326' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.9-OTP22.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.9-yokozuna-OTP20.3_amd64.deb + file_size: '91740592' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.9-yokozuna-OTP20.3_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.9-yokozuna-OTP22.3_amd64.deb + file_size: '93046580' + chksum_href: https://files.tiot.jp/riak/kv/3.0/3.0.9/ubuntu/xenial64/riak_3.0.9-yokozuna-OTP22.3_amd64.deb.sha +riak_cs: + 2.0.0: + - os: source + file_info: + file_name: riak-cs-2.0.0.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/riak-cs-2.0.0.tar.gz + file_size: '9709182' + - os: debian + versions: + - version: '6' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/debian/6/riak-cs_2.0.0-1_amd64.deb + file_size: '24144566' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/debian/6/riak-cs_2.0.0-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/debian/7/riak-cs_2.0.0-1_amd64.deb + file_size: '24189380' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/debian/7/riak-cs_2.0.0-1_amd64.deb.sha + - os: fedora + versions: + - version: '19' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.0.0-1.fc19.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/fedora/19/riak-cs-2.0.0-1.fc19.src.rpm + file_size: '9687523' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/fedora/19/riak-cs-2.0.0-1.fc19.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.0-1.fc19.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/fedora/19/riak-cs-2.0.0-1.fc19.x86_64.rpm + file_size: '21815512' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/fedora/19/riak-cs-2.0.0-1.fc19.x86_64.rpm.sha + - os: freebsd + versions: + - version: '10' + architectures: + - arch: txz + file_info: + file_name: riak-cs-2.0.0.txz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/freebsd/10/riak-cs-2.0.0.txz + file_size: '23688492' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/freebsd/10/riak-cs-2.0.0.txz.sha + - version: '9.2' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs-2.0.0-FreeBSD-amd64.tbz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/freebsd/9.2/riak-cs-2.0.0-FreeBSD-amd64.tbz + file_size: '29615523' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/freebsd/9.2/riak-cs-2.0.0-FreeBSD-amd64.tbz.sha + - os: osx + versions: + - version: '10.8' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.0-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/osx/10.8/riak-cs-2.0.0-OSX-x86_64.tar.gz + file_size: '24642728' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/osx/10.8/riak-cs-2.0.0-OSX-x86_64.tar.gz.sha + - os: rhel + versions: + - version: '5' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.0-1.el5.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/5/riak-cs-2.0.0-1.el5.x86_64.rpm + file_size: '24362997' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/5/riak-cs-2.0.0-1.el5.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-cs-2.0.0-1.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/5/riak-cs-2.0.0-1.src.rpm + file_size: '9687080' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/5/riak-cs-2.0.0-1.src.rpm.sha + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.0.0-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/6/riak-cs-2.0.0-1.el6.src.rpm + file_size: '9670530' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/6/riak-cs-2.0.0-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.0-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/6/riak-cs-2.0.0-1.el6.x86_64.rpm + file_size: '21892916' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/6/riak-cs-2.0.0-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.0.0-1.el7.centos.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/7/riak-cs-2.0.0-1.el7.centos.src.rpm + file_size: '9618891' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/7/riak-cs-2.0.0-1.el7.centos.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.0-1.el7.centos.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/7/riak-cs-2.0.0-1.el7.centos.x86_64.rpm + file_size: '21812920' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/rhel/7/riak-cs-2.0.0-1.el7.centos.x86_64.rpm.sha + - os: sles + versions: + - version: '11' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.0-1.SLES11.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/sles/11/riak-cs-2.0.0-1.SLES11.x86_64.rpm + file_size: '25261860' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/sles/11/riak-cs-2.0.0-1.SLES11.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-cs-2.0.0-1.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/sles/11/riak-cs-2.0.0-1.src.rpm + file_size: '9671122' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/sles/11/riak-cs-2.0.0-1.src.rpm.sha + - os: smartos + versions: + - version: '1.8' + architectures: + - arch: x86_64 + file_info: + file_name: riak_cs-2.0.0-SmartOS-x86_64.tgz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/smartos/1.8/riak_cs-2.0.0-SmartOS-x86_64.tgz + file_size: '31991701' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/smartos/1.8/riak_cs-2.0.0-SmartOS-x86_64.tgz.sha + - version: '13.1' + architectures: + - arch: x86_64 + file_info: + file_name: riak_cs-2.0.0-SmartOS-x86_64.tgz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/smartos/13.1/riak_cs-2.0.0-SmartOS-x86_64.tgz + file_size: '31989822' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/smartos/13.1/riak_cs-2.0.0-SmartOS-x86_64.tgz.sha + - os: solaris + versions: + - version: '10' + architectures: + - arch: x86_64 + file_info: + file_name: BASHOriak-cs-2.0.0-Solaris10-x86_64.pkg.gz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/solaris/10/BASHOriak-cs-2.0.0-Solaris10-x86_64.pkg.gz + file_size: '28945489' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/solaris/10/BASHOriak-cs-2.0.0-Solaris10-x86_64.pkg.gz.sha + - os: ubuntu + versions: + - version: lucid + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/ubuntu/lucid/riak-cs_2.0.0-1_amd64.deb + file_size: '24146078' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/ubuntu/lucid/riak-cs_2.0.0-1_amd64.deb.sha + - version: precise + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/ubuntu/precise/riak-cs_2.0.0-1_amd64.deb + file_size: '24119194' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/ubuntu/precise/riak-cs_2.0.0-1_amd64.deb.sha + - version: trusty + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/ubuntu/trusty/riak-cs_2.0.0-1_amd64.deb + file_size: '20021826' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.0/ubuntu/trusty/riak-cs_2.0.0-1_amd64.deb.sha + 2.0.1: + - os: source + file_info: + file_name: riak-cs-2.0.1.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/riak-cs-2.0.1.tar.gz + file_size: '9734438' + - os: debian + versions: + - version: '6' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/debian/6/riak-cs_2.0.1-1_amd64.deb + file_size: '24209698' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/debian/6/riak-cs_2.0.1-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/debian/7/riak-cs_2.0.1-1_amd64.deb + file_size: '24253352' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/debian/7/riak-cs_2.0.1-1_amd64.deb.sha + - os: fedora + versions: + - version: '19' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.0.1-1.fc19.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/fedora/19/riak-cs-2.0.1-1.fc19.src.rpm + file_size: '9714696' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/fedora/19/riak-cs-2.0.1-1.fc19.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.1-1.fc19.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/fedora/19/riak-cs-2.0.1-1.fc19.x86_64.rpm + file_size: '21874336' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/fedora/19/riak-cs-2.0.1-1.fc19.x86_64.rpm.sha + - os: freebsd + versions: + - version: '10' + architectures: + - arch: txz + file_info: + file_name: riak-cs-2.0.1.txz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/freebsd/10/riak-cs-2.0.1.txz + file_size: '23760880' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/freebsd/10/riak-cs-2.0.1.txz.sha + - version: '9.2' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs-2.0.1-FreeBSD-amd64.tbz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/freebsd/9.2/riak-cs-2.0.1-FreeBSD-amd64.tbz + file_size: '29679739' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/freebsd/9.2/riak-cs-2.0.1-FreeBSD-amd64.tbz.sha + - os: osx + versions: + - version: '10.8' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.1-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/osx/10.8/riak-cs-2.0.1-OSX-x86_64.tar.gz + file_size: '24710129' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/osx/10.8/riak-cs-2.0.1-OSX-x86_64.tar.gz.sha + - os: rhel + versions: + - version: '5' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.1-1.el5.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/5/riak-cs-2.0.1-1.el5.x86_64.rpm + file_size: '24421926' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/5/riak-cs-2.0.1-1.el5.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-cs-2.0.1-1.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/5/riak-cs-2.0.1-1.src.rpm + file_size: '9709943' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/5/riak-cs-2.0.1-1.src.rpm.sha + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.0.1-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/6/riak-cs-2.0.1-1.el6.src.rpm + file_size: '9694943' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/6/riak-cs-2.0.1-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.1-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/6/riak-cs-2.0.1-1.el6.x86_64.rpm + file_size: '21953756' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/6/riak-cs-2.0.1-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.0.1-1.el7.centos.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/7/riak-cs-2.0.1-1.el7.centos.src.rpm + file_size: '9649994' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/7/riak-cs-2.0.1-1.el7.centos.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.1-1.el7.centos.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/7/riak-cs-2.0.1-1.el7.centos.x86_64.rpm + file_size: '21871700' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/rhel/7/riak-cs-2.0.1-1.el7.centos.x86_64.rpm.sha + - os: sles + versions: + - version: '11' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.0.1-1.SLES11.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/sles/11/riak-cs-2.0.1-1.SLES11.x86_64.rpm + file_size: '25327783' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/sles/11/riak-cs-2.0.1-1.SLES11.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-cs-2.0.1-1.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/sles/11/riak-cs-2.0.1-1.src.rpm + file_size: '9682213' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/sles/11/riak-cs-2.0.1-1.src.rpm.sha + - os: smartos + versions: + - version: '1.8' + architectures: + - arch: x86_64 + file_info: + file_name: riak_cs-2.0.1-SmartOS-x86_64.tgz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/smartos/1.8/riak_cs-2.0.1-SmartOS-x86_64.tgz + file_size: '31308976' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/smartos/1.8/riak_cs-2.0.1-SmartOS-x86_64.tgz.sha + - version: '13.1' + architectures: + - arch: x86_64 + file_info: + file_name: riak_cs-2.0.1-SmartOS-x86_64.tgz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/smartos/13.1/riak_cs-2.0.1-SmartOS-x86_64.tgz + file_size: '31292984' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/smartos/13.1/riak_cs-2.0.1-SmartOS-x86_64.tgz.sha + - os: solaris + versions: + - version: '10' + architectures: + - arch: x86_64 + file_info: + file_name: BASHOriak-cs-2.0.1-Solaris10-x86_64.pkg.gz + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/solaris/10/BASHOriak-cs-2.0.1-Solaris10-x86_64.pkg.gz + file_size: '28585773' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/solaris/10/BASHOriak-cs-2.0.1-Solaris10-x86_64.pkg.gz.sha + - os: ubuntu + versions: + - version: lucid + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/ubuntu/lucid/riak-cs_2.0.1-1_amd64.deb + file_size: '24207464' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/ubuntu/lucid/riak-cs_2.0.1-1_amd64.deb.sha + - version: precise + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/ubuntu/precise/riak-cs_2.0.1-1_amd64.deb + file_size: '24189948' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/ubuntu/precise/riak-cs_2.0.1-1_amd64.deb.sha + - version: trusty + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.0.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/ubuntu/trusty/riak-cs_2.0.1-1_amd64.deb + file_size: '20058620' + chksum_href: https://files.tiot.jp/riak/cs/2.0/2.0.1/ubuntu/trusty/riak-cs_2.0.1-1_amd64.deb.sha + 2.1.0: + - os: source + file_info: + file_name: riak-cs-2.1.0.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/riak-cs-2.1.0.tar.gz + file_size: '10670941' + - os: debian + versions: + - version: '6' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/debian/6/riak-cs_2.1.0-1_amd64.deb + file_size: '26657738' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/debian/6/riak-cs_2.1.0-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/debian/7/riak-cs_2.1.0-1_amd64.deb + file_size: '26703798' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/debian/7/riak-cs_2.1.0-1_amd64.deb.sha + - os: fedora + versions: + - version: '19' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.1.0-1.fc19.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/fedora/19/riak-cs-2.1.0-1.fc19.src.rpm + file_size: '10655944' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/fedora/19/riak-cs-2.1.0-1.fc19.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.1.0-1.fc19.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/fedora/19/riak-cs-2.1.0-1.fc19.x86_64.rpm + file_size: '24117812' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/fedora/19/riak-cs-2.1.0-1.fc19.x86_64.rpm.sha + - os: freebsd + versions: + - version: '10' + architectures: + - arch: txz + file_info: + file_name: riak-cs-2.1.0.txz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/freebsd/10/riak-cs-2.1.0.txz + file_size: '25878088' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/freebsd/10/riak-cs-2.1.0.txz.sha + - version: '9.2' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs-2.1.0-FreeBSD-amd64.tbz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/freebsd/9.2/riak-cs-2.1.0-FreeBSD-amd64.tbz + file_size: '32135218' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/freebsd/9.2/riak-cs-2.1.0-FreeBSD-amd64.tbz.sha + - os: osx + versions: + - version: '10.8' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.1.0-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/osx/10.8/riak-cs-2.1.0-OSX-x86_64.tar.gz + file_size: '27157716' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/osx/10.8/riak-cs-2.1.0-OSX-x86_64.tar.gz.sha + - os: rhel + versions: + - version: '5' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.1.0-1.el5.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/5/riak-cs-2.1.0-1.el5.x86_64.rpm + file_size: '26899774' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/5/riak-cs-2.1.0-1.el5.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-cs-2.1.0-1.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/5/riak-cs-2.1.0-1.src.rpm + file_size: '10645942' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/5/riak-cs-2.1.0-1.src.rpm.sha + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.1.0-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/6/riak-cs-2.1.0-1.el6.src.rpm + file_size: '10632114' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/6/riak-cs-2.1.0-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.1.0-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/6/riak-cs-2.1.0-1.el6.x86_64.rpm + file_size: '24209960' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/6/riak-cs-2.1.0-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.1.0-1.el7.centos.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/7/riak-cs-2.1.0-1.el7.centos.src.rpm + file_size: '10606405' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/7/riak-cs-2.1.0-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-cs-2.1.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/7/riak-cs-2.1.0-1.el7.centos.x86_64.rpm - file_size: 24115092 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/rhel/7/riak-cs-2.1.0-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/7/riak-cs-2.1.0-1.el7.centos.x86_64.rpm + file_size: '24115092' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/rhel/7/riak-cs-2.1.0-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -3315,15 +10500,15 @@ riak_cs: - arch: x86_64 file_info: file_name: riak-cs-2.1.0-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/sles/11/riak-cs-2.1.0-1.SLES11.x86_64.rpm - file_size: 27569003 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/sles/11/riak-cs-2.1.0-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/sles/11/riak-cs-2.1.0-1.SLES11.x86_64.rpm + file_size: '27569003' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/sles/11/riak-cs-2.1.0-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: riak-cs-2.1.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/sles/11/riak-cs-2.1.0-1.src.rpm - file_size: 10634680 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/sles/11/riak-cs-2.1.0-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/sles/11/riak-cs-2.1.0-1.src.rpm + file_size: '10634680' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/sles/11/riak-cs-2.1.0-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -3331,17 +10516,17 @@ riak_cs: - arch: x86_64 file_info: file_name: riak_cs-2.1.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/smartos/1.8/riak_cs-2.1.0-SmartOS-x86_64.tgz - file_size: 33766345 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/smartos/1.8/riak_cs-2.1.0-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/smartos/1.8/riak_cs-2.1.0-SmartOS-x86_64.tgz + file_size: '33766345' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/smartos/1.8/riak_cs-2.1.0-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak_cs-2.1.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/smartos/13.1/riak_cs-2.1.0-SmartOS-x86_64.tgz - file_size: 33759251 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/smartos/13.1/riak_cs-2.1.0-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/smartos/13.1/riak_cs-2.1.0-SmartOS-x86_64.tgz + file_size: '33759251' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/smartos/13.1/riak_cs-2.1.0-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -3349,9 +10534,9 @@ riak_cs: - arch: x86_64 file_info: file_name: BASHOriak-cs-2.1.0-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/solaris/10/BASHOriak-cs-2.1.0-Solaris10-x86_64.pkg.gz - file_size: 31048140 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/solaris/10/BASHOriak-cs-2.1.0-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/solaris/10/BASHOriak-cs-2.1.0-Solaris10-x86_64.pkg.gz + file_size: '31048140' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/solaris/10/BASHOriak-cs-2.1.0-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -3359,31 +10544,31 @@ riak_cs: - arch: amd64 file_info: file_name: riak-cs_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/ubuntu/lucid/riak-cs_2.1.0-1_amd64.deb - file_size: 26655520 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/ubuntu/lucid/riak-cs_2.1.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/ubuntu/lucid/riak-cs_2.1.0-1_amd64.deb + file_size: '26655520' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/ubuntu/lucid/riak-cs_2.1.0-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak-cs_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/ubuntu/precise/riak-cs_2.1.0-1_amd64.deb - file_size: 26633874 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/ubuntu/precise/riak-cs_2.1.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/ubuntu/precise/riak-cs_2.1.0-1_amd64.deb + file_size: '26633874' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/ubuntu/precise/riak-cs_2.1.0-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak-cs_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/ubuntu/trusty/riak-cs_2.1.0-1_amd64.deb - file_size: 22182822 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.0/ubuntu/trusty/riak-cs_2.1.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/ubuntu/trusty/riak-cs_2.1.0-1_amd64.deb + file_size: '22182822' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.0/ubuntu/trusty/riak-cs_2.1.0-1_amd64.deb.sha 2.1.1: - os: source file_info: file_name: riak-cs-2.1.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/riak-cs-2.1.1.tar.gz - file_size: 10680566 + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/riak-cs-2.1.1.tar.gz + file_size: '10680566' - os: debian versions: - version: '6' @@ -3391,17 +10576,17 @@ riak_cs: - arch: amd64 file_info: file_name: riak-cs_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/debian/6/riak-cs_2.1.1-1_amd64.deb - file_size: 26650836 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/debian/6/riak-cs_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/debian/6/riak-cs_2.1.1-1_amd64.deb + file_size: '26650836' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/debian/6/riak-cs_2.1.1-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak-cs_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/debian/7/riak-cs_2.1.1-1_amd64.deb - file_size: 26696268 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/debian/7/riak-cs_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/debian/7/riak-cs_2.1.1-1_amd64.deb + file_size: '26696268' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/debian/7/riak-cs_2.1.1-1_amd64.deb.sha - os: freebsd versions: - version: '10' @@ -3409,17 +10594,17 @@ riak_cs: - arch: txz file_info: file_name: riak-cs-2.1.1.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/freebsd/10/riak-cs-2.1.1.txz - file_size: 25875800 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/freebsd/10/riak-cs-2.1.1.txz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/freebsd/10/riak-cs-2.1.1.txz + file_size: '25875800' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/freebsd/10/riak-cs-2.1.1.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: riak-cs-2.1.1-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/freebsd/9.2/riak-cs-2.1.1-FreeBSD-amd64.tbz - file_size: 32131289 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/freebsd/9.2/riak-cs-2.1.1-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/freebsd/9.2/riak-cs-2.1.1-FreeBSD-amd64.tbz + file_size: '32131289' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/freebsd/9.2/riak-cs-2.1.1-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -3427,9 +10612,9 @@ riak_cs: - arch: x86_64 file_info: file_name: riak-cs-2.1.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/osx/10.8/riak-cs-2.1.1-OSX-x86_64.tar.gz - file_size: 27152204 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/osx/10.8/riak-cs-2.1.1-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/osx/10.8/riak-cs-2.1.1-OSX-x86_64.tar.gz + file_size: '27152204' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/osx/10.8/riak-cs-2.1.1-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -3437,43 +10622,43 @@ riak_cs: - arch: x86_64 file_info: file_name: riak-cs-2.1.1-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/5/riak-cs-2.1.1-1.el5.x86_64.rpm - file_size: 26889470 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/5/riak-cs-2.1.1-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/5/riak-cs-2.1.1-1.el5.x86_64.rpm + file_size: '26889470' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/5/riak-cs-2.1.1-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-cs-2.1.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/5/riak-cs-2.1.1-1.src.rpm - file_size: 10673951 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/5/riak-cs-2.1.1-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/5/riak-cs-2.1.1-1.src.rpm + file_size: '10673951' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/5/riak-cs-2.1.1-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-cs-2.1.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/6/riak-cs-2.1.1-1.el6.src.rpm - file_size: 10644531 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/6/riak-cs-2.1.1-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/6/riak-cs-2.1.1-1.el6.src.rpm + file_size: '10644531' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/6/riak-cs-2.1.1-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-cs-2.1.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/6/riak-cs-2.1.1-1.el6.x86_64.rpm - file_size: 24205024 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/6/riak-cs-2.1.1-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/6/riak-cs-2.1.1-1.el6.x86_64.rpm + file_size: '24205024' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/6/riak-cs-2.1.1-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-cs-2.1.1-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/7/riak-cs-2.1.1-1.el7.centos.src.rpm - file_size: 10619493 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/7/riak-cs-2.1.1-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/7/riak-cs-2.1.1-1.el7.centos.src.rpm + file_size: '10619493' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/7/riak-cs-2.1.1-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-cs-2.1.1-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/7/riak-cs-2.1.1-1.el7.centos.x86_64.rpm - file_size: 24109976 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/rhel/7/riak-cs-2.1.1-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/7/riak-cs-2.1.1-1.el7.centos.x86_64.rpm + file_size: '24109976' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/rhel/7/riak-cs-2.1.1-1.el7.centos.x86_64.rpm.sha - os: smartos versions: - version: '1.8' @@ -3481,17 +10666,17 @@ riak_cs: - arch: x86_64 file_info: file_name: riak_cs-2.1.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/smartos/1.8/riak_cs-2.1.1-SmartOS-x86_64.tgz - file_size: 33771692 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/smartos/1.8/riak_cs-2.1.1-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/smartos/1.8/riak_cs-2.1.1-SmartOS-x86_64.tgz + file_size: '33771692' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/smartos/1.8/riak_cs-2.1.1-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak_cs-2.1.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/smartos/13.1/riak_cs-2.1.1-SmartOS-x86_64.tgz - file_size: 33770780 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/smartos/13.1/riak_cs-2.1.1-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/smartos/13.1/riak_cs-2.1.1-SmartOS-x86_64.tgz + file_size: '33770780' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/smartos/13.1/riak_cs-2.1.1-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -3499,9 +10684,9 @@ riak_cs: - arch: x86_64 file_info: file_name: BASHOriak-cs-2.1.1-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/solaris/10/BASHOriak-cs-2.1.1-Solaris10-x86_64.pkg.gz - file_size: 31067113 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/solaris/10/BASHOriak-cs-2.1.1-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/solaris/10/BASHOriak-cs-2.1.1-Solaris10-x86_64.pkg.gz + file_size: '31067113' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/solaris/10/BASHOriak-cs-2.1.1-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -3509,32 +10694,204 @@ riak_cs: - arch: amd64 file_info: file_name: riak-cs_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/ubuntu/lucid/riak-cs_2.1.1-1_amd64.deb - file_size: 26650472 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/ubuntu/lucid/riak-cs_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/ubuntu/lucid/riak-cs_2.1.1-1_amd64.deb + file_size: '26650472' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/ubuntu/lucid/riak-cs_2.1.1-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak-cs_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/ubuntu/precise/riak-cs_2.1.1-1_amd64.deb - file_size: 26623250 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/ubuntu/precise/riak-cs_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/ubuntu/precise/riak-cs_2.1.1-1_amd64.deb + file_size: '26623250' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/ubuntu/precise/riak-cs_2.1.1-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak-cs_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/ubuntu/trusty/riak-cs_2.1.1-1_amd64.deb - file_size: 22174054 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs/2.1/2.1.1/ubuntu/trusty/riak-cs_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/ubuntu/trusty/riak-cs_2.1.1-1_amd64.deb + file_size: '22174054' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.1/ubuntu/trusty/riak-cs_2.1.1-1_amd64.deb.sha + 2.1.2: + - os: source + file_info: + file_name: riak-cs-2.1.2.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/riak-cs-2.1.2.tar.gz + file_size: '11035083' + - os: debian + versions: + - version: '10' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs-dbgsym_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/10/riak-cs-dbgsym_2.1.2-1_amd64.deb + file_size: '115176' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/10/riak-cs-dbgsym_2.1.2-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/10/riak-cs_2.1.2-1_amd64.deb + file_size: '22398708' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/10/riak-cs_2.1.2-1_amd64.deb.sha + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/7/riak-cs_2.1.2-1_amd64.deb + file_size: '26695722' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/7/riak-cs_2.1.2-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/8/riak-cs_2.1.2-1_amd64.deb + file_size: '22249450' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/8/riak-cs_2.1.2-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: riak-cs-dbgsym_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/9/riak-cs-dbgsym_2.1.2-1_amd64.deb + file_size: '103378' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/9/riak-cs-dbgsym_2.1.2-1_amd64.deb.sha + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/9/riak-cs_2.1.2-1_amd64.deb + file_size: '22224626' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/debian/9/riak-cs_2.1.2-1_amd64.deb.sha + - os: freebsd + versions: + - version: '10.4' + architectures: + - arch: txz + file_info: + file_name: riak-cs-2.1.2.txz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/freebsd/10.4/riak-cs-2.1.2.txz + file_size: '27425904' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/freebsd/10.4/riak-cs-2.1.2.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: riak-cs-2.1.2.txz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/freebsd/11.1/riak-cs-2.1.2.txz + file_size: '27438820' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/freebsd/11.1/riak-cs-2.1.2.txz.sha + - version: '12.1' + architectures: + - arch: txz + file_info: + file_name: riak-cs-2.1.2.txz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/freebsd/12.1/riak-cs-2.1.2.txz + file_size: '28996916' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/freebsd/12.1/riak-cs-2.1.2.txz.sha + - os: osx + versions: + - version: '10.11' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.1.2-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/osx/10.11/riak-cs-2.1.2-OSX-x86_64.tar.gz + file_size: '28767353' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/osx/10.11/riak-cs-2.1.2-OSX-x86_64.tar.gz.sha + - version: '10.14' + architectures: + - arch: x86_64 + file_info: + file_name: riak-cs-2.1.2-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/osx/10.14/riak-cs-2.1.2-OSX-x86_64.tar.gz + file_size: '29188420' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/osx/10.14/riak-cs-2.1.2-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: [] + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.1.2-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/rhel/6/riak-cs-2.1.2-1.el6.src.rpm + file_size: '10992846' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/rhel/6/riak-cs-2.1.2-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.1.2-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/rhel/6/riak-cs-2.1.2-1.el6.x86_64.rpm + file_size: '24210940' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/rhel/6/riak-cs-2.1.2-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: riak-cs-2.1.2-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/rhel/7/riak-cs-2.1.2-1.el7.src.rpm + file_size: '10955906' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/rhel/7/riak-cs-2.1.2-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: riak-cs-2.1.2-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/rhel/7/riak-cs-2.1.2-1.el7.x86_64.rpm + file_size: '24102544' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/rhel/7/riak-cs-2.1.2-1.el7.x86_64.rpm.sha + - version: '8' + architectures: [] + - os: ubuntu + versions: + - version: bionic64 + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/bionic64/riak-cs_2.1.2-1_amd64.deb + file_size: '22274224' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/bionic64/riak-cs_2.1.2-1_amd64.deb.sha + - version: focal64 + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/focal64/riak-cs_2.1.2-1_amd64.deb + file_size: '22414056' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/focal64/riak-cs_2.1.2-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/precise64/riak-cs_2.1.2-1_amd64.deb + file_size: '26646328' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/precise64/riak-cs_2.1.2-1_amd64.deb.sha + - version: trusty64 + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/trusty64/riak-cs_2.1.2-1_amd64.deb + file_size: '22207506' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/trusty64/riak-cs_2.1.2-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: riak-cs_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/xenial64/riak-cs_2.1.2-1_amd64.deb + file_size: '22282494' + chksum_href: https://files.tiot.jp/riak/cs/2.1/2.1.2/ubuntu/xenial64/riak-cs_2.1.2-1_amd64.deb.sha stanchion: 2.0.0: - os: source file_info: file_name: stanchion-2.0.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/stanchion-2.0.0.tar.gz - file_size: 4109581 + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/stanchion-2.0.0.tar.gz + file_size: '4109581' - os: debian versions: - version: '6' @@ -3542,17 +10899,17 @@ stanchion: - arch: amd64 file_info: file_name: stanchion_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/debian/6/stanchion_2.0.0-1_amd64.deb - file_size: 22324786 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/debian/6/stanchion_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/debian/6/stanchion_2.0.0-1_amd64.deb + file_size: '22324786' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/debian/6/stanchion_2.0.0-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: stanchion_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/debian/7/stanchion_2.0.0-1_amd64.deb - file_size: 22359578 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/debian/7/stanchion_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/debian/7/stanchion_2.0.0-1_amd64.deb + file_size: '22359578' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/debian/7/stanchion_2.0.0-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -3560,15 +10917,15 @@ stanchion: - arch: source file_info: file_name: stanchion-2.0.0-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/fedora/19/stanchion-2.0.0-1.fc19.src.rpm - file_size: 4117813 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/fedora/19/stanchion-2.0.0-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/fedora/19/stanchion-2.0.0-1.fc19.src.rpm + file_size: '4117813' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/fedora/19/stanchion-2.0.0-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: stanchion-2.0.0-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/fedora/19/stanchion-2.0.0-1.fc19.x86_64.rpm - file_size: 20161528 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/fedora/19/stanchion-2.0.0-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/fedora/19/stanchion-2.0.0-1.fc19.x86_64.rpm + file_size: '20161528' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/fedora/19/stanchion-2.0.0-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -3576,17 +10933,17 @@ stanchion: - arch: txz file_info: file_name: stanchion-2.0.0.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/freebsd/10/stanchion-2.0.0.txz - file_size: 22119860 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/freebsd/10/stanchion-2.0.0.txz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/freebsd/10/stanchion-2.0.0.txz + file_size: '22119860' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/freebsd/10/stanchion-2.0.0.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: stanchion-2.0.0-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/freebsd/9.2/stanchion-2.0.0-FreeBSD-amd64.tbz - file_size: 27804394 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/freebsd/9.2/stanchion-2.0.0-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/freebsd/9.2/stanchion-2.0.0-FreeBSD-amd64.tbz + file_size: '27804394' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/freebsd/9.2/stanchion-2.0.0-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -3594,9 +10951,9 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.0.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz - file_size: 22819693 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz + file_size: '22819693' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/osx/10.8/stanchion-2.0.0-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -3604,43 +10961,43 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.0.0-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/5/stanchion-2.0.0-1.el5.x86_64.rpm - file_size: 22519345 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/5/stanchion-2.0.0-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/5/stanchion-2.0.0-1.el5.x86_64.rpm + file_size: '22519345' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/5/stanchion-2.0.0-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: stanchion-2.0.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/5/stanchion-2.0.0-1.src.rpm - file_size: 4126718 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/5/stanchion-2.0.0-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/5/stanchion-2.0.0-1.src.rpm + file_size: '4126718' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/5/stanchion-2.0.0-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: stanchion-2.0.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/6/stanchion-2.0.0-1.el6.src.rpm - file_size: 4112760 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/6/stanchion-2.0.0-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/6/stanchion-2.0.0-1.el6.src.rpm + file_size: '4112760' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/6/stanchion-2.0.0-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: stanchion-2.0.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/6/stanchion-2.0.0-1.el6.x86_64.rpm - file_size: 20215564 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/6/stanchion-2.0.0-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/6/stanchion-2.0.0-1.el6.x86_64.rpm + file_size: '20215564' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/6/stanchion-2.0.0-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: stanchion-2.0.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/7/stanchion-2.0.0-1.el7.centos.src.rpm - file_size: 4104594 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/7/stanchion-2.0.0-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/7/stanchion-2.0.0-1.el7.centos.src.rpm + file_size: '4104594' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/7/stanchion-2.0.0-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: stanchion-2.0.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/7/stanchion-2.0.0-1.el7.centos.x86_64.rpm - file_size: 20159668 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/rhel/7/stanchion-2.0.0-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/7/stanchion-2.0.0-1.el7.centos.x86_64.rpm + file_size: '20159668' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/rhel/7/stanchion-2.0.0-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -3648,15 +11005,15 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.0.0-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/sles/11/stanchion-2.0.0-1.SLES11.x86_64.rpm - file_size: 23605345 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/sles/11/stanchion-2.0.0-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/sles/11/stanchion-2.0.0-1.SLES11.x86_64.rpm + file_size: '23605345' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/sles/11/stanchion-2.0.0-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: stanchion-2.0.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/sles/11/stanchion-2.0.0-1.src.rpm - file_size: 4104179 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/sles/11/stanchion-2.0.0-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/sles/11/stanchion-2.0.0-1.src.rpm + file_size: '4104179' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/sles/11/stanchion-2.0.0-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -3664,27 +11021,17 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.0.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/smartos/1.8/stanchion-2.0.0-SmartOS-x86_64.tgz - file_size: 30137498 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/smartos/1.8/stanchion-2.0.0-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/smartos/1.8/stanchion-2.0.0-SmartOS-x86_64.tgz + file_size: '30137498' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/smartos/1.8/stanchion-2.0.0-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: stanchion-2.0.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/smartos/13.1/stanchion-2.0.0-SmartOS-x86_64.tgz - file_size: 30143460 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/smartos/13.1/stanchion-2.0.0-SmartOS-x86_64.tgz.sha - - os: solaris - versions: - - version: '10' - architectures: - - arch: x86_64 - file_info: - file_name: BASHOstanchion-2.0.0-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/solaris/10/BASHOstanchion-2.0.0-Solaris10-x86_64.pkg.gz - file_size: 27108159 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/solaris/10/BASHOstanchion-2.0.0-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/smartos/13.1/stanchion-2.0.0-SmartOS-x86_64.tgz + file_size: '30143460' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/smartos/13.1/stanchion-2.0.0-SmartOS-x86_64.tgz.sha - os: ubuntu versions: - version: lucid @@ -3692,31 +11039,31 @@ stanchion: - arch: amd64 file_info: file_name: stanchion_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/ubuntu/lucid/stanchion_2.0.0-1_amd64.deb - file_size: 22316612 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/ubuntu/lucid/stanchion_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/ubuntu/lucid/stanchion_2.0.0-1_amd64.deb + file_size: '22316612' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/ubuntu/lucid/stanchion_2.0.0-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: stanchion_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/ubuntu/precise/stanchion_2.0.0-1_amd64.deb - file_size: 22295082 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/ubuntu/precise/stanchion_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/ubuntu/precise/stanchion_2.0.0-1_amd64.deb + file_size: '22295082' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/ubuntu/precise/stanchion_2.0.0-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: stanchion_2.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/ubuntu/trusty/stanchion_2.0.0-1_amd64.deb - file_size: 18429888 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.0/2.0.0/ubuntu/trusty/stanchion_2.0.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/ubuntu/trusty/stanchion_2.0.0-1_amd64.deb + file_size: '18429888' + chksum_href: https://files.tiot.jp/riak/stanchion/2.0/2.0.0/ubuntu/trusty/stanchion_2.0.0-1_amd64.deb.sha 2.1.0: - os: source file_info: file_name: stanchion-2.1.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/stanchion-2.1.0.tar.gz - file_size: 5567264 + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/stanchion-2.1.0.tar.gz + file_size: '5567264' - os: debian versions: - version: '6' @@ -3724,17 +11071,17 @@ stanchion: - arch: amd64 file_info: file_name: stanchion_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/debian/6/stanchion_2.1.0-1_amd64.deb - file_size: 24910232 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/debian/6/stanchion_2.1.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/debian/6/stanchion_2.1.0-1_amd64.deb + file_size: '24910232' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/debian/6/stanchion_2.1.0-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: stanchion_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/debian/7/stanchion_2.1.0-1_amd64.deb - file_size: 24948340 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/debian/7/stanchion_2.1.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/debian/7/stanchion_2.1.0-1_amd64.deb + file_size: '24948340' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/debian/7/stanchion_2.1.0-1_amd64.deb.sha - os: fedora versions: - version: '19' @@ -3742,15 +11089,15 @@ stanchion: - arch: source file_info: file_name: stanchion-2.1.0-1.fc19.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/fedora/19/stanchion-2.1.0-1.fc19.src.rpm - file_size: 5593127 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/fedora/19/stanchion-2.1.0-1.fc19.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/fedora/19/stanchion-2.1.0-1.fc19.src.rpm + file_size: '5593127' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/fedora/19/stanchion-2.1.0-1.fc19.src.rpm.sha - arch: x86_64 file_info: file_name: stanchion-2.1.0-1.fc19.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/fedora/19/stanchion-2.1.0-1.fc19.x86_64.rpm - file_size: 22543152 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/fedora/19/stanchion-2.1.0-1.fc19.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/fedora/19/stanchion-2.1.0-1.fc19.x86_64.rpm + file_size: '22543152' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/fedora/19/stanchion-2.1.0-1.fc19.x86_64.rpm.sha - os: freebsd versions: - version: '10' @@ -3758,17 +11105,17 @@ stanchion: - arch: txz file_info: file_name: stanchion-2.1.0.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/freebsd/10/stanchion-2.1.0.txz - file_size: 24380468 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/freebsd/10/stanchion-2.1.0.txz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/freebsd/10/stanchion-2.1.0.txz + file_size: '24380468' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/freebsd/10/stanchion-2.1.0.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: stanchion-2.1.0-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/freebsd/9.2/stanchion-2.1.0-FreeBSD-amd64.tbz - file_size: 30412844 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/freebsd/9.2/stanchion-2.1.0-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/freebsd/9.2/stanchion-2.1.0-FreeBSD-amd64.tbz + file_size: '30412844' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/freebsd/9.2/stanchion-2.1.0-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -3776,9 +11123,9 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.1.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/osx/10.8/stanchion-2.1.0-OSX-x86_64.tar.gz - file_size: 25403571 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/osx/10.8/stanchion-2.1.0-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/osx/10.8/stanchion-2.1.0-OSX-x86_64.tar.gz + file_size: '25403571' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/osx/10.8/stanchion-2.1.0-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -3786,29 +11133,29 @@ stanchion: - arch: source file_info: file_name: stanchion-2.1.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/rhel/6/stanchion-2.1.0-1.el6.src.rpm - file_size: 5572806 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/rhel/6/stanchion-2.1.0-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/rhel/6/stanchion-2.1.0-1.el6.src.rpm + file_size: '5572806' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/rhel/6/stanchion-2.1.0-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: stanchion-2.1.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/rhel/6/stanchion-2.1.0-1.el6.x86_64.rpm - file_size: 22608248 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/rhel/6/stanchion-2.1.0-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/rhel/6/stanchion-2.1.0-1.el6.x86_64.rpm + file_size: '22608248' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/rhel/6/stanchion-2.1.0-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: stanchion-2.1.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/rhel/7/stanchion-2.1.0-1.el7.centos.src.rpm - file_size: 5581986 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/rhel/7/stanchion-2.1.0-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/rhel/7/stanchion-2.1.0-1.el7.centos.src.rpm + file_size: '5581986' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/rhel/7/stanchion-2.1.0-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: stanchion-2.1.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/rhel/7/stanchion-2.1.0-1.el7.centos.x86_64.rpm - file_size: 22540380 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/rhel/7/stanchion-2.1.0-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/rhel/7/stanchion-2.1.0-1.el7.centos.x86_64.rpm + file_size: '22540380' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/rhel/7/stanchion-2.1.0-1.el7.centos.x86_64.rpm.sha - os: sles versions: - version: '11' @@ -3816,15 +11163,15 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.1.0-1.SLES11.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/sles/11/stanchion-2.1.0-1.SLES11.x86_64.rpm - file_size: 25984279 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/sles/11/stanchion-2.1.0-1.SLES11.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/sles/11/stanchion-2.1.0-1.SLES11.x86_64.rpm + file_size: '25984279' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/sles/11/stanchion-2.1.0-1.SLES11.x86_64.rpm.sha - arch: source file_info: file_name: stanchion-2.1.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/sles/11/stanchion-2.1.0-1.src.rpm - file_size: 5573977 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/sles/11/stanchion-2.1.0-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/sles/11/stanchion-2.1.0-1.src.rpm + file_size: '5573977' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/sles/11/stanchion-2.1.0-1.src.rpm.sha - os: smartos versions: - version: '1.8' @@ -3832,17 +11179,17 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.1.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/smartos/1.8/stanchion-2.1.0-SmartOS-x86_64.tgz - file_size: 32010442 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/smartos/1.8/stanchion-2.1.0-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/smartos/1.8/stanchion-2.1.0-SmartOS-x86_64.tgz + file_size: '32010442' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/smartos/1.8/stanchion-2.1.0-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: stanchion-2.1.0-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/smartos/13.1/stanchion-2.1.0-SmartOS-x86_64.tgz - file_size: 31993497 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/smartos/13.1/stanchion-2.1.0-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/smartos/13.1/stanchion-2.1.0-SmartOS-x86_64.tgz + file_size: '31993497' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/smartos/13.1/stanchion-2.1.0-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -3850,9 +11197,9 @@ stanchion: - arch: x86_64 file_info: file_name: BASHOstanchion-2.1.0-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/solaris/10/BASHOstanchion-2.1.0-Solaris10-x86_64.pkg.gz - file_size: 29301318 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/solaris/10/BASHOstanchion-2.1.0-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/solaris/10/BASHOstanchion-2.1.0-Solaris10-x86_64.pkg.gz + file_size: '29301318' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/solaris/10/BASHOstanchion-2.1.0-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -3860,31 +11207,31 @@ stanchion: - arch: amd64 file_info: file_name: stanchion_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/ubuntu/lucid/stanchion_2.1.0-1_amd64.deb - file_size: 24904868 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/ubuntu/lucid/stanchion_2.1.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/ubuntu/lucid/stanchion_2.1.0-1_amd64.deb + file_size: '24904868' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/ubuntu/lucid/stanchion_2.1.0-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: stanchion_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/ubuntu/precise/stanchion_2.1.0-1_amd64.deb - file_size: 24876504 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/ubuntu/precise/stanchion_2.1.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/ubuntu/precise/stanchion_2.1.0-1_amd64.deb + file_size: '24876504' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/ubuntu/precise/stanchion_2.1.0-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: stanchion_2.1.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/ubuntu/trusty/stanchion_2.1.0-1_amd64.deb - file_size: 20673762 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.0/ubuntu/trusty/stanchion_2.1.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/ubuntu/trusty/stanchion_2.1.0-1_amd64.deb + file_size: '20673762' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.0/ubuntu/trusty/stanchion_2.1.0-1_amd64.deb.sha 2.1.1: - os: source file_info: file_name: stanchion-2.1.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/stanchion-2.1.1.tar.gz - file_size: 5567943 + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/stanchion-2.1.1.tar.gz + file_size: '5567943' - os: debian versions: - version: '6' @@ -3892,17 +11239,33 @@ stanchion: - arch: amd64 file_info: file_name: stanchion_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/debian/6/stanchion_2.1.1-1_amd64.deb - file_size: 24912694 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/debian/6/stanchion_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/debian/6/stanchion_2.1.1-1_amd64.deb + file_size: '24912694' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/debian/6/stanchion_2.1.1-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: stanchion_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/debian/7/stanchion_2.1.1-1_amd64.deb - file_size: 24953316 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/debian/7/stanchion_2.1.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/debian/7/stanchion_2.1.1-1_amd64.deb + file_size: '24953316' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/debian/7/stanchion_2.1.1-1_amd64.deb.sha + - version: '8' + architectures: + - arch: amd64 + file_info: + file_name: stanchion_2.1.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/debian/8/stanchion_2.1.1-1_amd64.deb + file_size: '20710946' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/debian/8/stanchion_2.1.1-1_amd64.deb.sha + - version: '9' + architectures: + - arch: amd64 + file_info: + file_name: stanchion_2.1.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/debian/9/stanchion_2.1.1-1_amd64.deb + file_size: '20689812' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/debian/9/stanchion_2.1.1-1_amd64.deb.sha - os: freebsd versions: - version: '10' @@ -3910,17 +11273,25 @@ stanchion: - arch: txz file_info: file_name: stanchion-2.1.1.txz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/freebsd/10/stanchion-2.1.1.txz - file_size: 24385364 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/freebsd/10/stanchion-2.1.1.txz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/freebsd/10/stanchion-2.1.1.txz + file_size: '24385364' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/freebsd/10/stanchion-2.1.1.txz.sha + - version: '11.1' + architectures: + - arch: txz + file_info: + file_name: stanchion-2.1.1.txz + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/freebsd/11.1/stanchion-2.1.1.txz + file_size: '25904240' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/freebsd/11.1/stanchion-2.1.1.txz.sha - version: '9.2' architectures: - arch: amd64 file_info: file_name: stanchion-2.1.1-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/freebsd/9.2/stanchion-2.1.1-FreeBSD-amd64.tbz - file_size: 30421132 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/freebsd/9.2/stanchion-2.1.1-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/freebsd/9.2/stanchion-2.1.1-FreeBSD-amd64.tbz + file_size: '30421132' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/freebsd/9.2/stanchion-2.1.1-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -3928,9 +11299,9 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.1.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/osx/10.8/stanchion-2.1.1-OSX-x86_64.tar.gz - file_size: 25407929 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/osx/10.8/stanchion-2.1.1-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/osx/10.8/stanchion-2.1.1-OSX-x86_64.tar.gz + file_size: '25407929' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/osx/10.8/stanchion-2.1.1-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -3938,43 +11309,43 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.1.1-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/5/stanchion-2.1.1-1.el5.x86_64.rpm - file_size: 25133904 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/5/stanchion-2.1.1-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/5/stanchion-2.1.1-1.el5.x86_64.rpm + file_size: '25133904' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/5/stanchion-2.1.1-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: stanchion-2.1.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/5/stanchion-2.1.1-1.src.rpm - file_size: 5602961 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/5/stanchion-2.1.1-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/5/stanchion-2.1.1-1.src.rpm + file_size: '5602961' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/5/stanchion-2.1.1-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: stanchion-2.1.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/6/stanchion-2.1.1-1.el6.src.rpm - file_size: 5573905 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/6/stanchion-2.1.1-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/6/stanchion-2.1.1-1.el6.src.rpm + file_size: '5573905' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/6/stanchion-2.1.1-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: stanchion-2.1.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/6/stanchion-2.1.1-1.el6.x86_64.rpm - file_size: 22611972 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/6/stanchion-2.1.1-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/6/stanchion-2.1.1-1.el6.x86_64.rpm + file_size: '22611972' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/6/stanchion-2.1.1-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: stanchion-2.1.1-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/7/stanchion-2.1.1-1.el7.centos.src.rpm - file_size: 5582434 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/7/stanchion-2.1.1-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/7/stanchion-2.1.1-1.el7.centos.src.rpm + file_size: '5582434' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/7/stanchion-2.1.1-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: stanchion-2.1.1-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/7/stanchion-2.1.1-1.el7.centos.x86_64.rpm - file_size: 22544492 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/rhel/7/stanchion-2.1.1-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/7/stanchion-2.1.1-1.el7.centos.x86_64.rpm + file_size: '22544492' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/rhel/7/stanchion-2.1.1-1.el7.centos.x86_64.rpm.sha - os: smartos versions: - version: '1.8' @@ -3982,17 +11353,17 @@ stanchion: - arch: x86_64 file_info: file_name: stanchion-2.1.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/smartos/1.8/stanchion-2.1.1-SmartOS-x86_64.tgz - file_size: 32006982 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/smartos/1.8/stanchion-2.1.1-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/smartos/1.8/stanchion-2.1.1-SmartOS-x86_64.tgz + file_size: '32006982' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/smartos/1.8/stanchion-2.1.1-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: stanchion-2.1.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/smartos/13.1/stanchion-2.1.1-SmartOS-x86_64.tgz - file_size: 32007596 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/smartos/13.1/stanchion-2.1.1-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/smartos/13.1/stanchion-2.1.1-SmartOS-x86_64.tgz + file_size: '32007596' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/smartos/13.1/stanchion-2.1.1-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -4000,315 +11371,229 @@ stanchion: - arch: x86_64 file_info: file_name: BASHOstanchion-2.1.1-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/solaris/10/BASHOstanchion-2.1.1-Solaris10-x86_64.pkg.gz - file_size: 29306244 + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/solaris/10/BASHOstanchion-2.1.1-Solaris10-x86_64.pkg.gz + file_size: '29306244' - os: ubuntu versions: - - version: lucid + - version: bionic architectures: - arch: amd64 file_info: file_name: stanchion_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/ubuntu/lucid/stanchion_2.1.1-1_amd64.deb - file_size: 24906382 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/ubuntu/lucid/stanchion_2.1.1-1_amd64.deb.sha - - version: precise + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/bionic/stanchion_2.1.1-1_amd64.deb + file_size: '20737788' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/bionic/stanchion_2.1.1-1_amd64.deb.sha + - version: lucid architectures: - arch: amd64 file_info: file_name: stanchion_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/ubuntu/precise/stanchion_2.1.1-1_amd64.deb - file_size: 24880918 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/ubuntu/precise/stanchion_2.1.1-1_amd64.deb.sha - - version: trusty + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/lucid/stanchion_2.1.1-1_amd64.deb + file_size: '24906382' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/lucid/stanchion_2.1.1-1_amd64.deb.sha + - version: precise architectures: - arch: amd64 file_info: file_name: stanchion_2.1.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/ubuntu/trusty/stanchion_2.1.1-1_amd64.deb - file_size: 20677002 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/stanchion/2.1/2.1.1/ubuntu/trusty/stanchion_2.1.1-1_amd64.deb.sha -riak_cs_control: - 1.0.0: - - os: source - file_info: - file_name: riak-cs-control-1.0.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/riak-cs-control-1.0.0.tar.gz - file_size: 3286655 - - os: debian - versions: - - version: '6' + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/precise/stanchion_2.1.1-1_amd64.deb + file_size: '24880918' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/precise/stanchion_2.1.1-1_amd64.deb.sha + - version: trusty architectures: - arch: amd64 file_info: - file_name: riak-cs-control_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/debian/6/riak-cs-control_1.0.0-1_amd64.deb - file_size: 22920376 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/debian/6/riak-cs-control_1.0.0-1_amd64.deb.sha - - os: fedora - versions: - - version: '17' - architectures: - - arch: x86_64 - file_info: - file_name: riak-cs-control-1.0.0-1.fc17.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/fedora/17/riak-cs-control-1.0.0-1.fc17.x86_64.rpm - file_size: 20336977 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/fedora/17/riak-cs-control-1.0.0-1.fc17.x86_64.rpm.sha - - os: freebsd - versions: - - version: '9' + file_name: stanchion_2.1.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/trusty/stanchion_2.1.1-1_amd64.deb + file_size: '20677002' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/trusty/stanchion_2.1.1-1_amd64.deb.sha + - version: xenial architectures: - arch: amd64 file_info: - file_name: riak-cs-control-1.0.0-FreeBSD-amd64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/freebsd/9/riak-cs-control-1.0.0-FreeBSD-amd64.tgz - file_size: 27871969 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/freebsd/9/riak-cs-control-1.0.0-FreeBSD-amd64.tgz.sha - - os: osx - versions: - - version: '10.6' - architectures: - - arch: i386 - file_info: - file_name: riak-cs-control-1.0.0-OSX-i386.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/osx/10.6/riak-cs-control-1.0.0-OSX-i386.tar.gz - file_size: 23584728 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/osx/10.6/riak-cs-control-1.0.0-OSX-i386.tar.gz.sha - - arch: x86_64 - file_info: - file_name: riak-cs-control-1.0.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/osx/10.6/riak-cs-control-1.0.0-OSX-x86_64.tar.gz - file_size: 23214508 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/osx/10.6/riak-cs-control-1.0.0-OSX-x86_64.tar.gz.sha - - os: rhel - versions: - - version: '5' - architectures: - - arch: x86_64 - file_info: - file_name: riak-cs-control-1.0.0-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/rhel/5/riak-cs-control-1.0.0-1.el5.x86_64.rpm - file_size: 23117284 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/rhel/5/riak-cs-control-1.0.0-1.el5.x86_64.rpm.sha - - version: '6' - architectures: - - arch: x86_64 - file_info: - file_name: riak-cs-control-1.0.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/rhel/6/riak-cs-control-1.0.0-1.el6.x86_64.rpm - file_size: 20659768 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/rhel/6/riak-cs-control-1.0.0-1.el6.x86_64.rpm.sha - - os: smartos - versions: - - version: '1.8' - architectures: - - arch: i386 - file_info: - file_name: riak_cs_control-1.0.0-SmartOS-i386.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/smartos/1.8/riak_cs_control-1.0.0-SmartOS-i386.tgz - file_size: 29619074 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/smartos/1.8/riak_cs_control-1.0.0-SmartOS-i386.tgz.sha - - os: solaris + file_name: stanchion_2.1.1-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/xenial/stanchion_2.1.1-1_amd64.deb + file_size: '20727066' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.1/ubuntu/xenial/stanchion_2.1.1-1_amd64.deb.sha + 2.1.2: + - os: source + file_info: + file_name: stanchion-2.1.2.tar.gz + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/stanchion-2.1.2.tar.gz + file_size: '5565184' + - os: debian versions: - version: '10' - architectures: - - arch: i386 - file_info: - file_name: BASHOriak-cs-control-1.0.0-Solaris10-i386.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/solaris/10/BASHOriak-cs-control-1.0.0-Solaris10-i386.pkg.gz - file_size: 27163925 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/solaris/10/BASHOriak-cs-control-1.0.0-Solaris10-i386.pkg.gz.sha - - os: ubuntu - versions: - - version: lucid architectures: - arch: amd64 file_info: - file_name: riak-cs-control_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/ubuntu/lucid/riak-cs-control_1.0.0-1_amd64.deb - file_size: 22914466 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/ubuntu/lucid/riak-cs-control_1.0.0-1_amd64.deb.sha - - arch: i386 + file_name: stanchion-dbgsym_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/10/stanchion-dbgsym_2.1.2-1_amd64.deb + file_size: '110940' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/10/stanchion-dbgsym_2.1.2-1_amd64.deb.sha + - arch: amd64 file_info: - file_name: riak-cs-control_1.0.0-1_i386.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/ubuntu/lucid/riak-cs-control_1.0.0-1_i386.deb - file_size: 22682424 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/ubuntu/lucid/riak-cs-control_1.0.0-1_i386.deb.sha - - version: natty + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/10/stanchion_2.1.2-1_amd64.deb + file_size: '20860492' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/10/stanchion_2.1.2-1_amd64.deb.sha + - version: '7' architectures: - arch: amd64 file_info: - file_name: riak-cs-control_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/ubuntu/natty/riak-cs-control_1.0.0-1_amd64.deb - file_size: 22934268 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/ubuntu/natty/riak-cs-control_1.0.0-1_amd64.deb.sha - - version: precise + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/7/stanchion_2.1.2-1_amd64.deb + file_size: '24900962' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/7/stanchion_2.1.2-1_amd64.deb.sha + - version: '8' architectures: - arch: amd64 file_info: - file_name: riak-cs-control_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/ubuntu/precise/riak-cs-control_1.0.0-1_amd64.deb - file_size: 22900364 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.0/ubuntu/precise/riak-cs-control_1.0.0-1_amd64.deb.sha - 1.0.1: - - os: source - file_info: - file_name: riak-cs-control-1.0.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/riak-cs-control-1.0.1.tar.gz - file_size: 3301379 - - os: debian - versions: - - version: '6' + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/8/stanchion_2.1.2-1_amd64.deb + file_size: '20712656' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/8/stanchion_2.1.2-1_amd64.deb.sha + - version: '9' architectures: - arch: amd64 file_info: - file_name: riak-cs-control_1.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/debian/6/riak-cs-control_1.0.1-1_amd64.deb - file_size: 22933148 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/debian/6/riak-cs-control_1.0.1-1_amd64.deb.sha - - version: '7' - architectures: + file_name: stanchion-dbgsym_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/9/stanchion-dbgsym_2.1.2-1_amd64.deb + file_size: '99452' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/9/stanchion-dbgsym_2.1.2-1_amd64.deb.sha - arch: amd64 file_info: - file_name: riak-cs-control_1.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/debian/7/riak-cs-control_1.0.1-1_amd64.deb - file_size: 22949714 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/debian/7/riak-cs-control_1.0.1-1_amd64.deb.sha - - os: fedora + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/9/stanchion_2.1.2-1_amd64.deb + file_size: '20689602' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/debian/9/stanchion_2.1.2-1_amd64.deb.sha + - os: freebsd versions: - - version: '17' + - version: '10.4' architectures: - - arch: source + - arch: txz file_info: - file_name: riak-cs-control-1.0.1-1.fc17.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/fedora/17/riak-cs-control-1.0.1-1.fc17.src.rpm - file_size: 3305040 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/fedora/17/riak-cs-control-1.0.1-1.fc17.src.rpm.sha - - arch: x86_64 + file_name: stanchion-2.1.2.txz + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/freebsd/10.4/stanchion-2.1.2.txz + file_size: '25892288' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/freebsd/10.4/stanchion-2.1.2.txz.sha + - version: '11.1' + architectures: + - arch: txz file_info: - file_name: riak-cs-control-1.0.1-1.fc17.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/fedora/17/riak-cs-control-1.0.1-1.fc17.x86_64.rpm - file_size: 20356557 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/fedora/17/riak-cs-control-1.0.1-1.fc17.x86_64.rpm.sha - - os: freebsd - versions: - - version: '9' + file_name: stanchion-2.1.2.txz + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/freebsd/11.1/stanchion-2.1.2.txz + file_size: '25901548' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/freebsd/11.1/stanchion-2.1.2.txz.sha + - version: '12.1' architectures: - - arch: amd64 + - arch: txz file_info: - file_name: riak-cs-control-1.0.1-FreeBSD-amd64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/freebsd/9/riak-cs-control-1.0.1-FreeBSD-amd64.tgz - file_size: 27879967 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/freebsd/9/riak-cs-control-1.0.1-FreeBSD-amd64.tgz.sha + file_name: stanchion-2.1.2.txz + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/freebsd/12.1/stanchion-2.1.2.txz + file_size: '27460984' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/freebsd/12.1/stanchion-2.1.2.txz.sha - os: osx versions: - - version: '10.8' + - version: '10.11' architectures: - arch: x86_64 file_info: - file_name: riak-cs-control-1.0.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/osx/10.8/riak-cs-control-1.0.1-OSX-x86_64.tar.gz - file_size: 23068365 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/osx/10.8/riak-cs-control-1.0.1-OSX-x86_64.tar.gz.sha - - os: rhel - versions: - - version: '5' + file_name: stanchion-2.1.2-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/osx/10.11/stanchion-2.1.2-OSX-x86_64.tar.gz + file_size: '53943486' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/osx/10.11/stanchion-2.1.2-OSX-x86_64.tar.gz.sha + - version: '10.14' architectures: - arch: x86_64 file_info: - file_name: riak-cs-control-1.0.1-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/rhel/5/riak-cs-control-1.0.1-1.el5.x86_64.rpm - file_size: 23134862 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/rhel/5/riak-cs-control-1.0.1-1.el5.x86_64.rpm.sha - - arch: source - file_info: - file_name: riak-cs-control-1.0.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/rhel/5/riak-cs-control-1.0.1-1.src.rpm - file_size: 3300753 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/rhel/5/riak-cs-control-1.0.1-1.src.rpm.sha + file_name: stanchion-2.1.2-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/osx/10.14/stanchion-2.1.2-OSX-x86_64.tar.gz + file_size: '27392225' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/osx/10.14/stanchion-2.1.2-OSX-x86_64.tar.gz.sha + - os: raspbian + versions: [] + - os: rhel + versions: - version: '6' architectures: - arch: source file_info: - file_name: riak-cs-control-1.0.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/rhel/6/riak-cs-control-1.0.1-1.el6.src.rpm - file_size: 3305321 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/rhel/6/riak-cs-control-1.0.1-1.el6.src.rpm.sha - - arch: x86_64 - file_info: - file_name: riak-cs-control-1.0.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/rhel/6/riak-cs-control-1.0.1-1.el6.x86_64.rpm - file_size: 20671352 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/rhel/6/riak-cs-control-1.0.1-1.el6.x86_64.rpm.sha - - os: smartos - versions: - - version: '1.6' - architectures: - - arch: x86_64 - file_info: - file_name: riak_cs_control-1.0.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/smartos/1.6/riak_cs_control-1.0.1-SmartOS-x86_64.tgz - file_size: 28520926 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/smartos/1.6/riak_cs_control-1.0.1-SmartOS-x86_64.tgz.sha - - version: '1.8' - architectures: - - arch: x86_64 - file_info: - file_name: riak_cs_control-1.0.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/smartos/1.8/riak_cs_control-1.0.1-SmartOS-x86_64.tgz - file_size: 29525330 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/smartos/1.8/riak_cs_control-1.0.1-SmartOS-x86_64.tgz.sha - - version: '13.1' - architectures: - - arch: x86_64 - file_info: - file_name: riak_cs_control-1.0.1-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/smartos/13.1/riak_cs_control-1.0.1-SmartOS-x86_64.tgz - file_size: 29514155 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/smartos/13.1/riak_cs_control-1.0.1-SmartOS-x86_64.tgz.sha - - os: solaris - versions: - - version: '10' + file_name: stanchion-2.1.2-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/rhel/6/stanchion-2.1.2-1.el6.src.rpm + file_size: '5570458' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/rhel/6/stanchion-2.1.2-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: stanchion-2.1.2-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/rhel/6/stanchion-2.1.2-1.el6.x86_64.rpm + file_size: '22560608' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/rhel/6/stanchion-2.1.2-1.el6.x86_64.rpm.sha + - version: '7' architectures: + - arch: source + file_info: + file_name: stanchion-2.1.2-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/rhel/7/stanchion-2.1.2-1.el7.src.rpm + file_size: '5584556' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/rhel/7/stanchion-2.1.2-1.el7.src.rpm.sha - arch: x86_64 file_info: - file_name: BASHOriak-cs-control-1.0.1-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/solaris/10/BASHOriak-cs-control-1.0.1-Solaris10-x86_64.pkg.gz - file_size: 27480986 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/solaris/10/BASHOriak-cs-control-1.0.1-Solaris10-x86_64.pkg.gz.sha + file_name: stanchion-2.1.2-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/rhel/7/stanchion-2.1.2-1.el7.x86_64.rpm + file_size: '22484488' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/rhel/7/stanchion-2.1.2-1.el7.x86_64.rpm.sha + - version: '8' + architectures: [] - os: ubuntu versions: - - version: lucid + - version: bionic64 architectures: - arch: amd64 file_info: - file_name: riak-cs-control_1.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/ubuntu/lucid/riak-cs-control_1.0.1-1_amd64.deb - file_size: 22933772 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/ubuntu/lucid/riak-cs-control_1.0.1-1_amd64.deb.sha - - version: natty + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/bionic64/stanchion_2.1.2-1_amd64.deb + file_size: '20737784' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/bionic64/stanchion_2.1.2-1_amd64.deb.sha + - version: focal64 architectures: - arch: amd64 file_info: - file_name: riak-cs-control_1.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/ubuntu/natty/riak-cs-control_1.0.1-1_amd64.deb - file_size: 22950048 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/ubuntu/natty/riak-cs-control_1.0.1-1_amd64.deb.sha - - version: precise + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/focal64/stanchion_2.1.2-1_amd64.deb + file_size: '20879136' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/focal64/stanchion_2.1.2-1_amd64.deb.sha + - version: precise64 + architectures: + - arch: amd64 + file_info: + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/precise64/stanchion_2.1.2-1_amd64.deb + file_size: '24854460' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/precise64/stanchion_2.1.2-1_amd64.deb.sha + - version: trusty64 architectures: - arch: amd64 file_info: - file_name: riak-cs-control_1.0.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/ubuntu/precise/riak-cs-control_1.0.1-1_amd64.deb - file_size: 22910350 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.1/ubuntu/precise/riak-cs-control_1.0.1-1_amd64.deb.sha + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/trusty64/stanchion_2.1.2-1_amd64.deb + file_size: '20660236' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/trusty64/stanchion_2.1.2-1_amd64.deb.sha + - version: xenial64 + architectures: + - arch: amd64 + file_info: + file_name: stanchion_2.1.2-1_amd64.deb + file_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/xenial64/stanchion_2.1.2-1_amd64.deb + file_size: '20732522' + chksum_href: https://files.tiot.jp/riak/stanchion/2.1/2.1.2/ubuntu/xenial64/stanchion_2.1.2-1_amd64.deb.sha +riak_cs_control: 1.0.2: - os: source file_info: file_name: riak-cs-control-1.0.2.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/riak-cs-control-1.0.2.tar.gz - file_size: 3301442 + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/riak-cs-control-1.0.2.tar.gz + file_size: '3301442' - os: debian versions: - version: '6' @@ -4316,33 +11601,27 @@ riak_cs_control: - arch: amd64 file_info: file_name: riak-cs-control_1.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/debian/6/riak-cs-control_1.0.2-1_amd64.deb - file_size: 22932742 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/debian/6/riak-cs-control_1.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/debian/6/riak-cs-control_1.0.2-1_amd64.deb + file_size: '22932742' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/debian/6/riak-cs-control_1.0.2-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak-cs-control_1.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/debian/7/riak-cs-control_1.0.2-1_amd64.deb - file_size: 22950528 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/debian/7/riak-cs-control_1.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/debian/7/riak-cs-control_1.0.2-1_amd64.deb + file_size: '22950528' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/debian/7/riak-cs-control_1.0.2-1_amd64.deb.sha - os: fedora versions: - version: '17' architectures: - - arch: source - file_info: - file_name: riak-cs-control-1.0.2-1.fc17.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/fedora/17/riak-cs-control-1.0.2-1.fc17.src.rpm - file_size: 3305226 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/fedora/17/riak-cs-control-1.0.2-1.fc17.src.rpm.sha - arch: x86_64 file_info: file_name: riak-cs-control-1.0.2-1.fc17.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/fedora/17/riak-cs-control-1.0.2-1.fc17.x86_64.rpm - file_size: 20356597 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/fedora/17/riak-cs-control-1.0.2-1.fc17.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/fedora/17/riak-cs-control-1.0.2-1.fc17.x86_64.rpm + file_size: '20356597' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/fedora/17/riak-cs-control-1.0.2-1.fc17.x86_64.rpm.sha - os: freebsd versions: - version: '9' @@ -4350,9 +11629,9 @@ riak_cs_control: - arch: amd64 file_info: file_name: riak-cs-control-1.0.2-FreeBSD-amd64.tbz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/freebsd/9/riak-cs-control-1.0.2-FreeBSD-amd64.tbz - file_size: 27879968 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/freebsd/9/riak-cs-control-1.0.2-FreeBSD-amd64.tbz.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/freebsd/9/riak-cs-control-1.0.2-FreeBSD-amd64.tbz + file_size: '27879968' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/freebsd/9/riak-cs-control-1.0.2-FreeBSD-amd64.tbz.sha - os: osx versions: - version: '10.8' @@ -4360,9 +11639,9 @@ riak_cs_control: - arch: x86_64 file_info: file_name: riak-cs-control-1.0.2-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/osx/10.8/riak-cs-control-1.0.2-OSX-x86_64.tar.gz - file_size: 23068447 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/osx/10.8/riak-cs-control-1.0.2-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/osx/10.8/riak-cs-control-1.0.2-OSX-x86_64.tar.gz + file_size: '23068447' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/osx/10.8/riak-cs-control-1.0.2-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -4370,29 +11649,35 @@ riak_cs_control: - arch: x86_64 file_info: file_name: riak-cs-control-1.0.2-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.el5.x86_64.rpm - file_size: 23134983 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.el5.x86_64.rpm + file_size: '23134983' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.el5.x86_64.rpm.sha + - arch: source + file_info: + file_name: riak-cs-control-1.0.2-1.fc17.src.rpm + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.fc17.src.rpm + file_size: '3305226' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.fc17.src.rpm.sha - arch: source file_info: file_name: riak-cs-control-1.0.2-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.src.rpm - file_size: 3300694 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.src.rpm + file_size: '3300694' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/5/riak-cs-control-1.0.2-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-cs-control-1.0.2-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/rhel/6/riak-cs-control-1.0.2-1.el6.src.rpm - file_size: 3305482 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/rhel/6/riak-cs-control-1.0.2-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/6/riak-cs-control-1.0.2-1.el6.src.rpm + file_size: '3305482' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/6/riak-cs-control-1.0.2-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-cs-control-1.0.2-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/rhel/6/riak-cs-control-1.0.2-1.el6.x86_64.rpm - file_size: 20671348 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/rhel/6/riak-cs-control-1.0.2-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/6/riak-cs-control-1.0.2-1.el6.x86_64.rpm + file_size: '20671348' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/rhel/6/riak-cs-control-1.0.2-1.el6.x86_64.rpm.sha - os: smartos versions: - version: '1.6' @@ -4400,25 +11685,25 @@ riak_cs_control: - arch: x86_64 file_info: file_name: riak_cs_control-1.0.2-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/smartos/1.6/riak_cs_control-1.0.2-SmartOS-x86_64.tgz - file_size: 28515415 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/smartos/1.6/riak_cs_control-1.0.2-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/smartos/1.6/riak_cs_control-1.0.2-SmartOS-x86_64.tgz + file_size: '29506684' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/smartos/1.6/riak_cs_control-1.0.2-SmartOS-x86_64.tgz.sha - version: '1.8' architectures: - arch: x86_64 file_info: file_name: riak_cs_control-1.0.2-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/smartos/1.8/riak_cs_control-1.0.2-SmartOS-x86_64.tgz - file_size: 29506684 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/smartos/1.8/riak_cs_control-1.0.2-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/smartos/1.8/riak_cs_control-1.0.2-SmartOS-x86_64.tgz + file_size: '28515415' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/smartos/1.8/riak_cs_control-1.0.2-SmartOS-x86_64.tgz.sha - version: '13.1' architectures: - arch: x86_64 file_info: file_name: riak_cs_control-1.0.2-SmartOS-x86_64.tgz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/smartos/13.1/riak_cs_control-1.0.2-SmartOS-x86_64.tgz - file_size: 29518325 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/smartos/13.1/riak_cs_control-1.0.2-SmartOS-x86_64.tgz.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/smartos/13.1/riak_cs_control-1.0.2-SmartOS-x86_64.tgz + file_size: '29518325' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/smartos/13.1/riak_cs_control-1.0.2-SmartOS-x86_64.tgz.sha - os: solaris versions: - version: '10' @@ -4426,9 +11711,9 @@ riak_cs_control: - arch: x86_64 file_info: file_name: BASHOriak-cs-control-1.0.2-Solaris10-x86_64.pkg.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/solaris/10/BASHOriak-cs-control-1.0.2-Solaris10-x86_64.pkg.gz - file_size: 27466402 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/solaris/10/BASHOriak-cs-control-1.0.2-Solaris10-x86_64.pkg.gz.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/solaris/10/BASHOriak-cs-control-1.0.2-Solaris10-x86_64.pkg.gz + file_size: '27466402' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/solaris/10/BASHOriak-cs-control-1.0.2-Solaris10-x86_64.pkg.gz.sha - os: ubuntu versions: - version: lucid @@ -4436,221 +11721,38 @@ riak_cs_control: - arch: amd64 file_info: file_name: riak-cs-control_1.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/ubuntu/lucid/riak-cs-control_1.0.2-1_amd64.deb - file_size: 22937312 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/ubuntu/lucid/riak-cs-control_1.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/ubuntu/lucid/riak-cs-control_1.0.2-1_amd64.deb + file_size: '22937312' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/ubuntu/lucid/riak-cs-control_1.0.2-1_amd64.deb.sha - version: natty architectures: - arch: amd64 file_info: file_name: riak-cs-control_1.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/ubuntu/natty/riak-cs-control_1.0.2-1_amd64.deb - file_size: 22945122 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/ubuntu/natty/riak-cs-control_1.0.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/ubuntu/natty/riak-cs-control_1.0.2-1_amd64.deb + file_size: '22945122' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/ubuntu/natty/riak-cs-control_1.0.2-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak-cs-control_1.0.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/ubuntu/precise/riak-cs-control_1.0.2-1_amd64.deb - file_size: 22911536 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak-cs-control/1.0/1.0.2/ubuntu/precise/riak-cs-control_1.0.2-1_amd64.deb.sha -dataplatform: - 1.0.0: - - os: debian - versions: - - version: '7' - architectures: - - arch: amd64 - file_info: - file_name: data-platform_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/debian/7/data-platform_1.0.0-1_amd64.deb - file_size: 63615830 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/debian/7/data-platform_1.0.0-1_amd64.deb.sha - - os: osx - versions: - - version: '10.10' - architectures: - - arch: x86_64 - file_info: - file_name: data-platform-1.0.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/osx/10.10/data-platform-1.0.0-OSX-x86_64.tar.gz - file_size: 62611411 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/osx/10.10/data-platform-1.0.0-OSX-x86_64.tar.gz.sha - - os: rhel - versions: - - version: '6' - architectures: - - arch: source - file_info: - file_name: data-platform-1.0.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/6/data-platform-1.0.0-1.el6.src.rpm - file_size: 20370019 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/6/data-platform-1.0.0-1.el6.src.rpm.sha - - arch: x86_64 - file_info: - file_name: data-platform-1.0.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/6/data-platform-1.0.0-1.el6.x86_64.rpm - file_size: 63275021 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/6/data-platform-1.0.0-1.el6.x86_64.rpm.sha - - version: '7' - architectures: - - arch: source - file_info: - file_name: data-platform-1.0.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/7/data-platform-1.0.0-1.el7.centos.src.rpm - file_size: 20327743 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/7/data-platform-1.0.0-1.el7.centos.src.rpm.sha - - arch: x86_64 - file_info: - file_name: data-platform-1.0.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/7/data-platform-1.0.0-1.el7.centos.x86_64.rpm - file_size: 60319092 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/7/data-platform-1.0.0-1.el7.centos.x86_64.rpm.sha - - version: amazon - architectures: - - arch: source - file_info: - file_name: data-platform-1.0.0-1.el7.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/amazon/data-platform-1.0.0-1.el7.src.rpm - file_size: 20327847 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/amazon/data-platform-1.0.0-1.el7.src.rpm.sha - - arch: x86_64 - file_info: - file_name: data-platform-1.0.0-1.el7.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/amazon/data-platform-1.0.0-1.el7.x86_64.rpm - file_size: 59297808 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/rhel/amazon/data-platform-1.0.0-1.el7.x86_64.rpm.sha - - os: ubuntu - versions: - - version: precise - architectures: - - arch: amd64 - file_info: - file_name: data-platform_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/ubuntu/precise/data-platform_1.0.0-1_amd64.deb - file_size: 63554640 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/ubuntu/precise/data-platform_1.0.0-1_amd64.deb.sha - - version: trusty - architectures: - - arch: amd64 - file_info: - file_name: data-platform_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/ubuntu/trusty/data-platform_1.0.0-1_amd64.deb - file_size: 57636716 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.0/1.0.0/ubuntu/trusty/data-platform_1.0.0-1_amd64.deb.sha - 1.1.0: - - os: osx - versions: - - version: '10.10' - architectures: - - arch: x86_64 - file_info: - file_name: data-platform-1.1.0-d9846e00-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.1/1.1.0/osx/10.10/data-platform-1.1.0-d9846e00-OSX-x86_64.tar.gz - file_size: 128342763 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform/1.1/1.1.0/osx/10.10/data-platform-1.1.0-d9846e00-OSX-x86_64.tar.gz.sha -dataplatform_extras: - 1.0.0: - - os: debian - versions: - - version: '7' - architectures: - - arch: amd64 - file_info: - file_name: data-platform-extras_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/debian/7/data-platform-extras_1.0.0-1_amd64.deb - file_size: 504514076 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/debian/7/data-platform-extras_1.0.0-1_amd64.deb.sha - - os: osx - versions: - - version: '10.10' - architectures: - - arch: x86_64 - file_info: - file_name: data-platform-extras-1.0.0-OSX-x86_64.pkg - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/osx/10.10/data-platform-extras-1.0.0-OSX-x86_64.pkg - file_size: 505098997 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/osx/10.10/data-platform-extras-1.0.0-OSX-x86_64.pkg.sha - - os: rhel - versions: - - version: '6' - architectures: - - arch: x86_64 - file_info: - file_name: data-platform-extras-1.0.0-1.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/rhel/6/data-platform-extras-1.0.0-1.x86_64.rpm - file_size: 504874320 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/rhel/6/data-platform-extras-1.0.0-1.x86_64.rpm.sha - - version: '7' - architectures: - - arch: x86_64 - file_info: - file_name: data-platform-extras-1.0.0-1.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/rhel/7/data-platform-extras-1.0.0-1.x86_64.rpm - file_size: 504862060 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/rhel/7/data-platform-extras-1.0.0-1.x86_64.rpm.sha - - version: amazon - architectures: - - arch: x86_64 - file_info: - file_name: data-platform-extras-1.0.0-1.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/rhel/amazon/data-platform-extras-1.0.0-1.x86_64.rpm - file_size: 504858831 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/rhel/amazon/data-platform-extras-1.0.0-1.x86_64.rpm.sha - - os: ubuntu - versions: - - version: precise - architectures: - - arch: amd64 - file_info: - file_name: data-platform-extras_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/ubuntu/precise/data-platform-extras_1.0.0-1_amd64.deb - file_size: 504514204 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/ubuntu/precise/data-platform-extras_1.0.0-1_amd64.deb.sha - - version: trusty - architectures: - - arch: amd64 - file_info: - file_name: data-platform-extras_1.0.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/ubuntu/trusty/data-platform-extras_1.0.0-1_amd64.deb - file_size: 504509114 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.0.0/ubuntu/trusty/data-platform-extras_1.0.0-1_amd64.deb.sha - 1.1.0: - - os: centos-6 - versions: - - version: packages - architectures: - - arch: x86_64 - file_info: - file_name: data-platform-extras-1.1.0rc3-1.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.1.0/centos-6/packages/data-platform-extras-1.1.0rc3-1.x86_64.rpm - file_size: 571440803 - - os: centos-7 - versions: - - version: packages - architectures: - - arch: x86_64 - file_info: - file_name: data-platform-extras-1.1.0rc3-1.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.1.0/centos-7/packages/data-platform-extras-1.1.0rc3-1.x86_64.rpm - file_size: 570129715 - - os: debian-7 - versions: - - version: packages - architectures: - - arch: amd64 - file_info: - file_name: data-platform-extras_1.1.0rc3-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/data-platform-extras/1.0/1.1.0/debian-7/packages/data-platform-extras_1.1.0rc3-1_amd64.deb - file_size: 569955376 + file_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/ubuntu/precise/riak-cs-control_1.0.2-1_amd64.deb + file_size: '22911536' + chksum_href: https://files.tiot.jp/riak/cs-control/1.0/1.0.2/ubuntu/precise/riak-cs-control_1.0.2-1_amd64.deb.sha riak_ts: + 1.2.0: + - os: source + file_info: + file_name: downloads + file_href: https://files.tiot.jp/riak/ts/1.2/1.2.0/downloads + file_size: '17356' 1.3.0: - os: source file_info: file_name: riak_ts-1.3.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/riak_ts-1.3.0.tar.gz - file_size: 20361705 + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/riak_ts-1.3.0.tar.gz + file_size: '20361705' - os: debian versions: - version: '6' @@ -4658,17 +11760,17 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.3.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/debian/6/riak-ts_1.3.0-1_amd64.deb - file_size: 65073572 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/debian/6/riak-ts_1.3.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/debian/6/riak-ts_1.3.0-1_amd64.deb + file_size: '65073572' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/debian/6/riak-ts_1.3.0-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak-ts_1.3.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/debian/7/riak-ts_1.3.0-1_amd64.deb - file_size: 65107750 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/debian/7/riak-ts_1.3.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/debian/7/riak-ts_1.3.0-1_amd64.deb + file_size: '65107750' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/debian/7/riak-ts_1.3.0-1_amd64.deb.sha - os: osx versions: - version: '10.8' @@ -4676,9 +11778,9 @@ riak_ts: - arch: x86_64 file_info: file_name: riak-ts-1.3.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/osx/10.8/riak-ts-1.3.0-OSX-x86_64.tar.gz - file_size: 65739233 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/osx/10.8/riak-ts-1.3.0-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/osx/10.8/riak-ts-1.3.0-OSX-x86_64.tar.gz + file_size: '65739233' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/osx/10.8/riak-ts-1.3.0-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' @@ -4686,43 +11788,37 @@ riak_ts: - arch: x86_64 file_info: file_name: riak-ts-1.3.0-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/5/riak-ts-1.3.0-1.el5.x86_64.rpm - file_size: 67011868 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/5/riak-ts-1.3.0-1.el5.x86_64.rpm.sha - - arch: source - file_info: - file_name: riak-ts-1.3.0-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/5/riak-ts-1.3.0-1.src.rpm - file_size: 20369333 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/5/riak-ts-1.3.0-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/5/riak-ts-1.3.0-1.el5.x86_64.rpm + file_size: '67011868' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/5/riak-ts-1.3.0-1.el5.x86_64.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-ts-1.3.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/6/riak-ts-1.3.0-1.el6.src.rpm - file_size: 20357503 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/6/riak-ts-1.3.0-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/6/riak-ts-1.3.0-1.el6.src.rpm + file_size: '20357503' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/6/riak-ts-1.3.0-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.3.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/6/riak-ts-1.3.0-1.el6.x86_64.rpm - file_size: 65056748 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/6/riak-ts-1.3.0-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/6/riak-ts-1.3.0-1.el6.x86_64.rpm + file_size: '65056748' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/6/riak-ts-1.3.0-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-ts-1.3.0-1.el7.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/7/riak-ts-1.3.0-1.el7.src.rpm - file_size: 20294904 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/7/riak-ts-1.3.0-1.el7.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/7/riak-ts-1.3.0-1.el7.src.rpm + file_size: '20294904' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/7/riak-ts-1.3.0-1.el7.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.3.0-1.el7.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/7/riak-ts-1.3.0-1.el7.x86_64.rpm - file_size: 64747788 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/rhel/7/riak-ts-1.3.0-1.el7.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/7/riak-ts-1.3.0-1.el7.x86_64.rpm + file_size: '64747788' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/rhel/7/riak-ts-1.3.0-1.el7.x86_64.rpm.sha - os: ubuntu versions: - version: lucid @@ -4730,31 +11826,31 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.3.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/ubuntu/lucid/riak-ts_1.3.0-1_amd64.deb - file_size: 65067134 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/ubuntu/lucid/riak-ts_1.3.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/ubuntu/lucid/riak-ts_1.3.0-1_amd64.deb + file_size: '65067134' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/ubuntu/lucid/riak-ts_1.3.0-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak-ts_1.3.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/ubuntu/precise/riak-ts_1.3.0-1_amd64.deb - file_size: 67149990 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/ubuntu/precise/riak-ts_1.3.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/ubuntu/precise/riak-ts_1.3.0-1_amd64.deb + file_size: '67149990' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/ubuntu/precise/riak-ts_1.3.0-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak-ts_1.3.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/ubuntu/trusty/riak-ts_1.3.0-1_amd64.deb - file_size: 60306036 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.0/ubuntu/trusty/riak-ts_1.3.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/ubuntu/trusty/riak-ts_1.3.0-1_amd64.deb + file_size: '60306036' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.0/ubuntu/trusty/riak-ts_1.3.0-1_amd64.deb.sha 1.3.1: - os: source file_info: file_name: riak_ts-1.3.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/riak_ts-1.3.1.tar.gz - file_size: 20325521 + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/riak_ts-1.3.1.tar.gz + file_size: '20325521' - os: debian versions: - version: '6' @@ -4762,17 +11858,17 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.3.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/debian/6/riak-ts_1.3.1-1_amd64.deb - file_size: 65076402 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/debian/6/riak-ts_1.3.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/debian/6/riak-ts_1.3.1-1_amd64.deb + file_size: '65076402' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/debian/6/riak-ts_1.3.1-1_amd64.deb.sha - version: '7' architectures: - arch: amd64 file_info: file_name: riak-ts_1.3.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/debian/7/riak-ts_1.3.1-1_amd64.deb - file_size: 65112256 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/debian/7/riak-ts_1.3.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/debian/7/riak-ts_1.3.1-1_amd64.deb + file_size: '65112256' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/debian/7/riak-ts_1.3.1-1_amd64.deb.sha - os: osx versions: - version: '10.8' @@ -4780,53 +11876,59 @@ riak_ts: - arch: x86_64 file_info: file_name: riak-ts-1.3.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/osx/10.8/riak-ts-1.3.1-OSX-x86_64.tar.gz - file_size: 65744867 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/osx/10.8/riak-ts-1.3.1-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/osx/10.8/riak-ts-1.3.1-OSX-x86_64.tar.gz + file_size: '65744867' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/osx/10.8/riak-ts-1.3.1-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '5' architectures: + - arch: source + file_info: + file_name: riak-ts-1.3.0-1.src.rpm + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/5/riak-ts-1.3.0-1.src.rpm + file_size: '20369333' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/5/riak-ts-1.3.0-1.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.3.1-1.el5.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/5/riak-ts-1.3.1-1.el5.x86_64.rpm - file_size: 67011971 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/5/riak-ts-1.3.1-1.el5.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/5/riak-ts-1.3.1-1.el5.x86_64.rpm + file_size: '67011971' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/5/riak-ts-1.3.1-1.el5.x86_64.rpm.sha - arch: source file_info: file_name: riak-ts-1.3.1-1.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/5/riak-ts-1.3.1-1.src.rpm - file_size: 20331069 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/5/riak-ts-1.3.1-1.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/5/riak-ts-1.3.1-1.src.rpm + file_size: '20331069' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/5/riak-ts-1.3.1-1.src.rpm.sha - version: '6' architectures: - arch: source file_info: file_name: riak-ts-1.3.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/6/riak-ts-1.3.1-1.el6.src.rpm - file_size: 20324537 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/6/riak-ts-1.3.1-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/6/riak-ts-1.3.1-1.el6.src.rpm + file_size: '20324537' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/6/riak-ts-1.3.1-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.3.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/6/riak-ts-1.3.1-1.el6.x86_64.rpm - file_size: 65062460 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/6/riak-ts-1.3.1-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/6/riak-ts-1.3.1-1.el6.x86_64.rpm + file_size: '65062460' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/6/riak-ts-1.3.1-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-ts-1.3.1-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/7/riak-ts-1.3.1-1.el7.centos.src.rpm - file_size: 20257292 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/7/riak-ts-1.3.1-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/7/riak-ts-1.3.1-1.el7.centos.src.rpm + file_size: '20257292' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/7/riak-ts-1.3.1-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.3.1-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/7/riak-ts-1.3.1-1.el7.centos.x86_64.rpm - file_size: 64751956 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/rhel/7/riak-ts-1.3.1-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/7/riak-ts-1.3.1-1.el7.centos.x86_64.rpm + file_size: '64751956' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/rhel/7/riak-ts-1.3.1-1.el7.centos.x86_64.rpm.sha - os: ubuntu versions: - version: lucid @@ -4834,31 +11936,31 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.3.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/ubuntu/lucid/riak-ts_1.3.1-1_amd64.deb - file_size: 65070022 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/ubuntu/lucid/riak-ts_1.3.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/ubuntu/lucid/riak-ts_1.3.1-1_amd64.deb + file_size: '65070022' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/ubuntu/lucid/riak-ts_1.3.1-1_amd64.deb.sha - version: precise architectures: - arch: amd64 file_info: file_name: riak-ts_1.3.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/ubuntu/precise/riak-ts_1.3.1-1_amd64.deb - file_size: 67174476 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/ubuntu/precise/riak-ts_1.3.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/ubuntu/precise/riak-ts_1.3.1-1_amd64.deb + file_size: '67174476' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/ubuntu/precise/riak-ts_1.3.1-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak-ts_1.3.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/ubuntu/trusty/riak-ts_1.3.1-1_amd64.deb - file_size: 60236828 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.3/1.3.1/ubuntu/trusty/riak-ts_1.3.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/ubuntu/trusty/riak-ts_1.3.1-1_amd64.deb + file_size: '60236828' + chksum_href: https://files.tiot.jp/riak/ts/1.3/1.3.1/ubuntu/trusty/riak-ts_1.3.1-1_amd64.deb.sha 1.4.0: - os: source file_info: file_name: riak_ts-1.4.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/riak_ts-1.4.0.tar.gz - file_size: 21185823 + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/riak_ts-1.4.0.tar.gz + file_size: '21185823' - os: debian versions: - version: '7' @@ -4866,17 +11968,17 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.4.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/debian/7/riak-ts_1.4.0-1_amd64.deb - file_size: 67201146 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/debian/7/riak-ts_1.4.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/debian/7/riak-ts_1.4.0-1_amd64.deb + file_size: '67201146' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/debian/7/riak-ts_1.4.0-1_amd64.deb.sha - version: '8' architectures: - arch: amd64 file_info: file_name: riak-ts_1.4.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/debian/8/riak-ts_1.4.0-1_amd64.deb - file_size: 60386480 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/debian/8/riak-ts_1.4.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/debian/8/riak-ts_1.4.0-1_amd64.deb + file_size: '60386480' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/debian/8/riak-ts_1.4.0-1_amd64.deb.sha - os: osx versions: - version: '10.8' @@ -4884,9 +11986,9 @@ riak_ts: - arch: x86_64 file_info: file_name: riak-ts-1.4.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/osx/10.8/riak-ts-1.4.0-OSX-x86_64.tar.gz - file_size: 65676896 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/osx/10.8/riak-ts-1.4.0-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/osx/10.8/riak-ts-1.4.0-OSX-x86_64.tar.gz + file_size: '65676896' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/osx/10.8/riak-ts-1.4.0-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -4894,29 +11996,29 @@ riak_ts: - arch: source file_info: file_name: riak-ts-1.4.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/rhel/6/riak-ts-1.4.0-1.el6.src.rpm - file_size: 21189014 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/rhel/6/riak-ts-1.4.0-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/rhel/6/riak-ts-1.4.0-1.el6.src.rpm + file_size: '21189014' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/rhel/6/riak-ts-1.4.0-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.4.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/rhel/6/riak-ts-1.4.0-1.el6.x86_64.rpm - file_size: 65238360 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/rhel/6/riak-ts-1.4.0-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/rhel/6/riak-ts-1.4.0-1.el6.x86_64.rpm + file_size: '65238360' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/rhel/6/riak-ts-1.4.0-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-ts-1.4.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/rhel/7/riak-ts-1.4.0-1.el7.centos.src.rpm - file_size: 21123185 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/rhel/7/riak-ts-1.4.0-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/rhel/7/riak-ts-1.4.0-1.el7.centos.src.rpm + file_size: '21123185' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/rhel/7/riak-ts-1.4.0-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.4.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/rhel/7/riak-ts-1.4.0-1.el7.centos.x86_64.rpm - file_size: 64921812 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/rhel/7/riak-ts-1.4.0-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/rhel/7/riak-ts-1.4.0-1.el7.centos.x86_64.rpm + file_size: '64921812' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/rhel/7/riak-ts-1.4.0-1.el7.centos.x86_64.rpm.sha - os: ubuntu versions: - version: precise @@ -4924,23 +12026,23 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.4.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/ubuntu/precise/riak-ts_1.4.0-1_amd64.deb - file_size: 67119336 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/ubuntu/precise/riak-ts_1.4.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/ubuntu/precise/riak-ts_1.4.0-1_amd64.deb + file_size: '67119336' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/ubuntu/precise/riak-ts_1.4.0-1_amd64.deb.sha - version: trusty architectures: - arch: amd64 file_info: file_name: riak-ts_1.4.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/ubuntu/trusty/riak-ts_1.4.0-1_amd64.deb - file_size: 60343884 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.4/1.4.0/ubuntu/trusty/riak-ts_1.4.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/ubuntu/trusty/riak-ts_1.4.0-1_amd64.deb + file_size: '60343884' + chksum_href: https://files.tiot.jp/riak/ts/1.4/1.4.0/ubuntu/trusty/riak-ts_1.4.0-1_amd64.deb.sha 1.5.0: - os: source file_info: file_name: riak_ts-1.5.0.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/riak_ts-1.5.0.tar.gz - file_size: 22864344 + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/riak_ts-1.5.0.tar.gz + file_size: '22864344' - os: debian versions: - version: jessie @@ -4948,17 +12050,17 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.5.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/debian/jessie/riak-ts_1.5.0-1_amd64.deb - file_size: 61811692 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/debian/jessie/riak-ts_1.5.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/debian/jessie/riak-ts_1.5.0-1_amd64.deb + file_size: '61811692' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/debian/jessie/riak-ts_1.5.0-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak-ts_1.5.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/debian/wheezy/riak-ts_1.5.0-1_amd64.deb - file_size: 68690856 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/debian/wheezy/riak-ts_1.5.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/debian/wheezy/riak-ts_1.5.0-1_amd64.deb + file_size: '68690856' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/debian/wheezy/riak-ts_1.5.0-1_amd64.deb.sha - os: osx versions: - version: '10.8' @@ -4966,9 +12068,9 @@ riak_ts: - arch: x86_64 file_info: file_name: riak-ts-1.5.0-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/osx/10.8/riak-ts-1.5.0-OSX-x86_64.tar.gz - file_size: 67168714 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/osx/10.8/riak-ts-1.5.0-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/osx/10.8/riak-ts-1.5.0-OSX-x86_64.tar.gz + file_size: '67168714' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/osx/10.8/riak-ts-1.5.0-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -4976,29 +12078,29 @@ riak_ts: - arch: source file_info: file_name: riak-ts-1.5.0-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/rhel/6/riak-ts-1.5.0-1.el6.src.rpm - file_size: 22864183 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/rhel/6/riak-ts-1.5.0-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/rhel/6/riak-ts-1.5.0-1.el6.src.rpm + file_size: '22864183' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/rhel/6/riak-ts-1.5.0-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.5.0-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/rhel/6/riak-ts-1.5.0-1.el6.x86_64.rpm - file_size: 66826612 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/rhel/6/riak-ts-1.5.0-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/rhel/6/riak-ts-1.5.0-1.el6.x86_64.rpm + file_size: '66826612' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/rhel/6/riak-ts-1.5.0-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-ts-1.5.0-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/rhel/7/riak-ts-1.5.0-1.el7.centos.src.rpm - file_size: 22792959 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/rhel/7/riak-ts-1.5.0-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/rhel/7/riak-ts-1.5.0-1.el7.centos.src.rpm + file_size: '22792959' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/rhel/7/riak-ts-1.5.0-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.5.0-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/rhel/7/riak-ts-1.5.0-1.el7.centos.x86_64.rpm - file_size: 66556356 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/rhel/7/riak-ts-1.5.0-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/rhel/7/riak-ts-1.5.0-1.el7.centos.x86_64.rpm + file_size: '66556356' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/rhel/7/riak-ts-1.5.0-1.el7.centos.x86_64.rpm.sha - os: ubuntu versions: - version: trusty @@ -5006,23 +12108,23 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.5.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/ubuntu/trusty/riak-ts_1.5.0-1_amd64.deb - file_size: 61765634 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/ubuntu/trusty/riak-ts_1.5.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/ubuntu/trusty/riak-ts_1.5.0-1_amd64.deb + file_size: '61765634' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/ubuntu/trusty/riak-ts_1.5.0-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak-ts_1.5.0-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/ubuntu/xenial/riak-ts_1.5.0-1_amd64.deb - file_size: 61801444 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.0/ubuntu/xenial/riak-ts_1.5.0-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/ubuntu/xenial/riak-ts_1.5.0-1_amd64.deb + file_size: '61801444' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.0/ubuntu/xenial/riak-ts_1.5.0-1_amd64.deb.sha 1.5.1: - os: source file_info: file_name: riak_ts-1.5.1.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/riak_ts-1.5.1.tar.gz - file_size: 22868265 + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/riak_ts-1.5.1.tar.gz + file_size: '22868265' - os: debian versions: - version: jessie @@ -5030,17 +12132,17 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.5.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/debian/jessie/riak-ts_1.5.1-1_amd64.deb - file_size: 61748608 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/debian/jessie/riak-ts_1.5.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/debian/jessie/riak-ts_1.5.1-1_amd64.deb + file_size: '61748608' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/debian/jessie/riak-ts_1.5.1-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak-ts_1.5.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/debian/wheezy/riak-ts_1.5.1-1_amd64.deb - file_size: 68688578 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/debian/wheezy/riak-ts_1.5.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/debian/wheezy/riak-ts_1.5.1-1_amd64.deb + file_size: '68688578' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/debian/wheezy/riak-ts_1.5.1-1_amd64.deb.sha - os: osx versions: - version: '10.8' @@ -5048,9 +12150,9 @@ riak_ts: - arch: x86_64 file_info: file_name: riak-ts-1.5.1-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/osx/10.8/riak-ts-1.5.1-OSX-x86_64.tar.gz - file_size: 67176510 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/osx/10.8/riak-ts-1.5.1-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/osx/10.8/riak-ts-1.5.1-OSX-x86_64.tar.gz + file_size: '67176510' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/osx/10.8/riak-ts-1.5.1-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -5058,29 +12160,29 @@ riak_ts: - arch: source file_info: file_name: riak-ts-1.5.1-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/rhel/6/riak-ts-1.5.1-1.el6.src.rpm - file_size: 22859030 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/rhel/6/riak-ts-1.5.1-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/rhel/6/riak-ts-1.5.1-1.el6.src.rpm + file_size: '22859030' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/rhel/6/riak-ts-1.5.1-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.5.1-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/rhel/6/riak-ts-1.5.1-1.el6.x86_64.rpm - file_size: 66829632 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/rhel/6/riak-ts-1.5.1-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/rhel/6/riak-ts-1.5.1-1.el6.x86_64.rpm + file_size: '66829632' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/rhel/6/riak-ts-1.5.1-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-ts-1.5.1-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/rhel/7/riak-ts-1.5.1-1.el7.centos.src.rpm - file_size: 22795306 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/rhel/7/riak-ts-1.5.1-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/rhel/7/riak-ts-1.5.1-1.el7.centos.src.rpm + file_size: '22795306' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/rhel/7/riak-ts-1.5.1-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.5.1-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/rhel/7/riak-ts-1.5.1-1.el7.centos.x86_64.rpm - file_size: 66560360 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/rhel/7/riak-ts-1.5.1-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/rhel/7/riak-ts-1.5.1-1.el7.centos.x86_64.rpm + file_size: '66560360' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/rhel/7/riak-ts-1.5.1-1.el7.centos.x86_64.rpm.sha - os: ubuntu versions: - version: trusty @@ -5088,23 +12190,24 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.5.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/ubuntu/trusty/riak-ts_1.5.1-1_amd64.deb - file_size: 61703700 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/ubuntu/trusty/riak-ts_1.5.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/ubuntu/trusty/riak-ts_1.5.1-1_amd64.deb + file_size: '61703700' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/ubuntu/trusty/riak-ts_1.5.1-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak-ts_1.5.1-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/ubuntu/xenial/riak-ts_1.5.1-1_amd64.deb - file_size: 61789088 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.1/ubuntu/xenial/riak-ts_1.5.1-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/ubuntu/xenial/riak-ts_1.5.1-1_amd64.deb + file_size: '61789088' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.1/ubuntu/xenial/riak-ts_1.5.1-1_amd64.deb.sha 1.5.2: - os: source file_info: file_name: riak_ts-1.5.2.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/riak_ts-1.5.2.tar.gz - file_size: 22872640 + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/riak_ts-1.5.2.tar.gz + file_size: '22872640' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/riak_ts-1.5.2.tar.gz.sha - os: debian versions: - version: jessie @@ -5112,17 +12215,17 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.5.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/debian/jessie/riak-ts_1.5.2-1_amd64.deb - file_size: 61762276 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/debian/jessie/riak-ts_1.5.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/debian/jessie/riak-ts_1.5.2-1_amd64.deb + file_size: '61762276' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/debian/jessie/riak-ts_1.5.2-1_amd64.deb.sha - version: wheezy architectures: - arch: amd64 file_info: file_name: riak-ts_1.5.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/debian/wheezy/riak-ts_1.5.2-1_amd64.deb - file_size: 68691826 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/debian/wheezy/riak-ts_1.5.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/debian/wheezy/riak-ts_1.5.2-1_amd64.deb + file_size: '68691826' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/debian/wheezy/riak-ts_1.5.2-1_amd64.deb.sha - os: osx versions: - version: '10.8' @@ -5130,9 +12233,9 @@ riak_ts: - arch: x86_64 file_info: file_name: riak-ts-1.5.2-OSX-x86_64.tar.gz - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/osx/10.8/riak-ts-1.5.2-OSX-x86_64.tar.gz - file_size: 67185698 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/osx/10.8/riak-ts-1.5.2-OSX-x86_64.tar.gz.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/osx/10.8/riak-ts-1.5.2-OSX-x86_64.tar.gz + file_size: '67185698' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/osx/10.8/riak-ts-1.5.2-OSX-x86_64.tar.gz.sha - os: rhel versions: - version: '6' @@ -5140,29 +12243,29 @@ riak_ts: - arch: source file_info: file_name: riak-ts-1.5.2-1.el6.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/rhel/6/riak-ts-1.5.2-1.el6.src.rpm - file_size: 22861250 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/rhel/6/riak-ts-1.5.2-1.el6.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/rhel/6/riak-ts-1.5.2-1.el6.src.rpm + file_size: '22861250' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/rhel/6/riak-ts-1.5.2-1.el6.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.5.2-1.el6.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/rhel/6/riak-ts-1.5.2-1.el6.x86_64.rpm - file_size: 66831236 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/rhel/6/riak-ts-1.5.2-1.el6.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/rhel/6/riak-ts-1.5.2-1.el6.x86_64.rpm + file_size: '66831236' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/rhel/6/riak-ts-1.5.2-1.el6.x86_64.rpm.sha - version: '7' architectures: - arch: source file_info: file_name: riak-ts-1.5.2-1.el7.centos.src.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/rhel/7/riak-ts-1.5.2-1.el7.centos.src.rpm - file_size: 22793357 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/rhel/7/riak-ts-1.5.2-1.el7.centos.src.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/rhel/7/riak-ts-1.5.2-1.el7.centos.src.rpm + file_size: '22793357' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/rhel/7/riak-ts-1.5.2-1.el7.centos.src.rpm.sha - arch: x86_64 file_info: file_name: riak-ts-1.5.2-1.el7.centos.x86_64.rpm - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/rhel/7/riak-ts-1.5.2-1.el7.centos.x86_64.rpm - file_size: 66561356 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/rhel/7/riak-ts-1.5.2-1.el7.centos.x86_64.rpm.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/rhel/7/riak-ts-1.5.2-1.el7.centos.x86_64.rpm + file_size: '66561356' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/rhel/7/riak-ts-1.5.2-1.el7.centos.x86_64.rpm.sha - os: ubuntu versions: - version: trusty @@ -5170,14 +12273,178 @@ riak_ts: - arch: amd64 file_info: file_name: riak-ts_1.5.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/ubuntu/trusty/riak-ts_1.5.2-1_amd64.deb - file_size: 61672790 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/ubuntu/trusty/riak-ts_1.5.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/ubuntu/trusty/riak-ts_1.5.2-1_amd64.deb + file_size: '61672790' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/ubuntu/trusty/riak-ts_1.5.2-1_amd64.deb.sha - version: xenial architectures: - arch: amd64 file_info: file_name: riak-ts_1.5.2-1_amd64.deb - file_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/ubuntu/xenial/riak-ts_1.5.2-1_amd64.deb - file_size: 61799590 - chksum_href: http://s3.amazonaws.com/downloads.basho.com/riak_ts/1.5/1.5.2/ubuntu/xenial/riak-ts_1.5.2-1_amd64.deb.sha + file_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/ubuntu/xenial/riak-ts_1.5.2-1_amd64.deb + file_size: '61799590' + chksum_href: https://files.tiot.jp/riak/ts/1.5/1.5.2/ubuntu/xenial/riak-ts_1.5.2-1_amd64.deb.sha +dataplatform: + 1.0.0: + - os: debian + versions: + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: data-platform_1.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/debian/7/data-platform_1.0.0-1_amd64.deb + file_size: '63615830' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/debian/7/data-platform_1.0.0-1_amd64.deb.sha + - version: wheezy + architectures: + - arch: amd64 + file_info: + file_name: data-platform_1.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/debian/wheezy/data-platform_1.0.0-1_amd64.deb + file_size: '63615830' + - os: osx + versions: + - version: '10.10' + architectures: + - arch: x86_64 + file_info: + file_name: data-platform-1.0.0-OSX-x86_64.tar.gz + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/osx/10.10/data-platform-1.0.0-OSX-x86_64.tar.gz + file_size: '62611411' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/osx/10.10/data-platform-1.0.0-OSX-x86_64.tar.gz.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: source + file_info: + file_name: data-platform-1.0.0-1.el6.src.rpm + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/6/data-platform-1.0.0-1.el6.src.rpm + file_size: '20370019' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/6/data-platform-1.0.0-1.el6.src.rpm.sha + - arch: x86_64 + file_info: + file_name: data-platform-1.0.0-1.el6.x86_64.rpm + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/6/data-platform-1.0.0-1.el6.x86_64.rpm + file_size: '63275021' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/6/data-platform-1.0.0-1.el6.x86_64.rpm.sha + - version: '7' + architectures: + - arch: source + file_info: + file_name: data-platform-1.0.0-1.el7.centos.src.rpm + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/7/data-platform-1.0.0-1.el7.centos.src.rpm + file_size: '20327743' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/7/data-platform-1.0.0-1.el7.centos.src.rpm.sha + - arch: x86_64 + file_info: + file_name: data-platform-1.0.0-1.el7.centos.x86_64.rpm + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/7/data-platform-1.0.0-1.el7.centos.x86_64.rpm + file_size: '60319092' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/7/data-platform-1.0.0-1.el7.centos.x86_64.rpm.sha + - version: amazon + architectures: + - arch: source + file_info: + file_name: data-platform-1.0.0-1.el7.src.rpm + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/amazon/data-platform-1.0.0-1.el7.src.rpm + file_size: '20327847' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/amazon/data-platform-1.0.0-1.el7.src.rpm.sha + - arch: x86_64 + file_info: + file_name: data-platform-1.0.0-1.el7.x86_64.rpm + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/amazon/data-platform-1.0.0-1.el7.x86_64.rpm + file_size: '59297808' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/rhel/amazon/data-platform-1.0.0-1.el7.x86_64.rpm.sha + - os: ubuntu + versions: + - version: precise + architectures: + - arch: amd64 + file_info: + file_name: data-platform_1.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/ubuntu/precise/data-platform_1.0.0-1_amd64.deb + file_size: '63554640' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/ubuntu/precise/data-platform_1.0.0-1_amd64.deb.sha + - version: trusty + architectures: + - arch: amd64 + file_info: + file_name: data-platform_1.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/ubuntu/trusty/data-platform_1.0.0-1_amd64.deb + file_size: '57636716' + chksum_href: https://files.tiot.jp/riak/data-platform/1.0/1.0.0/ubuntu/trusty/data-platform_1.0.0-1_amd64.deb.sha +dataplatform_extras: + 1.0.0: + - os: debian + versions: + - version: '7' + architectures: + - arch: amd64 + file_info: + file_name: data-platform-extras_1.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/debian/7/data-platform-extras_1.0.0-1_amd64.deb + file_size: '504514076' + chksum_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/debian/7/data-platform-extras_1.0.0-1_amd64.deb.sha + - version: wheezy + architectures: + - arch: amd64 + file_info: + file_name: data-platform-extras_1.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/debian/wheezy/data-platform-extras_1.0.0-1_amd64.deb + file_size: '504514076' + - os: osx + versions: + - version: '10.10' + architectures: + - arch: x86_64 + file_info: + file_name: data-platform-extras-1.0.0-OSX-x86_64.pkg + file_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/osx/10.10/data-platform-extras-1.0.0-OSX-x86_64.pkg + file_size: '505098997' + chksum_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/osx/10.10/data-platform-extras-1.0.0-OSX-x86_64.pkg.sha + - os: rhel + versions: + - version: '6' + architectures: + - arch: x86_64 + file_info: + file_name: data-platform-extras-1.0.0-1.x86_64.rpm + file_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/rhel/6/data-platform-extras-1.0.0-1.x86_64.rpm + file_size: '504874320' + chksum_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/rhel/6/data-platform-extras-1.0.0-1.x86_64.rpm.sha + - version: '7' + architectures: + - arch: x86_64 + file_info: + file_name: data-platform-extras-1.0.0-1.x86_64.rpm + file_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/rhel/7/data-platform-extras-1.0.0-1.x86_64.rpm + file_size: '504862060' + chksum_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/rhel/7/data-platform-extras-1.0.0-1.x86_64.rpm.sha + - version: amazon + architectures: + - arch: x86_64 + file_info: + file_name: data-platform-extras-1.0.0-1.x86_64.rpm + file_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/rhel/amazon/data-platform-extras-1.0.0-1.x86_64.rpm + file_size: '504858831' + chksum_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/rhel/amazon/data-platform-extras-1.0.0-1.x86_64.rpm.sha + - os: ubuntu + versions: + - version: precise + architectures: + - arch: amd64 + file_info: + file_name: data-platform-extras_1.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/ubuntu/precise/data-platform-extras_1.0.0-1_amd64.deb + file_size: '504514204' + chksum_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/ubuntu/precise/data-platform-extras_1.0.0-1_amd64.deb.sha + - version: trusty + architectures: + - arch: amd64 + file_info: + file_name: data-platform-extras_1.0.0-1_amd64.deb + file_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/ubuntu/trusty/data-platform-extras_1.0.0-1_amd64.deb + file_size: '504509114' + chksum_href: https://files.tiot.jp/riak/data-platform-extras/1.0/1.0.0/ubuntu/trusty/data-platform-extras_1.0.0-1_amd64.deb.sha diff --git a/data/os_metadata.yaml b/data/os_metadata.yaml index e50dee84fd..9e93c1fca9 100644 --- a/data/os_metadata.yaml +++ b/data/os_metadata.yaml @@ -7,42 +7,54 @@ # image_src: <> // Absolute path to a .png image # installing_path: <> // (optional) Relative URI to install instructions # +amazon: + title: "Amazon Linux" + image_src: "images/shared/operating_system_branding/amazon.png" +# installing_instructions_page: "amazon/" debian: title: "Debian" - image_src: "/images/shared/operating_system_branding/debian.png" + image_src: "images/shared/operating_system_branding/debian.png" installing_instructions_page: "debian-ubuntu/" fedora: title: "Fedora" - image_src: "/images/shared/operating_system_branding/fedora.png" + image_src: "images/shared/operating_system_branding/fedora.png" installing_instructions_page: "rhel-centos/" freebsd: title: "FreeBSD" - image_src: "/images/shared/operating_system_branding/freebsd.png" + image_src: "images/shared/operating_system_branding/freebsd.png" installing_instructions_page: "freebsd/" osx: title: "Mac OS X" - image_src: "/images/shared/operating_system_branding/mac.png" + image_src: "images/shared/operating_system_branding/mac.png" installing_instructions_page: "mac-osx/" +raspbian: + title: "Raspbian" + image_src: "images/shared/operating_system_branding/raspbian.png" +# installing_instructions_page: "raspbian/" rhel: title: "RHEL & CentOS" - image_src: "/images/shared/operating_system_branding/redhat.png" + image_src: "images/shared/operating_system_branding/redhat.png" installing_instructions_page: "rhel-centos/" +oracle: + title: "Oracle Linux" + image_src: "images/shared/operating_system_branding/oracle.png" + installing_instructions_page: "oracle/" sles: title: "SuSE Linux Enterprise Server" - image_src: "/images/shared/operating_system_branding/suse.png" + image_src: "images/shared/operating_system_branding/suse.png" installing_instructions_page: "suse/" smartos: title: "Smart OS" - image_src: "/images/shared/operating_system_branding/smartos.png" + image_src: "images/shared/operating_system_branding/smartos.png" installing_instructions_page: "smartos/" solaris: title: "Solaris" - image_src: "/images/shared/operating_system_branding/solaris.png" + image_src: "images/shared/operating_system_branding/solaris.png" installing_instructions_page: "solaris/" ubuntu: title: "Ubuntu" - image_src: "/images/shared/operating_system_branding/ubuntu.png" + image_src: "images/shared/operating_system_branding/ubuntu.png" installing_instructions_page: "debian-ubuntu/" source: - image_src: "/images/shared/operating_system_branding/targz.png" + image_src: "images/shared/operating_system_branding/targz.png" installing_instructions_page: "source/" diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000000..e6c6839bc2 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,57 @@ +FROM ruby:alpine + +LABEL description="Alpine + Ruby + Hugo Docker container." +LABEL maintainer="Peter Clark " + +# taken from jojomi/docker-hugo at https://github.com/jojomi/docker-hugo/blob/0.25.1/Dockerfile +COPY ./run.sh /run.sh +RUN chmod u+x /run.sh + +ENV HUGO_VERSION=0.25.1 +#ENV HUGO_VERSION=0.29 +#ENV HUGO_VERSION=0.39 +#ENV HUGO_VERSION=0.49 +#ENV HUGO_VERSION=0.59.1 +#ENV HUGO_VERSION=0.69.0 +#ENV HUGO_VERSION=0.79.1 +#ENV HUGO_VERSION=0.85 +ENV HUGO_PORT=1314 +ADD https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_${HUGO_VERSION}_Linux-64bit.tar.gz /tmp +RUN tar -xf /tmp/hugo_${HUGO_VERSION}_Linux-64bit.tar.gz -C /tmp \ + && mkdir -p /usr/local/sbin \ + && mv /tmp/hugo /usr/local/sbin/hugo \ + && rm -rf /tmp/hugo_${HUGO_VERSION}_linux_amd64 +RUN apk add --no-cache ca-certificates + +# add stuff to allow ruby to compile native extentions +RUN apk -U add --no-cache gcc musl-dev make g++ git build-base clang \ + && rm -rf /var/cache/apk/* +ENV CXX=clang++ + +# add stuff to make mini_racer work +RUN gem install 'specific_install' +#RUN gem specific_install -l "https://github.com/sagarjauhari/mini_racer" + +# pre-install commonnly used items +#RUN gem install sass +#RUN gem install autoprefixer-rails +#RUN gem install sprockets -v '~>3.7' +#RUN gem install coffee-script +#RUN gem install uglifier +#RUN gem install rake +#RUN gem install guard +#RUN gem install guard-rake + +#RUN gem install pry +#RUN gem install pry-byebug +#RUN gem install net-sftp + +#RUN gem install mini_racer + +VOLUME /src +VOLUME /output + +WORKDIR /src +CMD ["/run.sh"] + +EXPOSE ${HUGO_PORT} diff --git a/docker/docker-build-image.titokyo.sh b/docker/docker-build-image.titokyo.sh new file mode 100644 index 0000000000..fce730454a --- /dev/null +++ b/docker/docker-build-image.titokyo.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +if test -f ./docker/Dockerfile; then + cd ./docker +fi + +docker build -t titokyo/riak_docs_generator --no-cache . diff --git a/docker/docker-compose.generate-riak-docs-beta.yaml b/docker/docker-compose.generate-riak-docs-beta.yaml new file mode 100644 index 0000000000..5d65f92611 --- /dev/null +++ b/docker/docker-compose.generate-riak-docs-beta.yaml @@ -0,0 +1,22 @@ +--- +version: "2.1" +services: + riakdocs: + image: titokyo/riak_docs_generator + container_name: riakdocs + environment: + - PUID=1000 + - PGID=1000 + - TZ=Asia/Tokyo + - HUGO_WATCH=false + - HUGO_REFRESH_TIME=-1 + - RAKE_DEBUG=false + - RAKE_GEN_DOWNS=false + - RAKE_GEN_PROJECTS=false + - HUGO_BASEURL=/riak-docs-beta/ + - HUGO_PORT=80 + volumes: + - ..:/src + - ../output:/output + restart: "no" + diff --git a/docker/docker-compose.generate-riak-docs.yaml b/docker/docker-compose.generate-riak-docs.yaml new file mode 100644 index 0000000000..83db7f3d3b --- /dev/null +++ b/docker/docker-compose.generate-riak-docs.yaml @@ -0,0 +1,22 @@ +--- +version: "2.1" +services: + riakdocs: + image: titokyo/riak_docs_generator + container_name: riakdocs + environment: + - PUID=1000 + - PGID=1000 + - TZ=Asia/Tokyo + - HUGO_WATCH=false + - HUGO_REFRESH_TIME=-1 + - RAKE_DEBUG=false + - RAKE_GEN_DOWNS=false + - RAKE_GEN_PROJECTS=false + - HUGO_BASEURL=/riak-docs/ + - HUGO_PORT=80 + volumes: + - ..:/src + - ../output:/output + restart: "no" + diff --git a/docker/docker-compose.localhost-preview.yaml b/docker/docker-compose.localhost-preview.yaml new file mode 100644 index 0000000000..5f0741a552 --- /dev/null +++ b/docker/docker-compose.localhost-preview.yaml @@ -0,0 +1,28 @@ +--- +version: "2.1" +networks: + bridge: +services: + riakdocs: + image: titokyo/riak_docs_generator + container_name: riakdocs + environment: + - PUID=1000 + - PGID=1000 + - TZ=Asia/Tokyo + - HUGO_WATCH=true + - HUGO_REFRESH_TIME=5 + - RAKE_DEBUG=true + - RAKE_GEN_DOWNS=false + - RAKE_GEN_PROJECTS=true + - HUGO_BASEURL=http://localhost/riak-docs/ + - HUGO_PORT=1314 + ports: + - 1314:1314 # $HUGO_PORT + volumes: + - ..:/src + - ../output:/output + restart: unless-stopped + networks: + - bridge + diff --git a/docker/run.sh b/docker/run.sh new file mode 100644 index 0000000000..0667cfdc76 --- /dev/null +++ b/docker/run.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env sh + +# Based on https://github.com/jojomi/docker-hugo/blob/0.25.1/run.sh + +RAKE_DEBUG="${RAKE_DEBUG:=false}" +RAKE_GEN_DOWNS="${RAKE_GEN_DOWNS:=false}" +RAKE_GEN_PROJECTS="${RAKE_GEN_PROJECTS:=true}" +WATCH="${HUGO_WATCH:=false}" +SLEEP="${HUGO_REFRESH_TIME:=-1}" +HUGO_PORT="${HUGO_PORT:=1314}" + +echo "RAKE_DEBUG:" $RAKE_DEBUG +echo "RAKE_GEN_DOWNS:" $RAKE_GEN_DOWNS +echo "RAKE_GEN_PROJECTS:" $RAKE_GEN_PROJECTS +echo "HUGO_WATCH:" $WATCH +echo "HUGO_REFRESH_TIME:" $HUGO_REFRESH_TIME +echo "HUGO_THEME:" $HUGO_THEME +echo "HUGO_BASEURL:" $HUGO_BASEURL +echo "HUGO_PORT:" $HUGO_PORT +echo "ARGS" $@ + +bundle update --bundler +bundle install + +if [[ "$RAKE_GEN_DOWNS" != "false" ]]; then + rake metadata:generate_downloads +fi + +HUGO=/usr/local/sbin/hugo +echo "Hugo path: $HUGO" + +while [ true ] +do + if [[ "$RAKE_DEBUG" != "false" ]]; then + rake build:debug + else + rake build + fi + if [[ "$RAKE_GEN_PROJECTS" != "false" ]]; then + rake metadata:generate_projects + fi + + if [[ $HUGO_WATCH != 'false' ]]; then + echo "Watching..." + # add for logging: --log --logFile "/output/log.txt" --verboseLog + rake watch + $HUGO server --watch=true --source="/src" --theme="$HUGO_THEME" --destination="/output" --baseURL="$HUGO_BASEURL" --bind="0.0.0.0" "$@" -p $HUGO_PORT || exit 1 + else + echo "Building one time..." + $HUGO --source="/src" --theme="$HUGO_THEME" --destination="/output" --baseURL="$HUGO_BASEURL" "$@" || exit 1 + fi + + if [[ $HUGO_REFRESH_TIME == -1 ]]; then + exit 0 + fi + echo "Sleeping for $HUGO_REFRESH_TIME seconds..." + sleep $SLEEP +done + diff --git a/dynamic/css/components/_selectors.scss b/dynamic/css/components/_selectors.scss index e69fac36a2..725feb994f 100644 --- a/dynamic/css/components/_selectors.scss +++ b/dynamic/css/components/_selectors.scss @@ -53,6 +53,34 @@ } } +// 2022-05-27: Added for "other" button used to show other release series that area hidden +.other__btn { + border : none; /* 1 */ + + @include rounded-corners(0.25em); + width : 100%; + line-height : 1; /* 2 */ + padding : 0.5em 0.5em; /* 3 */ + + @extend %sans-bold; + font-size : 100%; + color : $brand-primary-light; + background : $brand-gray; + + + transition : color 500ms ease, + background 500ms ease; + + @include on-event { + color : $light-font-em; + background : $brand-primary-lightest; + } + + @include when-inside('.js') { + cursor : pointer; + } +} + /** * Selector; Downward Arrow * An example of the vertical-arrow mixin; downward-facing orange arrow used to @@ -198,6 +226,11 @@ padding-left : 0; } } + +.release-is-archived-and-hidden { + display: none !important; +} + .selector-list__scroll-box { height : 100%; /* 2 */ @@ -315,7 +348,15 @@ /** * Selector list Elements -- Archived and Disabled Colors */ -.selector-list__element--archived { + .selector-list__element--archived { + color : $version-selector-text-dark; + background : $version-color-inactive; + @include on-event { + color : $version-selector-text; + background : $version-color-inactive-dark; + } +} +.selector-list__element--other { color : $version-selector-text-dark; background : $version-color-inactive; @include on-event { diff --git a/dynamic/css/pages/index.scss b/dynamic/css/pages/index.scss index 17b459e297..64753183d5 100644 --- a/dynamic/css/pages/index.scss +++ b/dynamic/css/pages/index.scss @@ -25,7 +25,7 @@ } .welcome { background-color : $brand-primary-light; - background : url('/images/index/home-banner.jpg') no-repeat bottom / cover; + background : url('../images/index/home-banner.jpg') no-repeat bottom / cover; padding-top : 3rem; padding-bottom : 3rem; @@ -251,7 +251,7 @@ font-size : 1.2rem; @include wide-from('lg') { - background : url('/images/index/highlights-bg-split.jpg') left/cover; + background : url('../images/index/highlights-bg-split.jpg') left/cover; } } .highlight__container { @@ -311,7 +311,7 @@ color : $light-background-warm; background-color : $cyan-darker; - background : url('/images/index/highlights-bg-dark.jpg'); + background : url('../images/index/highlights-bg-dark.jpg'); .highlight__link { color : $light-background-warm; @@ -342,7 +342,7 @@ color : $cyan-darker; background-color : $light-background-warm; - background : url('/images/index/highlights-bg-light.jpg') repeat; + background : url('../images/index/highlights-bg-light.jpg') repeat; .highlight__link { color : $cyan-darker; diff --git a/dynamic/css/shame/_safari-text-flicker-fix.scss b/dynamic/css/shame/_safari-text-flicker-fix.scss index 5d6bb8fcbf..6adcec9f35 100644 --- a/dynamic/css/shame/_safari-text-flicker-fix.scss +++ b/dynamic/css/shame/_safari-text-flicker-fix.scss @@ -16,4 +16,5 @@ */ .main-article { -webkit-transform: translate3d(0, 0, 0); + transform: translate3d(0, 0, 0); } diff --git a/dynamic/js/basho/selectors.coffee b/dynamic/js/basho/selectors.coffee index a6abf8d8f3..43f0d06ade 100644 --- a/dynamic/js/basho/selectors.coffee +++ b/dynamic/js/basho/selectors.coffee @@ -49,20 +49,23 @@ contentOfMeta = (name) -> # enabled, and stay orange if we're never going to fetch the JSON... generateVersionLists = () -> # Pull project/pathing information from the meta tags set up by Hugo. - project = contentOfMeta("project") # ex; riak_kv - current_version = contentOfMeta("version") # ex; 2.1.4 - project_relative_path = contentOfMeta("project_relative_path") # ex; installing/ -or- undefined - project_relative_path = "" unless project_relative_path # ex; installing/ -or- "" + project = contentOfMeta("project") # ex; riak_kv + current_version = contentOfMeta("version") # ex; 2.1.4 + project_relative_path = contentOfMeta("project_relative_path") # ex; installing/ -or- undefined + project_relative_path = "" unless project_relative_path # ex; installing/ -or- "" + docs_root_url = contentOfMeta("docs_root_url") # ex; http://docs.riak.com/ -or- undefined + docs_root_url = "" unless docs_root_url # ex; http://docs.riak.com/ -or- "" + project_descriptions_path = docs_root_url + 'data/project_descriptions.json' # ex; http://docs.riak.com/data/project_descriptions.json # The version_history tags will only exist if the front matter of the # given content .md page contains them, so these may be `undefined`. - meta_version_hisotry_in = contentOfMeta("version_history_in") + meta_version_history_in = contentOfMeta("version_history_in") meta_version_history_locations = contentOfMeta("version_history_locations") version_range = undefined versions_locations = [] - if meta_version_hisotry_in - version_range = SemVer.parseRange(meta_version_hisotry_in) + if meta_version_history_in + version_range = SemVer.parseRange(meta_version_history_in) if meta_version_history_locations locations_json = JSON.parse(meta_version_history_locations) @@ -72,19 +75,20 @@ generateVersionLists = () -> # Fetch the Project Descriptions from the server, and do all the heavy lifting # inside the `success` callback. if project and project != "community" && project != "404" - then $.getJSON('/data/project_descriptions.json', + then $.getJSON(project_descriptions_path, (data) -> project_data = data[project] - project_path = project_data.path # ex; /riak/kv - latest_rel = project_data.latest # ex; 2.1.4 - lts_series = project_data['lts'] # ex; 2.0 -or- undefined - archived_url = project_data['archived_url'] # ex; http://.. -or- undefined + project_path = docs_root_url.replace(/\/+$/, '') + project_data.path # ex; http://docs.riak.com/riak/kv + latest_rel = project_data.latest # ex; 2.1.4 + lts_series = project_data['lts'] # ex; [2.9, 3.0] -or- undefined + archive_below = project_data['archive_below'] # ex; 2.2 -or- undefined + archived_url = project_data['archived_url'] # ex; http://.. -or- undefined # Aggregator for the resulting HTML. To be added into the # div.selector-pane--versions version_selector_list_html = '' - + has_archived = false # Loop over each release set. for release_set, set_index in project_data.releases.reverse() @@ -99,7 +103,16 @@ generateVersionLists = () -> # to ensure scrollbars are always intractable. Without this explicit # z-index, the scrollbar of a __sizing-box may be partially covered # by the padding of a selector-list immediately to its right. - version_selector_list_html += '
' + archived_class = '' + # Make hidden if archive_below is set, the series is below that level, and the current_version is + # more than the highest in the series (i.e. current_version = 2.0.9 should make all 2.0.x visible). + if archive_below and release_set[0] < archive_below and release_set[release_set.length-1] < current_version + archived_class = ' release-is-archived-and-hidden' + has_archived = true + + version_selector_list_html += '
' arrow_str = '' version_selector_list_html += '
' + arrow_str + '
' @@ -117,7 +130,13 @@ generateVersionLists = () -> # of `release_version` string matches the `lts_series` string), add an # LTS tag to the top of the list. # NB. There may be no LTS series set on a give product. - if lts_series and release_set[0].match("^"+lts_series) + is_lts = false + if lts_series + for lts_name in lts_series + if release_set[0].match("^"+lts_name) + is_lts = true + + if is_lts class_list = ["selector-list__element", "selector-list__element--"+list_depth, "selector-list__element--lts-flag"] @@ -163,7 +182,11 @@ generateVersionLists = () -> if SemVer.isInRange(release_sem_ver, range) relative_path = url break - anchor = project_path+"/"+release_version+"/"+relative_path + # relative_path can start with a slash or not + # so we ensure it does + unless relative_path.startsWith("/") then relative_path = "/"+relative_path + + anchor = project_path+"/"+release_version+relative_path anchor_tag = '' # Build the full list element and add it to the html aggregator. @@ -197,11 +220,25 @@ generateVersionLists = () -> '
  • older
  • ' version_selector_list_html += '
    ' + if has_archived + class_list = ["selector-list__element", + "selector-list__element--other"] + + # We can skip the Edge Fader here, b/c we know there's only ever going + # to be one "Older" element. + #NB. See above note re: whitespace. + version_selector_list_html += '
    ' + version_selector_list_html += '
    ' + + version_selector_list_html += '
      \n' + version_selector_list_html += + '
    • ' + + version_selector_list_html += '
    ' # What we've all been waiting for; make the DOM modification. $('.selector-pane--versions').html(version_selector_list_html) - # With the lists added to the DOM, we can capture the height of the # tallest one, and set the height of the selector-pane's __shadow-box # and __sizing-box. @@ -219,7 +256,6 @@ generateVersionLists = () -> $version_pane__shadow.css('height', tallest_list + (2).rem()) $version_pane__sizing.css('height', tallest_list + (2).rem()) - ## HACK: # Because these dynamic elements were added after the JQuery.ready() # clause that defied 'scroll' and 'click' event listeners on all other @@ -237,6 +273,16 @@ generateVersionLists = () -> EdgeFader.onClickCallback ) + # When the other button is clicked, hide the button, show the hidden series and fix + # the height + # TODO: fix height + $('.selector-other-releases').on('click', + (event) -> + $('.release-is-archived-and-hidden').toggleClass('release-is-archived-and-hidden'); + $('.selector-other-releases').toggleClass('release-is-archived-and-hidden'); + window.dispatchEvent(new Event('resize')); + ) + ) # *end getJSON callback* diff --git a/dynamic/js/basho/table-of-contents.coffee b/dynamic/js/basho/table-of-contents.coffee index dfb85bf5e2..b61faa91ec 100644 --- a/dynamic/js/basho/table-of-contents.coffee +++ b/dynamic/js/basho/table-of-contents.coffee @@ -23,12 +23,12 @@ return if not toc.length # Finda all h2 elements and early out if there are fewer than 3 of them. h2s = $('main > h2') -return if h2s.length < 3 +return if h2s.length < 2 # Build DOM elements in JQuery, to be appended later. toc_title = $('

    Contents

    ') toc_wrapper = $('
    ') -toc_items = $('
      ').appendTo(toc_wrapper) +toc_items = $('
        ').appendTo(toc_wrapper) toc_wrapper.addClass("table-of-contents__wrapper--multi") if h2s.length >= 6 @@ -39,7 +39,7 @@ h2s.each -> # from the text of the header before using it as a local link. toc_items.append($("
      1. ", { class : "table-of-contents__item", - html : "#{$that.text()}" + html : "#{$that.text()}" })) diff --git a/layouts/404.html b/layouts/404.html index 6dd46478ae..d0b149d3c6 100644 --- a/layouts/404.html +++ b/layouts/404.html @@ -1,5 +1,4 @@ - - - - {{ partial "head.html" $HugoNode }} - - - @@ -86,18 +80,15 @@ {{/*

        */}} {{ (printf "## %s %s" $target_title $target_version) | markdownify }}

        You should be able to find your operating system in the choices below.

        - - - {{ range $os_map := (sort (where $target_download_metadata "os" "!=" "source") "os") }} - {{/*

        */}} {{ (printf "### ![](%s) %s for %s{#%s}" $image_source $target_title $os_title (replace (lower $os_title) " " "-" )) | markdownify }} @@ -123,8 +114,12 @@ {{$arch_map.arch}} {{$arch_map.file_info.file_name}} - {{ (div (div $arch_map.file_info.file_size 1024) 1024) }} MB + {{ (div (div (int $arch_map.file_info.file_size) 1024) 1024) }} MB + {{ if $arch_map.file_info.chksum_href }} hash + {{ else }} + + {{ end }} {{ end}} @@ -135,7 +130,8 @@ ===================================================== --> {{ if ( and $install_instructions_set $installing_page ) }} - {{ $installing_uri := (printf "%s/%s/%s" $version_path $install_instructions_set $installing_page) }} + {{ $installing_uri := replace (printf "%s%s/%s/%s" $HugoNode.Site.BaseURL $version_path $install_instructions_set $installing_page) "//" "/" }} + {{ $installing_uri := replace $installing_uri "http:/localhost" "http://localhost" }} @@ -150,17 +146,15 @@ {{end}} - {{ range $source_map := (where $target_download_metadata "os" "==" "source") }} - {{/*

        */}} {{ (printf "### ![](%s) Source Download for %s{#source}" $image_source $target_title) | markdownify }} @@ -170,15 +164,20 @@ - + + {{ if $source_map.file_info.chksum_href }} + {{ else }} + + {{ end }} {{ if ( and $install_instructions_set $installing_page ) }} - {{ $installing_uri := (printf "%s/%s/%s" $version_path $install_instructions_set $installing_page) }} + {{ $installing_uri := replace (printf "%s%s/%s/%s" $HugoNode.Site.BaseURL $version_path $install_instructions_set $installing_page) "//" "/" }} + {{ $installing_uri := replace $installing_uri "http:/localhost" "http://localhost" }}
        {{ (div (div $source_map.file_info.file_size 1024) 1024) }} MB{{ (div (div (int $source_map.file_info.file_size) 1024) 1024) }} MBhash
        @@ -197,7 +196,6 @@ {{ if not ( eq $index (sub 1 (len $listed_projects)) ) }}
        {{ end }} - {{ end }} @@ -213,7 +211,7 @@ - + diff --git a/layouts/_default/latest_redirect.html b/layouts/_default/latest_redirect.html new file mode 100644 index 0000000000..2329266b0b --- /dev/null +++ b/layouts/_default/latest_redirect.html @@ -0,0 +1,16 @@ + + + + + + + + diff --git a/layouts/_default/redirect.html b/layouts/_default/redirect.html index 08255cf94e..1f6c8a0766 100644 --- a/layouts/_default/redirect.html +++ b/layouts/_default/redirect.html @@ -1,8 +1,8 @@ - + - + diff --git a/layouts/_default/single.html b/layouts/_default/single.html index cdd848d9e2..05c027ef51 100644 --- a/layouts/_default/single.html +++ b/layouts/_default/single.html @@ -50,10 +50,10 @@ {{ $HugoNode := . }} -{{ $project := $HugoNode.Scratch.Get "project" }} -{{ $version := $HugoNode.Scratch.Get "version" }} -{{ $is_latest := $HugoNode.Scratch.Get "is_latest" }} -{{ $is_lts := $HugoNode.Scratch.Get "is_lts" }} +{{ $project := $HugoNode.Scratch.Get "project" }} +{{ $version := $HugoNode.Scratch.Get "version" }} +{{ $is_latest := $HugoNode.Scratch.Get "is_latest" }} +{{ $is_lts := $HugoNode.Scratch.Get "is_lts" }} {{ $menu_title := (printf "%s-%s" $project $version) }} {{/* ex; "riak_kv-2.1.4" */}} --> @@ -103,7 +103,7 @@ - + diff --git a/layouts/community/single.html b/layouts/community/single.html index d6ee6c4ff6..57a4ebd416 100644 --- a/layouts/community/single.html +++ b/layouts/community/single.html @@ -99,7 +99,7 @@ - + diff --git a/layouts/index.html b/layouts/index.html index f1288e18a6..f2be2389b4 100644 --- a/layouts/index.html +++ b/layouts/index.html @@ -18,11 +18,11 @@ - + Basho Documentation - + @@ -44,7 +44,7 @@
        -

        Welcome to the
        Basho Docs

        +

        Welcome to the
        Riak Docs

        @@ -65,11 +65,11 @@

        Welcome to the
        Basho Docs

        @@ -159,7 +115,7 @@

        Welcome to the
        Basho Docs

        - + diff --git a/layouts/partials/banner.html b/layouts/partials/banner.html index c01ea2f8fc..e073d8378f 100644 --- a/layouts/partials/banner.html +++ b/layouts/partials/banner.html @@ -21,7 +21,7 @@ @@ -43,18 +43,17 @@ the list of links. Simple, right? --> + {{/* Don't display the intra-site navigation if we're on the splash page; it already is an intra-site navigation page, so the banner becomes redundant. @@ -66,9 +65,9 @@ {{ end }} diff --git a/layouts/partials/common-variables.html b/layouts/partials/common-variables.html index 0c36ac6d25..7ca2ef079f 100644 --- a/layouts/partials/common-variables.html +++ b/layouts/partials/common-variables.html @@ -12,17 +12,19 @@ Notes: * We use `(replace "")` to remove `sub_str` from `str` - * .Permalink == e.g. https://docs.basho.com/riak/kv/2.1.4/introduction/ + * .Permalink == e.g. {{ .Site.BaseURL }}riak/kv/2.1.4/introduction/ * .Site.BaseURL == e.g. https://docs.basho.com/ -{{ $title_supertext := .Params.title_supertext }} {{/* ex; Small Title Text */}} -{{ $title := .Title }} {{/* ex; Primary Title Text */}} -{{ $description := .Params.description }} {{/* ex; Rich-preview summary */}} -{{ $project := .Params.project }} {{/* ex; riak_kv */}} -{{ $version := .Params.project_version }} {{/* ex; 2.1.4 */}} -{{ $version_history_in := .Params.version_history.in }} {{/* See CONTRIBUTING.md */}} -{{ $version_history_locations := .Params.version_history.locations }} {{/* See CONTRIBUTING.md */}} +{{ $title_supertext := .Params.title_supertext }} {{/* ex; Small Title Text */}} +{{ $title := .Title }} {{/* ex; Primary Title Text */}} +{{ $description := .Params.description }} {{/* ex; Rich-preview summary */}} +{{ $project := .Params.project }} {{/* ex; riak_kv */}} +{{ $version := .Params.project_version }} {{/* ex; 2.9.0p5 */}} +{{ $series := (replaceRE "^([0-9]+\\.[0-9]+).*$" "$1" $version) }} {{/* e.g. "3.0.4" -> "3.0" */}} +{{ $version_history_in := .Params.version_history.in }} {{/* See CONTRIBUTING.md */}} +{{ $version_history_locations := .Params.version_history.locations }} {{/* See CONTRIBUTING.md */}} + {{ $display_toc := (index .Params "toc") | default true }} {{ $commercial_offering := (index .Params "commercial_offering") | default false }} @@ -31,21 +33,24 @@ {{ $project_description := (index .Site.Params.project_descriptions $project) }} -{{ $project_name := $project_description.project_name }} {{/* ex; "Riak KV" */}} -{{ $project_name_html := $project_description.project_name_html }} {{/* ex; RiakTS */}} -{{ $project_path := $project_description.path }} {{/* ex; /riak/kv */}} -{{ $version_path := (printf "%s/%s" $project_path $version) }} {{/* ex; /riak/kv/2.1.4 */}} -{{ $releases := $project_description.releases }} {{/* ex; [[2.0.0, 2.0.1, ...], [2.1.0, 2.1.2, ...]] */}} -{{ $latest_version := $project_description.latest }} {{/* ex; 2.1.4 */}} -{{ $is_latest := (eq $version $latest_version) }} {{/* ex; true */}} -{{ $lts_version := $project_description.lts }} {{/* ex; 2.0.6 */}} -{{ $is_lts := (eq $version $lts_version) }} {{/* ex; false */}} -{{ $archived_url := $project_description.archived_url }} {{/* ex; http://docs.basho.com/riak/1.4.12/ */}} -{{ $archived_path := $project_description.archived_path }} {{/* ex; riak */}} +{{ $project_name := $project_description.project_name }} {{/* ex; "Riak KV" */}} +{{ $project_name_html := $project_description.project_name_html }} {{/* ex; RiakTS */}} +{{ $project_path := $project_description.path }} {{/* ex; /riak/kv */}} +{{ $version_path := (printf "%s/%s" $project_path $version) }} {{/* ex; /riak/kv/2.9.0p5 */}} +{{ $releases := $project_description.releases }} {{/* ex; [[2.0.0, 2.0.1, ...], [2.1.0, 2.1.2, ...]] */}} +{{ $latest_version := $project_description.latest }} {{/* ex; 2.1.4 */}} +{{ $is_latest := (eq $version $latest_version) }} {{/* ex; true */}} +{{ $lts_versions := $project_description.lts }} {{/* ex; [2.9.10, 3.0.9] */}} +{{ $is_lts := (in $lts_versions $series) }} {{/* ex; false */}} +{{ $archived_url := $project_description.archived_url }} {{/* ex; {{ .Site.BaseURL }}riak/kv/2.9.0p5/ */}} +{{ $archived_path := $project_description.archived_path }} {{/* ex; riak */}} + +{{ $github_path := $project_description.github_path | default "https://github.com/TI-Tokyo/riak-docs-fork/edit/master/content/" }} {{/* ex; https://github.com/TI-Tokyo/riak-docs-fork/edit/master/content */}} -{{ $site_relative_path := (printf "/%s" (replace .Permalink .Site.BaseURL "")) }} {{/* ex; /riak/kv/2.1.4/introduction/ */}} -{{ $project_relative_path := (replace $site_relative_path (printf "%s/" $version_path) "") }} {{/* ex; introduction/ */}} +{{ $site_root_path := .Site.BaseURL }} {{/* ex; http://docs.riak.com/ */}} +{{ $site_relative_path := (printf "/%s" (replace .Permalink .Site.BaseURL "")) }} {{/* ex; /riak/kv/2.9.0p5/introduction/ */}} +{{ $project_relative_path := (replace $site_relative_path (printf "%s/" $version_path) "") }} {{/* ex; introduction/ */}} @@ -66,7 +71,8 @@ {{ .Scratch.Add "releases" $releases }} {{ .Scratch.Add "latest_version" $latest_version }} {{ .Scratch.Add "is_latest" $is_latest }} -{{ .Scratch.Add "lts_version" $lts_version }} +{{ .Scratch.Add "lts_versions" $lts_versions }} +{{ .Scratch.Add "series" $series }} {{ .Scratch.Add "is_lts" $is_lts }} {{ .Scratch.Add "archived_url" $archived_url }} {{ .Scratch.Add "archived_path" $archived_path }} @@ -82,10 +88,10 @@ leading and trailing '/' from the $site_relative_path. {{ if $project_relative_path }} - {{ $gh_edit_path := (printf "https://github.com/basho/basho_docs/edit/master/content/%s.md" (substr $site_relative_path 1 -1)) }} + {{ $gh_edit_path := (printf "%s%s.md" $github_path (substr $site_relative_path 1 -1)) }} {{ .Scratch.Add "gh_edit_path" $gh_edit_path }} {{ else }} - {{ $gh_edit_path := (printf "https://github.com/basho/basho_docs/edit/master/content/%s/index.md" (substr $site_relative_path 1 -1)) }} + {{ $gh_edit_path := (printf "%s%s/index.md" $github_path (substr $site_relative_path 1 -1)) }} {{ .Scratch.Add "gh_edit_path" $gh_edit_path }} {{ end }} diff --git a/layouts/partials/content-navigation.html b/layouts/partials/content-navigation.html index 1a47057150..a1e78e679a 100644 --- a/layouts/partials/content-navigation.html +++ b/layouts/partials/content-navigation.html @@ -6,6 +6,7 @@ "community") {{ $HugoNode := .node }} +{{ $BaseURL := $HugoNode.Site.BaseURL }} {{ $menu_title := .menu_title }} {{ $menu_list := (index $HugoNode.Site.Menus $menu_title) }} {{/* GoLang List of Menu Objects */}} @@ -38,8 +39,8 @@ diff --git a/layouts/partials/footer.html b/layouts/partials/footer.html index 6329d60504..4dda62fd67 100644 --- a/layouts/partials/footer.html +++ b/layouts/partials/footer.html @@ -11,7 +11,8 @@

        - © 2011-{{.Now.Year}} Basho Technologies, Inc. + Parts © 2011-2017 Basho Technologies, Inc.
        + Parts © 2018-{{ now.Year }} KK TI Tokyo

        diff --git a/layouts/partials/google-search.html b/layouts/partials/google-search.html index be5269965a..7bb237df09 100644 --- a/layouts/partials/google-search.html +++ b/layouts/partials/google-search.html @@ -1,6 +1,6 @@ \ No newline at end of file diff --git a/layouts/partials/head.html b/layouts/partials/head.html index 9c5e7cd0b8..724e0a80e0 100644 --- a/layouts/partials/head.html +++ b/layouts/partials/head.html @@ -11,17 +11,18 @@ {{ $project_relative_path := $HugoNode.Scratch.Get "project_relative_path" }} {{ $version_history_in := $HugoNode.Scratch.Get "version_history_in" }} {{ $version_history_locations := $HugoNode.Scratch.Get "version_history_locations" }} +{{ $series := $HugoNode.Scratch.Get "series" }} --> - + {{ if $title_supertext }}{{ $title_supertext }} {{ end }}{{ $title }} - + {{- if $canonical_link }} {{end}} @@ -36,9 +37,12 @@ {{- if $project_relative_path }} {{end}} -{{- if $version_history_in }} - {{end}} + + + {{- if $version_history_in }} +{{end}} {{- if $version_history_locations }} {{end}} - + + \ No newline at end of file diff --git a/layouts/shortcodes/baseurl.html b/layouts/shortcodes/baseurl.html new file mode 100644 index 0000000000..f0670f1927 --- /dev/null +++ b/layouts/shortcodes/baseurl.html @@ -0,0 +1 @@ +{{- .Page.Site.BaseURL | relURL -}}{{- "/" -}} \ No newline at end of file diff --git a/rake_libs/downloads_metadata_generator_sftp.rb b/rake_libs/downloads_metadata_generator_sftp.rb new file mode 100644 index 0000000000..b31c5909ef --- /dev/null +++ b/rake_libs/downloads_metadata_generator_sftp.rb @@ -0,0 +1,256 @@ +################################## +# Downloads Package List Generator +# +# The idea behind this file is to provide a one-stop-shop for updating the +# dynamically-retrieved package URIs, and for correctly exposing that data in a +# form Hugo will automatically detect and load. +# At the end of the day, the generated YAML will look something like, +# +# ---- +# riak_kv: +# 2.1.3: +# - os: debian +# versions: +# - version: '6' +# architectures: +# - arch: amd64 +# file_info: +# file_name: riak_2.1.3-1_amd64.deb +# file_href: http://s3.amazonaws.com/downloads.basho. . . +# file_size: 63146820 +# chksum_href: http://s3.amazonaws.com/downloads.bas. . . +# - version: '7' +# architectures: +# . . . +# - os: fedora +# versions: +# ... +# +# Or, more generically, +# +# ---- +# <> +# <> +# - os: <> +# versions: +# - version: <> +# architectures: +# - arch: <> +# file_info: +# file_name: <> +# file_href: <> +# file_size: <> +# chksum_href: <> +# ---- +# +# NOTABLE EXCEPTION: Source tarballs don't belong to any one OS, so they're +# grouped at the same level as OSs, and are exposed in the form, +# +# ---- +# <> +# <> +# - os: source +# file_info: +# file_name: <> +# file_href: <> +# file_size: <> +# +# Note that we have a list of the `{os:"", versions:[]}` objects, as well as a +# lists of the `{version:"", architectures:[]}` and `{arch:"", file_info:{}}` +# objects. This will allow us to sort the lists based on the relevant +# `os`/`version`/`arch` string, and iterate over the contents of each of those +# objects appropriately. +# The project names and version numbers are going to be pulled from the metadata +# generated by our release process and that populate the index.json files in +# s3.amazonaws.com/downloads.basho.com/ + +require 'uri' +require 'net/sftp' +require 'json' +require 'yaml' + +##### +# File-Wide Constants +##################### +BASE_HTTP_URL = "https://files.tiot.jp/riak" +BASE_SFTP_URL = "sftp://sftp.tiot.jp/riak" +BASE_SFTP_URI = URI(BASE_SFTP_URL) + +# List of projects to track, plus metadata +# Keys are the Project Designations used in Hugo. Values are maps containing the +# root paths used in the file server, and the lowest "major" version number to +# pull information for. +PROJECTS_TO_TRACK = { + "riak_kv" =>{ "file_root"=>"kv", "min_maj_ver"=>2.0 }, + "riak_cs" =>{ "file_root"=>"cs", "min_maj_ver"=>2.0 }, + "stanchion" =>{ "file_root"=>"stanchion", "min_maj_ver"=>2.0 }, + "riak_cs_control" =>{ "file_root"=>"cs-control", "min_maj_ver"=>1.0 }, + "riak_ts" =>{ "file_root"=>"ts", "min_maj_ver"=>1.2 }, + "dataplatform" =>{ "file_root"=>"data-platform", "min_maj_ver"=>1.0 }, + "dataplatform_extras" =>{ "file_root"=>"data-platform-extras", "min_maj_ver"=>1.0 }, +} + +# NOTE: All Hashes in this file use strings for keys, so we can correctly write +# them to a YAML file after all data has been pulled from the file server. + + +##### +# Rake Task Call +################ +def generate_downloads_metadata_sftp() + # The Hash we'll eventually write. + download_info_hash = {} + + Net::SFTP.start(BASE_SFTP_URI.host, 'publicfiles', :password => 'anonymous') do |sftp| + + PROJECTS_TO_TRACK.each do |project_designation, project_meta| + download_info_hash["#{project_designation}"] = version_hash = {} + file_root = project_meta["file_root"] + + # For every project, pull the list of major version numbers (e.g. '2.0', + # '2.1', etc.), and filter it based on that project's `min_maj_ver`. + major_versions = fetch_index_sftp(sftp, "#{file_root}") + .select { |k,v| v["type"] == "dir" } + .select { |k,v| k != "CURRENT" } + .select { |k,v| k =~ /^\d+\.\d+$/ } + .select { |k,v| k.to_f >= project_meta["min_maj_ver"] } + .keys() + major_versions.each do |major_version| + # For every major version, pull the list of full version numbers. + versions = fetch_index_sftp(sftp, "#{file_root}/#{major_version}") + .select { |k,v| v["type"] == "dir" } + .select { |k,v| k != "CURRENT" } + .select { |k,v| k =~ /^\d+\.\d+\.\d+(p\d+)?+$/ } + .keys() + versions.each do |version| + version_hash["#{version}"] = os_list = [] + # Every full version directory will contain one directory per operating + # system, and a source tarball. We want to first record information + # regarding the source archive, then continue to iterate through the + # operating systems. + version_index_json = fetch_index_sftp(sftp, "#{file_root}/#{major_version}/#{version}") + + # Grab the source file list, and add it to download_info_hash (by way of + # appending the source to the the os_list). + source_maps = version_index_json.select { |k, v| v["type"] == "file" && k !~ /\.sha/} + source_maps.each do |k, v| + file_info = { + "file_name"=>k, + "file_href"=>v["staticLink"], + "file_size"=>v["size"] + } + if version_index_json.has_key?("#{k}.sha") + file_info["chksum_href"] = version_index_json["#{k}.sha"]["staticLink"] + end + os_list.push({"os"=>"source", "file_info"=>file_info}) + end + + # Move on to per-OS entries. + operating_systems = version_index_json.select { |k,v| v["type"] == "dir" } + .select { |k,v| k != "CURRENT" } + .keys() + operating_systems.each do |os| + os_version_list = [] + os_list.push({"os"=>os, "versions"=>os_version_list}) + + os_versions = fetch_index_sftp(sftp, "#{file_root}/#{major_version}/#{version}/#{os}") + .select { |k,v| v["type"] == "dir" } + .select { |k,v| k != "CURRENT" } + .keys() + os_versions.each do |os_version| + arch_list = [] + os_version_list.push({"version"=>os_version, "architectures"=>arch_list}) + + package_maps = fetch_index_sftp(sftp, "#{file_root}/#{major_version}/#{version}/#{os}/#{os_version}") + .select { |k,v| v["type"] == "file" } + # Filter out .sha files, and add each package to download_info_hash + # (by way of appending the source to the the arch_list). + package_maps.select { |k, v| k !~ /\.sha/ } .each do |k, v| + file_info = { + "file_name"=>k, + "file_href"=>v["staticLink"], + "file_size"=>v["size"] + } + if package_maps.has_key?("#{k}.sha") + file_info["chksum_href"] = package_maps["#{k}.sha"]["staticLink"] + end + + # attempt to extract the architecture from `k` (the file name) + # (default to 'unknown') + package_arch = 'unknown' + if k =~ /amd64/ + package_arch = 'amd64' + elsif k =~ /x86_64/ + package_arch = 'x86_64' + elsif k =~ /i386_64/ + package_arch = 'i386_64' + elsif k =~ /i386/ + package_arch = 'i386' + elsif k =~ /armhf/ + package_arch = 'arm32' + elsif k =~ /arm64/ + package_arch = 'arm64' + elsif k =~ /src/ + package_arch = 'source' + elsif k =~ /\.txz/ + package_arch = 'txz' + end + + arch_list.push({"arch"=>package_arch, "file_info"=>file_info}) + end + end + end + end + end + end + end + + puts "Opening \"data/download_info.yaml\" for writing" + File.open('data/download_info.yaml', 'w') do |f| + f.write(download_info_hash.to_yaml) + end +# puts download_info_hash.to_yaml + + puts "Download data generation complete!" + puts "" +end + + +#### +# Helper Functions +################## + +# Reads the index.json file at a given path off of BASE_DOWNLOAD_URL, and +# extracts the JSON by stripping the leading HTML, the trailing `;`s, and by +# sending the rest through a JSON parser. +def fetch_index_sftp(sftp, relative_path) + #puts "Using \"#{BASE_SFTP_URI}\"..." + puts "Indexing \"#{relative_path}\"..." + results = Hash.new + #puts "Opened SFTP to \"#{BASE_SFTP_URI.host}\"..." + #puts "Listing \"#{BASE_SFTP_URI.path}/#{relative_path}\"..." + sftp.dir.foreach("#{BASE_SFTP_URI.path}/#{relative_path}") do |entry| + unless entry.name == "." || entry.name == ".." || entry.symlink? + if entry.directory? + type = "dir" + else + type = "file" + end + properties = Hash["type" => type, + "name" => entry.name, + "staticLink" => "#{BASE_HTTP_URL}/#{relative_path}/#{entry.name}", + "size" => "#{entry.attributes.size}" + ] + results.store(entry.name,properties) + end + end + #puts results + results = results.sort.to_h + return results +end + + +# If this file is being run directly, go ahead and generate the download data. +if __FILE__ == $0 + generate_downloads_metadata_sftp() +end diff --git a/rake_libs/projects_metadata_generator.rb b/rake_libs/projects_metadata_generator.rb index 636904adb6..a5369389aa 100644 --- a/rake_libs/projects_metadata_generator.rb +++ b/rake_libs/projects_metadata_generator.rb @@ -44,10 +44,12 @@ def generate_projects_metadata() project_hash[:project_name] = description["project_name"] project_hash[:path] = description["path"] + project_hash[:github_path] = description["github_path"] project_hash[:archived_path] = description["archived_path"] project_hash[:releases] = description["releases"] project_hash[:latest] = description["latest"] project_hash[:lts] = description["lts"] if description["lts"] + project_hash[:archive_below] = description["archive_below"] if description["archive_below"] project_hash[:archived_url] = description["archived_url"] if description["archived_url"] end diff --git a/static/css/main.css b/static/css/main.css index 25b8e31066..b737ab7a75 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -3,4 +3,4 @@ * Copyright 2011-2016 The Bootstrap Authors * Copyright 2011-2016 Twitter, Inc. * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) - *//*! normalize.css v4.0.0 | MIT License | github.com/necolas/normalize.css */@import url("https://fonts.googleapis.com/css?family=Oswald:400");@import url("https://fonts.googleapis.com/css?family=Muli:400,700");@import url("https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700");html{font-family:sans-serif;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}body{margin:0}article,aside,details,figcaption,figure,footer,header,main,menu,nav,section,summary{display:block}audio,canvas,progress,video{display:inline-block}audio:not([controls]){display:none;height:0}progress{vertical-align:baseline}template,[hidden]{display:none}a{background-color:transparent}a:active,a:hover{outline-width:0}abbr[title]{border-bottom:none;text-decoration:underline;text-decoration:underline dotted}b,strong{font-weight:inherit}b,strong{font-weight:bolder}dfn{font-style:italic}h1{font-size:2em;margin:0.67em 0}mark{background-color:#ff0;color:#000}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}img{border-style:none}svg:not(:root){overflow:hidden}code,kbd,pre,samp{font-family:monospace, monospace;font-size:1em}figure{margin:1em 40px}hr{-webkit-box-sizing:content-box;box-sizing:content-box;height:0;overflow:visible}button,input,select,textarea{font:inherit}optgroup{font-weight:bold}button,input,select{overflow:visible}button,input,select,textarea{margin:0}button,select{text-transform:none}button,[type="button"],[type="reset"],[type="submit"]{cursor:pointer}[disabled]{cursor:default}button,html [type="button"],[type="reset"],[type="submit"]{-webkit-appearance:button}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button:-moz-focusring,input:-moz-focusring{outline:1px dotted ButtonText}fieldset{border:1px solid #c0c0c0;margin:0 2px;padding:0.35em 0.625em 0.75em}legend{-webkit-box-sizing:border-box;box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}textarea{overflow:auto}[type="checkbox"],[type="radio"]{-webkit-box-sizing:border-box;box-sizing:border-box;padding:0}[type="number"]::-webkit-inner-spin-button,[type="number"]::-webkit-outer-spin-button{height:auto}[type="search"]{-webkit-appearance:textfield}[type="search"]::-webkit-search-cancel-button,[type="search"]::-webkit-search-decoration{-webkit-appearance:none}html{-webkit-box-sizing:border-box;box-sizing:border-box}*,*::before,*::after{-webkit-box-sizing:inherit;box-sizing:inherit}@-ms-viewport{width:device-width}html{font-size:16px;-ms-overflow-style:scrollbar;-webkit-tap-highlight-color:transparent}body{font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;font-size:1rem;line-height:1.5;color:red;background-color:#fff}[tabindex="-1"]:focus{outline:none !important}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5rem}p{margin-top:0;margin-bottom:1rem}abbr[title],abbr[data-original-title]{cursor:help;border-bottom:1px dotted red}address{margin-bottom:1rem;font-style:normal;line-height:inherit}ol,ul,dl{margin-top:0;margin-bottom:1rem}ol ol,ul ul,ol ul,ul ol{margin-bottom:0}dt{font-weight:bold}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem}a{color:#f99d53;text-decoration:none}a:focus,a:hover{color:#f67309;text-decoration:underline}a:focus{outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:not([href]):not([tabindex]){color:inherit;text-decoration:none}a:not([href]):not([tabindex]):focus,a:not([href]):not([tabindex]):hover{color:inherit;text-decoration:none}a:not([href]):not([tabindex]):focus{outline:none}pre{margin-top:0;margin-bottom:1rem;overflow:auto}figure{margin:0 0 1rem}img{vertical-align:middle}[role="button"]{cursor:pointer}a,area,button,[role="button"],input,label,select,summary,textarea{-ms-touch-action:manipulation;touch-action:manipulation}table{border-collapse:collapse;background-color:transparent}caption{padding-top:.75rem;padding-bottom:.75rem;color:red;text-align:left;caption-side:bottom}th{text-align:left}label{display:inline-block;margin-bottom:.5rem}button:focus{outline:1px dotted;outline:5px auto -webkit-focus-ring-color}input,button,select,textarea{margin:0;line-height:inherit;border-radius:0}input[type="radio"]:disabled,input[type="checkbox"]:disabled{cursor:not-allowed}input[type="date"],input[type="time"],input[type="datetime-local"],input[type="month"]{-webkit-appearance:listbox}textarea{resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:.5rem;font-size:1.5rem;line-height:inherit}input[type="search"]{-webkit-appearance:none}output{display:inline-block}[hidden]{display:none !important}h1,h2,h3,h4,h5,h6,.h1,.h2,.h3,.h4,.h5,.h6{margin-bottom:.5rem;font-family:inherit;font-weight:500;line-height:1.1;color:inherit}h1,.h1{font-size:2.5rem}h2,.h2{font-size:2rem}h3,.h3{font-size:1.75rem}h4,.h4{font-size:1.5rem}h5,.h5{font-size:1.25rem}h6,.h6{font-size:1rem}.lead{font-size:1.25rem;font-weight:300}.display-1{font-size:6rem;font-weight:300}.display-2{font-size:5.5rem;font-weight:300}.display-3{font-size:4.5rem;font-weight:300}.display-4{font-size:3.5rem;font-weight:300}hr{margin-top:1rem;margin-bottom:1rem;border:0;border-top:1px solid rgba(0,0,0,0.1)}small,.small{font-size:80%;font-weight:normal}mark,.mark{padding:.2em;background-color:#fcf8e3}.list-unstyled{padding-left:0;list-style:none}.list-inline{padding-left:0;list-style:none}.list-inline-item{display:inline-block}.list-inline-item:not(:last-child){margin-right:5px}.initialism{font-size:90%;text-transform:uppercase}.blockquote{padding:.5rem 1rem;margin-bottom:1rem;font-size:1.25rem;border-left:.25rem solid red}.blockquote-footer{display:block;font-size:80%;color:red}.blockquote-footer::before{content:"\2014 \00A0"}.blockquote-reverse{padding-right:1rem;padding-left:0;text-align:right;border-right:.25rem solid red;border-left:0}.blockquote-reverse .blockquote-footer::before{content:""}.blockquote-reverse .blockquote-footer::after{content:"\00A0 \2014"}dl.row>dd+dt{clear:left}.img-fluid,.carousel-inner>.carousel-item>img,.carousel-inner>.carousel-item>a>img{display:block;max-width:100%;height:auto}.img-rounded{border-radius:.3rem}.img-thumbnail{padding:.25rem;background-color:#fff;border:1px solid #ddd;border-radius:.25rem;-webkit-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.075);box-shadow:0 1px 2px rgba(0,0,0,0.075);display:inline-block;max-width:100%;height:auto}.img-circle{border-radius:50%}.figure{display:inline-block}.figure-img{margin-bottom:.5rem;line-height:1}.figure-caption{font-size:90%;color:red}code,kbd,pre,samp{font-family:Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace}code{padding:.2rem .4rem;font-size:90%;color:#bd4147;background-color:#f7f7f9;border-radius:.25rem}kbd{padding:.2rem .4rem;font-size:90%;color:#fff;background-color:#333;border-radius:.2rem;-webkit-box-shadow:inset 0 -0.1rem 0 rgba(0,0,0,0.25);box-shadow:inset 0 -0.1rem 0 rgba(0,0,0,0.25)}kbd kbd{padding:0;font-size:100%;font-weight:bold;-webkit-box-shadow:none;box-shadow:none}pre{display:block;margin-top:0;margin-bottom:1rem;font-size:90%;color:red}pre code{padding:0;font-size:inherit;color:inherit;background-color:transparent;border-radius:0}.pre-scrollable{max-height:340px;overflow-y:scroll}.container{margin-left:auto;margin-right:auto;padding-left:15px;padding-right:15px}.container::after{content:"";display:table;clear:both}@media (min-width: 30rem){.container{max-width:32rem}}@media (min-width: 48rem){.container{max-width:45rem}}@media (min-width: 66.5rem){.container{max-width:58.75rem}}@media (min-width: 86rem){.container{max-width:71.25rem}}.container-fluid{margin-left:auto;margin-right:auto;padding-left:15px;padding-right:15px}.container-fluid::after{content:"";display:table;clear:both}.row{margin-left:-15px;margin-right:-15px}.row::after{content:"";display:table;clear:both}.col-xs-1,.col-xs-2,.col-xs-3,.col-xs-4,.col-xs-5,.col-xs-6,.col-xs-7,.col-xs-8,.col-xs-9,.col-xs-10,.col-xs-11,.col-xs-12,.col-sm-1,.col-sm-2,.col-sm-3,.col-sm-4,.col-sm-5,.col-sm-6,.col-sm-7,.col-sm-8,.col-sm-9,.col-sm-10,.col-sm-11,.col-sm-12,.col-md-1,.col-md-2,.col-md-3,.col-md-4,.col-md-5,.col-md-6,.col-md-7,.col-md-8,.col-md-9,.col-md-10,.col-md-11,.col-md-12,.col-lg-1,.col-lg-2,.col-lg-3,.col-lg-4,.col-lg-5,.col-lg-6,.col-lg-7,.col-lg-8,.col-lg-9,.col-lg-10,.col-lg-11,.col-lg-12,.col-xl-1,.col-xl-2,.col-xl-3,.col-xl-4,.col-xl-5,.col-xl-6,.col-xl-7,.col-xl-8,.col-xl-9,.col-xl-10,.col-xl-11,.col-xl-12{position:relative;min-height:1px;padding-right:15px;padding-left:15px}@media (min-width: 20rem){.col-xs-1{float:left;width:8.33333%}.col-xs-2{float:left;width:16.66667%}.col-xs-3{float:left;width:25%}.col-xs-4{float:left;width:33.33333%}.col-xs-5{float:left;width:41.66667%}.col-xs-6{float:left;width:50%}.col-xs-7{float:left;width:58.33333%}.col-xs-8{float:left;width:66.66667%}.col-xs-9{float:left;width:75%}.col-xs-10{float:left;width:83.33333%}.col-xs-11{float:left;width:91.66667%}.col-xs-12{float:left;width:100%}.pull-xs-0{right:auto}.pull-xs-1{right:8.33333%}.pull-xs-2{right:16.66667%}.pull-xs-3{right:25%}.pull-xs-4{right:33.33333%}.pull-xs-5{right:41.66667%}.pull-xs-6{right:50%}.pull-xs-7{right:58.33333%}.pull-xs-8{right:66.66667%}.pull-xs-9{right:75%}.pull-xs-10{right:83.33333%}.pull-xs-11{right:91.66667%}.pull-xs-12{right:100%}.push-xs-0{left:auto}.push-xs-1{left:8.33333%}.push-xs-2{left:16.66667%}.push-xs-3{left:25%}.push-xs-4{left:33.33333%}.push-xs-5{left:41.66667%}.push-xs-6{left:50%}.push-xs-7{left:58.33333%}.push-xs-8{left:66.66667%}.push-xs-9{left:75%}.push-xs-10{left:83.33333%}.push-xs-11{left:91.66667%}.push-xs-12{left:100%}.offset-xs-1{margin-left:8.33333%}.offset-xs-2{margin-left:16.66667%}.offset-xs-3{margin-left:25%}.offset-xs-4{margin-left:33.33333%}.offset-xs-5{margin-left:41.66667%}.offset-xs-6{margin-left:50%}.offset-xs-7{margin-left:58.33333%}.offset-xs-8{margin-left:66.66667%}.offset-xs-9{margin-left:75%}.offset-xs-10{margin-left:83.33333%}.offset-xs-11{margin-left:91.66667%}}@media (min-width: 30rem){.col-sm-1{float:left;width:8.33333%}.col-sm-2{float:left;width:16.66667%}.col-sm-3{float:left;width:25%}.col-sm-4{float:left;width:33.33333%}.col-sm-5{float:left;width:41.66667%}.col-sm-6{float:left;width:50%}.col-sm-7{float:left;width:58.33333%}.col-sm-8{float:left;width:66.66667%}.col-sm-9{float:left;width:75%}.col-sm-10{float:left;width:83.33333%}.col-sm-11{float:left;width:91.66667%}.col-sm-12{float:left;width:100%}.pull-sm-0{right:auto}.pull-sm-1{right:8.33333%}.pull-sm-2{right:16.66667%}.pull-sm-3{right:25%}.pull-sm-4{right:33.33333%}.pull-sm-5{right:41.66667%}.pull-sm-6{right:50%}.pull-sm-7{right:58.33333%}.pull-sm-8{right:66.66667%}.pull-sm-9{right:75%}.pull-sm-10{right:83.33333%}.pull-sm-11{right:91.66667%}.pull-sm-12{right:100%}.push-sm-0{left:auto}.push-sm-1{left:8.33333%}.push-sm-2{left:16.66667%}.push-sm-3{left:25%}.push-sm-4{left:33.33333%}.push-sm-5{left:41.66667%}.push-sm-6{left:50%}.push-sm-7{left:58.33333%}.push-sm-8{left:66.66667%}.push-sm-9{left:75%}.push-sm-10{left:83.33333%}.push-sm-11{left:91.66667%}.push-sm-12{left:100%}.offset-sm-0{margin-left:0%}.offset-sm-1{margin-left:8.33333%}.offset-sm-2{margin-left:16.66667%}.offset-sm-3{margin-left:25%}.offset-sm-4{margin-left:33.33333%}.offset-sm-5{margin-left:41.66667%}.offset-sm-6{margin-left:50%}.offset-sm-7{margin-left:58.33333%}.offset-sm-8{margin-left:66.66667%}.offset-sm-9{margin-left:75%}.offset-sm-10{margin-left:83.33333%}.offset-sm-11{margin-left:91.66667%}}@media (min-width: 48rem){.col-md-1{float:left;width:8.33333%}.col-md-2{float:left;width:16.66667%}.col-md-3{float:left;width:25%}.col-md-4{float:left;width:33.33333%}.col-md-5{float:left;width:41.66667%}.col-md-6{float:left;width:50%}.col-md-7{float:left;width:58.33333%}.col-md-8{float:left;width:66.66667%}.col-md-9{float:left;width:75%}.col-md-10{float:left;width:83.33333%}.col-md-11{float:left;width:91.66667%}.col-md-12{float:left;width:100%}.pull-md-0{right:auto}.pull-md-1{right:8.33333%}.pull-md-2{right:16.66667%}.pull-md-3{right:25%}.pull-md-4{right:33.33333%}.pull-md-5{right:41.66667%}.pull-md-6{right:50%}.pull-md-7{right:58.33333%}.pull-md-8{right:66.66667%}.pull-md-9{right:75%}.pull-md-10{right:83.33333%}.pull-md-11{right:91.66667%}.pull-md-12{right:100%}.push-md-0{left:auto}.push-md-1{left:8.33333%}.push-md-2{left:16.66667%}.push-md-3{left:25%}.push-md-4{left:33.33333%}.push-md-5{left:41.66667%}.push-md-6{left:50%}.push-md-7{left:58.33333%}.push-md-8{left:66.66667%}.push-md-9{left:75%}.push-md-10{left:83.33333%}.push-md-11{left:91.66667%}.push-md-12{left:100%}.offset-md-0{margin-left:0%}.offset-md-1{margin-left:8.33333%}.offset-md-2{margin-left:16.66667%}.offset-md-3{margin-left:25%}.offset-md-4{margin-left:33.33333%}.offset-md-5{margin-left:41.66667%}.offset-md-6{margin-left:50%}.offset-md-7{margin-left:58.33333%}.offset-md-8{margin-left:66.66667%}.offset-md-9{margin-left:75%}.offset-md-10{margin-left:83.33333%}.offset-md-11{margin-left:91.66667%}}@media (min-width: 66.5rem){.col-lg-1{float:left;width:8.33333%}.col-lg-2{float:left;width:16.66667%}.col-lg-3{float:left;width:25%}.col-lg-4{float:left;width:33.33333%}.col-lg-5{float:left;width:41.66667%}.col-lg-6{float:left;width:50%}.col-lg-7{float:left;width:58.33333%}.col-lg-8{float:left;width:66.66667%}.col-lg-9{float:left;width:75%}.col-lg-10{float:left;width:83.33333%}.col-lg-11{float:left;width:91.66667%}.col-lg-12{float:left;width:100%}.pull-lg-0{right:auto}.pull-lg-1{right:8.33333%}.pull-lg-2{right:16.66667%}.pull-lg-3{right:25%}.pull-lg-4{right:33.33333%}.pull-lg-5{right:41.66667%}.pull-lg-6{right:50%}.pull-lg-7{right:58.33333%}.pull-lg-8{right:66.66667%}.pull-lg-9{right:75%}.pull-lg-10{right:83.33333%}.pull-lg-11{right:91.66667%}.pull-lg-12{right:100%}.push-lg-0{left:auto}.push-lg-1{left:8.33333%}.push-lg-2{left:16.66667%}.push-lg-3{left:25%}.push-lg-4{left:33.33333%}.push-lg-5{left:41.66667%}.push-lg-6{left:50%}.push-lg-7{left:58.33333%}.push-lg-8{left:66.66667%}.push-lg-9{left:75%}.push-lg-10{left:83.33333%}.push-lg-11{left:91.66667%}.push-lg-12{left:100%}.offset-lg-0{margin-left:0%}.offset-lg-1{margin-left:8.33333%}.offset-lg-2{margin-left:16.66667%}.offset-lg-3{margin-left:25%}.offset-lg-4{margin-left:33.33333%}.offset-lg-5{margin-left:41.66667%}.offset-lg-6{margin-left:50%}.offset-lg-7{margin-left:58.33333%}.offset-lg-8{margin-left:66.66667%}.offset-lg-9{margin-left:75%}.offset-lg-10{margin-left:83.33333%}.offset-lg-11{margin-left:91.66667%}}@media (min-width: 86rem){.col-xl-1{float:left;width:8.33333%}.col-xl-2{float:left;width:16.66667%}.col-xl-3{float:left;width:25%}.col-xl-4{float:left;width:33.33333%}.col-xl-5{float:left;width:41.66667%}.col-xl-6{float:left;width:50%}.col-xl-7{float:left;width:58.33333%}.col-xl-8{float:left;width:66.66667%}.col-xl-9{float:left;width:75%}.col-xl-10{float:left;width:83.33333%}.col-xl-11{float:left;width:91.66667%}.col-xl-12{float:left;width:100%}.pull-xl-0{right:auto}.pull-xl-1{right:8.33333%}.pull-xl-2{right:16.66667%}.pull-xl-3{right:25%}.pull-xl-4{right:33.33333%}.pull-xl-5{right:41.66667%}.pull-xl-6{right:50%}.pull-xl-7{right:58.33333%}.pull-xl-8{right:66.66667%}.pull-xl-9{right:75%}.pull-xl-10{right:83.33333%}.pull-xl-11{right:91.66667%}.pull-xl-12{right:100%}.push-xl-0{left:auto}.push-xl-1{left:8.33333%}.push-xl-2{left:16.66667%}.push-xl-3{left:25%}.push-xl-4{left:33.33333%}.push-xl-5{left:41.66667%}.push-xl-6{left:50%}.push-xl-7{left:58.33333%}.push-xl-8{left:66.66667%}.push-xl-9{left:75%}.push-xl-10{left:83.33333%}.push-xl-11{left:91.66667%}.push-xl-12{left:100%}.offset-xl-0{margin-left:0%}.offset-xl-1{margin-left:8.33333%}.offset-xl-2{margin-left:16.66667%}.offset-xl-3{margin-left:25%}.offset-xl-4{margin-left:33.33333%}.offset-xl-5{margin-left:41.66667%}.offset-xl-6{margin-left:50%}.offset-xl-7{margin-left:58.33333%}.offset-xl-8{margin-left:66.66667%}.offset-xl-9{margin-left:75%}.offset-xl-10{margin-left:83.33333%}.offset-xl-11{margin-left:91.66667%}}.table{width:100%;max-width:100%;margin-bottom:1rem}.table th,.table td{padding:.75rem;vertical-align:top;border-top:1px solid red}.table thead th{vertical-align:bottom;border-bottom:2px solid red}.table tbody+tbody{border-top:2px solid red}.table .table{background-color:#fff}.table-sm th,.table-sm td{padding:.3rem}.table-bordered{border:1px solid red}.table-bordered th,.table-bordered td{border:1px solid red}.table-bordered thead th,.table-bordered thead td{border-bottom-width:2px}.table-striped tbody tr:nth-of-type(odd){background-color:rgba(0,0,0,0.05)}.table-hover tbody tr:hover{background-color:rgba(0,0,0,0.075)}.table-active,.table-active>th,.table-active>td{background-color:rgba(0,0,0,0.075)}.table-hover .table-active:hover{background-color:rgba(0,0,0,0.075)}.table-hover .table-active:hover>td,.table-hover .table-active:hover>th{background-color:rgba(0,0,0,0.075)}.table-success,.table-success>th,.table-success>td{background-color:#dff0d8}.table-hover .table-success:hover{background-color:#d0e9c6}.table-hover .table-success:hover>td,.table-hover .table-success:hover>th{background-color:#d0e9c6}.table-info,.table-info>th,.table-info>td{background-color:#d9edf7}.table-hover .table-info:hover{background-color:#c4e3f3}.table-hover .table-info:hover>td,.table-hover .table-info:hover>th{background-color:#c4e3f3}.table-warning,.table-warning>th,.table-warning>td{background-color:#fcf8e3}.table-hover .table-warning:hover{background-color:#faf2cc}.table-hover .table-warning:hover>td,.table-hover .table-warning:hover>th{background-color:#faf2cc}.table-danger,.table-danger>th,.table-danger>td{background-color:#f2dede}.table-hover .table-danger:hover{background-color:#ebcccc}.table-hover .table-danger:hover>td,.table-hover .table-danger:hover>th{background-color:#ebcccc}.thead-inverse th{color:#fff;background-color:red}.thead-default th{color:red;background-color:red}.table-inverse{color:red;background-color:red}.table-inverse th,.table-inverse td,.table-inverse thead th{border-color:red}.table-inverse.table-bordered{border:0}.table-responsive{display:block;width:100%;min-height:.01%;overflow-x:auto}.table-reflow thead{float:left}.table-reflow tbody{display:block;white-space:nowrap}.table-reflow th,.table-reflow td{border-top:1px solid red;border-left:1px solid red}.table-reflow th:last-child,.table-reflow td:last-child{border-right:1px solid red}.table-reflow thead:last-child tr:last-child th,.table-reflow thead:last-child tr:last-child td,.table-reflow tbody:last-child tr:last-child th,.table-reflow tbody:last-child tr:last-child td,.table-reflow tfoot:last-child tr:last-child th,.table-reflow tfoot:last-child tr:last-child td{border-bottom:1px solid red}.table-reflow tr{float:left}.table-reflow tr th,.table-reflow tr td{display:block !important;border:1px solid red}.form-control{display:block;width:100%;padding:.5rem .75rem;font-size:1rem;line-height:1.25;color:red;background-color:#fff;background-image:none;-webkit-background-clip:padding-box;background-clip:padding-box;border:1px solid rgba(0,0,0,0.15);border-radius:.25rem;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border-color ease-in-out 0.15s,-webkit-box-shadow ease-in-out 0.15s;transition:border-color ease-in-out 0.15s,-webkit-box-shadow ease-in-out 0.15s;-o-transition:border-color ease-in-out 0.15s,box-shadow ease-in-out 0.15s;transition:border-color ease-in-out 0.15s,box-shadow ease-in-out 0.15s;transition:border-color ease-in-out 0.15s,box-shadow ease-in-out 0.15s,-webkit-box-shadow ease-in-out 0.15s}.form-control::-ms-expand{background-color:transparent;border:0}.form-control:focus{color:red;background-color:#fff;border-color:#66afe9;outline:none;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(102,175,233,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(102,175,233,0.6)}.form-control::-webkit-input-placeholder{color:#999;opacity:1}.form-control::-moz-placeholder{color:#999;opacity:1}.form-control:-ms-input-placeholder{color:#999;opacity:1}.form-control::placeholder{color:#999;opacity:1}.form-control:disabled,.form-control[readonly]{background-color:red;opacity:1}.form-control:disabled{cursor:not-allowed}select.form-control:not([size]):not([multiple]){height:2.5rem}select.form-control:focus::-ms-value{color:red;background-color:#fff}.form-control-file,.form-control-range{display:block}.col-form-label{padding-top:.5rem;padding-bottom:.5rem;margin-bottom:0}.col-form-label-lg{padding-top:.75rem;padding-bottom:.75rem;font-size:1.25rem}.col-form-label-sm{padding-top:.25rem;padding-bottom:.25rem;font-size:.875rem}.col-form-legend{padding-top:.5rem;padding-bottom:.5rem;margin-bottom:0;font-size:1rem}.form-control-static{min-height:2.5rem;padding-top:.5rem;padding-bottom:.5rem;margin-bottom:0}.form-control-static.form-control-sm,.input-group-sm>.form-control-static.form-control,.input-group-sm>.form-control-static.input-group-addon,.input-group-sm>.input-group-btn>.form-control-static.btn,.form-control-static.form-control-lg,.input-group-lg>.form-control-static.form-control,.input-group-lg>.form-control-static.input-group-addon,.input-group-lg>.input-group-btn>.form-control-static.btn{padding-right:0;padding-left:0}.form-control-sm,.input-group-sm>.form-control,.input-group-sm>.input-group-addon,.input-group-sm>.input-group-btn>.btn{padding:.25rem .5rem;font-size:.875rem;border-radius:.2rem}select.form-control-sm:not([size]):not([multiple]),.input-group-sm>select.form-control:not([size]):not([multiple]),.input-group-sm>select.input-group-addon:not([size]):not([multiple]),.input-group-sm>.input-group-btn>select.btn:not([size]):not([multiple]){height:1.8125rem}.form-control-lg,.input-group-lg>.form-control,.input-group-lg>.input-group-addon,.input-group-lg>.input-group-btn>.btn{padding:.75rem 1.5rem;font-size:1.25rem;border-radius:.3rem}select.form-control-lg:not([size]):not([multiple]),.input-group-lg>select.form-control:not([size]):not([multiple]),.input-group-lg>select.input-group-addon:not([size]):not([multiple]),.input-group-lg>.input-group-btn>select.btn:not([size]):not([multiple]){height:3.16667rem}.form-group{margin-bottom:1rem}.form-text{display:block;margin-top:.25rem}.form-check{position:relative;display:block;margin-bottom:.75rem}.form-check+.form-check{margin-top:-.25rem}.form-check.disabled .form-check-label{color:red;cursor:not-allowed}.form-check-label{padding-left:1.25rem;margin-bottom:0;cursor:pointer}.form-check-input{position:absolute;margin-top:.25rem;margin-left:-1.25rem}.form-check-input:only-child{position:static}.form-check-inline{position:relative;display:inline-block;padding-left:1.25rem;margin-bottom:0;vertical-align:middle;cursor:pointer}.form-check-inline+.form-check-inline{margin-left:.75rem}.form-check-inline.disabled{cursor:not-allowed}.form-control-feedback{margin-top:.25rem}.form-control-success,.form-control-warning,.form-control-danger{padding-right:2.25rem;background-repeat:no-repeat;background-position:center right .625rem;-webkit-background-size:1.25rem 1.25rem;background-size:1.25rem 1.25rem}.has-success .form-control-feedback,.has-success .form-control-label,.has-success .radio,.has-success .checkbox,.has-success .radio-inline,.has-success .checkbox-inline,.has-success.radio label,.has-success.checkbox label,.has-success.radio-inline label,.has-success.checkbox-inline label,.has-success .custom-control{color:red}.has-success .form-control{border-color:red}.has-success .input-group-addon{color:red;border-color:red;background-color:#fcc}.has-success .form-control-feedback{color:red}.has-success .form-control-success{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3E%3Cpath fill='%235cb85c' d='M2.3 6.73L.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3E%3C/svg%3E")}.has-warning .form-control-feedback,.has-warning .form-control-label,.has-warning .radio,.has-warning .checkbox,.has-warning .radio-inline,.has-warning .checkbox-inline,.has-warning.radio label,.has-warning.checkbox label,.has-warning.radio-inline label,.has-warning.checkbox-inline label,.has-warning .custom-control{color:red}.has-warning .form-control{border-color:red}.has-warning .input-group-addon{color:red;border-color:red;background-color:#fcc}.has-warning .form-control-feedback{color:red}.has-warning .form-control-warning{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3E%3Cpath fill='%23f0ad4e' d='M4.4 5.324h-.8v-2.46h.8zm0 1.42h-.8V5.89h.8zM3.76.63L.04 7.075c-.115.2.016.425.26.426h7.397c.242 0 .372-.226.258-.426C6.726 4.924 5.47 2.79 4.253.63c-.113-.174-.39-.174-.494 0z'/%3E%3C/svg%3E")}.has-danger .form-control-feedback,.has-danger .form-control-label,.has-danger .radio,.has-danger .checkbox,.has-danger .radio-inline,.has-danger .checkbox-inline,.has-danger.radio label,.has-danger.checkbox label,.has-danger.radio-inline label,.has-danger.checkbox-inline label,.has-danger .custom-control{color:red}.has-danger .form-control{border-color:red}.has-danger .input-group-addon{color:red;border-color:red;background-color:#fcc}.has-danger .form-control-feedback{color:red}.has-danger .form-control-danger{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='%23d9534f' viewBox='-2 -2 7 7'%3E%3Cpath stroke='%23d9534f' d='M0 0l3 3m0-3L0 3'/%3E%3Ccircle r='.5'/%3E%3Ccircle cx='3' r='.5'/%3E%3Ccircle cy='3' r='.5'/%3E%3Ccircle cx='3' cy='3' r='.5'/%3E%3C/svg%3E")}@media (min-width: 30rem){.form-inline .form-group{display:inline-block;margin-bottom:0;vertical-align:middle}.form-inline .form-control{display:inline-block;width:auto;vertical-align:middle}.form-inline .form-control-static{display:inline-block}.form-inline .input-group{display:inline-table;vertical-align:middle}.form-inline .input-group .input-group-addon,.form-inline .input-group .input-group-btn,.form-inline .input-group .form-control{width:auto}.form-inline .input-group>.form-control{width:100%}.form-inline .form-control-label{margin-bottom:0;vertical-align:middle}.form-inline .form-check{display:inline-block;margin-top:0;margin-bottom:0;vertical-align:middle}.form-inline .form-check-label{padding-left:0}.form-inline .form-check-input{position:relative;margin-left:0}.form-inline .has-feedback .form-control-feedback{top:0}}.btn{display:inline-block;font-weight:normal;line-height:1.25;text-align:center;white-space:nowrap;vertical-align:middle;cursor:pointer;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;border:1px solid transparent;padding:.5rem 1rem;font-size:1rem;border-radius:.25rem;-webkit-transition:all 0.2s ease-in-out;-o-transition:all 0.2s ease-in-out;transition:all 0.2s ease-in-out}.btn:focus,.btn.focus,.btn:active:focus,.btn:active.focus,.btn.active:focus,.btn.active.focus{outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn:focus,.btn:hover{text-decoration:none}.btn.focus{text-decoration:none}.btn:active,.btn.active{background-image:none;outline:0;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn.disabled,.btn:disabled{cursor:not-allowed;opacity:.65;-webkit-box-shadow:none;box-shadow:none}a.btn.disabled,fieldset[disabled] a.btn{pointer-events:none}.btn-primary{color:#fff;background-color:#f99d53;border-color:#f99d53;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-primary:hover{color:#fff;background-color:#f78122;border-color:#f77b18}.btn-primary:focus,.btn-primary.focus{color:#fff;background-color:#f78122;border-color:#f77b18}.btn-primary:active,.btn-primary.active,.open>.btn-primary.dropdown-toggle{color:#fff;background-color:#f78122;border-color:#f77b18;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-primary:active:hover,.btn-primary:active:focus,.btn-primary:active.focus,.btn-primary.active:hover,.btn-primary.active:focus,.btn-primary.active.focus,.open>.btn-primary.dropdown-toggle:hover,.open>.btn-primary.dropdown-toggle:focus,.open>.btn-primary.dropdown-toggle.focus{color:#fff;background-color:#ed6e08;border-color:#c65c07}.btn-primary.disabled:focus,.btn-primary.disabled.focus,.btn-primary:disabled:focus,.btn-primary:disabled.focus{background-color:#f99d53;border-color:#f99d53}.btn-primary.disabled:hover,.btn-primary:disabled:hover{background-color:#f99d53;border-color:#f99d53}.btn-secondary{color:red;background-color:#fff;border-color:#ccc;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-secondary:hover{color:red;background-color:#e6e6e6;border-color:#adadad}.btn-secondary:focus,.btn-secondary.focus{color:red;background-color:#e6e6e6;border-color:#adadad}.btn-secondary:active,.btn-secondary.active,.open>.btn-secondary.dropdown-toggle{color:red;background-color:#e6e6e6;border-color:#adadad;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-secondary:active:hover,.btn-secondary:active:focus,.btn-secondary:active.focus,.btn-secondary.active:hover,.btn-secondary.active:focus,.btn-secondary.active.focus,.open>.btn-secondary.dropdown-toggle:hover,.open>.btn-secondary.dropdown-toggle:focus,.open>.btn-secondary.dropdown-toggle.focus{color:red;background-color:#d4d4d4;border-color:#8c8c8c}.btn-secondary.disabled:focus,.btn-secondary.disabled.focus,.btn-secondary:disabled:focus,.btn-secondary:disabled.focus{background-color:#fff;border-color:#ccc}.btn-secondary.disabled:hover,.btn-secondary:disabled:hover{background-color:#fff;border-color:#ccc}.btn-info{color:#fff;background-color:red;border-color:red;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-info:hover{color:#fff;background-color:#c00;border-color:#c20000}.btn-info:focus,.btn-info.focus{color:#fff;background-color:#c00;border-color:#c20000}.btn-info:active,.btn-info.active,.open>.btn-info.dropdown-toggle{color:#fff;background-color:#c00;border-color:#c20000;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-info:active:hover,.btn-info:active:focus,.btn-info:active.focus,.btn-info.active:hover,.btn-info.active:focus,.btn-info.active.focus,.open>.btn-info.dropdown-toggle:hover,.open>.btn-info.dropdown-toggle:focus,.open>.btn-info.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-info.disabled:focus,.btn-info.disabled.focus,.btn-info:disabled:focus,.btn-info:disabled.focus{background-color:red;border-color:red}.btn-info.disabled:hover,.btn-info:disabled:hover{background-color:red;border-color:red}.btn-success{color:#fff;background-color:red;border-color:red;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-success:hover{color:#fff;background-color:#c00;border-color:#c20000}.btn-success:focus,.btn-success.focus{color:#fff;background-color:#c00;border-color:#c20000}.btn-success:active,.btn-success.active,.open>.btn-success.dropdown-toggle{color:#fff;background-color:#c00;border-color:#c20000;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-success:active:hover,.btn-success:active:focus,.btn-success:active.focus,.btn-success.active:hover,.btn-success.active:focus,.btn-success.active.focus,.open>.btn-success.dropdown-toggle:hover,.open>.btn-success.dropdown-toggle:focus,.open>.btn-success.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-success.disabled:focus,.btn-success.disabled.focus,.btn-success:disabled:focus,.btn-success:disabled.focus{background-color:red;border-color:red}.btn-success.disabled:hover,.btn-success:disabled:hover{background-color:red;border-color:red}.btn-warning{color:#fff;background-color:red;border-color:red;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-warning:hover{color:#fff;background-color:#c00;border-color:#c20000}.btn-warning:focus,.btn-warning.focus{color:#fff;background-color:#c00;border-color:#c20000}.btn-warning:active,.btn-warning.active,.open>.btn-warning.dropdown-toggle{color:#fff;background-color:#c00;border-color:#c20000;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-warning:active:hover,.btn-warning:active:focus,.btn-warning:active.focus,.btn-warning.active:hover,.btn-warning.active:focus,.btn-warning.active.focus,.open>.btn-warning.dropdown-toggle:hover,.open>.btn-warning.dropdown-toggle:focus,.open>.btn-warning.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-warning.disabled:focus,.btn-warning.disabled.focus,.btn-warning:disabled:focus,.btn-warning:disabled.focus{background-color:red;border-color:red}.btn-warning.disabled:hover,.btn-warning:disabled:hover{background-color:red;border-color:red}.btn-danger{color:#fff;background-color:red;border-color:red;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-danger:hover{color:#fff;background-color:#c00;border-color:#c20000}.btn-danger:focus,.btn-danger.focus{color:#fff;background-color:#c00;border-color:#c20000}.btn-danger:active,.btn-danger.active,.open>.btn-danger.dropdown-toggle{color:#fff;background-color:#c00;border-color:#c20000;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-danger:active:hover,.btn-danger:active:focus,.btn-danger:active.focus,.btn-danger.active:hover,.btn-danger.active:focus,.btn-danger.active.focus,.open>.btn-danger.dropdown-toggle:hover,.open>.btn-danger.dropdown-toggle:focus,.open>.btn-danger.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-danger.disabled:focus,.btn-danger.disabled.focus,.btn-danger:disabled:focus,.btn-danger:disabled.focus{background-color:red;border-color:red}.btn-danger.disabled:hover,.btn-danger:disabled:hover{background-color:red;border-color:red}.btn-outline-primary{color:#f99d53;background-image:none;background-color:transparent;border-color:#f99d53}.btn-outline-primary:hover{color:#fff;background-color:#f99d53;border-color:#f99d53}.btn-outline-primary:focus,.btn-outline-primary.focus{color:#fff;background-color:#f99d53;border-color:#f99d53}.btn-outline-primary:active,.btn-outline-primary.active,.open>.btn-outline-primary.dropdown-toggle{color:#fff;background-color:#f99d53;border-color:#f99d53}.btn-outline-primary:active:hover,.btn-outline-primary:active:focus,.btn-outline-primary:active.focus,.btn-outline-primary.active:hover,.btn-outline-primary.active:focus,.btn-outline-primary.active.focus,.open>.btn-outline-primary.dropdown-toggle:hover,.open>.btn-outline-primary.dropdown-toggle:focus,.open>.btn-outline-primary.dropdown-toggle.focus{color:#fff;background-color:#ed6e08;border-color:#c65c07}.btn-outline-primary.disabled:focus,.btn-outline-primary.disabled.focus,.btn-outline-primary:disabled:focus,.btn-outline-primary:disabled.focus{border-color:#fcd5b6}.btn-outline-primary.disabled:hover,.btn-outline-primary:disabled:hover{border-color:#fcd5b6}.btn-outline-secondary{color:#ccc;background-image:none;background-color:transparent;border-color:#ccc}.btn-outline-secondary:hover{color:#fff;background-color:#ccc;border-color:#ccc}.btn-outline-secondary:focus,.btn-outline-secondary.focus{color:#fff;background-color:#ccc;border-color:#ccc}.btn-outline-secondary:active,.btn-outline-secondary.active,.open>.btn-outline-secondary.dropdown-toggle{color:#fff;background-color:#ccc;border-color:#ccc}.btn-outline-secondary:active:hover,.btn-outline-secondary:active:focus,.btn-outline-secondary:active.focus,.btn-outline-secondary.active:hover,.btn-outline-secondary.active:focus,.btn-outline-secondary.active.focus,.open>.btn-outline-secondary.dropdown-toggle:hover,.open>.btn-outline-secondary.dropdown-toggle:focus,.open>.btn-outline-secondary.dropdown-toggle.focus{color:#fff;background-color:#a1a1a1;border-color:#8c8c8c}.btn-outline-secondary.disabled:focus,.btn-outline-secondary.disabled.focus,.btn-outline-secondary:disabled:focus,.btn-outline-secondary:disabled.focus{border-color:#fff}.btn-outline-secondary.disabled:hover,.btn-outline-secondary:disabled:hover{border-color:#fff}.btn-outline-info{color:red;background-image:none;background-color:transparent;border-color:red}.btn-outline-info:hover{color:#fff;background-color:red;border-color:red}.btn-outline-info:focus,.btn-outline-info.focus{color:#fff;background-color:red;border-color:red}.btn-outline-info:active,.btn-outline-info.active,.open>.btn-outline-info.dropdown-toggle{color:#fff;background-color:red;border-color:red}.btn-outline-info:active:hover,.btn-outline-info:active:focus,.btn-outline-info:active.focus,.btn-outline-info.active:hover,.btn-outline-info.active:focus,.btn-outline-info.active.focus,.open>.btn-outline-info.dropdown-toggle:hover,.open>.btn-outline-info.dropdown-toggle:focus,.open>.btn-outline-info.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-outline-info.disabled:focus,.btn-outline-info.disabled.focus,.btn-outline-info:disabled:focus,.btn-outline-info:disabled.focus{border-color:#f66}.btn-outline-info.disabled:hover,.btn-outline-info:disabled:hover{border-color:#f66}.btn-outline-success{color:red;background-image:none;background-color:transparent;border-color:red}.btn-outline-success:hover{color:#fff;background-color:red;border-color:red}.btn-outline-success:focus,.btn-outline-success.focus{color:#fff;background-color:red;border-color:red}.btn-outline-success:active,.btn-outline-success.active,.open>.btn-outline-success.dropdown-toggle{color:#fff;background-color:red;border-color:red}.btn-outline-success:active:hover,.btn-outline-success:active:focus,.btn-outline-success:active.focus,.btn-outline-success.active:hover,.btn-outline-success.active:focus,.btn-outline-success.active.focus,.open>.btn-outline-success.dropdown-toggle:hover,.open>.btn-outline-success.dropdown-toggle:focus,.open>.btn-outline-success.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-outline-success.disabled:focus,.btn-outline-success.disabled.focus,.btn-outline-success:disabled:focus,.btn-outline-success:disabled.focus{border-color:#f66}.btn-outline-success.disabled:hover,.btn-outline-success:disabled:hover{border-color:#f66}.btn-outline-warning{color:red;background-image:none;background-color:transparent;border-color:red}.btn-outline-warning:hover{color:#fff;background-color:red;border-color:red}.btn-outline-warning:focus,.btn-outline-warning.focus{color:#fff;background-color:red;border-color:red}.btn-outline-warning:active,.btn-outline-warning.active,.open>.btn-outline-warning.dropdown-toggle{color:#fff;background-color:red;border-color:red}.btn-outline-warning:active:hover,.btn-outline-warning:active:focus,.btn-outline-warning:active.focus,.btn-outline-warning.active:hover,.btn-outline-warning.active:focus,.btn-outline-warning.active.focus,.open>.btn-outline-warning.dropdown-toggle:hover,.open>.btn-outline-warning.dropdown-toggle:focus,.open>.btn-outline-warning.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-outline-warning.disabled:focus,.btn-outline-warning.disabled.focus,.btn-outline-warning:disabled:focus,.btn-outline-warning:disabled.focus{border-color:#f66}.btn-outline-warning.disabled:hover,.btn-outline-warning:disabled:hover{border-color:#f66}.btn-outline-danger{color:red;background-image:none;background-color:transparent;border-color:red}.btn-outline-danger:hover{color:#fff;background-color:red;border-color:red}.btn-outline-danger:focus,.btn-outline-danger.focus{color:#fff;background-color:red;border-color:red}.btn-outline-danger:active,.btn-outline-danger.active,.open>.btn-outline-danger.dropdown-toggle{color:#fff;background-color:red;border-color:red}.btn-outline-danger:active:hover,.btn-outline-danger:active:focus,.btn-outline-danger:active.focus,.btn-outline-danger.active:hover,.btn-outline-danger.active:focus,.btn-outline-danger.active.focus,.open>.btn-outline-danger.dropdown-toggle:hover,.open>.btn-outline-danger.dropdown-toggle:focus,.open>.btn-outline-danger.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-outline-danger.disabled:focus,.btn-outline-danger.disabled.focus,.btn-outline-danger:disabled:focus,.btn-outline-danger:disabled.focus{border-color:#f66}.btn-outline-danger.disabled:hover,.btn-outline-danger:disabled:hover{border-color:#f66}.btn-link{font-weight:normal;color:#f99d53;border-radius:0}.btn-link,.btn-link:active,.btn-link.active,.btn-link:disabled{background-color:transparent;-webkit-box-shadow:none;box-shadow:none}.btn-link,.btn-link:focus,.btn-link:active{border-color:transparent}.btn-link:hover{border-color:transparent}.btn-link:focus,.btn-link:hover{color:#f67309;text-decoration:underline;background-color:transparent}.btn-link:disabled:focus,.btn-link:disabled:hover{color:red;text-decoration:none}.btn-lg,.btn-group-lg>.btn{padding:.75rem 1.5rem;font-size:1.25rem;border-radius:.3rem}.btn-sm,.btn-group-sm>.btn{padding:.25rem .5rem;font-size:.875rem;border-radius:.2rem}.btn-block{display:block;width:100%}.btn-block+.btn-block{margin-top:.5rem}input[type="submit"].btn-block,input[type="reset"].btn-block,input[type="button"].btn-block{width:100%}.fade{opacity:0;-webkit-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{display:none}.collapse.in{display:block}.collapsing{position:relative;height:0;overflow:hidden;-webkit-transition-timing-function:ease;-o-transition-timing-function:ease;transition-timing-function:ease;-webkit-transition-duration:.35s;-o-transition-duration:.35s;transition-duration:.35s;-webkit-transition-property:height;-o-transition-property:height;transition-property:height}.dropup,.dropdown{position:relative}.dropdown-toggle::after{display:inline-block;width:0;height:0;margin-left:.3em;vertical-align:middle;content:"";border-top:.3em solid;border-right:.3em solid transparent;border-left:.3em solid transparent}.dropdown-toggle:focus{outline:0}.dropup .dropdown-toggle::after{border-top:0;border-bottom:.3em solid}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;font-size:1rem;color:red;text-align:left;list-style:none;background-color:#fff;-webkit-background-clip:padding-box;background-clip:padding-box;border:1px solid rgba(0,0,0,0.15);border-radius:.25rem;-webkit-box-shadow:0 6px 12px rgba(0,0,0,0.175);box-shadow:0 6px 12px rgba(0,0,0,0.175)}.dropdown-divider{height:1px;margin:.5rem 0;overflow:hidden;background-color:#e5e5e5}.dropdown-item{display:block;width:100%;padding:3px 20px;clear:both;font-weight:normal;color:red;text-align:inherit;white-space:nowrap;background:none;border:0}.dropdown-item:focus,.dropdown-item:hover{color:#e60000;text-decoration:none;background-color:#f5f5f5}.dropdown-item.active,.dropdown-item.active:focus,.dropdown-item.active:hover{color:#fff;text-decoration:none;background-color:#f99d53;outline:0}.dropdown-item.disabled,.dropdown-item.disabled:focus,.dropdown-item.disabled:hover{color:red}.dropdown-item.disabled:focus,.dropdown-item.disabled:hover{text-decoration:none;cursor:not-allowed;background-color:transparent;background-image:none;filter:"progid:DXImageTransform.Microsoft.gradient(enabled = false)"}.open>.dropdown-menu{display:block}.open>a{outline:0}.dropdown-menu-right{right:0;left:auto}.dropdown-menu-left{right:auto;left:0}.dropdown-header{display:block;padding:5px 20px;font-size:.875rem;color:red;white-space:nowrap}.dropdown-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:990}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{content:"";border-top:0;border-bottom:.3em solid}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:2px}.btn-group,.btn-group-vertical{position:relative;display:inline-block;vertical-align:middle}.btn-group>.btn,.btn-group-vertical>.btn{position:relative;float:left}.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active,.btn-group-vertical>.btn:focus,.btn-group-vertical>.btn:active,.btn-group-vertical>.btn.active{z-index:2}.btn-group>.btn:hover,.btn-group-vertical>.btn:hover{z-index:2}.btn-group .btn+.btn,.btn-group .btn+.btn-group,.btn-group .btn-group+.btn,.btn-group .btn-group+.btn-group{margin-left:-1px}.btn-toolbar{margin-left:-0.5rem}.btn-toolbar::after{content:"";display:table;clear:both}.btn-toolbar .btn-group,.btn-toolbar .input-group{float:left}.btn-toolbar>.btn,.btn-toolbar>.btn-group,.btn-toolbar>.input-group{margin-left:.5rem}.btn-group>.btn:not(:first-child):not(:last-child):not(.dropdown-toggle){border-radius:0}.btn-group>.btn:first-child{margin-left:0}.btn-group>.btn:first-child:not(:last-child):not(.dropdown-toggle){border-bottom-right-radius:0;border-top-right-radius:0}.btn-group>.btn:last-child:not(:first-child),.btn-group>.dropdown-toggle:not(:first-child){border-bottom-left-radius:0;border-top-left-radius:0}.btn-group>.btn-group{float:left}.btn-group>.btn-group:not(:first-child):not(:last-child)>.btn{border-radius:0}.btn-group>.btn-group:first-child:not(:last-child)>.btn:last-child,.btn-group>.btn-group:first-child:not(:last-child)>.dropdown-toggle{border-bottom-right-radius:0;border-top-right-radius:0}.btn-group>.btn-group:last-child:not(:first-child)>.btn:first-child{border-bottom-left-radius:0;border-top-left-radius:0}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn+.dropdown-toggle-split{padding-right:.75rem;padding-left:.75rem}.btn+.dropdown-toggle-split::after{margin-left:0}.btn-sm+.dropdown-toggle-split,.btn-group-sm>.btn+.dropdown-toggle-split{padding-right:.375rem;padding-left:.375rem}.btn-lg+.dropdown-toggle-split,.btn-group-lg>.btn+.dropdown-toggle-split{padding-right:1.125rem;padding-left:1.125rem}.btn-group.open .dropdown-toggle{-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-group.open .dropdown-toggle.btn-link{-webkit-box-shadow:none;box-shadow:none}.btn .caret{margin-left:0}.btn-lg .caret,.btn-group-lg>.btn .caret{border-width:.3em .3em 0;border-bottom-width:0}.dropup .btn-lg .caret,.dropup .btn-group-lg>.btn .caret{border-width:0 .3em .3em}.btn-group-vertical>.btn,.btn-group-vertical>.btn-group,.btn-group-vertical>.btn-group>.btn{display:block;float:none;width:100%;max-width:100%}.btn-group-vertical>.btn-group::after{content:"";display:table;clear:both}.btn-group-vertical>.btn-group>.btn{float:none}.btn-group-vertical>.btn+.btn,.btn-group-vertical>.btn+.btn-group,.btn-group-vertical>.btn-group+.btn,.btn-group-vertical>.btn-group+.btn-group{margin-top:-1px;margin-left:0}.btn-group-vertical>.btn:not(:first-child):not(:last-child){border-radius:0}.btn-group-vertical>.btn:first-child:not(:last-child){border-bottom-right-radius:0;border-bottom-left-radius:0}.btn-group-vertical>.btn:last-child:not(:first-child){border-top-right-radius:0;border-top-left-radius:0}.btn-group-vertical>.btn-group:not(:first-child):not(:last-child)>.btn{border-radius:0}.btn-group-vertical>.btn-group:first-child:not(:last-child)>.btn:last-child,.btn-group-vertical>.btn-group:first-child:not(:last-child)>.dropdown-toggle{border-bottom-right-radius:0;border-bottom-left-radius:0}.btn-group-vertical>.btn-group:last-child:not(:first-child)>.btn:first-child{border-top-right-radius:0;border-top-left-radius:0}[data-toggle="buttons"]>.btn input[type="radio"],[data-toggle="buttons"]>.btn input[type="checkbox"],[data-toggle="buttons"]>.btn-group>.btn input[type="radio"],[data-toggle="buttons"]>.btn-group>.btn input[type="checkbox"]{position:absolute;clip:rect(0, 0, 0, 0);pointer-events:none}.input-group{position:relative;width:100%;display:table;border-collapse:separate}.input-group .form-control{position:relative;z-index:2;float:left;width:100%;margin-bottom:0}.input-group .form-control:focus,.input-group .form-control:active,.input-group .form-control:hover{z-index:3}.input-group-addon,.input-group-btn,.input-group .form-control{display:table-cell}.input-group-addon:not(:first-child):not(:last-child),.input-group-btn:not(:first-child):not(:last-child),.input-group .form-control:not(:first-child):not(:last-child){border-radius:0}.input-group-addon,.input-group-btn{width:1%;white-space:nowrap;vertical-align:middle}.input-group-addon{padding:.5rem .75rem;margin-bottom:0;font-size:1rem;font-weight:normal;line-height:1.25;color:red;text-align:center;background-color:red;border:1px solid rgba(0,0,0,0.15);border-radius:.25rem}.input-group-addon.form-control-sm,.input-group-sm>.input-group-addon,.input-group-sm>.input-group-btn>.input-group-addon.btn{padding:.25rem .5rem;font-size:.875rem;border-radius:.2rem}.input-group-addon.form-control-lg,.input-group-lg>.input-group-addon,.input-group-lg>.input-group-btn>.input-group-addon.btn{padding:.75rem 1.5rem;font-size:1.25rem;border-radius:.3rem}.input-group-addon input[type="radio"],.input-group-addon input[type="checkbox"]{margin-top:0}.input-group .form-control:not(:last-child),.input-group-addon:not(:last-child),.input-group-btn:not(:last-child)>.btn,.input-group-btn:not(:last-child)>.btn-group>.btn,.input-group-btn:not(:last-child)>.dropdown-toggle,.input-group-btn:not(:first-child)>.btn:not(:last-child):not(.dropdown-toggle),.input-group-btn:not(:first-child)>.btn-group:not(:last-child)>.btn{border-bottom-right-radius:0;border-top-right-radius:0}.input-group-addon:not(:last-child){border-right:0}.input-group .form-control:not(:first-child),.input-group-addon:not(:first-child),.input-group-btn:not(:first-child)>.btn,.input-group-btn:not(:first-child)>.btn-group>.btn,.input-group-btn:not(:first-child)>.dropdown-toggle,.input-group-btn:not(:last-child)>.btn:not(:first-child),.input-group-btn:not(:last-child)>.btn-group:not(:first-child)>.btn{border-bottom-left-radius:0;border-top-left-radius:0}.form-control+.input-group-addon:not(:first-child){border-left:0}.input-group-btn{position:relative;font-size:0;white-space:nowrap}.input-group-btn>.btn{position:relative}.input-group-btn>.btn+.btn{margin-left:-1px}.input-group-btn>.btn:focus,.input-group-btn>.btn:active,.input-group-btn>.btn:hover{z-index:3}.input-group-btn:not(:last-child)>.btn,.input-group-btn:not(:last-child)>.btn-group{margin-right:-1px}.input-group-btn:not(:first-child)>.btn,.input-group-btn:not(:first-child)>.btn-group{z-index:2;margin-left:-1px}.input-group-btn:not(:first-child)>.btn:focus,.input-group-btn:not(:first-child)>.btn:active,.input-group-btn:not(:first-child)>.btn:hover,.input-group-btn:not(:first-child)>.btn-group:focus,.input-group-btn:not(:first-child)>.btn-group:active,.input-group-btn:not(:first-child)>.btn-group:hover{z-index:3}.custom-control{position:relative;display:inline;padding-left:1.5rem;cursor:pointer}.custom-control+.custom-control{margin-left:1rem}.custom-control-input{position:absolute;z-index:-1;opacity:0}.custom-control-input:checked ~ .custom-control-indicator{color:#fff;background-color:#0074d9;-webkit-box-shadow:none;box-shadow:none}.custom-control-input:focus ~ .custom-control-indicator{-webkit-box-shadow:0 0 0 0.075rem #fff,0 0 0 0.2rem #0074d9;box-shadow:0 0 0 0.075rem #fff,0 0 0 0.2rem #0074d9}.custom-control-input:active ~ .custom-control-indicator{color:#fff;background-color:#84c6ff;-webkit-box-shadow:none;box-shadow:none}.custom-control-input:disabled ~ .custom-control-indicator{cursor:not-allowed;background-color:#eee}.custom-control-input:disabled ~ .custom-control-description{color:#767676;cursor:not-allowed}.custom-control-indicator{position:absolute;top:.0625rem;left:0;display:block;width:1rem;height:1rem;pointer-events:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;background-color:#ddd;background-repeat:no-repeat;background-position:center center;-webkit-background-size:50% 50%;background-size:50% 50%;-webkit-box-shadow:inset 0 0.25rem 0.25rem rgba(0,0,0,0.1);box-shadow:inset 0 0.25rem 0.25rem rgba(0,0,0,0.1)}.custom-checkbox .custom-control-indicator{border-radius:.25rem}.custom-checkbox .custom-control-input:checked ~ .custom-control-indicator{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3E%3Cpath fill='%23fff' d='M6.564.75l-3.59 3.612-1.538-1.55L0 4.26 2.974 7.25 8 2.193z'/%3E%3C/svg%3E")}.custom-checkbox .custom-control-input:indeterminate ~ .custom-control-indicator{background-color:#0074d9;background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 4 4'%3E%3Cpath stroke='%23fff' d='M0 2h4'/%3E%3C/svg%3E");-webkit-box-shadow:none;box-shadow:none}.custom-radio .custom-control-indicator{border-radius:50%}.custom-radio .custom-control-input:checked ~ .custom-control-indicator{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3E%3Ccircle r='3' fill='%23fff'/%3E%3C/svg%3E")}.custom-controls-stacked .custom-control{display:inline}.custom-controls-stacked .custom-control::after{display:block;margin-bottom:.25rem;content:""}.custom-controls-stacked .custom-control+.custom-control{margin-left:0}.custom-select{display:inline-block;max-width:100%;padding:.375rem 1.75rem .375rem .75rem;padding-right:.75rem \9;color:red;vertical-align:middle;background:#fff url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 4 5'%3E%3Cpath fill='%23333' d='M2 0L0 2h4zm0 5L0 3h4z'/%3E%3C/svg%3E") no-repeat right .75rem center;background-image:none \9;-webkit-background-size:8px 10px;background-size:8px 10px;border:1px solid rgba(0,0,0,0.15);border-radius:.25rem;-moz-appearance:none;-webkit-appearance:none}.custom-select:focus{border-color:#51a7e8;outline:none;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.075),0 0 5px rgba(81,167,232,0.5);box-shadow:inset 0 1px 2px rgba(0,0,0,0.075),0 0 5px rgba(81,167,232,0.5)}.custom-select:focus::-ms-value{color:red;background-color:#fff}.custom-select:disabled{color:red;cursor:not-allowed;background-color:red}.custom-select::-ms-expand{opacity:0}.custom-select-sm{padding-top:.375rem;padding-bottom:.375rem;font-size:75%}.custom-file{position:relative;display:inline-block;max-width:100%;height:2.5rem;cursor:pointer}.custom-file-input{min-width:14rem;max-width:100%;margin:0;filter:alpha(opacity=0);opacity:0}.custom-file-input:focus ~ .custom-file-control{-webkit-box-shadow:0 0 0 0.075rem #fff,0 0 0 0.2rem #0074d9;box-shadow:0 0 0 0.075rem #fff,0 0 0 0.2rem #0074d9}.custom-file-control{position:absolute;top:0;right:0;left:0;z-index:5;height:2.5rem;padding:.5rem 1rem;line-height:1.5;color:#555;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;background-color:#fff;border:1px solid #ddd;border-radius:.25rem;-webkit-box-shadow:inset 0 0.2rem 0.4rem rgba(0,0,0,0.05);box-shadow:inset 0 0.2rem 0.4rem rgba(0,0,0,0.05)}.custom-file-control:lang(en)::after{content:"Choose file..."}.custom-file-control::before{position:absolute;top:-1px;right:-1px;bottom:-1px;z-index:6;display:block;height:2.5rem;padding:.5rem 1rem;line-height:1.5;color:#555;background-color:#eee;border:1px solid #ddd;border-radius:0 .25rem .25rem 0}.custom-file-control:lang(en)::before{content:"Browse"}.nav{padding-left:0;margin-bottom:0;list-style:none}.nav-link{display:inline-block}.nav-link:focus,.nav-link:hover{text-decoration:none}.nav-link.disabled{color:red}.nav-link.disabled,.nav-link.disabled:focus,.nav-link.disabled:hover{color:red;cursor:not-allowed;background-color:transparent}.nav-inline .nav-item{display:inline-block}.nav-inline .nav-item+.nav-item,.nav-inline .nav-link+.nav-link{margin-left:1rem}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs::after{content:"";display:table;clear:both}.nav-tabs .nav-item{float:left;margin-bottom:-1px}.nav-tabs .nav-item+.nav-item{margin-left:.2rem}.nav-tabs .nav-link{display:block;padding:0.5em 1em;border:1px solid transparent;border-top-right-radius:.25rem;border-top-left-radius:.25rem}.nav-tabs .nav-link:focus,.nav-tabs .nav-link:hover{border-color:red red #ddd}.nav-tabs .nav-link.disabled,.nav-tabs .nav-link.disabled:focus,.nav-tabs .nav-link.disabled:hover{color:red;background-color:transparent;border-color:transparent}.nav-tabs .nav-link.active,.nav-tabs .nav-link.active:focus,.nav-tabs .nav-link.active:hover,.nav-tabs .nav-item.open .nav-link,.nav-tabs .nav-item.open .nav-link:focus,.nav-tabs .nav-item.open .nav-link:hover{color:red;background-color:#fff;border-color:#ddd #ddd transparent}.nav-tabs .dropdown-menu{margin-top:-1px;border-top-right-radius:0;border-top-left-radius:0}.nav-pills::after{content:"";display:table;clear:both}.nav-pills .nav-item{float:left}.nav-pills .nav-item+.nav-item{margin-left:.2rem}.nav-pills .nav-link{display:block;padding:0.5em 1em;border-radius:.25rem}.nav-pills .nav-link.active,.nav-pills .nav-link.active:focus,.nav-pills .nav-link.active:hover,.nav-pills .nav-item.open .nav-link,.nav-pills .nav-item.open .nav-link:focus,.nav-pills .nav-item.open .nav-link:hover{color:#fff;cursor:default;background-color:#f99d53}.nav-stacked .nav-item{display:block;float:none}.nav-stacked .nav-item+.nav-item{margin-top:.2rem;margin-left:0}.tab-content>.tab-pane{display:none}.tab-content>.active{display:block}.navbar{position:relative;padding:.5rem 1rem}.navbar::after{content:"";display:table;clear:both}@media (min-width: 30rem){.navbar{border-radius:.25rem}}.navbar-full{z-index:1000}@media (min-width: 30rem){.navbar-full{border-radius:0}}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030}@media (min-width: 30rem){.navbar-fixed-top,.navbar-fixed-bottom{border-radius:0}}.navbar-fixed-top{top:0}.navbar-fixed-bottom{bottom:0}.navbar-sticky-top{position:-webkit-sticky;position:sticky;top:0;z-index:1030;width:100%}@media (min-width: 30rem){.navbar-sticky-top{border-radius:0}}.navbar-brand{float:left;padding-top:.25rem;padding-bottom:.25rem;margin-right:1rem;font-size:1.25rem}.navbar-brand:focus,.navbar-brand:hover{text-decoration:none}.navbar-brand>img{display:block}.navbar-divider{float:left;width:1px;padding-top:.425rem;padding-bottom:.425rem;margin-right:1rem;margin-left:1rem;overflow:hidden}.navbar-divider::before{content:"\00a0"}.navbar-toggler{padding:.5rem .75rem;font-size:1.25rem;line-height:1;background:none;border:1px solid transparent;border-radius:.25rem}.navbar-toggler:focus,.navbar-toggler:hover{text-decoration:none}.navbar-nav .nav-item{float:left}.navbar-nav .nav-link{display:block;padding-top:.425rem;padding-bottom:.425rem}.navbar-nav .nav-link+.nav-link{margin-left:1rem}.navbar-nav .nav-item+.nav-item{margin-left:1rem}.navbar-light .navbar-brand{color:rgba(0,0,0,0.8)}.navbar-light .navbar-brand:focus,.navbar-light .navbar-brand:hover{color:rgba(0,0,0,0.8)}.navbar-light .navbar-nav .nav-link{color:rgba(0,0,0,0.3)}.navbar-light .navbar-nav .nav-link:focus,.navbar-light .navbar-nav .nav-link:hover{color:rgba(0,0,0,0.6)}.navbar-light .navbar-nav .open>.nav-link,.navbar-light .navbar-nav .open>.nav-link:focus,.navbar-light .navbar-nav .open>.nav-link:hover,.navbar-light .navbar-nav .active>.nav-link,.navbar-light .navbar-nav .active>.nav-link:focus,.navbar-light .navbar-nav .active>.nav-link:hover,.navbar-light .navbar-nav .nav-link.open,.navbar-light .navbar-nav .nav-link.open:focus,.navbar-light .navbar-nav .nav-link.open:hover,.navbar-light .navbar-nav .nav-link.active,.navbar-light .navbar-nav .nav-link.active:focus,.navbar-light .navbar-nav .nav-link.active:hover{color:rgba(0,0,0,0.8)}.navbar-light .navbar-divider{background-color:rgba(0,0,0,0.075)}.navbar-dark .navbar-brand{color:#fff}.navbar-dark .navbar-brand:focus,.navbar-dark .navbar-brand:hover{color:#fff}.navbar-dark .navbar-nav .nav-link{color:rgba(255,255,255,0.5)}.navbar-dark .navbar-nav .nav-link:focus,.navbar-dark .navbar-nav .nav-link:hover{color:rgba(255,255,255,0.75)}.navbar-dark .navbar-nav .open>.nav-link,.navbar-dark .navbar-nav .open>.nav-link:focus,.navbar-dark .navbar-nav .open>.nav-link:hover,.navbar-dark .navbar-nav .active>.nav-link,.navbar-dark .navbar-nav .active>.nav-link:focus,.navbar-dark .navbar-nav .active>.nav-link:hover,.navbar-dark .navbar-nav .nav-link.open,.navbar-dark .navbar-nav .nav-link.open:focus,.navbar-dark .navbar-nav .nav-link.open:hover,.navbar-dark .navbar-nav .nav-link.active,.navbar-dark .navbar-nav .nav-link.active:focus,.navbar-dark .navbar-nav .nav-link.active:hover{color:#fff}.navbar-dark .navbar-divider{background-color:rgba(255,255,255,0.075)}.navbar-toggleable-xs::after{content:"";display:table;clear:both}@media (max-width: 29.99rem){.navbar-toggleable-xs .navbar-nav .nav-item{float:none;margin-left:0}}@media (min-width: 30rem){.navbar-toggleable-xs{display:block !important}}.navbar-toggleable-sm::after{content:"";display:table;clear:both}@media (max-width: 47.99rem){.navbar-toggleable-sm .navbar-nav .nav-item{float:none;margin-left:0}}@media (min-width: 48rem){.navbar-toggleable-sm{display:block !important}}.navbar-toggleable-md::after{content:"";display:table;clear:both}@media (max-width: 66.49rem){.navbar-toggleable-md .navbar-nav .nav-item{float:none;margin-left:0}}@media (min-width: 66.5rem){.navbar-toggleable-md{display:block !important}}.card{position:relative;display:block;margin-bottom:.75rem;background-color:#fff;border-radius:.25rem;border:1px solid rgba(0,0,0,0.125)}.card-block{padding:1.25rem}.card-block::after{content:"";display:table;clear:both}.card-title{margin-bottom:.75rem}.card-subtitle{margin-top:-0.375rem;margin-bottom:0}.card-text:last-child{margin-bottom:0}.card-link:hover{text-decoration:none}.card-link+.card-link{margin-left:1.25rem}.card>.list-group:first-child .list-group-item:first-child{border-top-right-radius:.25rem;border-top-left-radius:.25rem}.card>.list-group:last-child .list-group-item:last-child{border-bottom-right-radius:.25rem;border-bottom-left-radius:.25rem}.card-header{padding:.75rem 1.25rem;background-color:#f5f5f5;border-bottom:1px solid rgba(0,0,0,0.125)}.card-header::after{content:"";display:table;clear:both}.card-header:first-child{border-radius:.25rem .25rem 0 0}.card-footer{padding:.75rem 1.25rem;background-color:#f5f5f5;border-top:1px solid rgba(0,0,0,0.125)}.card-footer::after{content:"";display:table;clear:both}.card-footer:last-child{border-radius:0 0 .25rem .25rem}.card-header-tabs{margin-right:-0.625rem;margin-bottom:-0.75rem;margin-left:-0.625rem;border-bottom:0}.card-header-pills{margin-right:-0.625rem;margin-left:-0.625rem}.card-primary{background-color:#f99d53;border-color:#f99d53}.card-primary .card-header,.card-primary .card-footer{background-color:transparent}.card-success{background-color:red;border-color:red}.card-success .card-header,.card-success .card-footer{background-color:transparent}.card-info{background-color:red;border-color:red}.card-info .card-header,.card-info .card-footer{background-color:transparent}.card-warning{background-color:red;border-color:red}.card-warning .card-header,.card-warning .card-footer{background-color:transparent}.card-danger{background-color:red;border-color:red}.card-danger .card-header,.card-danger .card-footer{background-color:transparent}.card-outline-primary{background-color:transparent;border-color:#f99d53}.card-outline-secondary{background-color:transparent;border-color:#ccc}.card-outline-info{background-color:transparent;border-color:red}.card-outline-success{background-color:transparent;border-color:red}.card-outline-warning{background-color:transparent;border-color:red}.card-outline-danger{background-color:transparent;border-color:red}.card-inverse .card-header,.card-inverse .card-footer{border-color:rgba(255,255,255,0.2)}.card-inverse .card-header,.card-inverse .card-footer,.card-inverse .card-title,.card-inverse .card-blockquote{color:#fff}.card-inverse .card-link,.card-inverse .card-text,.card-inverse .card-subtitle,.card-inverse .card-blockquote .blockquote-footer{color:rgba(255,255,255,0.65)}.card-inverse .card-link:focus,.card-inverse .card-link:hover{color:#fff}.card-blockquote{padding:0;margin-bottom:0;border-left:0}.card-img{border-radius:.25rem}.card-img-overlay{position:absolute;top:0;right:0;bottom:0;left:0;padding:1.25rem}.card-img-top{border-top-right-radius:.25rem;border-top-left-radius:.25rem}.card-img-bottom{border-bottom-right-radius:.25rem;border-bottom-left-radius:.25rem}@media (min-width: 30rem){.card-deck{display:table;width:100%;margin-bottom:.75rem;table-layout:fixed;border-spacing:1.25rem 0}.card-deck .card{display:table-cell;margin-bottom:0;vertical-align:top}.card-deck-wrapper{margin-right:-1.25rem;margin-left:-1.25rem}}@media (min-width: 30rem){.card-group{display:table;width:100%;table-layout:fixed}.card-group .card{display:table-cell;vertical-align:top}.card-group .card+.card{margin-left:0;border-left:0}.card-group .card:first-child{border-bottom-right-radius:0;border-top-right-radius:0}.card-group .card:first-child .card-img-top{border-top-right-radius:0}.card-group .card:first-child .card-img-bottom{border-bottom-right-radius:0}.card-group .card:last-child{border-bottom-left-radius:0;border-top-left-radius:0}.card-group .card:last-child .card-img-top{border-top-left-radius:0}.card-group .card:last-child .card-img-bottom{border-bottom-left-radius:0}.card-group .card:not(:first-child):not(:last-child){border-radius:0}.card-group .card:not(:first-child):not(:last-child) .card-img-top,.card-group .card:not(:first-child):not(:last-child) .card-img-bottom{border-radius:0}}@media (min-width: 30rem){.card-columns{-webkit-column-count:3;-moz-column-count:3;column-count:3;-webkit-column-gap:1.25rem;-moz-column-gap:1.25rem;column-gap:1.25rem}.card-columns .card{display:inline-block;width:100%}}.breadcrumb{padding:.75rem 1rem;margin-bottom:1rem;list-style:none;background-color:red;border-radius:.25rem}.breadcrumb::after{content:"";display:table;clear:both}.breadcrumb-item{float:left}.breadcrumb-item+.breadcrumb-item::before{display:inline-block;padding-right:.5rem;padding-left:.5rem;color:red;content:"/"}.breadcrumb-item+.breadcrumb-item:hover::before{text-decoration:underline}.breadcrumb-item+.breadcrumb-item:hover::before{text-decoration:none}.breadcrumb-item.active{color:red}.pagination{display:inline-block;padding-left:0;margin-top:1rem;margin-bottom:1rem;border-radius:.25rem}.page-item{display:inline}.page-item:first-child .page-link{margin-left:0;border-bottom-left-radius:.25rem;border-top-left-radius:.25rem}.page-item:last-child .page-link{border-bottom-right-radius:.25rem;border-top-right-radius:.25rem}.page-item.active .page-link,.page-item.active .page-link:focus,.page-item.active .page-link:hover{z-index:2;color:#fff;cursor:default;background-color:#f99d53;border-color:#f99d53}.page-item.disabled .page-link,.page-item.disabled .page-link:focus,.page-item.disabled .page-link:hover{color:red;pointer-events:none;cursor:not-allowed;background-color:#fff;border-color:#ddd}.page-link{position:relative;float:left;padding:.5rem .75rem;margin-left:-1px;color:#f99d53;text-decoration:none;background-color:#fff;border:1px solid #ddd}.page-link:focus,.page-link:hover{color:#f67309;background-color:red;border-color:#ddd}.pagination-lg .page-link{padding:.75rem 1.5rem;font-size:1.25rem}.pagination-lg .page-item:first-child .page-link{border-bottom-left-radius:.3rem;border-top-left-radius:.3rem}.pagination-lg .page-item:last-child .page-link{border-bottom-right-radius:.3rem;border-top-right-radius:.3rem}.pagination-sm .page-link{padding:.275rem .75rem;font-size:.875rem}.pagination-sm .page-item:first-child .page-link{border-bottom-left-radius:.2rem;border-top-left-radius:.2rem}.pagination-sm .page-item:last-child .page-link{border-bottom-right-radius:.2rem;border-top-right-radius:.2rem}.tag{display:inline-block;padding:.25em .4em;font-size:75%;font-weight:bold;line-height:1;color:#fff;text-align:center;white-space:nowrap;vertical-align:baseline;border-radius:.25rem}.tag:empty{display:none}.btn .tag{position:relative;top:-1px}a.tag:focus,a.tag:hover{color:#fff;text-decoration:none;cursor:pointer}.tag-pill{padding-right:.6em;padding-left:.6em;border-radius:10rem}.tag-default{background-color:red}.tag-default[href]:focus,.tag-default[href]:hover{background-color:#c00}.tag-primary{background-color:#f99d53}.tag-primary[href]:focus,.tag-primary[href]:hover{background-color:#f78122}.tag-success{background-color:red}.tag-success[href]:focus,.tag-success[href]:hover{background-color:#c00}.tag-info{background-color:red}.tag-info[href]:focus,.tag-info[href]:hover{background-color:#c00}.tag-warning{background-color:red}.tag-warning[href]:focus,.tag-warning[href]:hover{background-color:#c00}.tag-danger{background-color:red}.tag-danger[href]:focus,.tag-danger[href]:hover{background-color:#c00}.jumbotron{padding:2rem 1rem;margin-bottom:2rem;background-color:red;border-radius:.3rem}@media (min-width: 30rem){.jumbotron{padding:4rem 2rem}}.jumbotron-hr{border-top-color:#c00}.jumbotron-fluid{padding-right:0;padding-left:0;border-radius:0}.alert{padding:15px;margin-bottom:1rem;border:1px solid transparent;border-radius:.25rem}.alert-heading{color:inherit}.alert-link{font-weight:bold}.alert-dismissible{padding-right:35px}.alert-dismissible .close{position:relative;top:-2px;right:-21px;color:inherit}.alert-success{background-color:#dff0d8;border-color:#d0e9c6;color:#3c763d}.alert-success hr{border-top-color:#c1e2b3}.alert-success .alert-link{color:#2b542c}.alert-info{background-color:#d9edf7;border-color:#bcdff1;color:#31708f}.alert-info hr{border-top-color:#a6d5ec}.alert-info .alert-link{color:#245269}.alert-warning{background-color:#fcf8e3;border-color:#faf2cc;color:#8a6d3b}.alert-warning hr{border-top-color:#f7ecb5}.alert-warning .alert-link{color:#66512c}.alert-danger{background-color:#f2dede;border-color:#ebcccc;color:#a94442}.alert-danger hr{border-top-color:#e4b9b9}.alert-danger .alert-link{color:#843534}@-webkit-keyframes progress-bar-stripes{from{background-position:1rem 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:1rem 0}to{background-position:0 0}}@keyframes progress-bar-stripes{from{background-position:1rem 0}to{background-position:0 0}}.progress{display:block;width:100%;height:1rem;margin-bottom:1rem}.progress[value]{background-color:#eee;border:0;-webkit-appearance:none;-moz-appearance:none;appearance:none;border-radius:.25rem}.progress[value]::-ms-fill{background-color:#0074d9;border:0}.progress[value]::-moz-progress-bar{background-color:#0074d9;border-bottom-left-radius:.25rem;border-top-left-radius:.25rem}.progress[value]::-webkit-progress-value{background-color:#0074d9;border-bottom-left-radius:.25rem;border-top-left-radius:.25rem}.progress[value="100"]::-moz-progress-bar{border-bottom-right-radius:.25rem;border-top-right-radius:.25rem}.progress[value="100"]::-webkit-progress-value{border-bottom-right-radius:.25rem;border-top-right-radius:.25rem}.progress[value]::-webkit-progress-bar{background-color:#eee;border-radius:.25rem;-webkit-box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1);box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1)}base::-moz-progress-bar,.progress[value]{background-color:#eee;border-radius:.25rem;box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1)}@media screen and (min-width: 0\0){.progress{background-color:#eee;border-radius:.25rem;-webkit-box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1);box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1)}.progress-bar{display:inline-block;height:1rem;text-indent:-999rem;background-color:#0074d9;border-bottom-left-radius:.25rem;border-top-left-radius:.25rem}.progress[width="100%"]{border-bottom-right-radius:.25rem;border-top-right-radius:.25rem}}.progress-striped[value]::-webkit-progress-value{background-image:-webkit-linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);-webkit-background-size:1rem 1rem;background-size:1rem 1rem}.progress-striped[value]::-moz-progress-bar{background-image:linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-size:1rem 1rem}.progress-striped[value]::-ms-fill{background-image:linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-size:1rem 1rem}@media screen and (min-width: 0\0){.progress-bar-striped{background-image:-webkit-linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);-webkit-background-size:1rem 1rem;background-size:1rem 1rem}}.progress-animated[value]::-webkit-progress-value{-webkit-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-animated[value]::-moz-progress-bar{animation:progress-bar-stripes 2s linear infinite}@media screen and (min-width: 0\0){.progress-animated .progress-bar-striped{-webkit-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}}.progress-success[value]::-webkit-progress-value{background-color:red}.progress-success[value]::-moz-progress-bar{background-color:red}.progress-success[value]::-ms-fill{background-color:red}@media screen and (min-width: 0\0){.progress-success .progress-bar{background-color:red}}.progress-info[value]::-webkit-progress-value{background-color:red}.progress-info[value]::-moz-progress-bar{background-color:red}.progress-info[value]::-ms-fill{background-color:red}@media screen and (min-width: 0\0){.progress-info .progress-bar{background-color:red}}.progress-warning[value]::-webkit-progress-value{background-color:red}.progress-warning[value]::-moz-progress-bar{background-color:red}.progress-warning[value]::-ms-fill{background-color:red}@media screen and (min-width: 0\0){.progress-warning .progress-bar{background-color:red}}.progress-danger[value]::-webkit-progress-value{background-color:red}.progress-danger[value]::-moz-progress-bar{background-color:red}.progress-danger[value]::-ms-fill{background-color:red}@media screen and (min-width: 0\0){.progress-danger .progress-bar{background-color:red}}.media{margin-top:15px}.media:first-child{margin-top:0}.media,.media-body{overflow:hidden}.media-body{width:10000px}.media-left,.media-right,.media-body{display:table-cell;vertical-align:top}.media-middle{vertical-align:middle}.media-bottom{vertical-align:bottom}.media-object{display:block}.media-object.img-thumbnail{max-width:none}.media-right{padding-left:10px}.media-left{padding-right:10px}.media-heading{margin-top:0;margin-bottom:5px}.media-list{padding-left:0;list-style:none}.list-group{padding-left:0;margin-bottom:0}.list-group-item{position:relative;display:block;padding:.75rem 1.25rem;margin-bottom:-1px;background-color:#fff;border:1px solid #ddd}.list-group-item:first-child{border-top-right-radius:.25rem;border-top-left-radius:.25rem}.list-group-item:last-child{margin-bottom:0;border-bottom-right-radius:.25rem;border-bottom-left-radius:.25rem}.list-group-item.disabled,.list-group-item.disabled:focus,.list-group-item.disabled:hover{color:red;cursor:not-allowed;background-color:red}.list-group-item.disabled .list-group-item-heading,.list-group-item.disabled:focus .list-group-item-heading,.list-group-item.disabled:hover .list-group-item-heading{color:inherit}.list-group-item.disabled .list-group-item-text,.list-group-item.disabled:focus .list-group-item-text,.list-group-item.disabled:hover .list-group-item-text{color:red}.list-group-item.active,.list-group-item.active:focus,.list-group-item.active:hover{z-index:2;color:#fff;text-decoration:none;background-color:#f99d53;border-color:#f99d53}.list-group-item.active .list-group-item-heading,.list-group-item.active .list-group-item-heading>small,.list-group-item.active .list-group-item-heading>.small,.list-group-item.active:focus .list-group-item-heading,.list-group-item.active:focus .list-group-item-heading>small,.list-group-item.active:focus .list-group-item-heading>.small,.list-group-item.active:hover .list-group-item-heading,.list-group-item.active:hover .list-group-item-heading>small,.list-group-item.active:hover .list-group-item-heading>.small{color:inherit}.list-group-item.active .list-group-item-text,.list-group-item.active:focus .list-group-item-text,.list-group-item.active:hover .list-group-item-text{color:#fff}.list-group-flush .list-group-item{border-radius:0}.list-group-item-action{width:100%;color:#555;text-align:inherit}.list-group-item-action .list-group-item-heading{color:#333}.list-group-item-action:focus,.list-group-item-action:hover{color:#555;text-decoration:none;background-color:#f5f5f5}.list-group-item-success{color:#3c763d;background-color:#dff0d8}a.list-group-item-success,button.list-group-item-success{color:#3c763d}a.list-group-item-success .list-group-item-heading,button.list-group-item-success .list-group-item-heading{color:inherit}a.list-group-item-success:focus,a.list-group-item-success:hover,button.list-group-item-success:focus,button.list-group-item-success:hover{color:#3c763d;background-color:#d0e9c6}a.list-group-item-success.active,a.list-group-item-success.active:focus,a.list-group-item-success.active:hover,button.list-group-item-success.active,button.list-group-item-success.active:focus,button.list-group-item-success.active:hover{color:#fff;background-color:#3c763d;border-color:#3c763d}.list-group-item-info{color:#31708f;background-color:#d9edf7}a.list-group-item-info,button.list-group-item-info{color:#31708f}a.list-group-item-info .list-group-item-heading,button.list-group-item-info .list-group-item-heading{color:inherit}a.list-group-item-info:focus,a.list-group-item-info:hover,button.list-group-item-info:focus,button.list-group-item-info:hover{color:#31708f;background-color:#c4e3f3}a.list-group-item-info.active,a.list-group-item-info.active:focus,a.list-group-item-info.active:hover,button.list-group-item-info.active,button.list-group-item-info.active:focus,button.list-group-item-info.active:hover{color:#fff;background-color:#31708f;border-color:#31708f}.list-group-item-warning{color:#8a6d3b;background-color:#fcf8e3}a.list-group-item-warning,button.list-group-item-warning{color:#8a6d3b}a.list-group-item-warning .list-group-item-heading,button.list-group-item-warning .list-group-item-heading{color:inherit}a.list-group-item-warning:focus,a.list-group-item-warning:hover,button.list-group-item-warning:focus,button.list-group-item-warning:hover{color:#8a6d3b;background-color:#faf2cc}a.list-group-item-warning.active,a.list-group-item-warning.active:focus,a.list-group-item-warning.active:hover,button.list-group-item-warning.active,button.list-group-item-warning.active:focus,button.list-group-item-warning.active:hover{color:#fff;background-color:#8a6d3b;border-color:#8a6d3b}.list-group-item-danger{color:#a94442;background-color:#f2dede}a.list-group-item-danger,button.list-group-item-danger{color:#a94442}a.list-group-item-danger .list-group-item-heading,button.list-group-item-danger .list-group-item-heading{color:inherit}a.list-group-item-danger:focus,a.list-group-item-danger:hover,button.list-group-item-danger:focus,button.list-group-item-danger:hover{color:#a94442;background-color:#ebcccc}a.list-group-item-danger.active,a.list-group-item-danger.active:focus,a.list-group-item-danger.active:hover,button.list-group-item-danger.active,button.list-group-item-danger.active:focus,button.list-group-item-danger.active:hover{color:#fff;background-color:#a94442;border-color:#a94442}.list-group-item-heading{margin-top:0;margin-bottom:5px}.list-group-item-text{margin-bottom:0;line-height:1.3}.embed-responsive{position:relative;display:block;height:0;padding:0;overflow:hidden}.embed-responsive .embed-responsive-item,.embed-responsive iframe,.embed-responsive embed,.embed-responsive object,.embed-responsive video{position:absolute;top:0;bottom:0;left:0;width:100%;height:100%;border:0}.embed-responsive-21by9{padding-bottom:42.85714%}.embed-responsive-16by9{padding-bottom:56.25%}.embed-responsive-4by3{padding-bottom:75%}.embed-responsive-1by1{padding-bottom:100%}.close{float:right;font-size:1.5rem;font-weight:bold;line-height:1;color:#000;text-shadow:0 1px 0 #fff;opacity:.2}.close:focus,.close:hover{color:#000;text-decoration:none;cursor:pointer;opacity:.5}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.modal-open{overflow:hidden}.modal{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1050;display:none;overflow:hidden;outline:0;-webkit-overflow-scrolling:touch}.modal.fade .modal-dialog{-webkit-transition:-webkit-transform .3s ease-out;transition:-webkit-transform .3s ease-out;-o-transition:transform .3s ease-out, -o-transform .3s ease-out;transition:transform .3s ease-out;transition:transform .3s ease-out, -webkit-transform .3s ease-out, -o-transform .3s ease-out;-webkit-transform:translate(0, -25%);-ms-transform:translate(0, -25%);-o-transform:translate(0, -25%);transform:translate(0, -25%)}.modal.in .modal-dialog{-webkit-transform:translate(0, 0);-ms-transform:translate(0, 0);-o-transform:translate(0, 0);transform:translate(0, 0)}.modal-open .modal{overflow-x:hidden;overflow-y:auto}.modal-dialog{position:relative;width:auto;margin:10px}.modal-content{position:relative;background-color:#fff;-webkit-background-clip:padding-box;background-clip:padding-box;border:1px solid rgba(0,0,0,0.2);border-radius:.3rem;-webkit-box-shadow:0 3px 9px rgba(0,0,0,0.5);box-shadow:0 3px 9px rgba(0,0,0,0.5);outline:0}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop.in{opacity:.5}.modal-header{padding:15px;border-bottom:1px solid #e5e5e5}.modal-header::after{content:"";display:table;clear:both}.modal-header .close{margin-top:-2px}.modal-title{margin:0;line-height:1.5}.modal-body{position:relative;padding:15px}.modal-footer{padding:15px;text-align:right;border-top:1px solid #e5e5e5}.modal-footer::after{content:"";display:table;clear:both}.modal-scrollbar-measure{position:absolute;top:-9999px;width:50px;height:50px;overflow:scroll}@media (min-width: 30rem){.modal-dialog{max-width:600px;margin:30px auto}.modal-content{-webkit-box-shadow:0 5px 15px rgba(0,0,0,0.5);box-shadow:0 5px 15px rgba(0,0,0,0.5)}.modal-sm{max-width:300px}}@media (min-width: 66.5rem){.modal-lg{max-width:900px}}.tooltip{position:absolute;z-index:1070;display:block;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;font-style:normal;font-weight:normal;letter-spacing:normal;line-break:auto;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;white-space:normal;word-break:normal;word-spacing:normal;font-size:.875rem;word-wrap:break-word;opacity:0}.tooltip.in{opacity:.9}.tooltip.tooltip-top,.tooltip.bs-tether-element-attached-bottom{padding:5px 0;margin-top:-3px}.tooltip.tooltip-top .tooltip-arrow,.tooltip.bs-tether-element-attached-bottom .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-width:5px 5px 0;border-top-color:#000}.tooltip.tooltip-right,.tooltip.bs-tether-element-attached-left{padding:0 5px;margin-left:3px}.tooltip.tooltip-right .tooltip-arrow,.tooltip.bs-tether-element-attached-left .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-width:5px 5px 5px 0;border-right-color:#000}.tooltip.tooltip-bottom,.tooltip.bs-tether-element-attached-top{padding:5px 0;margin-top:3px}.tooltip.tooltip-bottom .tooltip-arrow,.tooltip.bs-tether-element-attached-top .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-width:0 5px 5px;border-bottom-color:#000}.tooltip.tooltip-left,.tooltip.bs-tether-element-attached-right{padding:0 5px;margin-left:-3px}.tooltip.tooltip-left .tooltip-arrow,.tooltip.bs-tether-element-attached-right .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-width:5px 0 5px 5px;border-left-color:#000}.tooltip-inner{max-width:200px;padding:3px 8px;color:#fff;text-align:center;background-color:#000;border-radius:.25rem}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.popover{position:absolute;top:0;left:0;z-index:1060;display:block;max-width:276px;padding:1px;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;font-style:normal;font-weight:normal;letter-spacing:normal;line-break:auto;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;white-space:normal;word-break:normal;word-spacing:normal;font-size:.875rem;word-wrap:break-word;background-color:#fff;-webkit-background-clip:padding-box;background-clip:padding-box;border:1px solid rgba(0,0,0,0.2);border-radius:.3rem;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2)}.popover.popover-top,.popover.bs-tether-element-attached-bottom{margin-top:-10px}.popover.popover-top .popover-arrow,.popover.bs-tether-element-attached-bottom .popover-arrow{bottom:-11px;left:50%;margin-left:-11px;border-top-color:rgba(0,0,0,0.25);border-bottom-width:0}.popover.popover-top .popover-arrow::after,.popover.bs-tether-element-attached-bottom .popover-arrow::after{bottom:1px;margin-left:-10px;content:"";border-top-color:#fff;border-bottom-width:0}.popover.popover-right,.popover.bs-tether-element-attached-left{margin-left:10px}.popover.popover-right .popover-arrow,.popover.bs-tether-element-attached-left .popover-arrow{top:50%;left:-11px;margin-top:-11px;border-right-color:rgba(0,0,0,0.25);border-left-width:0}.popover.popover-right .popover-arrow::after,.popover.bs-tether-element-attached-left .popover-arrow::after{bottom:-10px;left:1px;content:"";border-right-color:#fff;border-left-width:0}.popover.popover-bottom,.popover.bs-tether-element-attached-top{margin-top:10px}.popover.popover-bottom .popover-arrow,.popover.bs-tether-element-attached-top .popover-arrow{top:-11px;left:50%;margin-left:-11px;border-top-width:0;border-bottom-color:rgba(0,0,0,0.25)}.popover.popover-bottom .popover-arrow::after,.popover.bs-tether-element-attached-top .popover-arrow::after{top:1px;margin-left:-10px;content:"";border-top-width:0;border-bottom-color:#fff}.popover.popover-left,.popover.bs-tether-element-attached-right{margin-left:-10px}.popover.popover-left .popover-arrow,.popover.bs-tether-element-attached-right .popover-arrow{top:50%;right:-11px;margin-top:-11px;border-right-width:0;border-left-color:rgba(0,0,0,0.25)}.popover.popover-left .popover-arrow::after,.popover.bs-tether-element-attached-right .popover-arrow::after{right:1px;bottom:-10px;content:"";border-right-width:0;border-left-color:#fff}.popover-title{padding:8px 14px;margin:0;font-size:1rem;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;border-radius:.2375rem .2375rem 0 0}.popover-title:empty{display:none}.popover-content{padding:9px 14px}.popover-arrow,.popover-arrow::after{position:absolute;display:block;width:0;height:0;border-color:transparent;border-style:solid}.popover-arrow{border-width:11px}.popover-arrow::after{content:"";border-width:10px}.carousel{position:relative}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel-inner>.carousel-item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel-inner>.carousel-item>img,.carousel-inner>.carousel-item>a>img{line-height:1}@media all and (transform-3d), (-webkit-transform-3d){.carousel-inner>.carousel-item{-webkit-transition:-webkit-transform .6s ease-in-out;transition:-webkit-transform .6s ease-in-out;-o-transition:transform .6s ease-in-out, -o-transform .6s ease-in-out;transition:transform .6s ease-in-out;transition:transform .6s ease-in-out, -webkit-transform .6s ease-in-out, -o-transform .6s ease-in-out;-webkit-backface-visibility:hidden;backface-visibility:hidden;-webkit-perspective:1000px;perspective:1000px}.carousel-inner>.carousel-item.next,.carousel-inner>.carousel-item.active.right{left:0;-webkit-transform:translate3d(100%, 0, 0);transform:translate3d(100%, 0, 0)}.carousel-inner>.carousel-item.prev,.carousel-inner>.carousel-item.active.left{left:0;-webkit-transform:translate3d(-100%, 0, 0);transform:translate3d(-100%, 0, 0)}.carousel-inner>.carousel-item.next.left,.carousel-inner>.carousel-item.prev.right,.carousel-inner>.carousel-item.active{left:0;-webkit-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0)}}.carousel-inner>.active,.carousel-inner>.next,.carousel-inner>.prev{display:block}.carousel-inner>.active{left:0}.carousel-inner>.next,.carousel-inner>.prev{position:absolute;top:0;width:100%}.carousel-inner>.next{left:100%}.carousel-inner>.prev{left:-100%}.carousel-inner>.next.left,.carousel-inner>.prev.right{left:0}.carousel-inner>.active.left{left:-100%}.carousel-inner>.active.right{left:100%}.carousel-control{position:absolute;top:0;bottom:0;left:0;width:15%;font-size:20px;color:#fff;text-align:center;text-shadow:0 1px 2px rgba(0,0,0,0.6);opacity:.5}.carousel-control.left{background-image:-webkit-gradient(linear, left top, right top, from(rgba(0,0,0,0.5)), to(rgba(0,0,0,0.0001)));background-image:-webkit-linear-gradient(left, rgba(0,0,0,0.5) 0%, rgba(0,0,0,0.0001) 100%);background-image:-o-linear-gradient(left, rgba(0,0,0,0.5) 0%, rgba(0,0,0,0.0001) 100%);background-image:linear-gradient(to right, rgba(0,0,0,0.5) 0%, rgba(0,0,0,0.0001) 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#80000000', endColorstr='#00000000', GradientType=1)}.carousel-control.right{right:0;left:auto;background-image:-webkit-gradient(linear, left top, right top, from(rgba(0,0,0,0.0001)), to(rgba(0,0,0,0.5)));background-image:-webkit-linear-gradient(left, rgba(0,0,0,0.0001) 0%, rgba(0,0,0,0.5) 100%);background-image:-o-linear-gradient(left, rgba(0,0,0,0.0001) 0%, rgba(0,0,0,0.5) 100%);background-image:linear-gradient(to right, rgba(0,0,0,0.0001) 0%, rgba(0,0,0,0.5) 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#00000000', endColorstr='#80000000', GradientType=1)}.carousel-control:focus,.carousel-control:hover{color:#fff;text-decoration:none;outline:0;opacity:.9}.carousel-control .icon-prev,.carousel-control .icon-next{position:absolute;top:50%;z-index:5;display:inline-block;width:20px;height:20px;margin-top:-10px;font-family:serif;line-height:1}.carousel-control .icon-prev{left:50%;margin-left:-10px}.carousel-control .icon-next{right:50%;margin-right:-10px}.carousel-control .icon-prev::before{content:"\2039"}.carousel-control .icon-next::before{content:"\203a"}.carousel-indicators{position:absolute;bottom:10px;left:50%;z-index:15;width:60%;padding-left:0;margin-left:-30%;text-align:center;list-style:none}.carousel-indicators li{display:inline-block;width:10px;height:10px;margin:1px;text-indent:-999px;cursor:pointer;background-color:transparent;border:1px solid #fff;border-radius:10px}.carousel-indicators .active{width:12px;height:12px;margin:0;background-color:#fff}.carousel-caption{position:absolute;right:15%;bottom:20px;left:15%;z-index:10;padding-top:20px;padding-bottom:20px;color:#fff;text-align:center;text-shadow:0 1px 2px rgba(0,0,0,0.6)}.carousel-caption .btn{text-shadow:none}@media (min-width: 30rem){.carousel-control .icon-prev,.carousel-control .icon-next{width:30px;height:30px;margin-top:-15px;font-size:30px}.carousel-control .icon-prev{margin-left:-15px}.carousel-control .icon-next{margin-right:-15px}.carousel-caption{right:20%;left:20%;padding-bottom:30px}.carousel-indicators{bottom:20px}}.bg-inverse{background-color:red}.bg-faded{background-color:red}.bg-primary{color:#fff !important;background-color:#f99d53 !important}a.bg-primary:focus,a.bg-primary:hover{background-color:#f78122 !important}.bg-success{color:#fff !important;background-color:red !important}a.bg-success:focus,a.bg-success:hover{background-color:#c00 !important}.bg-info{color:#fff !important;background-color:red !important}a.bg-info:focus,a.bg-info:hover{background-color:#c00 !important}.bg-warning{color:#fff !important;background-color:red !important}a.bg-warning:focus,a.bg-warning:hover{background-color:#c00 !important}.bg-danger{color:#fff !important;background-color:red !important}a.bg-danger:focus,a.bg-danger:hover{background-color:#c00 !important}.clearfix::after{content:"";display:table;clear:both}.d-block{display:block !important}.d-inline-block{display:inline-block !important}.d-inline{display:inline !important}@media (min-width: 20rem){.pull-xs-left{float:left !important}.pull-xs-right{float:right !important}.pull-xs-none{float:none !important}}@media (min-width: 30rem){.pull-sm-left{float:left !important}.pull-sm-right{float:right !important}.pull-sm-none{float:none !important}}@media (min-width: 48rem){.pull-md-left{float:left !important}.pull-md-right{float:right !important}.pull-md-none{float:none !important}}@media (min-width: 66.5rem){.pull-lg-left{float:left !important}.pull-lg-right{float:right !important}.pull-lg-none{float:none !important}}@media (min-width: 86rem){.pull-xl-left{float:left !important}.pull-xl-right{float:right !important}.pull-xl-none{float:none !important}}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0, 0, 0, 0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.w-100{width:100% !important}.m-x-auto{margin-right:auto !important;margin-left:auto !important}.m-a-0{margin:0 0 !important}.m-t-0{margin-top:0 !important}.m-r-0{margin-right:0 !important}.m-b-0{margin-bottom:0 !important}.m-l-0{margin-left:0 !important}.m-x-0{margin-right:0 !important;margin-left:0 !important}.m-y-0{margin-top:0 !important;margin-bottom:0 !important}.m-a-1{margin:1rem 1rem !important}.m-t-1{margin-top:1rem !important}.m-r-1{margin-right:1rem !important}.m-b-1{margin-bottom:1rem !important}.m-l-1{margin-left:1rem !important}.m-x-1{margin-right:1rem !important;margin-left:1rem !important}.m-y-1{margin-top:1rem !important;margin-bottom:1rem !important}.m-a-2{margin:1.5rem 1.5rem !important}.m-t-2{margin-top:1.5rem !important}.m-r-2{margin-right:1.5rem !important}.m-b-2{margin-bottom:1.5rem !important}.m-l-2{margin-left:1.5rem !important}.m-x-2{margin-right:1.5rem !important;margin-left:1.5rem !important}.m-y-2{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.m-a-3{margin:3rem 3rem !important}.m-t-3{margin-top:3rem !important}.m-r-3{margin-right:3rem !important}.m-b-3{margin-bottom:3rem !important}.m-l-3{margin-left:3rem !important}.m-x-3{margin-right:3rem !important;margin-left:3rem !important}.m-y-3{margin-top:3rem !important;margin-bottom:3rem !important}.p-a-0{padding:0 0 !important}.p-t-0{padding-top:0 !important}.p-r-0{padding-right:0 !important}.p-b-0{padding-bottom:0 !important}.p-l-0{padding-left:0 !important}.p-x-0{padding-right:0 !important;padding-left:0 !important}.p-y-0{padding-top:0 !important;padding-bottom:0 !important}.p-a-1{padding:1rem 1rem !important}.p-t-1{padding-top:1rem !important}.p-r-1{padding-right:1rem !important}.p-b-1{padding-bottom:1rem !important}.p-l-1{padding-left:1rem !important}.p-x-1{padding-right:1rem !important;padding-left:1rem !important}.p-y-1{padding-top:1rem !important;padding-bottom:1rem !important}.p-a-2{padding:1.5rem 1.5rem !important}.p-t-2{padding-top:1.5rem !important}.p-r-2{padding-right:1.5rem !important}.p-b-2{padding-bottom:1.5rem !important}.p-l-2{padding-left:1.5rem !important}.p-x-2{padding-right:1.5rem !important;padding-left:1.5rem !important}.p-y-2{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.p-a-3{padding:3rem 3rem !important}.p-t-3{padding-top:3rem !important}.p-r-3{padding-right:3rem !important}.p-b-3{padding-bottom:3rem !important}.p-l-3{padding-left:3rem !important}.p-x-3{padding-right:3rem !important;padding-left:3rem !important}.p-y-3{padding-top:3rem !important;padding-bottom:3rem !important}.pos-f-t{position:fixed;top:0;right:0;left:0;z-index:1030}.text-justify{text-align:justify !important}.text-nowrap{white-space:nowrap !important}.text-truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}@media (min-width: 20rem){.text-xs-left{text-align:left !important}.text-xs-right{text-align:right !important}.text-xs-center{text-align:center !important}}@media (min-width: 30rem){.text-sm-left{text-align:left !important}.text-sm-right{text-align:right !important}.text-sm-center{text-align:center !important}}@media (min-width: 48rem){.text-md-left{text-align:left !important}.text-md-right{text-align:right !important}.text-md-center{text-align:center !important}}@media (min-width: 66.5rem){.text-lg-left{text-align:left !important}.text-lg-right{text-align:right !important}.text-lg-center{text-align:center !important}}@media (min-width: 86rem){.text-xl-left{text-align:left !important}.text-xl-right{text-align:right !important}.text-xl-center{text-align:center !important}}.text-lowercase{text-transform:lowercase !important}.text-uppercase{text-transform:uppercase !important}.text-capitalize{text-transform:capitalize !important}.font-weight-normal{font-weight:normal}.font-weight-bold{font-weight:bold}.font-italic{font-style:italic}.text-muted{color:red !important}a.text-muted:focus,a.text-muted:hover{color:#c00}.text-primary{color:#f99d53 !important}a.text-primary:focus,a.text-primary:hover{color:#f78122}.text-success{color:red !important}a.text-success:focus,a.text-success:hover{color:#c00}.text-info{color:red !important}a.text-info:focus,a.text-info:hover{color:#c00}.text-warning{color:red !important}a.text-warning:focus,a.text-warning:hover{color:#c00}.text-danger{color:red !important}a.text-danger:focus,a.text-danger:hover{color:#c00}.text-hide{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.invisible{visibility:hidden !important}@media (min-width: 20rem){.hidden-xs-up{display:none !important}}@media (max-width: 29.99rem){.hidden-xs-down{display:none !important}}@media (min-width: 30rem){.hidden-sm-up{display:none !important}}@media (max-width: 47.99rem){.hidden-sm-down{display:none !important}}@media (min-width: 48rem){.hidden-md-up{display:none !important}}@media (max-width: 66.49rem){.hidden-md-down{display:none !important}}@media (min-width: 66.5rem){.hidden-lg-up{display:none !important}}@media (max-width: 85.99rem){.hidden-lg-down{display:none !important}}@media (min-width: 86rem){.hidden-xl-up{display:none !important}}.hidden-xl-down{display:none !important}.visible-print-block{display:none !important}@media print{.visible-print-block{display:block !important}}.visible-print-inline{display:none !important}@media print{.visible-print-inline{display:inline !important}}.visible-print-inline-block{display:none !important}@media print{.visible-print-inline-block{display:inline-block !important}}@media print{.hidden-print{display:none !important}}code{font-family:"Source Code Pro", monospace}.version-picker .product__title,.index .product__card .product__title{font-family:"Muli", sans-serif}.banner__navigation-pane,.index .welcome h1,.index .highlight__container .highlight__title,.index .highlight__container .highlight__link{font-family:"Oswald", sans-serif}body,.index .welcome,.index .product__card{font-family:"GandhiSerif", serif;font-weight:400;font-style:normal}h3,h4,h5,h6,.blocknote .blocknote__title,.content-nav,.code-block__tab-set .code-block__tab a,.code-block__title,.selector__title,.table-of-contents__items{font-family:"OpenSans", verdana, arial, sans-serif;font-weight:400;font-style:normal}h1,h2,.front-matter__title{font-family:"OpenSans", verdana, arial, sans-serif;font-weight:300;font-style:normal}.selector__btn,.selector-list__element a{font-family:"OpenSans", verdana, arial, sans-serif;font-weight:700;font-style:normal}@font-face{font-family:"GandhiSerif";font-weight:400;font-style:normal;src:local("Gandhi Serif Regular"),local("GandhiSerif-Regular"),url("../fonts/gandhiserif-regular.woff") format("woff")}@font-face{font-family:"GandhiSerif";font-weight:400;font-style:italic;src:local("Gandhi Serif Italic"),local("GandhiSerif-Italic"),url("../fonts/gandhiserif-italic.woff") format("woff")}@font-face{font-family:"GandhiSerif";font-weight:700;font-style:normal;src:local("Gandhi Serif Bold"),local("GandhiSerif-Bold"),url("../fonts/gandhiserif-bold.woff") format("woff")}@font-face{font-family:"GandhiSerif";font-weight:700;font-style:italic;src:local("Gandhi Serif Bold Italic"),local("GandhiSerif-BoldItalic"),url("../fonts/gandhiserif-bolditalic.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:300;font-style:normal;src:local("Open Sans Light"),local("OpenSans-Light"),url("../fonts/opensans-light.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:400;font-style:normal;src:local("Open Sans Regular"),local("OpenSans"),url("../fonts/opensans-regular.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:400;font-style:italic;src:local("Open Sans Italic"),local("OpenSans-Italic"),url("../fonts/opensans-italic.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:700;font-style:normal;src:local("Open Sans Bold"),local("OpenSans-Bold"),url("../fonts/opensans-bold.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:700;font-style:italic;src:local("Open Sans Bold Italic"),local("OpenSans-BoldItalic"),url("../fonts/opensans-bolditalic.woff") format("woff")}@font-face{font-family:"DocsFontIcons";font-weight:normal;font-style:normal;src:local("DocsFontIcons Regular"),local("DocsFontIcons"),url("../fonts/docsfonticons.woff") format("woff")}.docs-icon--lambda:before,.docs-icon--lambda-bold:before,.docs-icon--beaker:before,.docs-icon--cog:before,.docs-icon--github-alt:before,.docs-icon--comments:before,.docs-icon--bolt:before,.docs-icon--question-sign:before,.docs-icon--tools:before,.docs-icon--search:before,.docs-icon--download-alt:before,.docs-icon--github:before,.docs-icon--reorder:before,.docs-icon--more:before,.docs-icon--riak:before,.docs-icon--database:before,.docs-icon--install:before,.docs-icon--cloud:before,.docs-icon--configure:before,.docs-icon--references:before,.docs-icon--time:before{font-family:'DocsFontIcons';font-style:normal;font-weight:normal;font-variant:normal;speak:none;text-transform:none;line-height:1}.docs-icon--lambda:before{content:"\e005"}.docs-icon--lambda-bold:before{content:"\e00b"}.docs-icon--beaker:before{content:"\f0c3"}.docs-icon--cog:before{content:"\f013"}.docs-icon--github-alt:before{content:"\e004"}.docs-icon--comments:before{content:"\e000"}.docs-icon--bolt:before{content:"\f0e7"}.docs-icon--question-sign:before{content:"\f059"}.docs-icon--tools:before{content:"\e001"}.docs-icon--search:before{content:"\f002"}.docs-icon--download-alt:before{content:"\f019"}.docs-icon--github:before{content:"\e009"}.docs-icon--reorder:before{content:"\f0c9"}.docs-icon--more:before{content:"\e002"}.docs-icon--riak:before{content:"\e003"}.docs-icon--database:before{content:"\e006"}.docs-icon--install:before{content:"\e007"}.docs-icon--cloud:before{content:"\e008"}.docs-icon--configure:before{content:"\e00a"}.docs-icon--references:before{content:"\f02d"}.docs-icon--time:before{content:"\f017"}.inline{display:inline !important}.block{display:block !important}.inline-block{display:inline-block !important}.table{display:table !important}.inline-table{display:inline-table !important}.table-row{display:table-row !important}.table-cell{display:table-cell !important}.hidden{display:none !important}.float-left{float:left}.float-right{float:right}.clear{clear:both}.clear-left{clear:left}.clear-right{clear:right}.overflow{overflow:auto}.overflow-hidden{overflow:hidden}.overflow-visible{overflow:visible}.overflow-scroll{overflow:scroll}.overflow-x{overflow-x:auto}.overflow-x-hidden{overflow-x:hidden}.overflow-x-visible{overflow-x:visible}.overflow-x-scroll{overflow-x:scroll}.overflow-y{overflow-y:auto}.overflow-y-hidden{overflow-y:hidden}.overflow-y-visible{overflow-y:visible}.overflow-y-scroll{overflow-y:scroll}.inline-only-xs{display:none !important}.block-only-xs{display:none !important}.inline-block-only-xs{display:none !important}@media (min-width: 20rem) and (max-width: 29.99rem){.inline-only-xs{display:inline !important}.block-only-xs{display:block !important}.inline-block-only-xs{display:inline-block !important}}.inline-only-sm{display:none !important}.block-only-sm{display:none !important}.inline-block-only-sm{display:none !important}@media (min-width: 30rem) and (max-width: 47.99rem){.inline-only-sm{display:inline !important}.block-only-sm{display:block !important}.inline-block-only-sm{display:inline-block !important}}.inline-only-md{display:none !important}.block-only-md{display:none !important}.inline-block-only-md{display:none !important}@media (min-width: 48rem) and (max-width: 66.49rem){.inline-only-md{display:inline !important}.block-only-md{display:block !important}.inline-block-only-md{display:inline-block !important}}.inline-only-lg{display:none !important}.block-only-lg{display:none !important}.inline-block-only-lg{display:none !important}@media (min-width: 66.5rem) and (max-width: 85.99rem){.inline-only-lg{display:inline !important}.block-only-lg{display:block !important}.inline-block-only-lg{display:inline-block !important}}.inline-only-xl{display:none !important}.block-only-xl{display:none !important}.inline-block-only-xl{display:none !important}@media (min-width: 86rem){.inline-only-xl{display:inline !important}.block-only-xl{display:block !important}.inline-block-only-xl{display:inline-block !important}}@media (min-width: 20rem){.float-left-xs-up{float:left}.float-right-xs-up{float:right}.float-none-xs-up{float:none}}@media (min-width: 30rem){.float-left-sm-up{float:left}.float-right-sm-up{float:right}.float-none-sm-up{float:none}}@media (min-width: 48rem){.float-left-md-up{float:left}.float-right-md-up{float:right}.float-none-md-up{float:none}}@media (min-width: 66.5rem){.float-left-lg-up{float:left}.float-right-lg-up{float:right}.float-none-lg-up{float:none}}@media (min-width: 86rem){.float-left-xl-up{float:left}.float-right-xl-up{float:right}.float-none-xl-up{float:none}}@media (max-width: 29.99rem){.float-left-xs-down{float:left}.float-right-xs-down{float:right}.float-none-xs-down{float:none}}@media (max-width: 47.99rem){.float-left-sm-down{float:left}.float-right-sm-down{float:right}.float-none-sm-down{float:none}}@media (max-width: 66.49rem){.float-left-md-down{float:left}.float-right-md-down{float:right}.float-none-md-down{float:none}}@media (max-width: 85.99rem){.float-left-lg-down{float:left}.float-right-lg-down{float:right}.float-none-lg-down{float:none}}.float-left-xl-down{float:left}.float-right-xl-down{float:right}.float-none-xl-down{float:none}.hide-text{overflow:hidden;padding:0;text-indent:101%;white-space:nowrap}.visually-hidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.hljs{background:#f7f7f7;color:#454d54}.hljs-comment,.hljs-quote{color:#9199a1;font-style:italic}.hljs-keyword,.hljs-selector-tag,.hljs-addition{color:#6c8997;font-weight:bold}.hljs-number,.hljs-string,.hljs-params,.hljs-meta .hljs-meta-string,.hljs-literal,.hljs-doctag,.hljs-regexp{color:#dc7d00}.hljs-title,.hljs-section,.hljs-name,.hljs-selector-id,.hljs-selector-class{color:#79aeb6}.hljs-attribute,.hljs-attr,.hljs-variable,.hljs-template-variable,.hljs-class .hljs-title,.hljs-type{color:#597471}.hljs-symbol,.hljs-bullet,.hljs-subst,.hljs-meta,.hljs-meta .hljs-keyword,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-link{color:#f99d53}.hljs-built_in,.hljs-deletion{color:#ee6201}.hljs-formula{background:#ebebeb}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:bold}html{font-size:initial}body{color:#454d54}main{font-size:110%}code{color:#454d54;background:#e3e6e8;border-radius:.25rem;padding:0.1rem 0.25rem;-webkit-transition:color 150ms ease-in;-o-transition:color 150ms ease-in;transition:color 150ms ease-in}a{color:#337ab7;text-decoration:underline;cursor:pointer;-webkit-transition:color 150ms ease-in;-o-transition:color 150ms ease-in;transition:color 150ms ease-in}a:not([href]):hover,a:not([href]):active,a:not([href]):focus{cursor:default}a code{color:#337ab7}a:hover,a:active,a:focus{color:#e78505;text-decoration:underline}a:hover code,a:active code,a:focus code{color:#e78505}.main-article a[href^="http://"],.main-article a[href^="https://"]{display:inline-block}.main-article a[href^="http://"]::after,.main-article a[href^="https://"]::after{content:"";display:inline-block;text-align:left;width:.75rem;height:.625rem;margin-right:-.0625rem;background:url("") no-repeat right}.main-article a[href$=".pdf"],.main-article a[href$=".PDF"]{display:inline-block}.main-article a[href$=".pdf"]::after,.main-article a[href$=".PDF"]::after{content:"";display:inline-block;width:1rem;height:1rem;background:url("") no-repeat right}h1{color:red;line-height:initial;font-size:initial;margin-top:initial;margin-bottom:initial}h2{color:#636567;line-height:1.2;font-size:2.30rem;margin-top:3.00rem;padding-bottom:1.00rem}h3{color:#636567;line-height:1.2;font-size:1.70rem;margin-top:1.75rem;padding-bottom:0.50rem}h4{color:#636567;line-height:1.2;font-size:1.50rem;margin-top:1.25rem;padding-bottom:0.50rem}h5{color:#636567;line-height:1.2;font-size:1.30rem;margin-top:0.00rem;padding-bottom:0.50rem}h6{color:#636567;line-height:1.2;font-size:1.25rem;margin-top:0.00rem;padding-bottom:0.50rem}h1,h2,h3,h4,h5,h6{margin-bottom:0}h1 a,h2 a,h3 a,h4 a,h5 a,h6 a{color:#636567;text-decoration:none}h1 a code,h2 a code,h3 a code,h4 a code,h5 a code,h6 a code{color:#636567}h1 a:hover,h1 a:active,h1 a:focus,h2 a:hover,h2 a:active,h2 a:focus,h3 a:hover,h3 a:active,h3 a:focus,h4 a:hover,h4 a:active,h4 a:focus,h5 a:hover,h5 a:active,h5 a:focus,h6 a:hover,h6 a:active,h6 a:focus{color:#636567;text-decoration:none}h1 a:hover code,h1 a:active code,h1 a:focus code,h2 a:hover code,h2 a:active code,h2 a:focus code,h3 a:hover code,h3 a:active code,h3 a:focus code,h4 a:hover code,h4 a:active code,h4 a:focus code,h5 a:hover code,h5 a:active code,h5 a:focus code,h6 a:hover code,h6 a:active code,h6 a:focus code{color:#636567}h1 a a:hover,h1 a a:active,h1 a a:focus,h2 a a:hover,h2 a a:active,h2 a a:focus,h3 a a:hover,h3 a a:active,h3 a a:focus,h4 a a:hover,h4 a a:active,h4 a a:focus,h5 a a:hover,h5 a a:active,h5 a a:focus,h6 a a:hover,h6 a a:active,h6 a a:focus{color:#e78505}hr{border-top:1px dashed #d4d9de}blockquote{font-size:90%;margin-left:2rem;border-left:0.25rem solid #b8bfc7;padding-left:1rem;margin-top:1.5rem;margin-right:1rem}.blocknote{font-size:90%;background:#e1eef4;border-radius:.5rem;border:1px solid #bcd6d0;padding:1rem 1rem 1rem 1rem;margin:1.5rem}.blocknote p:last-of-type{margin-bottom:0}.blocknote .blocknote__title{font-size:110%;padding-bottom:0.25rem}@media (min-width: 48rem){.blocknote{margin-right:1rem}}pre code{display:block;overflow-x:auto;border-radius:.3rem;font-size:90%;color:#454d54;background:#f7f7f7;border:1px solid #b8bfc7;padding:0.5rem 1rem}pre+pre{display:none}ol,ul,dt{margin-bottom:1rem;padding-left:2.5rem}table{margin-top:1rem;margin-bottom:1rem}table code{background:transparent;padding:0;font-size:95%}table caption{color:#454d54}thead{border-bottom:1px solid #454d54}th{padding:0.25rem 0.5rem}td{padding:0.5rem;vertical-align:top}.main-article img{max-width:100%}::-moz-selection{background:#feca87}::selection{background:#feca87}.content-nav{position:fixed;z-index:200;width:100%;top:3.5rem;bottom:0;left:-200%;-webkit-transition:left 200ms ease-in-out 50ms;-o-transition:left 200ms ease-in-out 50ms;transition:left 200ms ease-in-out 50ms}@media (max-width: 47.99rem){.content-nav.content-nav--fullscreen{left:0}}@media (min-width: 48rem){.content-nav{top:4.5rem;bottom:2rem;left:1rem;width:13rem}}@media (min-width: 66.5rem){.content-nav{width:15rem}}.banner{position:fixed;z-index:400;top:0;left:0;right:0;height:3.5rem}@media (min-width: 48rem){.banner{left:0;height:4.5rem}}.content-well{width:100%;padding-top:3.5rem}@media (min-width: 48rem){.content-well{padding-top:4.5rem;padding-right:0.5rem;margin-right:auto;padding-left:16rem;max-width:62.5rem}}@media (min-width: 66.5rem){.content-well{padding-left:19rem;max-width:65.5rem}}@media (min-width: 86rem){.content-well{padding-left:20.5rem;max-width:78rem}}@media (max-width: 47.99rem){.content-well.content-well--immobile{position:fixed;overflow:hidden}}.main-article{padding:.5rem 0.5rem 0 0.5rem}@media (min-width: 48rem){.main-article{padding-top:.5rem}}@media (max-width: 47.99rem){.ERROR404 .main-article{text-align:center}}.main-article main{position:relative;z-index:100}.main-article main h1,.main-article main h2,.main-article main h3,.main-article main h4,.main-article main h5,.main-article main h6{position:relative}.main-article main h1:before,.main-article main h2:before,.main-article main h3:before,.main-article main h4:before,.main-article main h5:before,.main-article main h6:before{display:block;visibility:hidden;content:" ";height:0;margin-top:-4rem;padding-top:4rem}@media (min-width: 48rem){.main-article main h1:before,.main-article main h2:before,.main-article main h3:before,.main-article main h4:before,.main-article main h5:before,.main-article main h6:before{margin-top:-5rem;padding-top:5rem}}.main-article main h1{z-index:-1}.main-article main h2{z-index:-2}.main-article main h3{z-index:-3}.main-article main h4{z-index:-4}.main-article main h5{z-index:-5}.main-article main h6{z-index:-6}.banner{padding:0 1rem 0 1.25rem;background:#f7f7f7;-webkit-box-shadow:0 0 1.5rem 0 rgba(0,0,0,0.4);box-shadow:0 0 1.5rem 0 rgba(0,0,0,0.4)}@media (min-width: 48rem){.banner{background:#ededf0;padding:0 0 0 1.5rem}}.banner__docs-logo{position:relative;top:50%;-webkit-transform:translateY(-50%);-ms-transform:translateY(-50%);-o-transform:translateY(-50%);transform:translateY(-50%)}.banner__docs-logo .docs-logo__image{width:9.375rem;height:auto}@media (min-width: 48rem){.banner__docs-logo{position:static;top:initial;-webkit-transform:none;-ms-transform:none;-o-transform:none;transform:none;padding-top:1.25rem}.banner__docs-logo .docs-logo__image{width:10.1875rem;height:auto}}.banner__menu-bars{height:100%}.banner__menu-bars .menu-bars{font-size:2.5rem;position:relative;top:50%;-webkit-transform:translateY(-50%);-ms-transform:translateY(-50%);-o-transform:translateY(-50%);transform:translateY(-50%)}.banner__navigation-pane{text-transform:uppercase;height:100%}.banner__navigation-pane a{text-decoration:none}.banner__intra-brand__width-wrapper{position:relative;width:100%;margin-bottom:.4rem;padding-left:7rem}@media (min-width: 66.5rem){.banner__intra-brand__width-wrapper{padding-left:11rem;max-width:53.8125rem}}@media (min-width: 86rem){.banner__intra-brand__width-wrapper{padding-left:13.5rem;max-width:66.3125rem}}.index .banner__intra-brand__width-wrapper{max-width:none}.banner__intra-brand__width-wrapper::after{display:block;content:"";position:absolute;left:100%;height:2rem;width:2000rem;background:#6b737a}.banner__intra-brand{background:#6b737a;width:100%;height:2rem}.banner__intra-brand::before{display:inline-block;content:"";border:0px solid transparent;border-bottom-width:2rem;border-right-width:1.5rem;border-bottom-color:#ededf0}.banner__intra-brand .banner__brand-link{font-size:90%;color:#e3e6e8;height:2rem;padding-top:0.35rem;padding-right:1rem;padding-left:1rem}.banner__intra-brand .banner__brand-link:first-of-type{margin-left:3rem}.banner__intra-brand .banner__brand-link:last-of-type{margin-right:1.25rem}.banner__intra-brand .banner__brand-link:hover,.banner__intra-brand .banner__brand-link:active,.banner__intra-brand .banner__brand-link:focus{color:#bcd6d0}.banner__intra-site__width-wrapper{width:100%}@media (min-width: 66.5rem){.banner__intra-site__width-wrapper{max-width:53.8125rem}}@media (min-width: 86rem){.banner__intra-site__width-wrapper{max-width:66.3125rem}}.banner__intra-site{height:2.1rem}.banner__intra-site .banner__brand-link{font-size:110%;color:#e78505;height:100%;padding-left:1.25rem;padding-right:1.25rem;margin-right:0.5rem;-webkit-transition:color 200ms ease-in, background 200ms ease-in;-o-transition:color 200ms ease-in, background 200ms ease-in;transition:color 200ms ease-in, background 200ms ease-in}.banner__intra-site .banner__brand-link:last-of-type{margin-right:1rem}.banner__intra-site .banner__brand-link:hover,.banner__intra-site .banner__brand-link:active,.banner__intra-site .banner__brand-link:focus{color:#636567;background:#e0e0e3}.banner__intra-site .banner__brand-link.banner__brand-link--current{color:#636567;background:#e0e0e3}.content-nav{color:#788087}@media (min-width: 20rem) and (max-width: 29.99rem){.content-nav{font-size:100%}}@media (min-width: 30rem) and (max-width: 47.99rem){.content-nav{font-size:100%}}@media (min-width: 48rem) and (max-width: 66.49rem){.content-nav{font-size:80%}}@media (min-width: 66.5rem) and (max-width: 85.99rem){.content-nav{font-size:90%}}@media (min-width: 86rem){.content-nav{font-size:90%}}.no-js .content-nav{display:none !important}.content-nav__fixed-top{position:fixed;width:inherit;padding-top:.5rem;background:#f7f7f7;-webkit-box-shadow:-0.125rem -0.125rem 2rem -0.25rem rgba(0,0,0,0.8);box-shadow:-0.125rem -0.125rem 2rem -0.25rem rgba(0,0,0,0.8);-webkit-transition:height 500ms ease;-o-transition:height 500ms ease;transition:height 500ms ease}.content-nav--top-size-full .content-nav__fixed-top{height:8rem}.content-nav--top-size-half .content-nav__fixed-top{height:4rem}@media (min-width: 48rem){.content-nav__fixed-top{z-index:1;width:12.75rem;padding-left:0.5rem;padding-right:0.5rem;padding-top:1rem;-webkit-box-shadow:-0.125rem -0.125rem 2rem -0.25rem rgba(0,0,0,0.2);box-shadow:-0.125rem -0.125rem 2rem -0.25rem rgba(0,0,0,0.2)}.content-nav--top-size-full .content-nav__fixed-top{height:7.5rem}.content-nav--top-size-half .content-nav__fixed-top{height:4.5rem}}@media (min-width: 66.5rem){.content-nav__fixed-top{width:14.75rem;padding-top:1rem}.content-nav--top-size-full .content-nav__fixed-top{height:7.75rem}.content-nav--top-size-half .content-nav__fixed-top{height:4.5rem}}.version-picker{margin:0.5rem 0.75rem 0.5rem 0.75rem}@media (min-width: 48rem){.version-picker{margin-left:0.25rem;margin-right:0.25rem}}.version-picker .product__title{font-size:2.20em;font-weight:700;line-height:1;margin-top:0.3em;color:#788087;text-transform:lowercase;text-decoration:none;-webkit-transition:color 500ms ease;-o-transition:color 500ms ease;transition:color 500ms ease}@media (min-width: 48rem){.version-picker .product__title{margin-top:0.35em}}@media (min-width: 66.5rem){.version-picker .product__title{font-size:2.40em;margin-top:0.25em}}.version-picker .product__title .product__logo{height:0.60em;width:0.60em;margin-bottom:0.10em}.version-picker .product__title .product__title-highlight{color:#fcc074;text-transform:uppercase;font-size:70.5%;font-weight:400;-webkit-transition:color 500ms ease;-o-transition:color 500ms ease;transition:color 500ms ease}.version-picker .product__title:hover,.version-picker .product__title:active,.version-picker .product__title:focus{color:#636567}.version-picker .product__title:hover .product__title-highlight,.version-picker .product__title:active .product__title-highlight,.version-picker .product__title:focus .product__title-highlight{color:#f99d53}.content-nav__primary__sizing-box{position:relative;height:100%;-webkit-transition:top 500ms ease, padding-bottom 500ms ease;-o-transition:top 500ms ease, padding-bottom 500ms ease;transition:top 500ms ease, padding-bottom 500ms ease}.content-nav--top-size-full .content-nav__primary__sizing-box{top:8.5rem;padding-bottom:8.5rem}.content-nav--top-size-half .content-nav__primary__sizing-box{top:4.5rem;padding-bottom:4.5rem}@media (min-width: 48rem){.content-nav--top-size-full .content-nav__primary__sizing-box{top:8.5rem;padding-bottom:8.5rem}.content-nav--top-size-half .content-nav__primary__sizing-box{top:5.5rem;padding-bottom:5.5rem}}@media (min-width: 66.5rem){.content-nav--top-size-full .content-nav__primary__sizing-box{top:8.75rem;padding-bottom:8.75rem}.content-nav--top-size-half .content-nav__primary__sizing-box{top:5.5rem;padding-bottom:5.5rem}}.content-nav__primary__shadow-box{-webkit-box-shadow:-0.25rem -0.25rem 4rem -0.5rem rgba(0,0,0,0.8);box-shadow:-0.25rem -0.25rem 4rem -0.5rem rgba(0,0,0,0.8);height:100%}@media (min-width: 48rem){.content-nav__primary__shadow-box{-webkit-box-shadow:-0.25rem -0.25rem 4rem -0.5rem rgba(0,0,0,0.2);box-shadow:-0.25rem -0.25rem 4rem -0.5rem rgba(0,0,0,0.2)}}.content-nav__primary{background:#f7f7f7;height:100%}.content-nav__menu-container{min-height:100%;padding-bottom:8.0625rem}.content-nav__fixed-bottom{height:8.0625rem;margin-top:-8.0625rem;font-size:90%;line-height:1.1rem;background:#9199a1}.footer{padding:1rem .25rem 0rem .25rem;margin-top:2rem;font-size:80%;line-height:1.2;text-align:center;color:#9199a1;border-top:1px solid #d4d9de}@media (min-width: 48rem){.footer{padding:2rem 1rem 1rem 1rem;margin-top:0}}.footer a{color:#9199a1}.footer a:hover,.footer a:active,.footer a:focus{color:#79aeb6}.index .footer{position:relative;padding-top:2rem;margin-top:0}@media (min-width: 66.5rem){.index .footer{text-align:center}}@media (min-width: 48rem){.footer__attributions{text-align:center}}.footer__social{margin:0 0.5rem 1.5rem 0.5rem}@media (min-width: 48rem){.footer__social{margin-bottom:0}}@media (min-width: 66.5rem){.footer__social{position:absolute;right:0;top:0;margin-top:2.2rem;margin-right:2.0rem}}@font-face{font-family:"anchorjs-icons";src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}.anchor-icon::after{-webkit-transition:opacity 100ms ease-in;-o-transition:opacity 100ms ease-in;transition:opacity 100ms ease-in;opacity:0;font-family:'anchorjs-icons';font-style:normal;font-weight:normal;font-variant:normal;text-transform:none;line-height:1;content:"\e9cb";padding-left:0.2em;vertical-align:bottom;font-size:1.2em}.anchor-icon:hover::after,.anchor-icon:active::after,.anchor-icon:focus::after{opacity:1}.code-block__tab-set-wrapper{position:relative}.code-block__tab-set-wrapper .edge-fader{font-size:1rem;width:2rem;height:100%}.code-block__tab-set{white-space:nowrap;list-style:none;padding:0 2rem;margin:0 0 0.5rem 0}.code-block__tab-set .code-block__tab a{text-transform:uppercase;font-size:90%;color:#f99d53;text-decoration:none;border-radius:.25rem;padding:0.5rem 0.5rem;margin-right:0.25rem;-webkit-transition:background-color 200ms ease 50ms, color 200ms ease 50ms;-o-transition:background-color 200ms ease 50ms, color 200ms ease 50ms;transition:background-color 200ms ease 50ms, color 200ms ease 50ms}.code-block__tab-set .code-block__tab a:hover,.code-block__tab-set .code-block__tab a:active,.code-block__tab-set .code-block__tab a:focus{background:#f7f7f7;color:#6c8997;text-decoration:none}.code-block__tab-set .code-block__tab--active a{color:#636567;cursor:default;background:#f7f7f7}.code-block__tab-set .code-block__tab--active a:hover,.code-block__tab-set .code-block__tab--active a:active,.code-block__tab-set .code-block__tab--active a:focus{color:#636567}.code-block__title{text-transform:uppercase;font-size:90%;color:#636567;border-radius:.25rem;padding:0.5rem 0.75rem;margin:0 0 0.25rem 0.75rem;background:#f7f7f7}.content-menu{display:none;margin:0;padding-left:0;list-style:none;margin-top:-0.3rem;padding-top:0.3rem}.content-menu.content-menu--depth-0{margin-top:0;padding-top:0}.content-menu.content-menu--open{display:block}.content-menu.content-menu--depth-0>li{-webkit-box-shadow:0 -0.2rem 0.7rem -0.2rem rgba(0,0,0,0.1);box-shadow:0 -0.2rem 0.7rem -0.2rem rgba(0,0,0,0.1)}.content-menu__link--depth-1{padding-left:1.75rem}@media (min-width: 48rem){.content-menu__link--depth-1{padding-left:1.125rem}}.content-menu__link--depth-2{padding-left:2.50rem}@media (min-width: 48rem){.content-menu__link--depth-2{padding-left:1.625rem}}.content-menu__link--depth-3{padding-left:3.25rem}@media (min-width: 48rem){.content-menu__link--depth-3{padding-left:2.125rem}}.content-menu__link--depth-4{padding-left:4.00rem}@media (min-width: 48rem){.content-menu__link--depth-4{padding-left:2.625rem}}.content-menu__link--depth-5{padding-left:4.75rem}@media (min-width: 48rem){.content-menu__link--depth-5{padding-left:3.125rem}}.content-menu__download-entry{background:#ffd195;-webkit-transition:background 500ms ease;-o-transition:background 500ms ease;transition:background 500ms ease}.content-menu__download-entry:hover,.content-menu__download-entry:active,.content-menu__download-entry:focus{background:#fcc074}.content-menu__blank-entry{margin-top:0.25rem;height:1rem;margin-bottom:-1rem}.content-menu__item{margin-left:0;margin-right:0;color:#788087}.content-menu__item:hover,.content-menu__item:active,.content-menu__item:focus{color:#454d54}.content-menu__item a{padding-top:.6rem;padding-bottom:0.65rem;color:inherit;text-decoration:none}.content-menu__item>.content-menu__menu-toggle+a>.content-menu__item__right-border{width:100%;padding-right:0.5rem;border-right:1px solid #e3e6e8}.content-menu__item--selected{background:#79aeb6;color:#f0f2f5;-webkit-box-shadow:0 0 1rem -0.2rem rgba(0,0,0,0.4);box-shadow:0 0 1rem -0.2rem rgba(0,0,0,0.4)}.content-menu__item--selected:hover,.content-menu__item--selected:active,.content-menu__item--selected:focus{color:#f0f2f5}.content-menu__item--selected>.content-menu__menu-toggle+a{border-right-color:#f0f2f5}.content-menu__icon-container{text-align:right;margin-left:1rem;margin-right:0.25rem;padding-top:.6rem;padding-bottom:0.2rem;color:inherit;-webkit-transition:color 500ms ease;-o-transition:color 500ms ease;transition:color 500ms ease}.content-menu__icon-container:before{display:inline-block;width:1.25rem;padding-right:0.5rem}.content-menu__menu-toggle{width:3.0rem;padding-left:0.4rem;text-align:center;padding-top:.6rem;padding-bottom:0.2rem;cursor:pointer}@media (min-width: 48rem){.content-menu__menu-toggle{width:1.85rem;padding-left:0.20rem}}.content-menu__menu-toggle:after{display:inline-block;content:"";-webkit-transition:border-color 500ms ease;-o-transition:border-color 500ms ease;transition:border-color 500ms ease;width:.875rem;height:.875rem;border:.4375rem solid transparent;border-top:.4375rem solid #9199a1;border-bottom:0 none #1f262e;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.content-menu__item--selected .content-menu__menu-toggle:after{width:.875rem;height:.875rem;border:.4375rem solid transparent;border-top:.4375rem solid #f0f2f5;border-bottom:0 none #1f262e;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.content-menu__menu-toggle.content-menu__menu-toggle--open:after{width:.875rem;height:.875rem;border:.4375rem solid transparent;border-bottom:.4375rem solid #9199a1;border-top:0 none #1f262e;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}.content-menu__item--selected .content-menu__menu-toggle.content-menu__menu-toggle--open:after{width:.875rem;height:.875rem;border:.4375rem solid transparent;border-bottom:.4375rem solid #f0f2f5;border-top:0 none #1f262e;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}@media (min-width: 48rem){.content-menu__menu-toggle:after{width:.625rem;height:.625rem;border:.3125rem solid transparent;border-top:.3125rem solid #9199a1;border-bottom:0 none #1f262e;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.content-menu__item--selected .content-menu__menu-toggle:after{width:.625rem;height:.625rem;border:.3125rem solid transparent;border-top:.3125rem solid #f0f2f5;border-bottom:0 none #1f262e;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.content-menu__menu-toggle.content-menu__menu-toggle--open:after{width:.625rem;height:.625rem;border:.3125rem solid transparent;border-bottom:.3125rem solid #9199a1;border-top:0 none #1f262e;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}.content-menu__item--selected .content-menu__menu-toggle.content-menu__menu-toggle--open:after{width:.625rem;height:.625rem;border:.3125rem solid transparent;border-bottom:.3125rem solid #f0f2f5;border-top:0 none #1f262e;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}}.content-nav__fixed-bottom .content-menu{display:block;padding-top:0;margin-top:0}.content-nav__fixed-bottom .content-menu>li{-webkit-box-shadow:0 -0.2rem 0.7rem -0.2rem rgba(0,0,0,0.2);box-shadow:0 -0.2rem 0.7rem -0.2rem rgba(0,0,0,0.2)}.content-nav__fixed-bottom .content-menu__item{color:#e3e6e8}.content-nav__fixed-bottom .content-menu__item:hover,.content-nav__fixed-bottom .content-menu__item:active,.content-nav__fixed-bottom .content-menu__item:focus{color:#f0f2f5}.content-nav__fixed-bottom .content-menu__item a{padding-top:0;padding-bottom:0.5rem}.content-nav__fixed-bottom .content-menu__icon-container{margin-left:0.5rem}.edge-fader{width:0%;height:0%;font-size:1rem;position:absolute}.edge-fader--left{left:0;background:-webkit-gradient(linear, right top, left top, from(rgba(255,255,255,0.8)), color-stop(80%, #fff));background:-webkit-linear-gradient(right, rgba(255,255,255,0.8) 0%, #fff 80%);background:-o-linear-gradient(right, rgba(255,255,255,0.8) 0%, #fff 80%);background:linear-gradient(to left, rgba(255,255,255,0.8) 0%, #fff 80%);filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#ffffff', endColorstr='#ccffffff', GradientType=1 )}.edge-fader--left .edge-fader__arrow{position:absolute;left:0;top:50%;width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid #feca87;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%);cursor:pointer;-webkit-transition:border-color 500ms ease 20ms;-o-transition:border-color 500ms ease 20ms;transition:border-color 500ms ease 20ms}.edge-fader--left .edge-fader__arrow:hover,.edge-fader--left .edge-fader__arrow:active,.edge-fader--left .edge-fader__arrow:focus{width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid #f99d53;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%)}.edge-fader--left .edge-fader__arrow.edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid #e1eef4;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%);cursor:default}.edge-fader--left .edge-fader__arrow.edge-fader__arrow--invisible{width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid transparent;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%);cursor:default}.edge-fader--right{right:0;background:-webkit-gradient(linear, left top, right top, from(rgba(255,255,255,0.8)), color-stop(80%, #fff));background:-webkit-linear-gradient(left, rgba(255,255,255,0.8) 0%, #fff 80%);background:-o-linear-gradient(left, rgba(255,255,255,0.8) 0%, #fff 80%);background:linear-gradient(to right, rgba(255,255,255,0.8) 0%, #fff 80%);filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#ccffffff', endColorstr='#ffffff', GradientType=1 )}.edge-fader--right .edge-fader__arrow{position:absolute;right:0;top:50%;width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid #feca87;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%);cursor:pointer;-webkit-transition:border-color 500ms ease 20ms;-o-transition:border-color 500ms ease 20ms;transition:border-color 500ms ease 20ms}.edge-fader--right .edge-fader__arrow:hover,.edge-fader--right .edge-fader__arrow:active,.edge-fader--right .edge-fader__arrow:focus{width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid #f99d53;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%)}.edge-fader--right .edge-fader__arrow.edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid #e1eef4;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%);cursor:default}.edge-fader--right .edge-fader__arrow.edge-fader__arrow--invisible{width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid transparent;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%);cursor:default}.edge-fader--top{top:0;background:-webkit-gradient(linear, left bottom, left top, from(rgba(255,255,255,0.8)), color-stop(80%, #fff));background:-webkit-linear-gradient(bottom, rgba(255,255,255,0.8) 0%, #fff 80%);background:-o-linear-gradient(bottom, rgba(255,255,255,0.8) 0%, #fff 80%);background:linear-gradient(to top, rgba(255,255,255,0.8) 0%, #fff 80%);filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#ffffff', endColorstr='#ccffffff', GradientType=0 )}.edge-fader--top .edge-fader__arrow{position:absolute;top:0;left:50%;width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid #feca87;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%);cursor:pointer;-webkit-transition:border-color 500ms ease 20ms;-o-transition:border-color 500ms ease 20ms;transition:border-color 500ms ease 20ms}.edge-fader--top .edge-fader__arrow:hover,.edge-fader--top .edge-fader__arrow:active,.edge-fader--top .edge-fader__arrow:focus{width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid #f99d53;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%)}.edge-fader--top .edge-fader__arrow.edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid #e1eef4;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%);cursor:default}.edge-fader--top .edge-fader__arrow.edge-fader__arrow--invisible{width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid transparent;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%);cursor:default}.edge-fader--bottom{bottom:0;background:-webkit-gradient(linear, left top, left bottom, from(rgba(255,255,255,0.8)), color-stop(80%, #fff));background:-webkit-linear-gradient(top, rgba(255,255,255,0.8) 0%, #fff 80%);background:-o-linear-gradient(top, rgba(255,255,255,0.8) 0%, #fff 80%);background:linear-gradient(to bottom, rgba(255,255,255,0.8) 0%, #fff 80%);filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#ccffffff', endColorstr='#ffffff', GradientType=0 )}.edge-fader--bottom .edge-fader__arrow{position:absolute;bottom:0;left:50%;width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid #feca87;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%);cursor:pointer;-webkit-transition:border-color 500ms ease 20ms;-o-transition:border-color 500ms ease 20ms;transition:border-color 500ms ease 20ms}.edge-fader--bottom .edge-fader__arrow:hover,.edge-fader--bottom .edge-fader__arrow:active,.edge-fader--bottom .edge-fader__arrow:focus{width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid #f99d53;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%)}.edge-fader--bottom .edge-fader__arrow.edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid #e1eef4;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%);cursor:default}.edge-fader--bottom .edge-fader__arrow.edge-fader__arrow--invisible{width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid transparent;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%);cursor:default}@media (min-width: 20rem){.front-matter{margin-bottom:3rem}}@media (min-width: 48rem){.front-matter{margin-bottom:5rem}}.front-matter__title{color:#636567;line-height:1.2;font-size:3rem;margin-top:0}@media (min-width: 20rem){.front-matter__title{margin-bottom:2rem}}@media (min-width: 48rem){.front-matter__title{margin-bottom:3rem}}.front-matter__title .front-matter__title--supertext{font-size:70%}.menu-bars{width:.8em;height:.625em;cursor:pointer}.menu-bars:before{content:"";width:inherit;height:.125em;position:absolute;background:#636567;-webkit-box-shadow:0 .25em 0 0 #636567,0 .5em 0 0 #636567;box-shadow:0 .25em 0 0 #636567,0 .5em 0 0 #636567}.index .menu-bars{display:none}.search{border-radius:.25rem;background:#9199a1;color:#f0f2f5;padding:0.25rem 0.25rem 0.25rem 0;margin:0.25rem}.search__icon-container{text-align:right;padding:0.25rem 0;margin-left:0.75rem}.search__icon-container:before{display:inline-block;width:1.25rem;padding-right:0.5rem}.search__input{border:none;background:transparent;width:100%;border-radius:.25rem;padding:0.25rem 0 0.25rem 0.25rem;-webkit-transition:background 200ms ease-in, color 200ms ease-in;-o-transition:background 200ms ease-in, color 200ms ease-in;transition:background 200ms ease-in, color 200ms ease-in}.search__input:focus{background:#636567;color:#f0f2f5}.search__input::-webkit-input-placeholder{color:#d4d9de}.search__input::-moz-placeholder{color:#d4d9de}.search__input:-ms-input-placeholder{color:#d4d9de}.search__input::placeholder{color:#d4d9de}.selector__title{font-size:60%;letter-spacing:.125rem;text-transform:uppercase;text-align:center;line-height:1;margin-bottom:0.3em;padding-left:.0625rem}.selector__btn{border:none;border-radius:.25em;width:100%;line-height:1;padding:0.5em 0.9em;font-size:120%;color:#788087;background:#ffd195;-webkit-transition:color 500ms ease, background 500ms ease;-o-transition:color 500ms ease, background 500ms ease;transition:color 500ms ease, background 500ms ease}.selector__btn:hover,.selector__btn:active,.selector__btn:focus{color:#454d54;background:#fcc074}.js .selector__btn{cursor:pointer}.selector__arrow{position:relative;margin:0 0.2em 0 auto;-webkit-transition:border-color 500ms ease;-o-transition:border-color 500ms ease;transition:border-color 500ms ease;width:.75em;height:.75em;border:.375em solid transparent;border-top:.375em solid #ffd195;border-bottom:0 none #6c8997;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.selector--open .selector__arrow{width:.75em;height:.75em;border:.375em solid transparent;border-bottom:.375em solid #ffd195;border-top:0 none #6c8997;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}.selector-pane__sizing-box{position:absolute;top:7.75rem;height:0;max-height:0;width:100%;z-index:100;-webkit-transition:height 500ms ease, max-height 500ms ease;-o-transition:height 500ms ease, max-height 500ms ease;transition:height 500ms ease, max-height 500ms ease}@media (min-width: 48rem){.selector-pane__sizing-box{left:0.25rem;top:7.15rem;width:auto}}@media (min-width: 66.5rem){.selector-pane__sizing-box{left:0.45rem;top:7.4rem}}.selector-pane__shadow-box{height:1000px;max-height:1000px;-webkit-transition:height 500ms ease, max-height 500ms ease;-o-transition:height 500ms ease, max-height 500ms ease;transition:height 500ms ease, max-height 500ms ease;-webkit-box-shadow:-0.25rem -0.25rem 2rem -0.25rem rgba(0,0,0,0.2);box-shadow:-0.25rem -0.25rem 2rem -0.25rem rgba(0,0,0,0.2)}.selector-pane__primary{height:100%;background:#ebebeb;color:#f0f2f5;font-size:105%;padding-left:1rem;padding-right:1rem;white-space:nowrap;overflow-y:hidden}.selector-list__sizing-box{position:relative;height:100%;vertical-align:bottom;padding-left:0.75rem}.selector-list__sizing-box:first-of-type{padding-left:0}.selector-list__scroll-box{height:100%;padding-top:1rem;padding-bottom:1rem;padding-left:0.6rem;padding-right:0.4rem;margin-left:-0.6rem;margin-right:-0.4rem}.selector-list{margin:0;padding:0;list-style:none;-webkit-box-shadow:-0.125rem -0.125rem 1rem -0.2rem rgba(0,0,0,0.2);box-shadow:-0.125rem -0.125rem 1rem -0.2rem rgba(0,0,0,0.2)}.selector-list__element{background:rgba(255,0,0,0.7);color:#f0f2f5;text-align:center;min-width:3.5rem;border-top:.0625rem solid #d4d9de;-webkit-transition:background 400ms ease;-o-transition:background 400ms ease;transition:background 400ms ease}.selector-list__element a{padding:0.6rem 0.6rem;color:inherit;text-decoration:none}.selector-list__element:first-of-type{border-top-left-radius:.15rem;border-top-right-radius:.15rem;border-top:none}.selector-list__element:last-of-type{border-bottom-left-radius:.15rem;border-bottom-right-radius:.15rem}.selector-list__element:only-of-type{border-radius:.15rem;border-top:none}.selector-list__element--1{background:#f99d53}.selector-list__element--1.selector-list__element--current{background:#fbb984}.selector-list__element--1:hover,.selector-list__element--1:active,.selector-list__element--1:focus{background:#fbb984}.selector-list__element--1.selector-list__element--lts-flag:hover,.selector-list__element--1.selector-list__element--lts-flag:active,.selector-list__element--1.selector-list__element--lts-flag:focus{background:#f99d53}.selector-list__element--2{background:#79aeb6}.selector-list__element--2.selector-list__element--current{background:#9ac2c8}.selector-list__element--2:hover,.selector-list__element--2:active,.selector-list__element--2:focus{background:#9ac2c8}.selector-list__element--2.selector-list__element--lts-flag:hover,.selector-list__element--2.selector-list__element--lts-flag:active,.selector-list__element--2.selector-list__element--lts-flag:focus{background:#79aeb6}.selector-list__element--3{background:#88b3a8}.selector-list__element--3.selector-list__element--current{background:#a7c7bf}.selector-list__element--3:hover,.selector-list__element--3:active,.selector-list__element--3:focus{background:#a7c7bf}.selector-list__element--3.selector-list__element--lts-flag:hover,.selector-list__element--3.selector-list__element--lts-flag:active,.selector-list__element--3.selector-list__element--lts-flag:focus{background:#88b3a8}.selector-list__element--4{background:#6f8a86}.selector-list__element--4.selector-list__element--current{background:#8ba19e}.selector-list__element--4:hover,.selector-list__element--4:active,.selector-list__element--4:focus{background:#8ba19e}.selector-list__element--4.selector-list__element--lts-flag:hover,.selector-list__element--4.selector-list__element--lts-flag:active,.selector-list__element--4.selector-list__element--lts-flag:focus{background:#6f8a86}.selector-list__element--5{background:#6c8997}.selector-list__element--5.selector-list__element--current{background:#8aa1ac}.selector-list__element--5:hover,.selector-list__element--5:active,.selector-list__element--5:focus{background:#8aa1ac}.selector-list__element--5.selector-list__element--lts-flag:hover,.selector-list__element--5.selector-list__element--lts-flag:active,.selector-list__element--5.selector-list__element--lts-flag:focus{background:#6c8997}.selector-list__element--6{background:#3b4a5d}.selector-list__element--6.selector-list__element--current{background:#4f637c}.selector-list__element--6:hover,.selector-list__element--6:active,.selector-list__element--6:focus{background:#4f637c}.selector-list__element--6.selector-list__element--lts-flag:hover,.selector-list__element--6.selector-list__element--lts-flag:active,.selector-list__element--6.selector-list__element--lts-flag:focus{background:#3b4a5d}.selector-list__element--archived{color:#d4d9de;background:#f0f2f5}.selector-list__element--archived:hover,.selector-list__element--archived:active,.selector-list__element--archived:focus{color:#f0f2f5;background:#d4d9de}.selector-list__element--disabled{background:#f0f2f5;border-top:.0625rem solid #f7f8f9;color:#d4d9de}.selector-list__element--current a,.selector-list__element--disabled a{pointer-events:none;cursor:default}.selector-list__element--lts-flag{color:#fff}.selector-list__element--lts-flag:hover,.selector-list__element--lts-flag:active,.selector-list__element--lts-flag:focus{background:inherit}.selector-list__element--lts-flag a{padding-top:0.4rem;padding-bottom:0.2rem;letter-spacing:.1875rem;font-size:80%}.selector-pane__shadow-box .edge-fader--left{font-size:0.8rem;width:0.75rem;margin-left:0.2rem;height:1.25rem;max-height:100%;top:50%;-webkit-transform:translateY(-50%);-ms-transform:translateY(-50%);-o-transform:translateY(-50%);transform:translateY(-50%);z-index:1;background:transparent;-webkit-filter:none;filter:none}.selector-pane__shadow-box .edge-fader--left .edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid transparent;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%)}.selector-pane__shadow-box .edge-fader--right{font-size:0.8rem;width:0.75rem;margin-right:0.2rem;height:1.25rem;max-height:100%;top:50%;-webkit-transform:translateY(-50%);-ms-transform:translateY(-50%);-o-transform:translateY(-50%);transform:translateY(-50%);z-index:1;background:transparent;-webkit-filter:none;filter:none}.selector-pane__shadow-box .edge-fader--right .edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid transparent;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%)}.selector-list__sizing-box .edge-fader--top{font-size:0.8rem;top:0;left:1.4rem;width:1.25rem;height:0.75rem;margin-top:0.1rem;max-height:100%;background:transparent;-webkit-filter:none;filter:none}.selector-list__sizing-box .edge-fader--top .edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid transparent;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%)}.selector-list__sizing-box .edge-fader--bottom{font-size:0.8rem;top:0;left:0.6rem;width:1.25rem;height:0.75rem;margin-top:0.1rem;max-height:100%;background:transparent;-webkit-filter:none;filter:none}.selector-list__sizing-box .edge-fader--bottom .edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid transparent;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%)}.social-button{position:relative;height:42px;width:42px;margin-right:8px}.social-button:last-of-type{margin-right:0}.social-button .social-button__primary{position:absolute;left:0;height:42px;width:42px;background-image:url("/images/index/social-sprite.png");background-repeat:no-repeat;background-position-y:0;opacity:1;-webkit-transition:opacity 200ms ease-in;-o-transition:opacity 200ms ease-in;transition:opacity 200ms ease-in}.social-button .social-button__inverted{position:absolute;left:0;height:42px;width:42px;background-image:url("/images/index/social-sprite.png");background-repeat:no-repeat;background-position-y:-42px;opacity:0;-webkit-transition:opacity 200ms ease-in;-o-transition:opacity 200ms ease-in;transition:opacity 200ms ease-in}.social-button:hover .social-button__primary,.social-button:active .social-button__primary,.social-button:focus .social-button__primary{opacity:0}.social-button:hover .social-button__inverted,.social-button:active .social-button__inverted,.social-button:focus .social-button__inverted{opacity:1}.social-button--facebook .social-button__primary{background-position-x:0}.social-button--facebook .social-button__inverted{background-position-x:0}.social-button--twitter .social-button__primary{background-position-x:-50px}.social-button--twitter .social-button__inverted{background-position-x:-50px}.social-button--youtube .social-button__primary{background-position-x:-100px}.social-button--youtube .social-button__inverted{background-position-x:-100px}.social-button--linkedin .social-button__primary{background-position-x:-150px}.social-button--linkedin .social-button__inverted{background-position-x:-150px}.social-button--github .social-button__primary{background-position-x:-200px}.social-button--github .social-button__inverted{background-position-x:-200px}.table-of-contents{margin-bottom:3rem}.table-of-contents__title{margin-top:0;margin-bottom:0.5rem}@media (min-width: 30rem){.table-of-contents__wrapper--multi{-webkit-column-count:2;-moz-column-count:2;column-count:2}}@media (min-width: 86rem){.table-of-contents__wrapper--multi{-webkit-column-count:3;-moz-column-count:3;column-count:3}}.table-of-contents__items{color:#9199a1;margin-bottom:0}.table-of-contents__items a{color:#636567}.table-of-contents__items a:hover,.table-of-contents__items a:active,.table-of-contents__items a:focus{color:#e78505}.table-of-contents__item{font-size:110%;padding-top:.25rem}@media (min-width: 48rem) and (max-width: 66.49rem){.table-of-contents__item{font-size:100%}}.index .welcome-content{padding-top:3.5rem}@media (min-width: 48rem){.index .welcome-content{padding-top:4.5rem}}.index .welcome{background-color:#fcc074;background:url("/images/index/home-banner.jpg") no-repeat bottom/cover;padding-top:3rem;padding-bottom:3rem;color:#636567;font-size:1.7rem;line-height:1;text-align:center;margin-bottom:0}@media (min-width: 48rem){.index .welcome{padding-bottom:5rem}}.index .welcome h1{letter-spacing:-1px;color:#636567;text-transform:uppercase;font-size:2.5rem;margin-bottom:1rem}@media (min-width: 66.5rem){.index .welcome h1{font-size:3rem}}.index .product-content{-webkit-box-shadow:0 0 1.5rem 0 rgba(0,0,0,0.2);box-shadow:0 0 1.5rem 0 rgba(0,0,0,0.2)}.index .product-content .product-callout{text-align:center;color:#788087;font-size:1.2rem;padding-top:1rem;margin-bottom:0}@media (min-width: 48rem){.index .product-content .product-callout{font-size:1.5rem}}.index .product-picker{padding-top:2rem;padding-bottom:5rem}@media (min-width: 66.5rem){.index .product-picker{max-width:71rem;padding-top:5rem;padding-bottom:6rem}}.index .product__wrapper{padding-bottom:2rem;padding-left:1rem;padding-right:1rem}.index .product__wrapper:last-of-type{padding-bottom:0}.index .product__card{border-radius:.3rem;padding:1.5rem 1rem;color:#636567;line-height:1.3;font-size:1.2rem;text-align:center;text-decoration:none;background:#f5f3f5;border:1px solid #ebebeb;-webkit-box-shadow:0 -0.25rem 4rem -0.5rem rgba(0,0,0,0.1);box-shadow:0 -0.25rem 4rem -0.5rem rgba(0,0,0,0.1);-webkit-transition:color 200ms ease-in, background 200ms ease-in;-o-transition:color 200ms ease-in, background 200ms ease-in;transition:color 200ms ease-in, background 200ms ease-in}.index .product__card .product__title{font-size:3.75rem;font-weight:700;line-height:1;margin-bottom:1.25rem;color:#636567;text-transform:lowercase;-webkit-transition:color 200ms ease-in;-o-transition:color 200ms ease-in;transition:color 200ms ease-in}.index .product__card .product__title .product__logo{vertical-align:middle;height:0.60em;width:0.60em;margin-bottom:0.10em}.index .product__card .product__title .product__logo .logo{height:100%;width:100%}.index .product__card .product__title .product__logo--invertible{position:relative}.index .product__card .product__title .product__logo--invertible .logo{position:absolute;left:0;-webkit-transition:opacity 200ms ease-in;-o-transition:opacity 200ms ease-in;transition:opacity 200ms ease-in}.index .product__card .product__title .product__logo--invertible .logo--primary{opacity:1}.index .product__card .product__title .product__logo--invertible .logo--inverted{opacity:0}.index .product__card .product__title .product__title-highlight{color:#f99d53;text-transform:uppercase;font-size:70.5%;font-weight:400;-webkit-transition:color 200ms ease-in;-o-transition:color 200ms ease-in;transition:color 200ms ease-in}@media (min-width: 66.5rem){.index .product__card .product__description{min-height:3.9em}}.index .product__card:hover,.index .product__card:active,.index .product__card:focus{text-decoration:none;color:#f0f2f5;background:#6c8997}.index .product__card:hover .product__title,.index .product__card:active .product__title,.index .product__card:focus .product__title{color:#f0f2f5}.index .product__card:hover .product__title .product__logo--invertible .logo--primary,.index .product__card:active .product__title .product__logo--invertible .logo--primary,.index .product__card:focus .product__title .product__logo--invertible .logo--primary{opacity:0}.index .product__card:hover .product__title .product__logo--invertible .logo--inverted,.index .product__card:active .product__title .product__logo--invertible .logo--inverted,.index .product__card:focus .product__title .product__logo--invertible .logo--inverted{opacity:1}.index .highlight__wrapper{-webkit-box-shadow:0 -0.15rem 3rem -0.5rem rgba(0,0,0,0.2);box-shadow:0 -0.15rem 3rem -0.5rem rgba(0,0,0,0.2);font-size:1.2rem}@media (min-width: 66.5rem){.index .highlight__wrapper{background:url("/images/index/highlights-bg-split.jpg") left/cover}}@media (max-width: 66.49rem){.index .highlight__container{padding:0;margin:0;max-width:none}.index .highlight__container .row{margin:0}}.index .highlight__container .highlight__title{letter-spacing:-1px;font-size:1.8rem;text-align:left;padding-bottom:0.25rem}.highlight__left .index .highlight__container .highlight__title{padding-bottom:0.5rem}.index .highlight__container .highlight__link-wrapper{text-align:center}.index .highlight__container .highlight__link{border-radius:.15rem;padding:0.8rem 2.5rem;font-size:85%;text-decoration:none;text-transform:uppercase;word-spacing:-1px;-webkit-transition:color 500ms ease, background 500ms ease;-o-transition:color 500ms ease, background 500ms ease;transition:color 500ms ease, background 500ms ease}.index .highlight__container .highlight__left{padding:3rem 1rem 2rem 1rem;color:#f7f7f7;background-color:#3b4a5d;background:url("/images/index/highlights-bg-dark.jpg")}@media (min-width: 66.5rem){.index .highlight__container .highlight__left{padding-top:2rem;min-height:16rem}.index .highlight__container .highlight__left p{min-height:5.4rem}}@media (min-width: 86rem){.index .highlight__container .highlight__left{padding-top:3rem}}.index .highlight__container .highlight__left .highlight__link{color:#f7f7f7;background:#f99d53}.index .highlight__container .highlight__left .highlight__link:hover,.index .highlight__container .highlight__left .highlight__link:active,.index .highlight__container .highlight__left .highlight__link:focus{color:#f7f7f7;background:#e78505}.index .highlight__container .highlight__right{padding:2rem 1rem 2rem 1rem;color:#3b4a5d;background-color:#f7f7f7;background:url("/images/index/highlights-bg-light.jpg") repeat}@media (min-width: 66.5rem){.index .highlight__container .highlight__right{padding-top:2rem;padding-left:3rem;min-height:16rem}.index .highlight__container .highlight__right p{min-height:5.4rem}}@media (min-width: 86rem){.index .highlight__container .highlight__right{padding-top:3rem}}.index .highlight__container .highlight__right .highlight__link{color:#3b4a5d;background:#ffd195}.index .highlight__container .highlight__right .highlight__link:hover,.index .highlight__container .highlight__right .highlight__link:active,.index .highlight__container .highlight__right .highlight__link:focus{color:#3b4a5d;background:#fcc074}.client-library-logos{list-style:none}.client-library-logo{border-radius:.25rem;margin:0.5rem;padding:0.4rem;-webkit-box-shadow:0 -0.25rem 2rem -0.5rem rgba(0,0,0,0.2);box-shadow:0 -0.25rem 2rem -0.5rem rgba(0,0,0,0.2);background:#f7f7f7;-webkit-transition:background 300ms ease-in, -webkit-box-shadow 300ms ease-in;transition:background 300ms ease-in, -webkit-box-shadow 300ms ease-in;-o-transition:background 300ms ease-in, box-shadow 300ms ease-in;transition:background 300ms ease-in, box-shadow 300ms ease-in;transition:background 300ms ease-in, box-shadow 300ms ease-in, -webkit-box-shadow 300ms ease-in}.client-library-logo:hover,.client-library-logo:active,.client-library-logo:focus{background:#e3e6e8;-webkit-box-shadow:0 -0.25rem 2rem -0.5rem rgba(0,0,0,0.4);box-shadow:0 -0.25rem 2rem -0.5rem rgba(0,0,0,0.4)}table.use-cases__image-links{width:100%}table.use-cases__image-links td:first-child{width:30%}table.use-cases__image-links td:first-child img{width:100%}table.use-cases__image-links td:last-child{width:70%}.main-article{-webkit-transform:translate3d(0, 0, 0)} + *//*! normalize.css v4.0.0 | MIT License | github.com/necolas/normalize.css */@import url("https://fonts.googleapis.com/css?family=Oswald:400");@import url("https://fonts.googleapis.com/css?family=Muli:400,700");@import url("https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700");html{font-family:sans-serif;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}body{margin:0}article,aside,details,figcaption,figure,footer,header,main,menu,nav,section,summary{display:block}audio,canvas,progress,video{display:inline-block}audio:not([controls]){display:none;height:0}progress{vertical-align:baseline}template,[hidden]{display:none}a{background-color:transparent}a:active,a:hover{outline-width:0}abbr[title]{border-bottom:none;text-decoration:underline;text-decoration:underline dotted}b,strong{font-weight:inherit}b,strong{font-weight:bolder}dfn{font-style:italic}h1{font-size:2em;margin:0.67em 0}mark{background-color:#ff0;color:#000}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}img{border-style:none}svg:not(:root){overflow:hidden}code,kbd,pre,samp{font-family:monospace, monospace;font-size:1em}figure{margin:1em 40px}hr{-webkit-box-sizing:content-box;box-sizing:content-box;height:0;overflow:visible}button,input,select,textarea{font:inherit}optgroup{font-weight:bold}button,input,select{overflow:visible}button,input,select,textarea{margin:0}button,select{text-transform:none}button,[type="button"],[type="reset"],[type="submit"]{cursor:pointer}[disabled]{cursor:default}button,html [type="button"],[type="reset"],[type="submit"]{-webkit-appearance:button}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button:-moz-focusring,input:-moz-focusring{outline:1px dotted ButtonText}fieldset{border:1px solid #c0c0c0;margin:0 2px;padding:0.35em 0.625em 0.75em}legend{-webkit-box-sizing:border-box;box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}textarea{overflow:auto}[type="checkbox"],[type="radio"]{-webkit-box-sizing:border-box;box-sizing:border-box;padding:0}[type="number"]::-webkit-inner-spin-button,[type="number"]::-webkit-outer-spin-button{height:auto}[type="search"]{-webkit-appearance:textfield}[type="search"]::-webkit-search-cancel-button,[type="search"]::-webkit-search-decoration{-webkit-appearance:none}html{-webkit-box-sizing:border-box;box-sizing:border-box}*,*::before,*::after{-webkit-box-sizing:inherit;box-sizing:inherit}@-ms-viewport{width:device-width}html{font-size:16px;-ms-overflow-style:scrollbar;-webkit-tap-highlight-color:transparent}body{font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;font-size:1rem;line-height:1.5;color:red;background-color:#fff}[tabindex="-1"]:focus{outline:none !important}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5rem}p{margin-top:0;margin-bottom:1rem}abbr[title],abbr[data-original-title]{cursor:help;border-bottom:1px dotted red}address{margin-bottom:1rem;font-style:normal;line-height:inherit}ol,ul,dl{margin-top:0;margin-bottom:1rem}ol ol,ul ul,ol ul,ul ol{margin-bottom:0}dt{font-weight:bold}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem}a{color:#f99d53;text-decoration:none}a:focus,a:hover{color:#f67309;text-decoration:underline}a:focus{outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:not([href]):not([tabindex]){color:inherit;text-decoration:none}a:not([href]):not([tabindex]):focus,a:not([href]):not([tabindex]):hover{color:inherit;text-decoration:none}a:not([href]):not([tabindex]):focus{outline:none}pre{margin-top:0;margin-bottom:1rem;overflow:auto}figure{margin:0 0 1rem}img{vertical-align:middle}[role="button"]{cursor:pointer}a,area,button,[role="button"],input,label,select,summary,textarea{-ms-touch-action:manipulation;touch-action:manipulation}table{border-collapse:collapse;background-color:transparent}caption{padding-top:.75rem;padding-bottom:.75rem;color:red;text-align:left;caption-side:bottom}th{text-align:left}label{display:inline-block;margin-bottom:.5rem}button:focus{outline:1px dotted;outline:5px auto -webkit-focus-ring-color}input,button,select,textarea{margin:0;line-height:inherit;border-radius:0}input[type="radio"]:disabled,input[type="checkbox"]:disabled{cursor:not-allowed}input[type="date"],input[type="time"],input[type="datetime-local"],input[type="month"]{-webkit-appearance:listbox}textarea{resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:.5rem;font-size:1.5rem;line-height:inherit}input[type="search"]{-webkit-appearance:none}output{display:inline-block}[hidden]{display:none !important}h1,h2,h3,h4,h5,h6,.h1,.h2,.h3,.h4,.h5,.h6{margin-bottom:.5rem;font-family:inherit;font-weight:500;line-height:1.1;color:inherit}h1,.h1{font-size:2.5rem}h2,.h2{font-size:2rem}h3,.h3{font-size:1.75rem}h4,.h4{font-size:1.5rem}h5,.h5{font-size:1.25rem}h6,.h6{font-size:1rem}.lead{font-size:1.25rem;font-weight:300}.display-1{font-size:6rem;font-weight:300}.display-2{font-size:5.5rem;font-weight:300}.display-3{font-size:4.5rem;font-weight:300}.display-4{font-size:3.5rem;font-weight:300}hr{margin-top:1rem;margin-bottom:1rem;border:0;border-top:1px solid rgba(0,0,0,0.1)}small,.small{font-size:80%;font-weight:normal}mark,.mark{padding:.2em;background-color:#fcf8e3}.list-unstyled{padding-left:0;list-style:none}.list-inline{padding-left:0;list-style:none}.list-inline-item{display:inline-block}.list-inline-item:not(:last-child){margin-right:5px}.initialism{font-size:90%;text-transform:uppercase}.blockquote{padding:.5rem 1rem;margin-bottom:1rem;font-size:1.25rem;border-left:.25rem solid red}.blockquote-footer{display:block;font-size:80%;color:red}.blockquote-footer::before{content:"\2014 \00A0"}.blockquote-reverse{padding-right:1rem;padding-left:0;text-align:right;border-right:.25rem solid red;border-left:0}.blockquote-reverse .blockquote-footer::before{content:""}.blockquote-reverse .blockquote-footer::after{content:"\00A0 \2014"}dl.row>dd+dt{clear:left}.img-fluid,.carousel-inner>.carousel-item>img,.carousel-inner>.carousel-item>a>img{display:block;max-width:100%;height:auto}.img-rounded{border-radius:.3rem}.img-thumbnail{padding:.25rem;background-color:#fff;border:1px solid #ddd;border-radius:.25rem;-webkit-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.075);box-shadow:0 1px 2px rgba(0,0,0,0.075);display:inline-block;max-width:100%;height:auto}.img-circle{border-radius:50%}.figure{display:inline-block}.figure-img{margin-bottom:.5rem;line-height:1}.figure-caption{font-size:90%;color:red}code,kbd,pre,samp{font-family:Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace}code{padding:.2rem .4rem;font-size:90%;color:#bd4147;background-color:#f7f7f9;border-radius:.25rem}kbd{padding:.2rem .4rem;font-size:90%;color:#fff;background-color:#333;border-radius:.2rem;-webkit-box-shadow:inset 0 -0.1rem 0 rgba(0,0,0,0.25);box-shadow:inset 0 -0.1rem 0 rgba(0,0,0,0.25)}kbd kbd{padding:0;font-size:100%;font-weight:bold;-webkit-box-shadow:none;box-shadow:none}pre{display:block;margin-top:0;margin-bottom:1rem;font-size:90%;color:red}pre code{padding:0;font-size:inherit;color:inherit;background-color:transparent;border-radius:0}.pre-scrollable{max-height:340px;overflow-y:scroll}.container{margin-left:auto;margin-right:auto;padding-left:15px;padding-right:15px}.container::after{content:"";display:table;clear:both}@media (min-width: 30rem){.container{max-width:32rem}}@media (min-width: 48rem){.container{max-width:45rem}}@media (min-width: 66.5rem){.container{max-width:58.75rem}}@media (min-width: 86rem){.container{max-width:71.25rem}}.container-fluid{margin-left:auto;margin-right:auto;padding-left:15px;padding-right:15px}.container-fluid::after{content:"";display:table;clear:both}.row{margin-left:-15px;margin-right:-15px}.row::after{content:"";display:table;clear:both}.col-xs-1,.col-xs-2,.col-xs-3,.col-xs-4,.col-xs-5,.col-xs-6,.col-xs-7,.col-xs-8,.col-xs-9,.col-xs-10,.col-xs-11,.col-xs-12,.col-sm-1,.col-sm-2,.col-sm-3,.col-sm-4,.col-sm-5,.col-sm-6,.col-sm-7,.col-sm-8,.col-sm-9,.col-sm-10,.col-sm-11,.col-sm-12,.col-md-1,.col-md-2,.col-md-3,.col-md-4,.col-md-5,.col-md-6,.col-md-7,.col-md-8,.col-md-9,.col-md-10,.col-md-11,.col-md-12,.col-lg-1,.col-lg-2,.col-lg-3,.col-lg-4,.col-lg-5,.col-lg-6,.col-lg-7,.col-lg-8,.col-lg-9,.col-lg-10,.col-lg-11,.col-lg-12,.col-xl-1,.col-xl-2,.col-xl-3,.col-xl-4,.col-xl-5,.col-xl-6,.col-xl-7,.col-xl-8,.col-xl-9,.col-xl-10,.col-xl-11,.col-xl-12{position:relative;min-height:1px;padding-right:15px;padding-left:15px}@media (min-width: 20rem){.col-xs-1{float:left;width:8.33333%}.col-xs-2{float:left;width:16.66667%}.col-xs-3{float:left;width:25%}.col-xs-4{float:left;width:33.33333%}.col-xs-5{float:left;width:41.66667%}.col-xs-6{float:left;width:50%}.col-xs-7{float:left;width:58.33333%}.col-xs-8{float:left;width:66.66667%}.col-xs-9{float:left;width:75%}.col-xs-10{float:left;width:83.33333%}.col-xs-11{float:left;width:91.66667%}.col-xs-12{float:left;width:100%}.pull-xs-0{right:auto}.pull-xs-1{right:8.33333%}.pull-xs-2{right:16.66667%}.pull-xs-3{right:25%}.pull-xs-4{right:33.33333%}.pull-xs-5{right:41.66667%}.pull-xs-6{right:50%}.pull-xs-7{right:58.33333%}.pull-xs-8{right:66.66667%}.pull-xs-9{right:75%}.pull-xs-10{right:83.33333%}.pull-xs-11{right:91.66667%}.pull-xs-12{right:100%}.push-xs-0{left:auto}.push-xs-1{left:8.33333%}.push-xs-2{left:16.66667%}.push-xs-3{left:25%}.push-xs-4{left:33.33333%}.push-xs-5{left:41.66667%}.push-xs-6{left:50%}.push-xs-7{left:58.33333%}.push-xs-8{left:66.66667%}.push-xs-9{left:75%}.push-xs-10{left:83.33333%}.push-xs-11{left:91.66667%}.push-xs-12{left:100%}.offset-xs-1{margin-left:8.33333%}.offset-xs-2{margin-left:16.66667%}.offset-xs-3{margin-left:25%}.offset-xs-4{margin-left:33.33333%}.offset-xs-5{margin-left:41.66667%}.offset-xs-6{margin-left:50%}.offset-xs-7{margin-left:58.33333%}.offset-xs-8{margin-left:66.66667%}.offset-xs-9{margin-left:75%}.offset-xs-10{margin-left:83.33333%}.offset-xs-11{margin-left:91.66667%}}@media (min-width: 30rem){.col-sm-1{float:left;width:8.33333%}.col-sm-2{float:left;width:16.66667%}.col-sm-3{float:left;width:25%}.col-sm-4{float:left;width:33.33333%}.col-sm-5{float:left;width:41.66667%}.col-sm-6{float:left;width:50%}.col-sm-7{float:left;width:58.33333%}.col-sm-8{float:left;width:66.66667%}.col-sm-9{float:left;width:75%}.col-sm-10{float:left;width:83.33333%}.col-sm-11{float:left;width:91.66667%}.col-sm-12{float:left;width:100%}.pull-sm-0{right:auto}.pull-sm-1{right:8.33333%}.pull-sm-2{right:16.66667%}.pull-sm-3{right:25%}.pull-sm-4{right:33.33333%}.pull-sm-5{right:41.66667%}.pull-sm-6{right:50%}.pull-sm-7{right:58.33333%}.pull-sm-8{right:66.66667%}.pull-sm-9{right:75%}.pull-sm-10{right:83.33333%}.pull-sm-11{right:91.66667%}.pull-sm-12{right:100%}.push-sm-0{left:auto}.push-sm-1{left:8.33333%}.push-sm-2{left:16.66667%}.push-sm-3{left:25%}.push-sm-4{left:33.33333%}.push-sm-5{left:41.66667%}.push-sm-6{left:50%}.push-sm-7{left:58.33333%}.push-sm-8{left:66.66667%}.push-sm-9{left:75%}.push-sm-10{left:83.33333%}.push-sm-11{left:91.66667%}.push-sm-12{left:100%}.offset-sm-0{margin-left:0%}.offset-sm-1{margin-left:8.33333%}.offset-sm-2{margin-left:16.66667%}.offset-sm-3{margin-left:25%}.offset-sm-4{margin-left:33.33333%}.offset-sm-5{margin-left:41.66667%}.offset-sm-6{margin-left:50%}.offset-sm-7{margin-left:58.33333%}.offset-sm-8{margin-left:66.66667%}.offset-sm-9{margin-left:75%}.offset-sm-10{margin-left:83.33333%}.offset-sm-11{margin-left:91.66667%}}@media (min-width: 48rem){.col-md-1{float:left;width:8.33333%}.col-md-2{float:left;width:16.66667%}.col-md-3{float:left;width:25%}.col-md-4{float:left;width:33.33333%}.col-md-5{float:left;width:41.66667%}.col-md-6{float:left;width:50%}.col-md-7{float:left;width:58.33333%}.col-md-8{float:left;width:66.66667%}.col-md-9{float:left;width:75%}.col-md-10{float:left;width:83.33333%}.col-md-11{float:left;width:91.66667%}.col-md-12{float:left;width:100%}.pull-md-0{right:auto}.pull-md-1{right:8.33333%}.pull-md-2{right:16.66667%}.pull-md-3{right:25%}.pull-md-4{right:33.33333%}.pull-md-5{right:41.66667%}.pull-md-6{right:50%}.pull-md-7{right:58.33333%}.pull-md-8{right:66.66667%}.pull-md-9{right:75%}.pull-md-10{right:83.33333%}.pull-md-11{right:91.66667%}.pull-md-12{right:100%}.push-md-0{left:auto}.push-md-1{left:8.33333%}.push-md-2{left:16.66667%}.push-md-3{left:25%}.push-md-4{left:33.33333%}.push-md-5{left:41.66667%}.push-md-6{left:50%}.push-md-7{left:58.33333%}.push-md-8{left:66.66667%}.push-md-9{left:75%}.push-md-10{left:83.33333%}.push-md-11{left:91.66667%}.push-md-12{left:100%}.offset-md-0{margin-left:0%}.offset-md-1{margin-left:8.33333%}.offset-md-2{margin-left:16.66667%}.offset-md-3{margin-left:25%}.offset-md-4{margin-left:33.33333%}.offset-md-5{margin-left:41.66667%}.offset-md-6{margin-left:50%}.offset-md-7{margin-left:58.33333%}.offset-md-8{margin-left:66.66667%}.offset-md-9{margin-left:75%}.offset-md-10{margin-left:83.33333%}.offset-md-11{margin-left:91.66667%}}@media (min-width: 66.5rem){.col-lg-1{float:left;width:8.33333%}.col-lg-2{float:left;width:16.66667%}.col-lg-3{float:left;width:25%}.col-lg-4{float:left;width:33.33333%}.col-lg-5{float:left;width:41.66667%}.col-lg-6{float:left;width:50%}.col-lg-7{float:left;width:58.33333%}.col-lg-8{float:left;width:66.66667%}.col-lg-9{float:left;width:75%}.col-lg-10{float:left;width:83.33333%}.col-lg-11{float:left;width:91.66667%}.col-lg-12{float:left;width:100%}.pull-lg-0{right:auto}.pull-lg-1{right:8.33333%}.pull-lg-2{right:16.66667%}.pull-lg-3{right:25%}.pull-lg-4{right:33.33333%}.pull-lg-5{right:41.66667%}.pull-lg-6{right:50%}.pull-lg-7{right:58.33333%}.pull-lg-8{right:66.66667%}.pull-lg-9{right:75%}.pull-lg-10{right:83.33333%}.pull-lg-11{right:91.66667%}.pull-lg-12{right:100%}.push-lg-0{left:auto}.push-lg-1{left:8.33333%}.push-lg-2{left:16.66667%}.push-lg-3{left:25%}.push-lg-4{left:33.33333%}.push-lg-5{left:41.66667%}.push-lg-6{left:50%}.push-lg-7{left:58.33333%}.push-lg-8{left:66.66667%}.push-lg-9{left:75%}.push-lg-10{left:83.33333%}.push-lg-11{left:91.66667%}.push-lg-12{left:100%}.offset-lg-0{margin-left:0%}.offset-lg-1{margin-left:8.33333%}.offset-lg-2{margin-left:16.66667%}.offset-lg-3{margin-left:25%}.offset-lg-4{margin-left:33.33333%}.offset-lg-5{margin-left:41.66667%}.offset-lg-6{margin-left:50%}.offset-lg-7{margin-left:58.33333%}.offset-lg-8{margin-left:66.66667%}.offset-lg-9{margin-left:75%}.offset-lg-10{margin-left:83.33333%}.offset-lg-11{margin-left:91.66667%}}@media (min-width: 86rem){.col-xl-1{float:left;width:8.33333%}.col-xl-2{float:left;width:16.66667%}.col-xl-3{float:left;width:25%}.col-xl-4{float:left;width:33.33333%}.col-xl-5{float:left;width:41.66667%}.col-xl-6{float:left;width:50%}.col-xl-7{float:left;width:58.33333%}.col-xl-8{float:left;width:66.66667%}.col-xl-9{float:left;width:75%}.col-xl-10{float:left;width:83.33333%}.col-xl-11{float:left;width:91.66667%}.col-xl-12{float:left;width:100%}.pull-xl-0{right:auto}.pull-xl-1{right:8.33333%}.pull-xl-2{right:16.66667%}.pull-xl-3{right:25%}.pull-xl-4{right:33.33333%}.pull-xl-5{right:41.66667%}.pull-xl-6{right:50%}.pull-xl-7{right:58.33333%}.pull-xl-8{right:66.66667%}.pull-xl-9{right:75%}.pull-xl-10{right:83.33333%}.pull-xl-11{right:91.66667%}.pull-xl-12{right:100%}.push-xl-0{left:auto}.push-xl-1{left:8.33333%}.push-xl-2{left:16.66667%}.push-xl-3{left:25%}.push-xl-4{left:33.33333%}.push-xl-5{left:41.66667%}.push-xl-6{left:50%}.push-xl-7{left:58.33333%}.push-xl-8{left:66.66667%}.push-xl-9{left:75%}.push-xl-10{left:83.33333%}.push-xl-11{left:91.66667%}.push-xl-12{left:100%}.offset-xl-0{margin-left:0%}.offset-xl-1{margin-left:8.33333%}.offset-xl-2{margin-left:16.66667%}.offset-xl-3{margin-left:25%}.offset-xl-4{margin-left:33.33333%}.offset-xl-5{margin-left:41.66667%}.offset-xl-6{margin-left:50%}.offset-xl-7{margin-left:58.33333%}.offset-xl-8{margin-left:66.66667%}.offset-xl-9{margin-left:75%}.offset-xl-10{margin-left:83.33333%}.offset-xl-11{margin-left:91.66667%}}.table{width:100%;max-width:100%;margin-bottom:1rem}.table th,.table td{padding:.75rem;vertical-align:top;border-top:1px solid red}.table thead th{vertical-align:bottom;border-bottom:2px solid red}.table tbody+tbody{border-top:2px solid red}.table .table{background-color:#fff}.table-sm th,.table-sm td{padding:.3rem}.table-bordered{border:1px solid red}.table-bordered th,.table-bordered td{border:1px solid red}.table-bordered thead th,.table-bordered thead td{border-bottom-width:2px}.table-striped tbody tr:nth-of-type(odd){background-color:rgba(0,0,0,0.05)}.table-hover tbody tr:hover{background-color:rgba(0,0,0,0.075)}.table-active,.table-active>th,.table-active>td{background-color:rgba(0,0,0,0.075)}.table-hover .table-active:hover{background-color:rgba(0,0,0,0.075)}.table-hover .table-active:hover>td,.table-hover .table-active:hover>th{background-color:rgba(0,0,0,0.075)}.table-success,.table-success>th,.table-success>td{background-color:#dff0d8}.table-hover .table-success:hover{background-color:#d0e9c6}.table-hover .table-success:hover>td,.table-hover .table-success:hover>th{background-color:#d0e9c6}.table-info,.table-info>th,.table-info>td{background-color:#d9edf7}.table-hover .table-info:hover{background-color:#c4e3f3}.table-hover .table-info:hover>td,.table-hover .table-info:hover>th{background-color:#c4e3f3}.table-warning,.table-warning>th,.table-warning>td{background-color:#fcf8e3}.table-hover .table-warning:hover{background-color:#faf2cc}.table-hover .table-warning:hover>td,.table-hover .table-warning:hover>th{background-color:#faf2cc}.table-danger,.table-danger>th,.table-danger>td{background-color:#f2dede}.table-hover .table-danger:hover{background-color:#ebcccc}.table-hover .table-danger:hover>td,.table-hover .table-danger:hover>th{background-color:#ebcccc}.thead-inverse th{color:#fff;background-color:red}.thead-default th{color:red;background-color:red}.table-inverse{color:red;background-color:red}.table-inverse th,.table-inverse td,.table-inverse thead th{border-color:red}.table-inverse.table-bordered{border:0}.table-responsive{display:block;width:100%;min-height:.01%;overflow-x:auto}.table-reflow thead{float:left}.table-reflow tbody{display:block;white-space:nowrap}.table-reflow th,.table-reflow td{border-top:1px solid red;border-left:1px solid red}.table-reflow th:last-child,.table-reflow td:last-child{border-right:1px solid red}.table-reflow thead:last-child tr:last-child th,.table-reflow thead:last-child tr:last-child td,.table-reflow tbody:last-child tr:last-child th,.table-reflow tbody:last-child tr:last-child td,.table-reflow tfoot:last-child tr:last-child th,.table-reflow tfoot:last-child tr:last-child td{border-bottom:1px solid red}.table-reflow tr{float:left}.table-reflow tr th,.table-reflow tr td{display:block !important;border:1px solid red}.form-control{display:block;width:100%;padding:.5rem .75rem;font-size:1rem;line-height:1.25;color:red;background-color:#fff;background-image:none;-webkit-background-clip:padding-box;background-clip:padding-box;border:1px solid rgba(0,0,0,0.15);border-radius:.25rem;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border-color ease-in-out 0.15s,-webkit-box-shadow ease-in-out 0.15s;transition:border-color ease-in-out 0.15s,-webkit-box-shadow ease-in-out 0.15s;-o-transition:border-color ease-in-out 0.15s,box-shadow ease-in-out 0.15s;transition:border-color ease-in-out 0.15s,box-shadow ease-in-out 0.15s;transition:border-color ease-in-out 0.15s,box-shadow ease-in-out 0.15s,-webkit-box-shadow ease-in-out 0.15s}.form-control::-ms-expand{background-color:transparent;border:0}.form-control:focus{color:red;background-color:#fff;border-color:#66afe9;outline:none;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(102,175,233,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(102,175,233,0.6)}.form-control::-webkit-input-placeholder{color:#999;opacity:1}.form-control::-moz-placeholder{color:#999;opacity:1}.form-control:-ms-input-placeholder{color:#999;opacity:1}.form-control::placeholder{color:#999;opacity:1}.form-control:disabled,.form-control[readonly]{background-color:red;opacity:1}.form-control:disabled{cursor:not-allowed}select.form-control:not([size]):not([multiple]){height:2.5rem}select.form-control:focus::-ms-value{color:red;background-color:#fff}.form-control-file,.form-control-range{display:block}.col-form-label{padding-top:.5rem;padding-bottom:.5rem;margin-bottom:0}.col-form-label-lg{padding-top:.75rem;padding-bottom:.75rem;font-size:1.25rem}.col-form-label-sm{padding-top:.25rem;padding-bottom:.25rem;font-size:.875rem}.col-form-legend{padding-top:.5rem;padding-bottom:.5rem;margin-bottom:0;font-size:1rem}.form-control-static{min-height:2.5rem;padding-top:.5rem;padding-bottom:.5rem;margin-bottom:0}.form-control-static.form-control-sm,.input-group-sm>.form-control-static.form-control,.input-group-sm>.form-control-static.input-group-addon,.input-group-sm>.input-group-btn>.form-control-static.btn,.form-control-static.form-control-lg,.input-group-lg>.form-control-static.form-control,.input-group-lg>.form-control-static.input-group-addon,.input-group-lg>.input-group-btn>.form-control-static.btn{padding-right:0;padding-left:0}.form-control-sm,.input-group-sm>.form-control,.input-group-sm>.input-group-addon,.input-group-sm>.input-group-btn>.btn{padding:.25rem .5rem;font-size:.875rem;border-radius:.2rem}select.form-control-sm:not([size]):not([multiple]),.input-group-sm>select.form-control:not([size]):not([multiple]),.input-group-sm>select.input-group-addon:not([size]):not([multiple]),.input-group-sm>.input-group-btn>select.btn:not([size]):not([multiple]){height:1.8125rem}.form-control-lg,.input-group-lg>.form-control,.input-group-lg>.input-group-addon,.input-group-lg>.input-group-btn>.btn{padding:.75rem 1.5rem;font-size:1.25rem;border-radius:.3rem}select.form-control-lg:not([size]):not([multiple]),.input-group-lg>select.form-control:not([size]):not([multiple]),.input-group-lg>select.input-group-addon:not([size]):not([multiple]),.input-group-lg>.input-group-btn>select.btn:not([size]):not([multiple]){height:3.16667rem}.form-group{margin-bottom:1rem}.form-text{display:block;margin-top:.25rem}.form-check{position:relative;display:block;margin-bottom:.75rem}.form-check+.form-check{margin-top:-.25rem}.form-check.disabled .form-check-label{color:red;cursor:not-allowed}.form-check-label{padding-left:1.25rem;margin-bottom:0;cursor:pointer}.form-check-input{position:absolute;margin-top:.25rem;margin-left:-1.25rem}.form-check-input:only-child{position:static}.form-check-inline{position:relative;display:inline-block;padding-left:1.25rem;margin-bottom:0;vertical-align:middle;cursor:pointer}.form-check-inline+.form-check-inline{margin-left:.75rem}.form-check-inline.disabled{cursor:not-allowed}.form-control-feedback{margin-top:.25rem}.form-control-success,.form-control-warning,.form-control-danger{padding-right:2.25rem;background-repeat:no-repeat;background-position:center right .625rem;-webkit-background-size:1.25rem 1.25rem;background-size:1.25rem 1.25rem}.has-success .form-control-feedback,.has-success .form-control-label,.has-success .radio,.has-success .checkbox,.has-success .radio-inline,.has-success .checkbox-inline,.has-success.radio label,.has-success.checkbox label,.has-success.radio-inline label,.has-success.checkbox-inline label,.has-success .custom-control{color:red}.has-success .form-control{border-color:red}.has-success .input-group-addon{color:red;border-color:red;background-color:#fcc}.has-success .form-control-feedback{color:red}.has-success .form-control-success{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3E%3Cpath fill='%235cb85c' d='M2.3 6.73L.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3E%3C/svg%3E")}.has-warning .form-control-feedback,.has-warning .form-control-label,.has-warning .radio,.has-warning .checkbox,.has-warning .radio-inline,.has-warning .checkbox-inline,.has-warning.radio label,.has-warning.checkbox label,.has-warning.radio-inline label,.has-warning.checkbox-inline label,.has-warning .custom-control{color:red}.has-warning .form-control{border-color:red}.has-warning .input-group-addon{color:red;border-color:red;background-color:#fcc}.has-warning .form-control-feedback{color:red}.has-warning .form-control-warning{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3E%3Cpath fill='%23f0ad4e' d='M4.4 5.324h-.8v-2.46h.8zm0 1.42h-.8V5.89h.8zM3.76.63L.04 7.075c-.115.2.016.425.26.426h7.397c.242 0 .372-.226.258-.426C6.726 4.924 5.47 2.79 4.253.63c-.113-.174-.39-.174-.494 0z'/%3E%3C/svg%3E")}.has-danger .form-control-feedback,.has-danger .form-control-label,.has-danger .radio,.has-danger .checkbox,.has-danger .radio-inline,.has-danger .checkbox-inline,.has-danger.radio label,.has-danger.checkbox label,.has-danger.radio-inline label,.has-danger.checkbox-inline label,.has-danger .custom-control{color:red}.has-danger .form-control{border-color:red}.has-danger .input-group-addon{color:red;border-color:red;background-color:#fcc}.has-danger .form-control-feedback{color:red}.has-danger .form-control-danger{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='%23d9534f' viewBox='-2 -2 7 7'%3E%3Cpath stroke='%23d9534f' d='M0 0l3 3m0-3L0 3'/%3E%3Ccircle r='.5'/%3E%3Ccircle cx='3' r='.5'/%3E%3Ccircle cy='3' r='.5'/%3E%3Ccircle cx='3' cy='3' r='.5'/%3E%3C/svg%3E")}@media (min-width: 30rem){.form-inline .form-group{display:inline-block;margin-bottom:0;vertical-align:middle}.form-inline .form-control{display:inline-block;width:auto;vertical-align:middle}.form-inline .form-control-static{display:inline-block}.form-inline .input-group{display:inline-table;vertical-align:middle}.form-inline .input-group .input-group-addon,.form-inline .input-group .input-group-btn,.form-inline .input-group .form-control{width:auto}.form-inline .input-group>.form-control{width:100%}.form-inline .form-control-label{margin-bottom:0;vertical-align:middle}.form-inline .form-check{display:inline-block;margin-top:0;margin-bottom:0;vertical-align:middle}.form-inline .form-check-label{padding-left:0}.form-inline .form-check-input{position:relative;margin-left:0}.form-inline .has-feedback .form-control-feedback{top:0}}.btn{display:inline-block;font-weight:normal;line-height:1.25;text-align:center;white-space:nowrap;vertical-align:middle;cursor:pointer;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;border:1px solid transparent;padding:.5rem 1rem;font-size:1rem;border-radius:.25rem;-webkit-transition:all 0.2s ease-in-out;-o-transition:all 0.2s ease-in-out;transition:all 0.2s ease-in-out}.btn:focus,.btn.focus,.btn:active:focus,.btn:active.focus,.btn.active:focus,.btn.active.focus{outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn:focus,.btn:hover{text-decoration:none}.btn.focus{text-decoration:none}.btn:active,.btn.active{background-image:none;outline:0;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn.disabled,.btn:disabled{cursor:not-allowed;opacity:.65;-webkit-box-shadow:none;box-shadow:none}a.btn.disabled,fieldset[disabled] a.btn{pointer-events:none}.btn-primary{color:#fff;background-color:#f99d53;border-color:#f99d53;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-primary:hover{color:#fff;background-color:#f78122;border-color:#f77b18}.btn-primary:focus,.btn-primary.focus{color:#fff;background-color:#f78122;border-color:#f77b18}.btn-primary:active,.btn-primary.active,.open>.btn-primary.dropdown-toggle{color:#fff;background-color:#f78122;border-color:#f77b18;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-primary:active:hover,.btn-primary:active:focus,.btn-primary:active.focus,.btn-primary.active:hover,.btn-primary.active:focus,.btn-primary.active.focus,.open>.btn-primary.dropdown-toggle:hover,.open>.btn-primary.dropdown-toggle:focus,.open>.btn-primary.dropdown-toggle.focus{color:#fff;background-color:#ed6e08;border-color:#c65c07}.btn-primary.disabled:focus,.btn-primary.disabled.focus,.btn-primary:disabled:focus,.btn-primary:disabled.focus{background-color:#f99d53;border-color:#f99d53}.btn-primary.disabled:hover,.btn-primary:disabled:hover{background-color:#f99d53;border-color:#f99d53}.btn-secondary{color:red;background-color:#fff;border-color:#ccc;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-secondary:hover{color:red;background-color:#e6e6e6;border-color:#adadad}.btn-secondary:focus,.btn-secondary.focus{color:red;background-color:#e6e6e6;border-color:#adadad}.btn-secondary:active,.btn-secondary.active,.open>.btn-secondary.dropdown-toggle{color:red;background-color:#e6e6e6;border-color:#adadad;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-secondary:active:hover,.btn-secondary:active:focus,.btn-secondary:active.focus,.btn-secondary.active:hover,.btn-secondary.active:focus,.btn-secondary.active.focus,.open>.btn-secondary.dropdown-toggle:hover,.open>.btn-secondary.dropdown-toggle:focus,.open>.btn-secondary.dropdown-toggle.focus{color:red;background-color:#d4d4d4;border-color:#8c8c8c}.btn-secondary.disabled:focus,.btn-secondary.disabled.focus,.btn-secondary:disabled:focus,.btn-secondary:disabled.focus{background-color:#fff;border-color:#ccc}.btn-secondary.disabled:hover,.btn-secondary:disabled:hover{background-color:#fff;border-color:#ccc}.btn-info{color:#fff;background-color:red;border-color:red;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-info:hover{color:#fff;background-color:#c00;border-color:#c20000}.btn-info:focus,.btn-info.focus{color:#fff;background-color:#c00;border-color:#c20000}.btn-info:active,.btn-info.active,.open>.btn-info.dropdown-toggle{color:#fff;background-color:#c00;border-color:#c20000;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-info:active:hover,.btn-info:active:focus,.btn-info:active.focus,.btn-info.active:hover,.btn-info.active:focus,.btn-info.active.focus,.open>.btn-info.dropdown-toggle:hover,.open>.btn-info.dropdown-toggle:focus,.open>.btn-info.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-info.disabled:focus,.btn-info.disabled.focus,.btn-info:disabled:focus,.btn-info:disabled.focus{background-color:red;border-color:red}.btn-info.disabled:hover,.btn-info:disabled:hover{background-color:red;border-color:red}.btn-success{color:#fff;background-color:red;border-color:red;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-success:hover{color:#fff;background-color:#c00;border-color:#c20000}.btn-success:focus,.btn-success.focus{color:#fff;background-color:#c00;border-color:#c20000}.btn-success:active,.btn-success.active,.open>.btn-success.dropdown-toggle{color:#fff;background-color:#c00;border-color:#c20000;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-success:active:hover,.btn-success:active:focus,.btn-success:active.focus,.btn-success.active:hover,.btn-success.active:focus,.btn-success.active.focus,.open>.btn-success.dropdown-toggle:hover,.open>.btn-success.dropdown-toggle:focus,.open>.btn-success.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-success.disabled:focus,.btn-success.disabled.focus,.btn-success:disabled:focus,.btn-success:disabled.focus{background-color:red;border-color:red}.btn-success.disabled:hover,.btn-success:disabled:hover{background-color:red;border-color:red}.btn-warning{color:#fff;background-color:red;border-color:red;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-warning:hover{color:#fff;background-color:#c00;border-color:#c20000}.btn-warning:focus,.btn-warning.focus{color:#fff;background-color:#c00;border-color:#c20000}.btn-warning:active,.btn-warning.active,.open>.btn-warning.dropdown-toggle{color:#fff;background-color:#c00;border-color:#c20000;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-warning:active:hover,.btn-warning:active:focus,.btn-warning:active.focus,.btn-warning.active:hover,.btn-warning.active:focus,.btn-warning.active.focus,.open>.btn-warning.dropdown-toggle:hover,.open>.btn-warning.dropdown-toggle:focus,.open>.btn-warning.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-warning.disabled:focus,.btn-warning.disabled.focus,.btn-warning:disabled:focus,.btn-warning:disabled.focus{background-color:red;border-color:red}.btn-warning.disabled:hover,.btn-warning:disabled:hover{background-color:red;border-color:red}.btn-danger{color:#fff;background-color:red;border-color:red;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.15),0 1px 1px rgba(0,0,0,0.075)}.btn-danger:hover{color:#fff;background-color:#c00;border-color:#c20000}.btn-danger:focus,.btn-danger.focus{color:#fff;background-color:#c00;border-color:#c20000}.btn-danger:active,.btn-danger.active,.open>.btn-danger.dropdown-toggle{color:#fff;background-color:#c00;border-color:#c20000;background-image:none;-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-danger:active:hover,.btn-danger:active:focus,.btn-danger:active.focus,.btn-danger.active:hover,.btn-danger.active:focus,.btn-danger.active.focus,.open>.btn-danger.dropdown-toggle:hover,.open>.btn-danger.dropdown-toggle:focus,.open>.btn-danger.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-danger.disabled:focus,.btn-danger.disabled.focus,.btn-danger:disabled:focus,.btn-danger:disabled.focus{background-color:red;border-color:red}.btn-danger.disabled:hover,.btn-danger:disabled:hover{background-color:red;border-color:red}.btn-outline-primary{color:#f99d53;background-image:none;background-color:transparent;border-color:#f99d53}.btn-outline-primary:hover{color:#fff;background-color:#f99d53;border-color:#f99d53}.btn-outline-primary:focus,.btn-outline-primary.focus{color:#fff;background-color:#f99d53;border-color:#f99d53}.btn-outline-primary:active,.btn-outline-primary.active,.open>.btn-outline-primary.dropdown-toggle{color:#fff;background-color:#f99d53;border-color:#f99d53}.btn-outline-primary:active:hover,.btn-outline-primary:active:focus,.btn-outline-primary:active.focus,.btn-outline-primary.active:hover,.btn-outline-primary.active:focus,.btn-outline-primary.active.focus,.open>.btn-outline-primary.dropdown-toggle:hover,.open>.btn-outline-primary.dropdown-toggle:focus,.open>.btn-outline-primary.dropdown-toggle.focus{color:#fff;background-color:#ed6e08;border-color:#c65c07}.btn-outline-primary.disabled:focus,.btn-outline-primary.disabled.focus,.btn-outline-primary:disabled:focus,.btn-outline-primary:disabled.focus{border-color:#fcd5b6}.btn-outline-primary.disabled:hover,.btn-outline-primary:disabled:hover{border-color:#fcd5b6}.btn-outline-secondary{color:#ccc;background-image:none;background-color:transparent;border-color:#ccc}.btn-outline-secondary:hover{color:#fff;background-color:#ccc;border-color:#ccc}.btn-outline-secondary:focus,.btn-outline-secondary.focus{color:#fff;background-color:#ccc;border-color:#ccc}.btn-outline-secondary:active,.btn-outline-secondary.active,.open>.btn-outline-secondary.dropdown-toggle{color:#fff;background-color:#ccc;border-color:#ccc}.btn-outline-secondary:active:hover,.btn-outline-secondary:active:focus,.btn-outline-secondary:active.focus,.btn-outline-secondary.active:hover,.btn-outline-secondary.active:focus,.btn-outline-secondary.active.focus,.open>.btn-outline-secondary.dropdown-toggle:hover,.open>.btn-outline-secondary.dropdown-toggle:focus,.open>.btn-outline-secondary.dropdown-toggle.focus{color:#fff;background-color:#a1a1a1;border-color:#8c8c8c}.btn-outline-secondary.disabled:focus,.btn-outline-secondary.disabled.focus,.btn-outline-secondary:disabled:focus,.btn-outline-secondary:disabled.focus{border-color:#fff}.btn-outline-secondary.disabled:hover,.btn-outline-secondary:disabled:hover{border-color:#fff}.btn-outline-info{color:red;background-image:none;background-color:transparent;border-color:red}.btn-outline-info:hover{color:#fff;background-color:red;border-color:red}.btn-outline-info:focus,.btn-outline-info.focus{color:#fff;background-color:red;border-color:red}.btn-outline-info:active,.btn-outline-info.active,.open>.btn-outline-info.dropdown-toggle{color:#fff;background-color:red;border-color:red}.btn-outline-info:active:hover,.btn-outline-info:active:focus,.btn-outline-info:active.focus,.btn-outline-info.active:hover,.btn-outline-info.active:focus,.btn-outline-info.active.focus,.open>.btn-outline-info.dropdown-toggle:hover,.open>.btn-outline-info.dropdown-toggle:focus,.open>.btn-outline-info.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-outline-info.disabled:focus,.btn-outline-info.disabled.focus,.btn-outline-info:disabled:focus,.btn-outline-info:disabled.focus{border-color:#f66}.btn-outline-info.disabled:hover,.btn-outline-info:disabled:hover{border-color:#f66}.btn-outline-success{color:red;background-image:none;background-color:transparent;border-color:red}.btn-outline-success:hover{color:#fff;background-color:red;border-color:red}.btn-outline-success:focus,.btn-outline-success.focus{color:#fff;background-color:red;border-color:red}.btn-outline-success:active,.btn-outline-success.active,.open>.btn-outline-success.dropdown-toggle{color:#fff;background-color:red;border-color:red}.btn-outline-success:active:hover,.btn-outline-success:active:focus,.btn-outline-success:active.focus,.btn-outline-success.active:hover,.btn-outline-success.active:focus,.btn-outline-success.active.focus,.open>.btn-outline-success.dropdown-toggle:hover,.open>.btn-outline-success.dropdown-toggle:focus,.open>.btn-outline-success.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-outline-success.disabled:focus,.btn-outline-success.disabled.focus,.btn-outline-success:disabled:focus,.btn-outline-success:disabled.focus{border-color:#f66}.btn-outline-success.disabled:hover,.btn-outline-success:disabled:hover{border-color:#f66}.btn-outline-warning{color:red;background-image:none;background-color:transparent;border-color:red}.btn-outline-warning:hover{color:#fff;background-color:red;border-color:red}.btn-outline-warning:focus,.btn-outline-warning.focus{color:#fff;background-color:red;border-color:red}.btn-outline-warning:active,.btn-outline-warning.active,.open>.btn-outline-warning.dropdown-toggle{color:#fff;background-color:red;border-color:red}.btn-outline-warning:active:hover,.btn-outline-warning:active:focus,.btn-outline-warning:active.focus,.btn-outline-warning.active:hover,.btn-outline-warning.active:focus,.btn-outline-warning.active.focus,.open>.btn-outline-warning.dropdown-toggle:hover,.open>.btn-outline-warning.dropdown-toggle:focus,.open>.btn-outline-warning.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-outline-warning.disabled:focus,.btn-outline-warning.disabled.focus,.btn-outline-warning:disabled:focus,.btn-outline-warning:disabled.focus{border-color:#f66}.btn-outline-warning.disabled:hover,.btn-outline-warning:disabled:hover{border-color:#f66}.btn-outline-danger{color:red;background-image:none;background-color:transparent;border-color:red}.btn-outline-danger:hover{color:#fff;background-color:red;border-color:red}.btn-outline-danger:focus,.btn-outline-danger.focus{color:#fff;background-color:red;border-color:red}.btn-outline-danger:active,.btn-outline-danger.active,.open>.btn-outline-danger.dropdown-toggle{color:#fff;background-color:red;border-color:red}.btn-outline-danger:active:hover,.btn-outline-danger:active:focus,.btn-outline-danger:active.focus,.btn-outline-danger.active:hover,.btn-outline-danger.active:focus,.btn-outline-danger.active.focus,.open>.btn-outline-danger.dropdown-toggle:hover,.open>.btn-outline-danger.dropdown-toggle:focus,.open>.btn-outline-danger.dropdown-toggle.focus{color:#fff;background-color:#a80000;border-color:maroon}.btn-outline-danger.disabled:focus,.btn-outline-danger.disabled.focus,.btn-outline-danger:disabled:focus,.btn-outline-danger:disabled.focus{border-color:#f66}.btn-outline-danger.disabled:hover,.btn-outline-danger:disabled:hover{border-color:#f66}.btn-link{font-weight:normal;color:#f99d53;border-radius:0}.btn-link,.btn-link:active,.btn-link.active,.btn-link:disabled{background-color:transparent;-webkit-box-shadow:none;box-shadow:none}.btn-link,.btn-link:focus,.btn-link:active{border-color:transparent}.btn-link:hover{border-color:transparent}.btn-link:focus,.btn-link:hover{color:#f67309;text-decoration:underline;background-color:transparent}.btn-link:disabled:focus,.btn-link:disabled:hover{color:red;text-decoration:none}.btn-lg,.btn-group-lg>.btn{padding:.75rem 1.5rem;font-size:1.25rem;border-radius:.3rem}.btn-sm,.btn-group-sm>.btn{padding:.25rem .5rem;font-size:.875rem;border-radius:.2rem}.btn-block{display:block;width:100%}.btn-block+.btn-block{margin-top:.5rem}input[type="submit"].btn-block,input[type="reset"].btn-block,input[type="button"].btn-block{width:100%}.fade{opacity:0;-webkit-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{display:none}.collapse.in{display:block}.collapsing{position:relative;height:0;overflow:hidden;-webkit-transition-timing-function:ease;-o-transition-timing-function:ease;transition-timing-function:ease;-webkit-transition-duration:.35s;-o-transition-duration:.35s;transition-duration:.35s;-webkit-transition-property:height;-o-transition-property:height;transition-property:height}.dropup,.dropdown{position:relative}.dropdown-toggle::after{display:inline-block;width:0;height:0;margin-left:.3em;vertical-align:middle;content:"";border-top:.3em solid;border-right:.3em solid transparent;border-left:.3em solid transparent}.dropdown-toggle:focus{outline:0}.dropup .dropdown-toggle::after{border-top:0;border-bottom:.3em solid}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;font-size:1rem;color:red;text-align:left;list-style:none;background-color:#fff;-webkit-background-clip:padding-box;background-clip:padding-box;border:1px solid rgba(0,0,0,0.15);border-radius:.25rem;-webkit-box-shadow:0 6px 12px rgba(0,0,0,0.175);box-shadow:0 6px 12px rgba(0,0,0,0.175)}.dropdown-divider{height:1px;margin:.5rem 0;overflow:hidden;background-color:#e5e5e5}.dropdown-item{display:block;width:100%;padding:3px 20px;clear:both;font-weight:normal;color:red;text-align:inherit;white-space:nowrap;background:none;border:0}.dropdown-item:focus,.dropdown-item:hover{color:#e60000;text-decoration:none;background-color:#f5f5f5}.dropdown-item.active,.dropdown-item.active:focus,.dropdown-item.active:hover{color:#fff;text-decoration:none;background-color:#f99d53;outline:0}.dropdown-item.disabled,.dropdown-item.disabled:focus,.dropdown-item.disabled:hover{color:red}.dropdown-item.disabled:focus,.dropdown-item.disabled:hover{text-decoration:none;cursor:not-allowed;background-color:transparent;background-image:none;filter:"progid:DXImageTransform.Microsoft.gradient(enabled = false)"}.open>.dropdown-menu{display:block}.open>a{outline:0}.dropdown-menu-right{right:0;left:auto}.dropdown-menu-left{right:auto;left:0}.dropdown-header{display:block;padding:5px 20px;font-size:.875rem;color:red;white-space:nowrap}.dropdown-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:990}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{content:"";border-top:0;border-bottom:.3em solid}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:2px}.btn-group,.btn-group-vertical{position:relative;display:inline-block;vertical-align:middle}.btn-group>.btn,.btn-group-vertical>.btn{position:relative;float:left}.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active,.btn-group-vertical>.btn:focus,.btn-group-vertical>.btn:active,.btn-group-vertical>.btn.active{z-index:2}.btn-group>.btn:hover,.btn-group-vertical>.btn:hover{z-index:2}.btn-group .btn+.btn,.btn-group .btn+.btn-group,.btn-group .btn-group+.btn,.btn-group .btn-group+.btn-group{margin-left:-1px}.btn-toolbar{margin-left:-0.5rem}.btn-toolbar::after{content:"";display:table;clear:both}.btn-toolbar .btn-group,.btn-toolbar .input-group{float:left}.btn-toolbar>.btn,.btn-toolbar>.btn-group,.btn-toolbar>.input-group{margin-left:.5rem}.btn-group>.btn:not(:first-child):not(:last-child):not(.dropdown-toggle){border-radius:0}.btn-group>.btn:first-child{margin-left:0}.btn-group>.btn:first-child:not(:last-child):not(.dropdown-toggle){border-bottom-right-radius:0;border-top-right-radius:0}.btn-group>.btn:last-child:not(:first-child),.btn-group>.dropdown-toggle:not(:first-child){border-bottom-left-radius:0;border-top-left-radius:0}.btn-group>.btn-group{float:left}.btn-group>.btn-group:not(:first-child):not(:last-child)>.btn{border-radius:0}.btn-group>.btn-group:first-child:not(:last-child)>.btn:last-child,.btn-group>.btn-group:first-child:not(:last-child)>.dropdown-toggle{border-bottom-right-radius:0;border-top-right-radius:0}.btn-group>.btn-group:last-child:not(:first-child)>.btn:first-child{border-bottom-left-radius:0;border-top-left-radius:0}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn+.dropdown-toggle-split{padding-right:.75rem;padding-left:.75rem}.btn+.dropdown-toggle-split::after{margin-left:0}.btn-sm+.dropdown-toggle-split,.btn-group-sm>.btn+.dropdown-toggle-split{padding-right:.375rem;padding-left:.375rem}.btn-lg+.dropdown-toggle-split,.btn-group-lg>.btn+.dropdown-toggle-split{padding-right:1.125rem;padding-left:1.125rem}.btn-group.open .dropdown-toggle{-webkit-box-shadow:inset 0 3px 5px rgba(0,0,0,0.125);box-shadow:inset 0 3px 5px rgba(0,0,0,0.125)}.btn-group.open .dropdown-toggle.btn-link{-webkit-box-shadow:none;box-shadow:none}.btn .caret{margin-left:0}.btn-lg .caret,.btn-group-lg>.btn .caret{border-width:.3em .3em 0;border-bottom-width:0}.dropup .btn-lg .caret,.dropup .btn-group-lg>.btn .caret{border-width:0 .3em .3em}.btn-group-vertical>.btn,.btn-group-vertical>.btn-group,.btn-group-vertical>.btn-group>.btn{display:block;float:none;width:100%;max-width:100%}.btn-group-vertical>.btn-group::after{content:"";display:table;clear:both}.btn-group-vertical>.btn-group>.btn{float:none}.btn-group-vertical>.btn+.btn,.btn-group-vertical>.btn+.btn-group,.btn-group-vertical>.btn-group+.btn,.btn-group-vertical>.btn-group+.btn-group{margin-top:-1px;margin-left:0}.btn-group-vertical>.btn:not(:first-child):not(:last-child){border-radius:0}.btn-group-vertical>.btn:first-child:not(:last-child){border-bottom-right-radius:0;border-bottom-left-radius:0}.btn-group-vertical>.btn:last-child:not(:first-child){border-top-right-radius:0;border-top-left-radius:0}.btn-group-vertical>.btn-group:not(:first-child):not(:last-child)>.btn{border-radius:0}.btn-group-vertical>.btn-group:first-child:not(:last-child)>.btn:last-child,.btn-group-vertical>.btn-group:first-child:not(:last-child)>.dropdown-toggle{border-bottom-right-radius:0;border-bottom-left-radius:0}.btn-group-vertical>.btn-group:last-child:not(:first-child)>.btn:first-child{border-top-right-radius:0;border-top-left-radius:0}[data-toggle="buttons"]>.btn input[type="radio"],[data-toggle="buttons"]>.btn input[type="checkbox"],[data-toggle="buttons"]>.btn-group>.btn input[type="radio"],[data-toggle="buttons"]>.btn-group>.btn input[type="checkbox"]{position:absolute;clip:rect(0, 0, 0, 0);pointer-events:none}.input-group{position:relative;width:100%;display:table;border-collapse:separate}.input-group .form-control{position:relative;z-index:2;float:left;width:100%;margin-bottom:0}.input-group .form-control:focus,.input-group .form-control:active,.input-group .form-control:hover{z-index:3}.input-group-addon,.input-group-btn,.input-group .form-control{display:table-cell}.input-group-addon:not(:first-child):not(:last-child),.input-group-btn:not(:first-child):not(:last-child),.input-group .form-control:not(:first-child):not(:last-child){border-radius:0}.input-group-addon,.input-group-btn{width:1%;white-space:nowrap;vertical-align:middle}.input-group-addon{padding:.5rem .75rem;margin-bottom:0;font-size:1rem;font-weight:normal;line-height:1.25;color:red;text-align:center;background-color:red;border:1px solid rgba(0,0,0,0.15);border-radius:.25rem}.input-group-addon.form-control-sm,.input-group-sm>.input-group-addon,.input-group-sm>.input-group-btn>.input-group-addon.btn{padding:.25rem .5rem;font-size:.875rem;border-radius:.2rem}.input-group-addon.form-control-lg,.input-group-lg>.input-group-addon,.input-group-lg>.input-group-btn>.input-group-addon.btn{padding:.75rem 1.5rem;font-size:1.25rem;border-radius:.3rem}.input-group-addon input[type="radio"],.input-group-addon input[type="checkbox"]{margin-top:0}.input-group .form-control:not(:last-child),.input-group-addon:not(:last-child),.input-group-btn:not(:last-child)>.btn,.input-group-btn:not(:last-child)>.btn-group>.btn,.input-group-btn:not(:last-child)>.dropdown-toggle,.input-group-btn:not(:first-child)>.btn:not(:last-child):not(.dropdown-toggle),.input-group-btn:not(:first-child)>.btn-group:not(:last-child)>.btn{border-bottom-right-radius:0;border-top-right-radius:0}.input-group-addon:not(:last-child){border-right:0}.input-group .form-control:not(:first-child),.input-group-addon:not(:first-child),.input-group-btn:not(:first-child)>.btn,.input-group-btn:not(:first-child)>.btn-group>.btn,.input-group-btn:not(:first-child)>.dropdown-toggle,.input-group-btn:not(:last-child)>.btn:not(:first-child),.input-group-btn:not(:last-child)>.btn-group:not(:first-child)>.btn{border-bottom-left-radius:0;border-top-left-radius:0}.form-control+.input-group-addon:not(:first-child){border-left:0}.input-group-btn{position:relative;font-size:0;white-space:nowrap}.input-group-btn>.btn{position:relative}.input-group-btn>.btn+.btn{margin-left:-1px}.input-group-btn>.btn:focus,.input-group-btn>.btn:active,.input-group-btn>.btn:hover{z-index:3}.input-group-btn:not(:last-child)>.btn,.input-group-btn:not(:last-child)>.btn-group{margin-right:-1px}.input-group-btn:not(:first-child)>.btn,.input-group-btn:not(:first-child)>.btn-group{z-index:2;margin-left:-1px}.input-group-btn:not(:first-child)>.btn:focus,.input-group-btn:not(:first-child)>.btn:active,.input-group-btn:not(:first-child)>.btn:hover,.input-group-btn:not(:first-child)>.btn-group:focus,.input-group-btn:not(:first-child)>.btn-group:active,.input-group-btn:not(:first-child)>.btn-group:hover{z-index:3}.custom-control{position:relative;display:inline;padding-left:1.5rem;cursor:pointer}.custom-control+.custom-control{margin-left:1rem}.custom-control-input{position:absolute;z-index:-1;opacity:0}.custom-control-input:checked ~ .custom-control-indicator{color:#fff;background-color:#0074d9;-webkit-box-shadow:none;box-shadow:none}.custom-control-input:focus ~ .custom-control-indicator{-webkit-box-shadow:0 0 0 0.075rem #fff,0 0 0 0.2rem #0074d9;box-shadow:0 0 0 0.075rem #fff,0 0 0 0.2rem #0074d9}.custom-control-input:active ~ .custom-control-indicator{color:#fff;background-color:#84c6ff;-webkit-box-shadow:none;box-shadow:none}.custom-control-input:disabled ~ .custom-control-indicator{cursor:not-allowed;background-color:#eee}.custom-control-input:disabled ~ .custom-control-description{color:#767676;cursor:not-allowed}.custom-control-indicator{position:absolute;top:.0625rem;left:0;display:block;width:1rem;height:1rem;pointer-events:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;background-color:#ddd;background-repeat:no-repeat;background-position:center center;-webkit-background-size:50% 50%;background-size:50% 50%;-webkit-box-shadow:inset 0 0.25rem 0.25rem rgba(0,0,0,0.1);box-shadow:inset 0 0.25rem 0.25rem rgba(0,0,0,0.1)}.custom-checkbox .custom-control-indicator{border-radius:.25rem}.custom-checkbox .custom-control-input:checked ~ .custom-control-indicator{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3E%3Cpath fill='%23fff' d='M6.564.75l-3.59 3.612-1.538-1.55L0 4.26 2.974 7.25 8 2.193z'/%3E%3C/svg%3E")}.custom-checkbox .custom-control-input:indeterminate ~ .custom-control-indicator{background-color:#0074d9;background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 4 4'%3E%3Cpath stroke='%23fff' d='M0 2h4'/%3E%3C/svg%3E");-webkit-box-shadow:none;box-shadow:none}.custom-radio .custom-control-indicator{border-radius:50%}.custom-radio .custom-control-input:checked ~ .custom-control-indicator{background-image:url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3E%3Ccircle r='3' fill='%23fff'/%3E%3C/svg%3E")}.custom-controls-stacked .custom-control{display:inline}.custom-controls-stacked .custom-control::after{display:block;margin-bottom:.25rem;content:""}.custom-controls-stacked .custom-control+.custom-control{margin-left:0}.custom-select{display:inline-block;max-width:100%;padding:.375rem 1.75rem .375rem .75rem;padding-right:.75rem \9;color:red;vertical-align:middle;background:#fff url("data:image/svg+xml;charset=utf8,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 4 5'%3E%3Cpath fill='%23333' d='M2 0L0 2h4zm0 5L0 3h4z'/%3E%3C/svg%3E") no-repeat right .75rem center;background-image:none \9;-webkit-background-size:8px 10px;background-size:8px 10px;border:1px solid rgba(0,0,0,0.15);border-radius:.25rem;-moz-appearance:none;-webkit-appearance:none}.custom-select:focus{border-color:#51a7e8;outline:none;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.075),0 0 5px rgba(81,167,232,0.5);box-shadow:inset 0 1px 2px rgba(0,0,0,0.075),0 0 5px rgba(81,167,232,0.5)}.custom-select:focus::-ms-value{color:red;background-color:#fff}.custom-select:disabled{color:red;cursor:not-allowed;background-color:red}.custom-select::-ms-expand{opacity:0}.custom-select-sm{padding-top:.375rem;padding-bottom:.375rem;font-size:75%}.custom-file{position:relative;display:inline-block;max-width:100%;height:2.5rem;cursor:pointer}.custom-file-input{min-width:14rem;max-width:100%;margin:0;filter:alpha(opacity=0);opacity:0}.custom-file-input:focus ~ .custom-file-control{-webkit-box-shadow:0 0 0 0.075rem #fff,0 0 0 0.2rem #0074d9;box-shadow:0 0 0 0.075rem #fff,0 0 0 0.2rem #0074d9}.custom-file-control{position:absolute;top:0;right:0;left:0;z-index:5;height:2.5rem;padding:.5rem 1rem;line-height:1.5;color:#555;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;background-color:#fff;border:1px solid #ddd;border-radius:.25rem;-webkit-box-shadow:inset 0 0.2rem 0.4rem rgba(0,0,0,0.05);box-shadow:inset 0 0.2rem 0.4rem rgba(0,0,0,0.05)}.custom-file-control:lang(en)::after{content:"Choose file..."}.custom-file-control::before{position:absolute;top:-1px;right:-1px;bottom:-1px;z-index:6;display:block;height:2.5rem;padding:.5rem 1rem;line-height:1.5;color:#555;background-color:#eee;border:1px solid #ddd;border-radius:0 .25rem .25rem 0}.custom-file-control:lang(en)::before{content:"Browse"}.nav{padding-left:0;margin-bottom:0;list-style:none}.nav-link{display:inline-block}.nav-link:focus,.nav-link:hover{text-decoration:none}.nav-link.disabled{color:red}.nav-link.disabled,.nav-link.disabled:focus,.nav-link.disabled:hover{color:red;cursor:not-allowed;background-color:transparent}.nav-inline .nav-item{display:inline-block}.nav-inline .nav-item+.nav-item,.nav-inline .nav-link+.nav-link{margin-left:1rem}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs::after{content:"";display:table;clear:both}.nav-tabs .nav-item{float:left;margin-bottom:-1px}.nav-tabs .nav-item+.nav-item{margin-left:.2rem}.nav-tabs .nav-link{display:block;padding:0.5em 1em;border:1px solid transparent;border-top-right-radius:.25rem;border-top-left-radius:.25rem}.nav-tabs .nav-link:focus,.nav-tabs .nav-link:hover{border-color:red red #ddd}.nav-tabs .nav-link.disabled,.nav-tabs .nav-link.disabled:focus,.nav-tabs .nav-link.disabled:hover{color:red;background-color:transparent;border-color:transparent}.nav-tabs .nav-link.active,.nav-tabs .nav-link.active:focus,.nav-tabs .nav-link.active:hover,.nav-tabs .nav-item.open .nav-link,.nav-tabs .nav-item.open .nav-link:focus,.nav-tabs .nav-item.open .nav-link:hover{color:red;background-color:#fff;border-color:#ddd #ddd transparent}.nav-tabs .dropdown-menu{margin-top:-1px;border-top-right-radius:0;border-top-left-radius:0}.nav-pills::after{content:"";display:table;clear:both}.nav-pills .nav-item{float:left}.nav-pills .nav-item+.nav-item{margin-left:.2rem}.nav-pills .nav-link{display:block;padding:0.5em 1em;border-radius:.25rem}.nav-pills .nav-link.active,.nav-pills .nav-link.active:focus,.nav-pills .nav-link.active:hover,.nav-pills .nav-item.open .nav-link,.nav-pills .nav-item.open .nav-link:focus,.nav-pills .nav-item.open .nav-link:hover{color:#fff;cursor:default;background-color:#f99d53}.nav-stacked .nav-item{display:block;float:none}.nav-stacked .nav-item+.nav-item{margin-top:.2rem;margin-left:0}.tab-content>.tab-pane{display:none}.tab-content>.active{display:block}.navbar{position:relative;padding:.5rem 1rem}.navbar::after{content:"";display:table;clear:both}@media (min-width: 30rem){.navbar{border-radius:.25rem}}.navbar-full{z-index:1000}@media (min-width: 30rem){.navbar-full{border-radius:0}}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030}@media (min-width: 30rem){.navbar-fixed-top,.navbar-fixed-bottom{border-radius:0}}.navbar-fixed-top{top:0}.navbar-fixed-bottom{bottom:0}.navbar-sticky-top{position:-webkit-sticky;position:sticky;top:0;z-index:1030;width:100%}@media (min-width: 30rem){.navbar-sticky-top{border-radius:0}}.navbar-brand{float:left;padding-top:.25rem;padding-bottom:.25rem;margin-right:1rem;font-size:1.25rem}.navbar-brand:focus,.navbar-brand:hover{text-decoration:none}.navbar-brand>img{display:block}.navbar-divider{float:left;width:1px;padding-top:.425rem;padding-bottom:.425rem;margin-right:1rem;margin-left:1rem;overflow:hidden}.navbar-divider::before{content:"\00a0"}.navbar-toggler{padding:.5rem .75rem;font-size:1.25rem;line-height:1;background:none;border:1px solid transparent;border-radius:.25rem}.navbar-toggler:focus,.navbar-toggler:hover{text-decoration:none}.navbar-nav .nav-item{float:left}.navbar-nav .nav-link{display:block;padding-top:.425rem;padding-bottom:.425rem}.navbar-nav .nav-link+.nav-link{margin-left:1rem}.navbar-nav .nav-item+.nav-item{margin-left:1rem}.navbar-light .navbar-brand{color:rgba(0,0,0,0.8)}.navbar-light .navbar-brand:focus,.navbar-light .navbar-brand:hover{color:rgba(0,0,0,0.8)}.navbar-light .navbar-nav .nav-link{color:rgba(0,0,0,0.3)}.navbar-light .navbar-nav .nav-link:focus,.navbar-light .navbar-nav .nav-link:hover{color:rgba(0,0,0,0.6)}.navbar-light .navbar-nav .open>.nav-link,.navbar-light .navbar-nav .open>.nav-link:focus,.navbar-light .navbar-nav .open>.nav-link:hover,.navbar-light .navbar-nav .active>.nav-link,.navbar-light .navbar-nav .active>.nav-link:focus,.navbar-light .navbar-nav .active>.nav-link:hover,.navbar-light .navbar-nav .nav-link.open,.navbar-light .navbar-nav .nav-link.open:focus,.navbar-light .navbar-nav .nav-link.open:hover,.navbar-light .navbar-nav .nav-link.active,.navbar-light .navbar-nav .nav-link.active:focus,.navbar-light .navbar-nav .nav-link.active:hover{color:rgba(0,0,0,0.8)}.navbar-light .navbar-divider{background-color:rgba(0,0,0,0.075)}.navbar-dark .navbar-brand{color:#fff}.navbar-dark .navbar-brand:focus,.navbar-dark .navbar-brand:hover{color:#fff}.navbar-dark .navbar-nav .nav-link{color:rgba(255,255,255,0.5)}.navbar-dark .navbar-nav .nav-link:focus,.navbar-dark .navbar-nav .nav-link:hover{color:rgba(255,255,255,0.75)}.navbar-dark .navbar-nav .open>.nav-link,.navbar-dark .navbar-nav .open>.nav-link:focus,.navbar-dark .navbar-nav .open>.nav-link:hover,.navbar-dark .navbar-nav .active>.nav-link,.navbar-dark .navbar-nav .active>.nav-link:focus,.navbar-dark .navbar-nav .active>.nav-link:hover,.navbar-dark .navbar-nav .nav-link.open,.navbar-dark .navbar-nav .nav-link.open:focus,.navbar-dark .navbar-nav .nav-link.open:hover,.navbar-dark .navbar-nav .nav-link.active,.navbar-dark .navbar-nav .nav-link.active:focus,.navbar-dark .navbar-nav .nav-link.active:hover{color:#fff}.navbar-dark .navbar-divider{background-color:rgba(255,255,255,0.075)}.navbar-toggleable-xs::after{content:"";display:table;clear:both}@media (max-width: 29.99rem){.navbar-toggleable-xs .navbar-nav .nav-item{float:none;margin-left:0}}@media (min-width: 30rem){.navbar-toggleable-xs{display:block !important}}.navbar-toggleable-sm::after{content:"";display:table;clear:both}@media (max-width: 47.99rem){.navbar-toggleable-sm .navbar-nav .nav-item{float:none;margin-left:0}}@media (min-width: 48rem){.navbar-toggleable-sm{display:block !important}}.navbar-toggleable-md::after{content:"";display:table;clear:both}@media (max-width: 66.49rem){.navbar-toggleable-md .navbar-nav .nav-item{float:none;margin-left:0}}@media (min-width: 66.5rem){.navbar-toggleable-md{display:block !important}}.card{position:relative;display:block;margin-bottom:.75rem;background-color:#fff;border-radius:.25rem;border:1px solid rgba(0,0,0,0.125)}.card-block{padding:1.25rem}.card-block::after{content:"";display:table;clear:both}.card-title{margin-bottom:.75rem}.card-subtitle{margin-top:-0.375rem;margin-bottom:0}.card-text:last-child{margin-bottom:0}.card-link:hover{text-decoration:none}.card-link+.card-link{margin-left:1.25rem}.card>.list-group:first-child .list-group-item:first-child{border-top-right-radius:.25rem;border-top-left-radius:.25rem}.card>.list-group:last-child .list-group-item:last-child{border-bottom-right-radius:.25rem;border-bottom-left-radius:.25rem}.card-header{padding:.75rem 1.25rem;background-color:#f5f5f5;border-bottom:1px solid rgba(0,0,0,0.125)}.card-header::after{content:"";display:table;clear:both}.card-header:first-child{border-radius:.25rem .25rem 0 0}.card-footer{padding:.75rem 1.25rem;background-color:#f5f5f5;border-top:1px solid rgba(0,0,0,0.125)}.card-footer::after{content:"";display:table;clear:both}.card-footer:last-child{border-radius:0 0 .25rem .25rem}.card-header-tabs{margin-right:-0.625rem;margin-bottom:-0.75rem;margin-left:-0.625rem;border-bottom:0}.card-header-pills{margin-right:-0.625rem;margin-left:-0.625rem}.card-primary{background-color:#f99d53;border-color:#f99d53}.card-primary .card-header,.card-primary .card-footer{background-color:transparent}.card-success{background-color:red;border-color:red}.card-success .card-header,.card-success .card-footer{background-color:transparent}.card-info{background-color:red;border-color:red}.card-info .card-header,.card-info .card-footer{background-color:transparent}.card-warning{background-color:red;border-color:red}.card-warning .card-header,.card-warning .card-footer{background-color:transparent}.card-danger{background-color:red;border-color:red}.card-danger .card-header,.card-danger .card-footer{background-color:transparent}.card-outline-primary{background-color:transparent;border-color:#f99d53}.card-outline-secondary{background-color:transparent;border-color:#ccc}.card-outline-info{background-color:transparent;border-color:red}.card-outline-success{background-color:transparent;border-color:red}.card-outline-warning{background-color:transparent;border-color:red}.card-outline-danger{background-color:transparent;border-color:red}.card-inverse .card-header,.card-inverse .card-footer{border-color:rgba(255,255,255,0.2)}.card-inverse .card-header,.card-inverse .card-footer,.card-inverse .card-title,.card-inverse .card-blockquote{color:#fff}.card-inverse .card-link,.card-inverse .card-text,.card-inverse .card-subtitle,.card-inverse .card-blockquote .blockquote-footer{color:rgba(255,255,255,0.65)}.card-inverse .card-link:focus,.card-inverse .card-link:hover{color:#fff}.card-blockquote{padding:0;margin-bottom:0;border-left:0}.card-img{border-radius:.25rem}.card-img-overlay{position:absolute;top:0;right:0;bottom:0;left:0;padding:1.25rem}.card-img-top{border-top-right-radius:.25rem;border-top-left-radius:.25rem}.card-img-bottom{border-bottom-right-radius:.25rem;border-bottom-left-radius:.25rem}@media (min-width: 30rem){.card-deck{display:table;width:100%;margin-bottom:.75rem;table-layout:fixed;border-spacing:1.25rem 0}.card-deck .card{display:table-cell;margin-bottom:0;vertical-align:top}.card-deck-wrapper{margin-right:-1.25rem;margin-left:-1.25rem}}@media (min-width: 30rem){.card-group{display:table;width:100%;table-layout:fixed}.card-group .card{display:table-cell;vertical-align:top}.card-group .card+.card{margin-left:0;border-left:0}.card-group .card:first-child{border-bottom-right-radius:0;border-top-right-radius:0}.card-group .card:first-child .card-img-top{border-top-right-radius:0}.card-group .card:first-child .card-img-bottom{border-bottom-right-radius:0}.card-group .card:last-child{border-bottom-left-radius:0;border-top-left-radius:0}.card-group .card:last-child .card-img-top{border-top-left-radius:0}.card-group .card:last-child .card-img-bottom{border-bottom-left-radius:0}.card-group .card:not(:first-child):not(:last-child){border-radius:0}.card-group .card:not(:first-child):not(:last-child) .card-img-top,.card-group .card:not(:first-child):not(:last-child) .card-img-bottom{border-radius:0}}@media (min-width: 30rem){.card-columns{-webkit-column-count:3;-moz-column-count:3;column-count:3;-webkit-column-gap:1.25rem;-moz-column-gap:1.25rem;column-gap:1.25rem}.card-columns .card{display:inline-block;width:100%}}.breadcrumb{padding:.75rem 1rem;margin-bottom:1rem;list-style:none;background-color:red;border-radius:.25rem}.breadcrumb::after{content:"";display:table;clear:both}.breadcrumb-item{float:left}.breadcrumb-item+.breadcrumb-item::before{display:inline-block;padding-right:.5rem;padding-left:.5rem;color:red;content:"/"}.breadcrumb-item+.breadcrumb-item:hover::before{text-decoration:underline}.breadcrumb-item+.breadcrumb-item:hover::before{text-decoration:none}.breadcrumb-item.active{color:red}.pagination{display:inline-block;padding-left:0;margin-top:1rem;margin-bottom:1rem;border-radius:.25rem}.page-item{display:inline}.page-item:first-child .page-link{margin-left:0;border-bottom-left-radius:.25rem;border-top-left-radius:.25rem}.page-item:last-child .page-link{border-bottom-right-radius:.25rem;border-top-right-radius:.25rem}.page-item.active .page-link,.page-item.active .page-link:focus,.page-item.active .page-link:hover{z-index:2;color:#fff;cursor:default;background-color:#f99d53;border-color:#f99d53}.page-item.disabled .page-link,.page-item.disabled .page-link:focus,.page-item.disabled .page-link:hover{color:red;pointer-events:none;cursor:not-allowed;background-color:#fff;border-color:#ddd}.page-link{position:relative;float:left;padding:.5rem .75rem;margin-left:-1px;color:#f99d53;text-decoration:none;background-color:#fff;border:1px solid #ddd}.page-link:focus,.page-link:hover{color:#f67309;background-color:red;border-color:#ddd}.pagination-lg .page-link{padding:.75rem 1.5rem;font-size:1.25rem}.pagination-lg .page-item:first-child .page-link{border-bottom-left-radius:.3rem;border-top-left-radius:.3rem}.pagination-lg .page-item:last-child .page-link{border-bottom-right-radius:.3rem;border-top-right-radius:.3rem}.pagination-sm .page-link{padding:.275rem .75rem;font-size:.875rem}.pagination-sm .page-item:first-child .page-link{border-bottom-left-radius:.2rem;border-top-left-radius:.2rem}.pagination-sm .page-item:last-child .page-link{border-bottom-right-radius:.2rem;border-top-right-radius:.2rem}.tag{display:inline-block;padding:.25em .4em;font-size:75%;font-weight:bold;line-height:1;color:#fff;text-align:center;white-space:nowrap;vertical-align:baseline;border-radius:.25rem}.tag:empty{display:none}.btn .tag{position:relative;top:-1px}a.tag:focus,a.tag:hover{color:#fff;text-decoration:none;cursor:pointer}.tag-pill{padding-right:.6em;padding-left:.6em;border-radius:10rem}.tag-default{background-color:red}.tag-default[href]:focus,.tag-default[href]:hover{background-color:#c00}.tag-primary{background-color:#f99d53}.tag-primary[href]:focus,.tag-primary[href]:hover{background-color:#f78122}.tag-success{background-color:red}.tag-success[href]:focus,.tag-success[href]:hover{background-color:#c00}.tag-info{background-color:red}.tag-info[href]:focus,.tag-info[href]:hover{background-color:#c00}.tag-warning{background-color:red}.tag-warning[href]:focus,.tag-warning[href]:hover{background-color:#c00}.tag-danger{background-color:red}.tag-danger[href]:focus,.tag-danger[href]:hover{background-color:#c00}.jumbotron{padding:2rem 1rem;margin-bottom:2rem;background-color:red;border-radius:.3rem}@media (min-width: 30rem){.jumbotron{padding:4rem 2rem}}.jumbotron-hr{border-top-color:#c00}.jumbotron-fluid{padding-right:0;padding-left:0;border-radius:0}.alert{padding:15px;margin-bottom:1rem;border:1px solid transparent;border-radius:.25rem}.alert-heading{color:inherit}.alert-link{font-weight:bold}.alert-dismissible{padding-right:35px}.alert-dismissible .close{position:relative;top:-2px;right:-21px;color:inherit}.alert-success{background-color:#dff0d8;border-color:#d0e9c6;color:#3c763d}.alert-success hr{border-top-color:#c1e2b3}.alert-success .alert-link{color:#2b542c}.alert-info{background-color:#d9edf7;border-color:#bcdff1;color:#31708f}.alert-info hr{border-top-color:#a6d5ec}.alert-info .alert-link{color:#245269}.alert-warning{background-color:#fcf8e3;border-color:#faf2cc;color:#8a6d3b}.alert-warning hr{border-top-color:#f7ecb5}.alert-warning .alert-link{color:#66512c}.alert-danger{background-color:#f2dede;border-color:#ebcccc;color:#a94442}.alert-danger hr{border-top-color:#e4b9b9}.alert-danger .alert-link{color:#843534}@-webkit-keyframes progress-bar-stripes{from{background-position:1rem 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:1rem 0}to{background-position:0 0}}@keyframes progress-bar-stripes{from{background-position:1rem 0}to{background-position:0 0}}.progress{display:block;width:100%;height:1rem;margin-bottom:1rem}.progress[value]{background-color:#eee;border:0;-webkit-appearance:none;-moz-appearance:none;appearance:none;border-radius:.25rem}.progress[value]::-ms-fill{background-color:#0074d9;border:0}.progress[value]::-moz-progress-bar{background-color:#0074d9;border-bottom-left-radius:.25rem;border-top-left-radius:.25rem}.progress[value]::-webkit-progress-value{background-color:#0074d9;border-bottom-left-radius:.25rem;border-top-left-radius:.25rem}.progress[value="100"]::-moz-progress-bar{border-bottom-right-radius:.25rem;border-top-right-radius:.25rem}.progress[value="100"]::-webkit-progress-value{border-bottom-right-radius:.25rem;border-top-right-radius:.25rem}.progress[value]::-webkit-progress-bar{background-color:#eee;border-radius:.25rem;-webkit-box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1);box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1)}base::-moz-progress-bar,.progress[value]{background-color:#eee;border-radius:.25rem;box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1)}@media screen and (min-width: 0\0){.progress{background-color:#eee;border-radius:.25rem;-webkit-box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1);box-shadow:inset 0 0.1rem 0.1rem rgba(0,0,0,0.1)}.progress-bar{display:inline-block;height:1rem;text-indent:-999rem;background-color:#0074d9;border-bottom-left-radius:.25rem;border-top-left-radius:.25rem}.progress[width="100%"]{border-bottom-right-radius:.25rem;border-top-right-radius:.25rem}}.progress-striped[value]::-webkit-progress-value{background-image:-webkit-linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);-webkit-background-size:1rem 1rem;background-size:1rem 1rem}.progress-striped[value]::-moz-progress-bar{background-image:linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-size:1rem 1rem}.progress-striped[value]::-ms-fill{background-image:linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-size:1rem 1rem}@media screen and (min-width: 0\0){.progress-bar-striped{background-image:-webkit-linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255,255,255,0.15) 25%, transparent 25%, transparent 50%, rgba(255,255,255,0.15) 50%, rgba(255,255,255,0.15) 75%, transparent 75%, transparent);-webkit-background-size:1rem 1rem;background-size:1rem 1rem}}.progress-animated[value]::-webkit-progress-value{-webkit-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-animated[value]::-moz-progress-bar{animation:progress-bar-stripes 2s linear infinite}@media screen and (min-width: 0\0){.progress-animated .progress-bar-striped{-webkit-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}}.progress-success[value]::-webkit-progress-value{background-color:red}.progress-success[value]::-moz-progress-bar{background-color:red}.progress-success[value]::-ms-fill{background-color:red}@media screen and (min-width: 0\0){.progress-success .progress-bar{background-color:red}}.progress-info[value]::-webkit-progress-value{background-color:red}.progress-info[value]::-moz-progress-bar{background-color:red}.progress-info[value]::-ms-fill{background-color:red}@media screen and (min-width: 0\0){.progress-info .progress-bar{background-color:red}}.progress-warning[value]::-webkit-progress-value{background-color:red}.progress-warning[value]::-moz-progress-bar{background-color:red}.progress-warning[value]::-ms-fill{background-color:red}@media screen and (min-width: 0\0){.progress-warning .progress-bar{background-color:red}}.progress-danger[value]::-webkit-progress-value{background-color:red}.progress-danger[value]::-moz-progress-bar{background-color:red}.progress-danger[value]::-ms-fill{background-color:red}@media screen and (min-width: 0\0){.progress-danger .progress-bar{background-color:red}}.media{margin-top:15px}.media:first-child{margin-top:0}.media,.media-body{overflow:hidden}.media-body{width:10000px}.media-left,.media-right,.media-body{display:table-cell;vertical-align:top}.media-middle{vertical-align:middle}.media-bottom{vertical-align:bottom}.media-object{display:block}.media-object.img-thumbnail{max-width:none}.media-right{padding-left:10px}.media-left{padding-right:10px}.media-heading{margin-top:0;margin-bottom:5px}.media-list{padding-left:0;list-style:none}.list-group{padding-left:0;margin-bottom:0}.list-group-item{position:relative;display:block;padding:.75rem 1.25rem;margin-bottom:-1px;background-color:#fff;border:1px solid #ddd}.list-group-item:first-child{border-top-right-radius:.25rem;border-top-left-radius:.25rem}.list-group-item:last-child{margin-bottom:0;border-bottom-right-radius:.25rem;border-bottom-left-radius:.25rem}.list-group-item.disabled,.list-group-item.disabled:focus,.list-group-item.disabled:hover{color:red;cursor:not-allowed;background-color:red}.list-group-item.disabled .list-group-item-heading,.list-group-item.disabled:focus .list-group-item-heading,.list-group-item.disabled:hover .list-group-item-heading{color:inherit}.list-group-item.disabled .list-group-item-text,.list-group-item.disabled:focus .list-group-item-text,.list-group-item.disabled:hover .list-group-item-text{color:red}.list-group-item.active,.list-group-item.active:focus,.list-group-item.active:hover{z-index:2;color:#fff;text-decoration:none;background-color:#f99d53;border-color:#f99d53}.list-group-item.active .list-group-item-heading,.list-group-item.active .list-group-item-heading>small,.list-group-item.active .list-group-item-heading>.small,.list-group-item.active:focus .list-group-item-heading,.list-group-item.active:focus .list-group-item-heading>small,.list-group-item.active:focus .list-group-item-heading>.small,.list-group-item.active:hover .list-group-item-heading,.list-group-item.active:hover .list-group-item-heading>small,.list-group-item.active:hover .list-group-item-heading>.small{color:inherit}.list-group-item.active .list-group-item-text,.list-group-item.active:focus .list-group-item-text,.list-group-item.active:hover .list-group-item-text{color:#fff}.list-group-flush .list-group-item{border-radius:0}.list-group-item-action{width:100%;color:#555;text-align:inherit}.list-group-item-action .list-group-item-heading{color:#333}.list-group-item-action:focus,.list-group-item-action:hover{color:#555;text-decoration:none;background-color:#f5f5f5}.list-group-item-success{color:#3c763d;background-color:#dff0d8}a.list-group-item-success,button.list-group-item-success{color:#3c763d}a.list-group-item-success .list-group-item-heading,button.list-group-item-success .list-group-item-heading{color:inherit}a.list-group-item-success:focus,a.list-group-item-success:hover,button.list-group-item-success:focus,button.list-group-item-success:hover{color:#3c763d;background-color:#d0e9c6}a.list-group-item-success.active,a.list-group-item-success.active:focus,a.list-group-item-success.active:hover,button.list-group-item-success.active,button.list-group-item-success.active:focus,button.list-group-item-success.active:hover{color:#fff;background-color:#3c763d;border-color:#3c763d}.list-group-item-info{color:#31708f;background-color:#d9edf7}a.list-group-item-info,button.list-group-item-info{color:#31708f}a.list-group-item-info .list-group-item-heading,button.list-group-item-info .list-group-item-heading{color:inherit}a.list-group-item-info:focus,a.list-group-item-info:hover,button.list-group-item-info:focus,button.list-group-item-info:hover{color:#31708f;background-color:#c4e3f3}a.list-group-item-info.active,a.list-group-item-info.active:focus,a.list-group-item-info.active:hover,button.list-group-item-info.active,button.list-group-item-info.active:focus,button.list-group-item-info.active:hover{color:#fff;background-color:#31708f;border-color:#31708f}.list-group-item-warning{color:#8a6d3b;background-color:#fcf8e3}a.list-group-item-warning,button.list-group-item-warning{color:#8a6d3b}a.list-group-item-warning .list-group-item-heading,button.list-group-item-warning .list-group-item-heading{color:inherit}a.list-group-item-warning:focus,a.list-group-item-warning:hover,button.list-group-item-warning:focus,button.list-group-item-warning:hover{color:#8a6d3b;background-color:#faf2cc}a.list-group-item-warning.active,a.list-group-item-warning.active:focus,a.list-group-item-warning.active:hover,button.list-group-item-warning.active,button.list-group-item-warning.active:focus,button.list-group-item-warning.active:hover{color:#fff;background-color:#8a6d3b;border-color:#8a6d3b}.list-group-item-danger{color:#a94442;background-color:#f2dede}a.list-group-item-danger,button.list-group-item-danger{color:#a94442}a.list-group-item-danger .list-group-item-heading,button.list-group-item-danger .list-group-item-heading{color:inherit}a.list-group-item-danger:focus,a.list-group-item-danger:hover,button.list-group-item-danger:focus,button.list-group-item-danger:hover{color:#a94442;background-color:#ebcccc}a.list-group-item-danger.active,a.list-group-item-danger.active:focus,a.list-group-item-danger.active:hover,button.list-group-item-danger.active,button.list-group-item-danger.active:focus,button.list-group-item-danger.active:hover{color:#fff;background-color:#a94442;border-color:#a94442}.list-group-item-heading{margin-top:0;margin-bottom:5px}.list-group-item-text{margin-bottom:0;line-height:1.3}.embed-responsive{position:relative;display:block;height:0;padding:0;overflow:hidden}.embed-responsive .embed-responsive-item,.embed-responsive iframe,.embed-responsive embed,.embed-responsive object,.embed-responsive video{position:absolute;top:0;bottom:0;left:0;width:100%;height:100%;border:0}.embed-responsive-21by9{padding-bottom:42.85714%}.embed-responsive-16by9{padding-bottom:56.25%}.embed-responsive-4by3{padding-bottom:75%}.embed-responsive-1by1{padding-bottom:100%}.close{float:right;font-size:1.5rem;font-weight:bold;line-height:1;color:#000;text-shadow:0 1px 0 #fff;opacity:.2}.close:focus,.close:hover{color:#000;text-decoration:none;cursor:pointer;opacity:.5}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.modal-open{overflow:hidden}.modal{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1050;display:none;overflow:hidden;outline:0;-webkit-overflow-scrolling:touch}.modal.fade .modal-dialog{-webkit-transition:-webkit-transform .3s ease-out;transition:-webkit-transform .3s ease-out;-o-transition:transform .3s ease-out, -o-transform .3s ease-out;transition:transform .3s ease-out;transition:transform .3s ease-out, -webkit-transform .3s ease-out, -o-transform .3s ease-out;-webkit-transform:translate(0, -25%);-ms-transform:translate(0, -25%);-o-transform:translate(0, -25%);transform:translate(0, -25%)}.modal.in .modal-dialog{-webkit-transform:translate(0, 0);-ms-transform:translate(0, 0);-o-transform:translate(0, 0);transform:translate(0, 0)}.modal-open .modal{overflow-x:hidden;overflow-y:auto}.modal-dialog{position:relative;width:auto;margin:10px}.modal-content{position:relative;background-color:#fff;-webkit-background-clip:padding-box;background-clip:padding-box;border:1px solid rgba(0,0,0,0.2);border-radius:.3rem;-webkit-box-shadow:0 3px 9px rgba(0,0,0,0.5);box-shadow:0 3px 9px rgba(0,0,0,0.5);outline:0}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop.in{opacity:.5}.modal-header{padding:15px;border-bottom:1px solid #e5e5e5}.modal-header::after{content:"";display:table;clear:both}.modal-header .close{margin-top:-2px}.modal-title{margin:0;line-height:1.5}.modal-body{position:relative;padding:15px}.modal-footer{padding:15px;text-align:right;border-top:1px solid #e5e5e5}.modal-footer::after{content:"";display:table;clear:both}.modal-scrollbar-measure{position:absolute;top:-9999px;width:50px;height:50px;overflow:scroll}@media (min-width: 30rem){.modal-dialog{max-width:600px;margin:30px auto}.modal-content{-webkit-box-shadow:0 5px 15px rgba(0,0,0,0.5);box-shadow:0 5px 15px rgba(0,0,0,0.5)}.modal-sm{max-width:300px}}@media (min-width: 66.5rem){.modal-lg{max-width:900px}}.tooltip{position:absolute;z-index:1070;display:block;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;font-style:normal;font-weight:normal;letter-spacing:normal;line-break:auto;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;white-space:normal;word-break:normal;word-spacing:normal;font-size:.875rem;word-wrap:break-word;opacity:0}.tooltip.in{opacity:.9}.tooltip.tooltip-top,.tooltip.bs-tether-element-attached-bottom{padding:5px 0;margin-top:-3px}.tooltip.tooltip-top .tooltip-arrow,.tooltip.bs-tether-element-attached-bottom .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-width:5px 5px 0;border-top-color:#000}.tooltip.tooltip-right,.tooltip.bs-tether-element-attached-left{padding:0 5px;margin-left:3px}.tooltip.tooltip-right .tooltip-arrow,.tooltip.bs-tether-element-attached-left .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-width:5px 5px 5px 0;border-right-color:#000}.tooltip.tooltip-bottom,.tooltip.bs-tether-element-attached-top{padding:5px 0;margin-top:3px}.tooltip.tooltip-bottom .tooltip-arrow,.tooltip.bs-tether-element-attached-top .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-width:0 5px 5px;border-bottom-color:#000}.tooltip.tooltip-left,.tooltip.bs-tether-element-attached-right{padding:0 5px;margin-left:-3px}.tooltip.tooltip-left .tooltip-arrow,.tooltip.bs-tether-element-attached-right .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-width:5px 0 5px 5px;border-left-color:#000}.tooltip-inner{max-width:200px;padding:3px 8px;color:#fff;text-align:center;background-color:#000;border-radius:.25rem}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.popover{position:absolute;top:0;left:0;z-index:1060;display:block;max-width:276px;padding:1px;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;font-style:normal;font-weight:normal;letter-spacing:normal;line-break:auto;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;white-space:normal;word-break:normal;word-spacing:normal;font-size:.875rem;word-wrap:break-word;background-color:#fff;-webkit-background-clip:padding-box;background-clip:padding-box;border:1px solid rgba(0,0,0,0.2);border-radius:.3rem;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2)}.popover.popover-top,.popover.bs-tether-element-attached-bottom{margin-top:-10px}.popover.popover-top .popover-arrow,.popover.bs-tether-element-attached-bottom .popover-arrow{bottom:-11px;left:50%;margin-left:-11px;border-top-color:rgba(0,0,0,0.25);border-bottom-width:0}.popover.popover-top .popover-arrow::after,.popover.bs-tether-element-attached-bottom .popover-arrow::after{bottom:1px;margin-left:-10px;content:"";border-top-color:#fff;border-bottom-width:0}.popover.popover-right,.popover.bs-tether-element-attached-left{margin-left:10px}.popover.popover-right .popover-arrow,.popover.bs-tether-element-attached-left .popover-arrow{top:50%;left:-11px;margin-top:-11px;border-right-color:rgba(0,0,0,0.25);border-left-width:0}.popover.popover-right .popover-arrow::after,.popover.bs-tether-element-attached-left .popover-arrow::after{bottom:-10px;left:1px;content:"";border-right-color:#fff;border-left-width:0}.popover.popover-bottom,.popover.bs-tether-element-attached-top{margin-top:10px}.popover.popover-bottom .popover-arrow,.popover.bs-tether-element-attached-top .popover-arrow{top:-11px;left:50%;margin-left:-11px;border-top-width:0;border-bottom-color:rgba(0,0,0,0.25)}.popover.popover-bottom .popover-arrow::after,.popover.bs-tether-element-attached-top .popover-arrow::after{top:1px;margin-left:-10px;content:"";border-top-width:0;border-bottom-color:#fff}.popover.popover-left,.popover.bs-tether-element-attached-right{margin-left:-10px}.popover.popover-left .popover-arrow,.popover.bs-tether-element-attached-right .popover-arrow{top:50%;right:-11px;margin-top:-11px;border-right-width:0;border-left-color:rgba(0,0,0,0.25)}.popover.popover-left .popover-arrow::after,.popover.bs-tether-element-attached-right .popover-arrow::after{right:1px;bottom:-10px;content:"";border-right-width:0;border-left-color:#fff}.popover-title{padding:8px 14px;margin:0;font-size:1rem;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;border-radius:.2375rem .2375rem 0 0}.popover-title:empty{display:none}.popover-content{padding:9px 14px}.popover-arrow,.popover-arrow::after{position:absolute;display:block;width:0;height:0;border-color:transparent;border-style:solid}.popover-arrow{border-width:11px}.popover-arrow::after{content:"";border-width:10px}.carousel{position:relative}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel-inner>.carousel-item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel-inner>.carousel-item>img,.carousel-inner>.carousel-item>a>img{line-height:1}@media all and (transform-3d), (-webkit-transform-3d){.carousel-inner>.carousel-item{-webkit-transition:-webkit-transform .6s ease-in-out;transition:-webkit-transform .6s ease-in-out;-o-transition:transform .6s ease-in-out, -o-transform .6s ease-in-out;transition:transform .6s ease-in-out;transition:transform .6s ease-in-out, -webkit-transform .6s ease-in-out, -o-transform .6s ease-in-out;-webkit-backface-visibility:hidden;backface-visibility:hidden;-webkit-perspective:1000px;perspective:1000px}.carousel-inner>.carousel-item.next,.carousel-inner>.carousel-item.active.right{left:0;-webkit-transform:translate3d(100%, 0, 0);transform:translate3d(100%, 0, 0)}.carousel-inner>.carousel-item.prev,.carousel-inner>.carousel-item.active.left{left:0;-webkit-transform:translate3d(-100%, 0, 0);transform:translate3d(-100%, 0, 0)}.carousel-inner>.carousel-item.next.left,.carousel-inner>.carousel-item.prev.right,.carousel-inner>.carousel-item.active{left:0;-webkit-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0)}}.carousel-inner>.active,.carousel-inner>.next,.carousel-inner>.prev{display:block}.carousel-inner>.active{left:0}.carousel-inner>.next,.carousel-inner>.prev{position:absolute;top:0;width:100%}.carousel-inner>.next{left:100%}.carousel-inner>.prev{left:-100%}.carousel-inner>.next.left,.carousel-inner>.prev.right{left:0}.carousel-inner>.active.left{left:-100%}.carousel-inner>.active.right{left:100%}.carousel-control{position:absolute;top:0;bottom:0;left:0;width:15%;font-size:20px;color:#fff;text-align:center;text-shadow:0 1px 2px rgba(0,0,0,0.6);opacity:.5}.carousel-control.left{background-image:-webkit-gradient(linear, left top, right top, from(rgba(0,0,0,0.5)), to(rgba(0,0,0,0.0001)));background-image:-webkit-linear-gradient(left, rgba(0,0,0,0.5) 0%, rgba(0,0,0,0.0001) 100%);background-image:-o-linear-gradient(left, rgba(0,0,0,0.5) 0%, rgba(0,0,0,0.0001) 100%);background-image:linear-gradient(to right, rgba(0,0,0,0.5) 0%, rgba(0,0,0,0.0001) 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#80000000', endColorstr='#00000000', GradientType=1)}.carousel-control.right{right:0;left:auto;background-image:-webkit-gradient(linear, left top, right top, from(rgba(0,0,0,0.0001)), to(rgba(0,0,0,0.5)));background-image:-webkit-linear-gradient(left, rgba(0,0,0,0.0001) 0%, rgba(0,0,0,0.5) 100%);background-image:-o-linear-gradient(left, rgba(0,0,0,0.0001) 0%, rgba(0,0,0,0.5) 100%);background-image:linear-gradient(to right, rgba(0,0,0,0.0001) 0%, rgba(0,0,0,0.5) 100%);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#00000000', endColorstr='#80000000', GradientType=1)}.carousel-control:focus,.carousel-control:hover{color:#fff;text-decoration:none;outline:0;opacity:.9}.carousel-control .icon-prev,.carousel-control .icon-next{position:absolute;top:50%;z-index:5;display:inline-block;width:20px;height:20px;margin-top:-10px;font-family:serif;line-height:1}.carousel-control .icon-prev{left:50%;margin-left:-10px}.carousel-control .icon-next{right:50%;margin-right:-10px}.carousel-control .icon-prev::before{content:"\2039"}.carousel-control .icon-next::before{content:"\203a"}.carousel-indicators{position:absolute;bottom:10px;left:50%;z-index:15;width:60%;padding-left:0;margin-left:-30%;text-align:center;list-style:none}.carousel-indicators li{display:inline-block;width:10px;height:10px;margin:1px;text-indent:-999px;cursor:pointer;background-color:transparent;border:1px solid #fff;border-radius:10px}.carousel-indicators .active{width:12px;height:12px;margin:0;background-color:#fff}.carousel-caption{position:absolute;right:15%;bottom:20px;left:15%;z-index:10;padding-top:20px;padding-bottom:20px;color:#fff;text-align:center;text-shadow:0 1px 2px rgba(0,0,0,0.6)}.carousel-caption .btn{text-shadow:none}@media (min-width: 30rem){.carousel-control .icon-prev,.carousel-control .icon-next{width:30px;height:30px;margin-top:-15px;font-size:30px}.carousel-control .icon-prev{margin-left:-15px}.carousel-control .icon-next{margin-right:-15px}.carousel-caption{right:20%;left:20%;padding-bottom:30px}.carousel-indicators{bottom:20px}}.bg-inverse{background-color:red}.bg-faded{background-color:red}.bg-primary{color:#fff !important;background-color:#f99d53 !important}a.bg-primary:focus,a.bg-primary:hover{background-color:#f78122 !important}.bg-success{color:#fff !important;background-color:red !important}a.bg-success:focus,a.bg-success:hover{background-color:#c00 !important}.bg-info{color:#fff !important;background-color:red !important}a.bg-info:focus,a.bg-info:hover{background-color:#c00 !important}.bg-warning{color:#fff !important;background-color:red !important}a.bg-warning:focus,a.bg-warning:hover{background-color:#c00 !important}.bg-danger{color:#fff !important;background-color:red !important}a.bg-danger:focus,a.bg-danger:hover{background-color:#c00 !important}.clearfix::after{content:"";display:table;clear:both}.d-block{display:block !important}.d-inline-block{display:inline-block !important}.d-inline{display:inline !important}@media (min-width: 20rem){.pull-xs-left{float:left !important}.pull-xs-right{float:right !important}.pull-xs-none{float:none !important}}@media (min-width: 30rem){.pull-sm-left{float:left !important}.pull-sm-right{float:right !important}.pull-sm-none{float:none !important}}@media (min-width: 48rem){.pull-md-left{float:left !important}.pull-md-right{float:right !important}.pull-md-none{float:none !important}}@media (min-width: 66.5rem){.pull-lg-left{float:left !important}.pull-lg-right{float:right !important}.pull-lg-none{float:none !important}}@media (min-width: 86rem){.pull-xl-left{float:left !important}.pull-xl-right{float:right !important}.pull-xl-none{float:none !important}}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0, 0, 0, 0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.w-100{width:100% !important}.m-x-auto{margin-right:auto !important;margin-left:auto !important}.m-a-0{margin:0 0 !important}.m-t-0{margin-top:0 !important}.m-r-0{margin-right:0 !important}.m-b-0{margin-bottom:0 !important}.m-l-0{margin-left:0 !important}.m-x-0{margin-right:0 !important;margin-left:0 !important}.m-y-0{margin-top:0 !important;margin-bottom:0 !important}.m-a-1{margin:1rem 1rem !important}.m-t-1{margin-top:1rem !important}.m-r-1{margin-right:1rem !important}.m-b-1{margin-bottom:1rem !important}.m-l-1{margin-left:1rem !important}.m-x-1{margin-right:1rem !important;margin-left:1rem !important}.m-y-1{margin-top:1rem !important;margin-bottom:1rem !important}.m-a-2{margin:1.5rem 1.5rem !important}.m-t-2{margin-top:1.5rem !important}.m-r-2{margin-right:1.5rem !important}.m-b-2{margin-bottom:1.5rem !important}.m-l-2{margin-left:1.5rem !important}.m-x-2{margin-right:1.5rem !important;margin-left:1.5rem !important}.m-y-2{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.m-a-3{margin:3rem 3rem !important}.m-t-3{margin-top:3rem !important}.m-r-3{margin-right:3rem !important}.m-b-3{margin-bottom:3rem !important}.m-l-3{margin-left:3rem !important}.m-x-3{margin-right:3rem !important;margin-left:3rem !important}.m-y-3{margin-top:3rem !important;margin-bottom:3rem !important}.p-a-0{padding:0 0 !important}.p-t-0{padding-top:0 !important}.p-r-0{padding-right:0 !important}.p-b-0{padding-bottom:0 !important}.p-l-0{padding-left:0 !important}.p-x-0{padding-right:0 !important;padding-left:0 !important}.p-y-0{padding-top:0 !important;padding-bottom:0 !important}.p-a-1{padding:1rem 1rem !important}.p-t-1{padding-top:1rem !important}.p-r-1{padding-right:1rem !important}.p-b-1{padding-bottom:1rem !important}.p-l-1{padding-left:1rem !important}.p-x-1{padding-right:1rem !important;padding-left:1rem !important}.p-y-1{padding-top:1rem !important;padding-bottom:1rem !important}.p-a-2{padding:1.5rem 1.5rem !important}.p-t-2{padding-top:1.5rem !important}.p-r-2{padding-right:1.5rem !important}.p-b-2{padding-bottom:1.5rem !important}.p-l-2{padding-left:1.5rem !important}.p-x-2{padding-right:1.5rem !important;padding-left:1.5rem !important}.p-y-2{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.p-a-3{padding:3rem 3rem !important}.p-t-3{padding-top:3rem !important}.p-r-3{padding-right:3rem !important}.p-b-3{padding-bottom:3rem !important}.p-l-3{padding-left:3rem !important}.p-x-3{padding-right:3rem !important;padding-left:3rem !important}.p-y-3{padding-top:3rem !important;padding-bottom:3rem !important}.pos-f-t{position:fixed;top:0;right:0;left:0;z-index:1030}.text-justify{text-align:justify !important}.text-nowrap{white-space:nowrap !important}.text-truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}@media (min-width: 20rem){.text-xs-left{text-align:left !important}.text-xs-right{text-align:right !important}.text-xs-center{text-align:center !important}}@media (min-width: 30rem){.text-sm-left{text-align:left !important}.text-sm-right{text-align:right !important}.text-sm-center{text-align:center !important}}@media (min-width: 48rem){.text-md-left{text-align:left !important}.text-md-right{text-align:right !important}.text-md-center{text-align:center !important}}@media (min-width: 66.5rem){.text-lg-left{text-align:left !important}.text-lg-right{text-align:right !important}.text-lg-center{text-align:center !important}}@media (min-width: 86rem){.text-xl-left{text-align:left !important}.text-xl-right{text-align:right !important}.text-xl-center{text-align:center !important}}.text-lowercase{text-transform:lowercase !important}.text-uppercase{text-transform:uppercase !important}.text-capitalize{text-transform:capitalize !important}.font-weight-normal{font-weight:normal}.font-weight-bold{font-weight:bold}.font-italic{font-style:italic}.text-muted{color:red !important}a.text-muted:focus,a.text-muted:hover{color:#c00}.text-primary{color:#f99d53 !important}a.text-primary:focus,a.text-primary:hover{color:#f78122}.text-success{color:red !important}a.text-success:focus,a.text-success:hover{color:#c00}.text-info{color:red !important}a.text-info:focus,a.text-info:hover{color:#c00}.text-warning{color:red !important}a.text-warning:focus,a.text-warning:hover{color:#c00}.text-danger{color:red !important}a.text-danger:focus,a.text-danger:hover{color:#c00}.text-hide{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.invisible{visibility:hidden !important}@media (min-width: 20rem){.hidden-xs-up{display:none !important}}@media (max-width: 29.99rem){.hidden-xs-down{display:none !important}}@media (min-width: 30rem){.hidden-sm-up{display:none !important}}@media (max-width: 47.99rem){.hidden-sm-down{display:none !important}}@media (min-width: 48rem){.hidden-md-up{display:none !important}}@media (max-width: 66.49rem){.hidden-md-down{display:none !important}}@media (min-width: 66.5rem){.hidden-lg-up{display:none !important}}@media (max-width: 85.99rem){.hidden-lg-down{display:none !important}}@media (min-width: 86rem){.hidden-xl-up{display:none !important}}.hidden-xl-down{display:none !important}.visible-print-block{display:none !important}@media print{.visible-print-block{display:block !important}}.visible-print-inline{display:none !important}@media print{.visible-print-inline{display:inline !important}}.visible-print-inline-block{display:none !important}@media print{.visible-print-inline-block{display:inline-block !important}}@media print{.hidden-print{display:none !important}}code{font-family:"Source Code Pro", monospace}.version-picker .product__title,.index .product__card .product__title{font-family:"Muli", sans-serif}.banner__navigation-pane,.index .welcome h1,.index .highlight__container .highlight__title,.index .highlight__container .highlight__link{font-family:"Oswald", sans-serif}body,.index .welcome,.index .product__card{font-family:"GandhiSerif", serif;font-weight:400;font-style:normal}h3,h4,h5,h6,.blocknote .blocknote__title,.content-nav,.code-block__tab-set .code-block__tab a,.code-block__title,.selector__title,.table-of-contents__items{font-family:"OpenSans", verdana, arial, sans-serif;font-weight:400;font-style:normal}h1,h2,.front-matter__title{font-family:"OpenSans", verdana, arial, sans-serif;font-weight:300;font-style:normal}.selector__btn,.other__btn,.selector-list__element a{font-family:"OpenSans", verdana, arial, sans-serif;font-weight:700;font-style:normal}@font-face{font-family:"GandhiSerif";font-weight:400;font-style:normal;src:local("Gandhi Serif Regular"),local("GandhiSerif-Regular"),url("../fonts/gandhiserif-regular.woff") format("woff")}@font-face{font-family:"GandhiSerif";font-weight:400;font-style:italic;src:local("Gandhi Serif Italic"),local("GandhiSerif-Italic"),url("../fonts/gandhiserif-italic.woff") format("woff")}@font-face{font-family:"GandhiSerif";font-weight:700;font-style:normal;src:local("Gandhi Serif Bold"),local("GandhiSerif-Bold"),url("../fonts/gandhiserif-bold.woff") format("woff")}@font-face{font-family:"GandhiSerif";font-weight:700;font-style:italic;src:local("Gandhi Serif Bold Italic"),local("GandhiSerif-BoldItalic"),url("../fonts/gandhiserif-bolditalic.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:300;font-style:normal;src:local("Open Sans Light"),local("OpenSans-Light"),url("../fonts/opensans-light.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:400;font-style:normal;src:local("Open Sans Regular"),local("OpenSans"),url("../fonts/opensans-regular.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:400;font-style:italic;src:local("Open Sans Italic"),local("OpenSans-Italic"),url("../fonts/opensans-italic.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:700;font-style:normal;src:local("Open Sans Bold"),local("OpenSans-Bold"),url("../fonts/opensans-bold.woff") format("woff")}@font-face{font-family:"OpenSans";font-weight:700;font-style:italic;src:local("Open Sans Bold Italic"),local("OpenSans-BoldItalic"),url("../fonts/opensans-bolditalic.woff") format("woff")}@font-face{font-family:"DocsFontIcons";font-weight:normal;font-style:normal;src:local("DocsFontIcons Regular"),local("DocsFontIcons"),url("../fonts/docsfonticons.woff") format("woff")}.docs-icon--lambda:before,.docs-icon--lambda-bold:before,.docs-icon--beaker:before,.docs-icon--cog:before,.docs-icon--github-alt:before,.docs-icon--comments:before,.docs-icon--bolt:before,.docs-icon--question-sign:before,.docs-icon--tools:before,.docs-icon--search:before,.docs-icon--download-alt:before,.docs-icon--github:before,.docs-icon--reorder:before,.docs-icon--more:before,.docs-icon--riak:before,.docs-icon--database:before,.docs-icon--install:before,.docs-icon--cloud:before,.docs-icon--configure:before,.docs-icon--references:before,.docs-icon--time:before{font-family:'DocsFontIcons';font-style:normal;font-weight:normal;font-variant:normal;speak:none;text-transform:none;line-height:1}.docs-icon--lambda:before{content:"\e005"}.docs-icon--lambda-bold:before{content:"\e00b"}.docs-icon--beaker:before{content:"\f0c3"}.docs-icon--cog:before{content:"\f013"}.docs-icon--github-alt:before{content:"\e004"}.docs-icon--comments:before{content:"\e000"}.docs-icon--bolt:before{content:"\f0e7"}.docs-icon--question-sign:before{content:"\f059"}.docs-icon--tools:before{content:"\e001"}.docs-icon--search:before{content:"\f002"}.docs-icon--download-alt:before{content:"\f019"}.docs-icon--github:before{content:"\e009"}.docs-icon--reorder:before{content:"\f0c9"}.docs-icon--more:before{content:"\e002"}.docs-icon--riak:before{content:"\e003"}.docs-icon--database:before{content:"\e006"}.docs-icon--install:before{content:"\e007"}.docs-icon--cloud:before{content:"\e008"}.docs-icon--configure:before{content:"\e00a"}.docs-icon--references:before{content:"\f02d"}.docs-icon--time:before{content:"\f017"}.inline{display:inline !important}.block{display:block !important}.inline-block{display:inline-block !important}.table{display:table !important}.inline-table{display:inline-table !important}.table-row{display:table-row !important}.table-cell{display:table-cell !important}.hidden{display:none !important}.float-left{float:left}.float-right{float:right}.clear{clear:both}.clear-left{clear:left}.clear-right{clear:right}.overflow{overflow:auto}.overflow-hidden{overflow:hidden}.overflow-visible{overflow:visible}.overflow-scroll{overflow:scroll}.overflow-x{overflow-x:auto}.overflow-x-hidden{overflow-x:hidden}.overflow-x-visible{overflow-x:visible}.overflow-x-scroll{overflow-x:scroll}.overflow-y{overflow-y:auto}.overflow-y-hidden{overflow-y:hidden}.overflow-y-visible{overflow-y:visible}.overflow-y-scroll{overflow-y:scroll}.inline-only-xs{display:none !important}.block-only-xs{display:none !important}.inline-block-only-xs{display:none !important}@media (min-width: 20rem) and (max-width: 29.99rem){.inline-only-xs{display:inline !important}.block-only-xs{display:block !important}.inline-block-only-xs{display:inline-block !important}}.inline-only-sm{display:none !important}.block-only-sm{display:none !important}.inline-block-only-sm{display:none !important}@media (min-width: 30rem) and (max-width: 47.99rem){.inline-only-sm{display:inline !important}.block-only-sm{display:block !important}.inline-block-only-sm{display:inline-block !important}}.inline-only-md{display:none !important}.block-only-md{display:none !important}.inline-block-only-md{display:none !important}@media (min-width: 48rem) and (max-width: 66.49rem){.inline-only-md{display:inline !important}.block-only-md{display:block !important}.inline-block-only-md{display:inline-block !important}}.inline-only-lg{display:none !important}.block-only-lg{display:none !important}.inline-block-only-lg{display:none !important}@media (min-width: 66.5rem) and (max-width: 85.99rem){.inline-only-lg{display:inline !important}.block-only-lg{display:block !important}.inline-block-only-lg{display:inline-block !important}}.inline-only-xl{display:none !important}.block-only-xl{display:none !important}.inline-block-only-xl{display:none !important}@media (min-width: 86rem){.inline-only-xl{display:inline !important}.block-only-xl{display:block !important}.inline-block-only-xl{display:inline-block !important}}@media (min-width: 20rem){.float-left-xs-up{float:left}.float-right-xs-up{float:right}.float-none-xs-up{float:none}}@media (min-width: 30rem){.float-left-sm-up{float:left}.float-right-sm-up{float:right}.float-none-sm-up{float:none}}@media (min-width: 48rem){.float-left-md-up{float:left}.float-right-md-up{float:right}.float-none-md-up{float:none}}@media (min-width: 66.5rem){.float-left-lg-up{float:left}.float-right-lg-up{float:right}.float-none-lg-up{float:none}}@media (min-width: 86rem){.float-left-xl-up{float:left}.float-right-xl-up{float:right}.float-none-xl-up{float:none}}@media (max-width: 29.99rem){.float-left-xs-down{float:left}.float-right-xs-down{float:right}.float-none-xs-down{float:none}}@media (max-width: 47.99rem){.float-left-sm-down{float:left}.float-right-sm-down{float:right}.float-none-sm-down{float:none}}@media (max-width: 66.49rem){.float-left-md-down{float:left}.float-right-md-down{float:right}.float-none-md-down{float:none}}@media (max-width: 85.99rem){.float-left-lg-down{float:left}.float-right-lg-down{float:right}.float-none-lg-down{float:none}}.float-left-xl-down{float:left}.float-right-xl-down{float:right}.float-none-xl-down{float:none}.hide-text{overflow:hidden;padding:0;text-indent:101%;white-space:nowrap}.visually-hidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.hljs{background:#f7f7f7;color:#454d54}.hljs-comment,.hljs-quote{color:#9199a1;font-style:italic}.hljs-keyword,.hljs-selector-tag,.hljs-addition{color:#6c8997;font-weight:bold}.hljs-number,.hljs-string,.hljs-params,.hljs-meta .hljs-meta-string,.hljs-literal,.hljs-doctag,.hljs-regexp{color:#dc7d00}.hljs-title,.hljs-section,.hljs-name,.hljs-selector-id,.hljs-selector-class{color:#79aeb6}.hljs-attribute,.hljs-attr,.hljs-variable,.hljs-template-variable,.hljs-class .hljs-title,.hljs-type{color:#597471}.hljs-symbol,.hljs-bullet,.hljs-subst,.hljs-meta,.hljs-meta .hljs-keyword,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-link{color:#f99d53}.hljs-built_in,.hljs-deletion{color:#ee6201}.hljs-formula{background:#ebebeb}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:bold}html{font-size:initial}body{color:#454d54}main{font-size:110%}code{color:#454d54;background:#e3e6e8;border-radius:.25rem;padding:0.1rem 0.25rem;-webkit-transition:color 150ms ease-in;-o-transition:color 150ms ease-in;transition:color 150ms ease-in}a{color:#337ab7;text-decoration:underline;cursor:pointer;-webkit-transition:color 150ms ease-in;-o-transition:color 150ms ease-in;transition:color 150ms ease-in}a:not([href]):hover,a:not([href]):active,a:not([href]):focus{cursor:default}a code{color:#337ab7}a:hover,a:active,a:focus{color:#e78505;text-decoration:underline}a:hover code,a:active code,a:focus code{color:#e78505}.main-article a[href^="http://"],.main-article a[href^="https://"]{display:inline-block}.main-article a[href^="http://"]::after,.main-article a[href^="https://"]::after{content:"";display:inline-block;text-align:left;width:.75rem;height:.625rem;margin-right:-.0625rem;background:url("") no-repeat right}.main-article a[href$=".pdf"],.main-article a[href$=".PDF"]{display:inline-block}.main-article a[href$=".pdf"]::after,.main-article a[href$=".PDF"]::after{content:"";display:inline-block;width:1rem;height:1rem;background:url("") no-repeat right}h1{color:red;line-height:initial;font-size:initial;margin-top:initial;margin-bottom:initial}h2{color:#636567;line-height:1.2;font-size:2.30rem;margin-top:3.00rem;padding-bottom:1.00rem}h3{color:#636567;line-height:1.2;font-size:1.70rem;margin-top:1.75rem;padding-bottom:0.50rem}h4{color:#636567;line-height:1.2;font-size:1.50rem;margin-top:1.25rem;padding-bottom:0.50rem}h5{color:#636567;line-height:1.2;font-size:1.30rem;margin-top:0.00rem;padding-bottom:0.50rem}h6{color:#636567;line-height:1.2;font-size:1.25rem;margin-top:0.00rem;padding-bottom:0.50rem}h1,h2,h3,h4,h5,h6{margin-bottom:0}h1 a,h2 a,h3 a,h4 a,h5 a,h6 a{color:#636567;text-decoration:none}h1 a code,h2 a code,h3 a code,h4 a code,h5 a code,h6 a code{color:#636567}h1 a:hover,h1 a:active,h1 a:focus,h2 a:hover,h2 a:active,h2 a:focus,h3 a:hover,h3 a:active,h3 a:focus,h4 a:hover,h4 a:active,h4 a:focus,h5 a:hover,h5 a:active,h5 a:focus,h6 a:hover,h6 a:active,h6 a:focus{color:#636567;text-decoration:none}h1 a:hover code,h1 a:active code,h1 a:focus code,h2 a:hover code,h2 a:active code,h2 a:focus code,h3 a:hover code,h3 a:active code,h3 a:focus code,h4 a:hover code,h4 a:active code,h4 a:focus code,h5 a:hover code,h5 a:active code,h5 a:focus code,h6 a:hover code,h6 a:active code,h6 a:focus code{color:#636567}h1 a a:hover,h1 a a:active,h1 a a:focus,h2 a a:hover,h2 a a:active,h2 a a:focus,h3 a a:hover,h3 a a:active,h3 a a:focus,h4 a a:hover,h4 a a:active,h4 a a:focus,h5 a a:hover,h5 a a:active,h5 a a:focus,h6 a a:hover,h6 a a:active,h6 a a:focus{color:#e78505}hr{border-top:1px dashed #d4d9de}blockquote{font-size:90%;margin-left:2rem;border-left:0.25rem solid #b8bfc7;padding-left:1rem;margin-top:1.5rem;margin-right:1rem}.blocknote{font-size:90%;background:#e1eef4;border-radius:.5rem;border:1px solid #bcd6d0;padding:1rem 1rem 1rem 1rem;margin:1.5rem}.blocknote p:last-of-type{margin-bottom:0}.blocknote .blocknote__title{font-size:110%;padding-bottom:0.25rem}@media (min-width: 48rem){.blocknote{margin-right:1rem}}pre code{display:block;overflow-x:auto;border-radius:.3rem;font-size:90%;color:#454d54;background:#f7f7f7;border:1px solid #b8bfc7;padding:0.5rem 1rem}pre+pre{display:none}ol,ul,dt{margin-bottom:1rem;padding-left:2.5rem}table{margin-top:1rem;margin-bottom:1rem}table code{background:transparent;padding:0;font-size:95%}table caption{color:#454d54}thead{border-bottom:1px solid #454d54}th{padding:0.25rem 0.5rem}td{padding:0.5rem;vertical-align:top}.main-article img{max-width:100%}::-moz-selection{background:#feca87}::selection{background:#feca87}.content-nav{position:fixed;z-index:200;width:100%;top:3.5rem;bottom:0;left:-200%;-webkit-transition:left 200ms ease-in-out 50ms;-o-transition:left 200ms ease-in-out 50ms;transition:left 200ms ease-in-out 50ms}@media (max-width: 47.99rem){.content-nav.content-nav--fullscreen{left:0}}@media (min-width: 48rem){.content-nav{top:4.5rem;bottom:2rem;left:1rem;width:13rem}}@media (min-width: 66.5rem){.content-nav{width:15rem}}.banner{position:fixed;z-index:400;top:0;left:0;right:0;height:3.5rem}@media (min-width: 48rem){.banner{left:0;height:4.5rem}}.content-well{width:100%;padding-top:3.5rem}@media (min-width: 48rem){.content-well{padding-top:4.5rem;padding-right:0.5rem;margin-right:auto;padding-left:16rem;max-width:62.5rem}}@media (min-width: 66.5rem){.content-well{padding-left:19rem;max-width:65.5rem}}@media (min-width: 86rem){.content-well{padding-left:20.5rem;max-width:78rem}}@media (max-width: 47.99rem){.content-well.content-well--immobile{position:fixed;overflow:hidden}}.main-article{padding:.5rem 0.5rem 0 0.5rem}@media (min-width: 48rem){.main-article{padding-top:.5rem}}@media (max-width: 47.99rem){.ERROR404 .main-article{text-align:center}}.main-article main{position:relative;z-index:100}.main-article main h1,.main-article main h2,.main-article main h3,.main-article main h4,.main-article main h5,.main-article main h6{position:relative}.main-article main h1:before,.main-article main h2:before,.main-article main h3:before,.main-article main h4:before,.main-article main h5:before,.main-article main h6:before{display:block;visibility:hidden;content:" ";height:0;margin-top:-4rem;padding-top:4rem}@media (min-width: 48rem){.main-article main h1:before,.main-article main h2:before,.main-article main h3:before,.main-article main h4:before,.main-article main h5:before,.main-article main h6:before{margin-top:-5rem;padding-top:5rem}}.main-article main h1{z-index:-1}.main-article main h2{z-index:-2}.main-article main h3{z-index:-3}.main-article main h4{z-index:-4}.main-article main h5{z-index:-5}.main-article main h6{z-index:-6}.banner{padding:0 1rem 0 1.25rem;background:#f7f7f7;-webkit-box-shadow:0 0 1.5rem 0 rgba(0,0,0,0.4);box-shadow:0 0 1.5rem 0 rgba(0,0,0,0.4)}@media (min-width: 48rem){.banner{background:#ededf0;padding:0 0 0 1.5rem}}.banner__docs-logo{position:relative;top:50%;-webkit-transform:translateY(-50%);-ms-transform:translateY(-50%);-o-transform:translateY(-50%);transform:translateY(-50%)}.banner__docs-logo .docs-logo__image{width:9.375rem;height:auto}@media (min-width: 48rem){.banner__docs-logo{position:static;top:initial;-webkit-transform:none;-ms-transform:none;-o-transform:none;transform:none;padding-top:1.25rem}.banner__docs-logo .docs-logo__image{width:10.1875rem;height:auto}}.banner__menu-bars{height:100%}.banner__menu-bars .menu-bars{font-size:2.5rem;position:relative;top:50%;-webkit-transform:translateY(-50%);-ms-transform:translateY(-50%);-o-transform:translateY(-50%);transform:translateY(-50%)}.banner__navigation-pane{text-transform:uppercase;height:100%}.banner__navigation-pane a{text-decoration:none}.banner__intra-brand__width-wrapper{position:relative;width:100%;margin-bottom:.4rem;padding-left:7rem}@media (min-width: 66.5rem){.banner__intra-brand__width-wrapper{padding-left:11rem;max-width:53.8125rem}}@media (min-width: 86rem){.banner__intra-brand__width-wrapper{padding-left:13.5rem;max-width:66.3125rem}}.index .banner__intra-brand__width-wrapper{max-width:none}.banner__intra-brand__width-wrapper::after{display:block;content:"";position:absolute;left:100%;height:2rem;width:2000rem;background:#6b737a}.banner__intra-brand{background:#6b737a;width:100%;height:2rem}.banner__intra-brand::before{display:inline-block;content:"";border:0px solid transparent;border-bottom-width:2rem;border-right-width:1.5rem;border-bottom-color:#ededf0}.banner__intra-brand .banner__brand-link{font-size:90%;color:#e3e6e8;height:2rem;padding-top:0.35rem;padding-right:1rem;padding-left:1rem}.banner__intra-brand .banner__brand-link:first-of-type{margin-left:3rem}.banner__intra-brand .banner__brand-link:last-of-type{margin-right:1.25rem}.banner__intra-brand .banner__brand-link:hover,.banner__intra-brand .banner__brand-link:active,.banner__intra-brand .banner__brand-link:focus{color:#bcd6d0}.banner__intra-site__width-wrapper{width:100%}@media (min-width: 66.5rem){.banner__intra-site__width-wrapper{max-width:53.8125rem}}@media (min-width: 86rem){.banner__intra-site__width-wrapper{max-width:66.3125rem}}.banner__intra-site{height:2.1rem}.banner__intra-site .banner__brand-link{font-size:110%;color:#e78505;height:100%;padding-left:1.25rem;padding-right:1.25rem;margin-right:0.5rem;-webkit-transition:color 200ms ease-in, background 200ms ease-in;-o-transition:color 200ms ease-in, background 200ms ease-in;transition:color 200ms ease-in, background 200ms ease-in}.banner__intra-site .banner__brand-link:last-of-type{margin-right:1rem}.banner__intra-site .banner__brand-link:hover,.banner__intra-site .banner__brand-link:active,.banner__intra-site .banner__brand-link:focus{color:#636567;background:#e0e0e3}.banner__intra-site .banner__brand-link.banner__brand-link--current{color:#636567;background:#e0e0e3}.content-nav{color:#788087}@media (min-width: 20rem) and (max-width: 29.99rem){.content-nav{font-size:100%}}@media (min-width: 30rem) and (max-width: 47.99rem){.content-nav{font-size:100%}}@media (min-width: 48rem) and (max-width: 66.49rem){.content-nav{font-size:80%}}@media (min-width: 66.5rem) and (max-width: 85.99rem){.content-nav{font-size:90%}}@media (min-width: 86rem){.content-nav{font-size:90%}}.no-js .content-nav{display:none !important}.content-nav__fixed-top{position:fixed;width:inherit;padding-top:.5rem;background:#f7f7f7;-webkit-box-shadow:-0.125rem -0.125rem 2rem -0.25rem rgba(0,0,0,0.8);box-shadow:-0.125rem -0.125rem 2rem -0.25rem rgba(0,0,0,0.8);-webkit-transition:height 500ms ease;-o-transition:height 500ms ease;transition:height 500ms ease}.content-nav--top-size-full .content-nav__fixed-top{height:8rem}.content-nav--top-size-half .content-nav__fixed-top{height:4rem}@media (min-width: 48rem){.content-nav__fixed-top{z-index:1;width:12.75rem;padding-left:0.5rem;padding-right:0.5rem;padding-top:1rem;-webkit-box-shadow:-0.125rem -0.125rem 2rem -0.25rem rgba(0,0,0,0.2);box-shadow:-0.125rem -0.125rem 2rem -0.25rem rgba(0,0,0,0.2)}.content-nav--top-size-full .content-nav__fixed-top{height:7.5rem}.content-nav--top-size-half .content-nav__fixed-top{height:4.5rem}}@media (min-width: 66.5rem){.content-nav__fixed-top{width:14.75rem;padding-top:1rem}.content-nav--top-size-full .content-nav__fixed-top{height:7.75rem}.content-nav--top-size-half .content-nav__fixed-top{height:4.5rem}}.version-picker{margin:0.5rem 0.75rem 0.5rem 0.75rem}@media (min-width: 48rem){.version-picker{margin-left:0.25rem;margin-right:0.25rem}}.version-picker .product__title{font-size:2.20em;font-weight:700;line-height:1;margin-top:0.3em;color:#788087;text-transform:lowercase;text-decoration:none;-webkit-transition:color 500ms ease;-o-transition:color 500ms ease;transition:color 500ms ease}@media (min-width: 48rem){.version-picker .product__title{margin-top:0.35em}}@media (min-width: 66.5rem){.version-picker .product__title{font-size:2.40em;margin-top:0.25em}}.version-picker .product__title .product__logo{height:0.60em;width:0.60em;margin-bottom:0.10em}.version-picker .product__title .product__title-highlight{color:#fcc074;text-transform:uppercase;font-size:70.5%;font-weight:400;-webkit-transition:color 500ms ease;-o-transition:color 500ms ease;transition:color 500ms ease}.version-picker .product__title:hover,.version-picker .product__title:active,.version-picker .product__title:focus{color:#636567}.version-picker .product__title:hover .product__title-highlight,.version-picker .product__title:active .product__title-highlight,.version-picker .product__title:focus .product__title-highlight{color:#f99d53}.content-nav__primary__sizing-box{position:relative;height:100%;-webkit-transition:top 500ms ease, padding-bottom 500ms ease;-o-transition:top 500ms ease, padding-bottom 500ms ease;transition:top 500ms ease, padding-bottom 500ms ease}.content-nav--top-size-full .content-nav__primary__sizing-box{top:8.5rem;padding-bottom:8.5rem}.content-nav--top-size-half .content-nav__primary__sizing-box{top:4.5rem;padding-bottom:4.5rem}@media (min-width: 48rem){.content-nav--top-size-full .content-nav__primary__sizing-box{top:8.5rem;padding-bottom:8.5rem}.content-nav--top-size-half .content-nav__primary__sizing-box{top:5.5rem;padding-bottom:5.5rem}}@media (min-width: 66.5rem){.content-nav--top-size-full .content-nav__primary__sizing-box{top:8.75rem;padding-bottom:8.75rem}.content-nav--top-size-half .content-nav__primary__sizing-box{top:5.5rem;padding-bottom:5.5rem}}.content-nav__primary__shadow-box{-webkit-box-shadow:-0.25rem -0.25rem 4rem -0.5rem rgba(0,0,0,0.8);box-shadow:-0.25rem -0.25rem 4rem -0.5rem rgba(0,0,0,0.8);height:100%}@media (min-width: 48rem){.content-nav__primary__shadow-box{-webkit-box-shadow:-0.25rem -0.25rem 4rem -0.5rem rgba(0,0,0,0.2);box-shadow:-0.25rem -0.25rem 4rem -0.5rem rgba(0,0,0,0.2)}}.content-nav__primary{background:#f7f7f7;height:100%}.content-nav__menu-container{min-height:100%;padding-bottom:8.0625rem}.content-nav__fixed-bottom{height:8.0625rem;margin-top:-8.0625rem;font-size:90%;line-height:1.1rem;background:#9199a1}.footer{padding:1rem .25rem 0rem .25rem;margin-top:2rem;font-size:80%;line-height:1.2;text-align:center;color:#9199a1;border-top:1px solid #d4d9de}@media (min-width: 48rem){.footer{padding:2rem 1rem 1rem 1rem;margin-top:0}}.footer a{color:#9199a1}.footer a:hover,.footer a:active,.footer a:focus{color:#79aeb6}.index .footer{position:relative;padding-top:2rem;margin-top:0}@media (min-width: 66.5rem){.index .footer{text-align:center}}@media (min-width: 48rem){.footer__attributions{text-align:center}}.footer__social{margin:0 0.5rem 1.5rem 0.5rem}@media (min-width: 48rem){.footer__social{margin-bottom:0}}@media (min-width: 66.5rem){.footer__social{position:absolute;right:0;top:0;margin-top:2.2rem;margin-right:2.0rem}}@font-face{font-family:"anchorjs-icons";src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}.anchor-icon::after{-webkit-transition:opacity 100ms ease-in;-o-transition:opacity 100ms ease-in;transition:opacity 100ms ease-in;opacity:0;font-family:'anchorjs-icons';font-style:normal;font-weight:normal;font-variant:normal;text-transform:none;line-height:1;content:"\e9cb";padding-left:0.2em;vertical-align:bottom;font-size:1.2em}.anchor-icon:hover::after,.anchor-icon:active::after,.anchor-icon:focus::after{opacity:1}.code-block__tab-set-wrapper{position:relative}.code-block__tab-set-wrapper .edge-fader{font-size:1rem;width:2rem;height:100%}.code-block__tab-set{white-space:nowrap;list-style:none;padding:0 2rem;margin:0 0 0.5rem 0}.code-block__tab-set .code-block__tab a{text-transform:uppercase;font-size:90%;color:#f99d53;text-decoration:none;border-radius:.25rem;padding:0.5rem 0.5rem;margin-right:0.25rem;-webkit-transition:background-color 200ms ease 50ms, color 200ms ease 50ms;-o-transition:background-color 200ms ease 50ms, color 200ms ease 50ms;transition:background-color 200ms ease 50ms, color 200ms ease 50ms}.code-block__tab-set .code-block__tab a:hover,.code-block__tab-set .code-block__tab a:active,.code-block__tab-set .code-block__tab a:focus{background:#f7f7f7;color:#6c8997;text-decoration:none}.code-block__tab-set .code-block__tab--active a{color:#636567;cursor:default;background:#f7f7f7}.code-block__tab-set .code-block__tab--active a:hover,.code-block__tab-set .code-block__tab--active a:active,.code-block__tab-set .code-block__tab--active a:focus{color:#636567}.code-block__title{text-transform:uppercase;font-size:90%;color:#636567;border-radius:.25rem;padding:0.5rem 0.75rem;margin:0 0 0.25rem 0.75rem;background:#f7f7f7}.content-menu{display:none;margin:0;padding-left:0;list-style:none;margin-top:-0.3rem;padding-top:0.3rem}.content-menu.content-menu--depth-0{margin-top:0;padding-top:0}.content-menu.content-menu--open{display:block}.content-menu.content-menu--depth-0>li{-webkit-box-shadow:0 -0.2rem 0.7rem -0.2rem rgba(0,0,0,0.1);box-shadow:0 -0.2rem 0.7rem -0.2rem rgba(0,0,0,0.1)}.content-menu__link--depth-1{padding-left:1.75rem}@media (min-width: 48rem){.content-menu__link--depth-1{padding-left:1.125rem}}.content-menu__link--depth-2{padding-left:2.50rem}@media (min-width: 48rem){.content-menu__link--depth-2{padding-left:1.625rem}}.content-menu__link--depth-3{padding-left:3.25rem}@media (min-width: 48rem){.content-menu__link--depth-3{padding-left:2.125rem}}.content-menu__link--depth-4{padding-left:4.00rem}@media (min-width: 48rem){.content-menu__link--depth-4{padding-left:2.625rem}}.content-menu__link--depth-5{padding-left:4.75rem}@media (min-width: 48rem){.content-menu__link--depth-5{padding-left:3.125rem}}.content-menu__download-entry{background:#ffd195;-webkit-transition:background 500ms ease;-o-transition:background 500ms ease;transition:background 500ms ease}.content-menu__download-entry:hover,.content-menu__download-entry:active,.content-menu__download-entry:focus{background:#fcc074}.content-menu__blank-entry{margin-top:0.25rem;height:1rem;margin-bottom:-1rem}.content-menu__item{margin-left:0;margin-right:0;color:#788087}.content-menu__item:hover,.content-menu__item:active,.content-menu__item:focus{color:#454d54}.content-menu__item a{padding-top:.6rem;padding-bottom:0.65rem;color:inherit;text-decoration:none}.content-menu__item>.content-menu__menu-toggle+a>.content-menu__item__right-border{width:100%;padding-right:0.5rem;border-right:1px solid #e3e6e8}.content-menu__item--selected{background:#79aeb6;color:#f0f2f5;-webkit-box-shadow:0 0 1rem -0.2rem rgba(0,0,0,0.4);box-shadow:0 0 1rem -0.2rem rgba(0,0,0,0.4)}.content-menu__item--selected:hover,.content-menu__item--selected:active,.content-menu__item--selected:focus{color:#f0f2f5}.content-menu__item--selected>.content-menu__menu-toggle+a{border-right-color:#f0f2f5}.content-menu__icon-container{text-align:right;margin-left:1rem;margin-right:0.25rem;padding-top:.6rem;padding-bottom:0.2rem;color:inherit;-webkit-transition:color 500ms ease;-o-transition:color 500ms ease;transition:color 500ms ease}.content-menu__icon-container:before{display:inline-block;width:1.25rem;padding-right:0.5rem}.content-menu__menu-toggle{width:3.0rem;padding-left:0.4rem;text-align:center;padding-top:.6rem;padding-bottom:0.2rem;cursor:pointer}@media (min-width: 48rem){.content-menu__menu-toggle{width:1.85rem;padding-left:0.20rem}}.content-menu__menu-toggle:after{display:inline-block;content:"";-webkit-transition:border-color 500ms ease;-o-transition:border-color 500ms ease;transition:border-color 500ms ease;width:.875rem;height:.875rem;border:.4375rem solid transparent;border-top:.4375rem solid #9199a1;border-bottom:0 none #1f262e;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.content-menu__item--selected .content-menu__menu-toggle:after{width:.875rem;height:.875rem;border:.4375rem solid transparent;border-top:.4375rem solid #f0f2f5;border-bottom:0 none #1f262e;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.content-menu__menu-toggle.content-menu__menu-toggle--open:after{width:.875rem;height:.875rem;border:.4375rem solid transparent;border-bottom:.4375rem solid #9199a1;border-top:0 none #1f262e;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}.content-menu__item--selected .content-menu__menu-toggle.content-menu__menu-toggle--open:after{width:.875rem;height:.875rem;border:.4375rem solid transparent;border-bottom:.4375rem solid #f0f2f5;border-top:0 none #1f262e;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}@media (min-width: 48rem){.content-menu__menu-toggle:after{width:.625rem;height:.625rem;border:.3125rem solid transparent;border-top:.3125rem solid #9199a1;border-bottom:0 none #1f262e;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.content-menu__item--selected .content-menu__menu-toggle:after{width:.625rem;height:.625rem;border:.3125rem solid transparent;border-top:.3125rem solid #f0f2f5;border-bottom:0 none #1f262e;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.content-menu__menu-toggle.content-menu__menu-toggle--open:after{width:.625rem;height:.625rem;border:.3125rem solid transparent;border-bottom:.3125rem solid #9199a1;border-top:0 none #1f262e;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}.content-menu__item--selected .content-menu__menu-toggle.content-menu__menu-toggle--open:after{width:.625rem;height:.625rem;border:.3125rem solid transparent;border-bottom:.3125rem solid #f0f2f5;border-top:0 none #1f262e;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}}.content-nav__fixed-bottom .content-menu{display:block;padding-top:0;margin-top:0}.content-nav__fixed-bottom .content-menu>li{-webkit-box-shadow:0 -0.2rem 0.7rem -0.2rem rgba(0,0,0,0.2);box-shadow:0 -0.2rem 0.7rem -0.2rem rgba(0,0,0,0.2)}.content-nav__fixed-bottom .content-menu__item{color:#e3e6e8}.content-nav__fixed-bottom .content-menu__item:hover,.content-nav__fixed-bottom .content-menu__item:active,.content-nav__fixed-bottom .content-menu__item:focus{color:#f0f2f5}.content-nav__fixed-bottom .content-menu__item a{padding-top:0;padding-bottom:0.5rem}.content-nav__fixed-bottom .content-menu__icon-container{margin-left:0.5rem}.edge-fader{width:0%;height:0%;font-size:1rem;position:absolute}.edge-fader--left{left:0;background:-webkit-gradient(linear, right top, left top, from(rgba(255,255,255,0.8)), color-stop(80%, #fff));background:-webkit-linear-gradient(right, rgba(255,255,255,0.8) 0%, #fff 80%);background:-o-linear-gradient(right, rgba(255,255,255,0.8) 0%, #fff 80%);background:linear-gradient(to left, rgba(255,255,255,0.8) 0%, #fff 80%);filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#ffffff', endColorstr='#ccffffff', GradientType=1 )}.edge-fader--left .edge-fader__arrow{position:absolute;left:0;top:50%;width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid #feca87;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%);cursor:pointer;-webkit-transition:border-color 500ms ease 20ms;-o-transition:border-color 500ms ease 20ms;transition:border-color 500ms ease 20ms}.edge-fader--left .edge-fader__arrow:hover,.edge-fader--left .edge-fader__arrow:active,.edge-fader--left .edge-fader__arrow:focus{width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid #f99d53;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%)}.edge-fader--left .edge-fader__arrow.edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid #e1eef4;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%);cursor:default}.edge-fader--left .edge-fader__arrow.edge-fader__arrow--invisible{width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid transparent;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%);cursor:default}.edge-fader--right{right:0;background:-webkit-gradient(linear, left top, right top, from(rgba(255,255,255,0.8)), color-stop(80%, #fff));background:-webkit-linear-gradient(left, rgba(255,255,255,0.8) 0%, #fff 80%);background:-o-linear-gradient(left, rgba(255,255,255,0.8) 0%, #fff 80%);background:linear-gradient(to right, rgba(255,255,255,0.8) 0%, #fff 80%);filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#ccffffff', endColorstr='#ffffff', GradientType=1 )}.edge-fader--right .edge-fader__arrow{position:absolute;right:0;top:50%;width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid #feca87;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%);cursor:pointer;-webkit-transition:border-color 500ms ease 20ms;-o-transition:border-color 500ms ease 20ms;transition:border-color 500ms ease 20ms}.edge-fader--right .edge-fader__arrow:hover,.edge-fader--right .edge-fader__arrow:active,.edge-fader--right .edge-fader__arrow:focus{width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid #f99d53;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%)}.edge-fader--right .edge-fader__arrow.edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid #e1eef4;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%);cursor:default}.edge-fader--right .edge-fader__arrow.edge-fader__arrow--invisible{width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid transparent;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%);cursor:default}.edge-fader--top{top:0;background:-webkit-gradient(linear, left bottom, left top, from(rgba(255,255,255,0.8)), color-stop(80%, #fff));background:-webkit-linear-gradient(bottom, rgba(255,255,255,0.8) 0%, #fff 80%);background:-o-linear-gradient(bottom, rgba(255,255,255,0.8) 0%, #fff 80%);background:linear-gradient(to top, rgba(255,255,255,0.8) 0%, #fff 80%);filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#ffffff', endColorstr='#ccffffff', GradientType=0 )}.edge-fader--top .edge-fader__arrow{position:absolute;top:0;left:50%;width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid #feca87;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%);cursor:pointer;-webkit-transition:border-color 500ms ease 20ms;-o-transition:border-color 500ms ease 20ms;transition:border-color 500ms ease 20ms}.edge-fader--top .edge-fader__arrow:hover,.edge-fader--top .edge-fader__arrow:active,.edge-fader--top .edge-fader__arrow:focus{width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid #f99d53;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%)}.edge-fader--top .edge-fader__arrow.edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid #e1eef4;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%);cursor:default}.edge-fader--top .edge-fader__arrow.edge-fader__arrow--invisible{width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid transparent;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%);cursor:default}.edge-fader--bottom{bottom:0;background:-webkit-gradient(linear, left top, left bottom, from(rgba(255,255,255,0.8)), color-stop(80%, #fff));background:-webkit-linear-gradient(top, rgba(255,255,255,0.8) 0%, #fff 80%);background:-o-linear-gradient(top, rgba(255,255,255,0.8) 0%, #fff 80%);background:linear-gradient(to bottom, rgba(255,255,255,0.8) 0%, #fff 80%);filter:progid:DXImageTransform.Microsoft.gradient( startColorstr='#ccffffff', endColorstr='#ffffff', GradientType=0 )}.edge-fader--bottom .edge-fader__arrow{position:absolute;bottom:0;left:50%;width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid #feca87;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%);cursor:pointer;-webkit-transition:border-color 500ms ease 20ms;-o-transition:border-color 500ms ease 20ms;transition:border-color 500ms ease 20ms}.edge-fader--bottom .edge-fader__arrow:hover,.edge-fader--bottom .edge-fader__arrow:active,.edge-fader--bottom .edge-fader__arrow:focus{width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid #f99d53;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%)}.edge-fader--bottom .edge-fader__arrow.edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid #e1eef4;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%);cursor:default}.edge-fader--bottom .edge-fader__arrow.edge-fader__arrow--invisible{width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid transparent;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%);cursor:default}@media (min-width: 20rem){.front-matter{margin-bottom:3rem}}@media (min-width: 48rem){.front-matter{margin-bottom:5rem}}.front-matter__title{color:#636567;line-height:1.2;font-size:3rem;margin-top:0}@media (min-width: 20rem){.front-matter__title{margin-bottom:2rem}}@media (min-width: 48rem){.front-matter__title{margin-bottom:3rem}}.front-matter__title .front-matter__title--supertext{font-size:70%}.menu-bars{width:.8em;height:.625em;cursor:pointer}.menu-bars:before{content:"";width:inherit;height:.125em;position:absolute;background:#636567;-webkit-box-shadow:0 .25em 0 0 #636567,0 .5em 0 0 #636567;box-shadow:0 .25em 0 0 #636567,0 .5em 0 0 #636567}.index .menu-bars{display:none}.search{border-radius:.25rem;background:#9199a1;color:#f0f2f5;padding:0.25rem 0.25rem 0.25rem 0;margin:0.25rem}.search__icon-container{text-align:right;padding:0.25rem 0;margin-left:0.75rem}.search__icon-container:before{display:inline-block;width:1.25rem;padding-right:0.5rem}.search__input{border:none;background:transparent;width:100%;border-radius:.25rem;padding:0.25rem 0 0.25rem 0.25rem;-webkit-transition:background 200ms ease-in, color 200ms ease-in;-o-transition:background 200ms ease-in, color 200ms ease-in;transition:background 200ms ease-in, color 200ms ease-in}.search__input:focus{background:#636567;color:#f0f2f5}.search__input::-webkit-input-placeholder{color:#d4d9de}.search__input::-moz-placeholder{color:#d4d9de}.search__input:-ms-input-placeholder{color:#d4d9de}.search__input::placeholder{color:#d4d9de}.selector__title{font-size:60%;letter-spacing:.125rem;text-transform:uppercase;text-align:center;line-height:1;margin-bottom:0.3em;padding-left:.0625rem}.selector__btn{border:none;border-radius:.25em;width:100%;line-height:1;padding:0.5em 0.9em;font-size:120%;color:#788087;background:#ffd195;-webkit-transition:color 500ms ease, background 500ms ease;-o-transition:color 500ms ease, background 500ms ease;transition:color 500ms ease, background 500ms ease}.selector__btn:hover,.selector__btn:active,.selector__btn:focus{color:#454d54;background:#fcc074}.js .selector__btn{cursor:pointer}.other__btn{border:none;border-radius:.25em;width:100%;line-height:1;padding:0.5em 0.5em;font-size:100%;color:#fcc074;background:#636567;-webkit-transition:color 500ms ease, background 500ms ease;-o-transition:color 500ms ease, background 500ms ease;transition:color 500ms ease, background 500ms ease}.other__btn:hover,.other__btn:active,.other__btn:focus{color:#454d54;background:#ffd195}.js .other__btn{cursor:pointer}.selector__arrow{position:relative;margin:0 0.2em 0 auto;-webkit-transition:border-color 500ms ease;-o-transition:border-color 500ms ease;transition:border-color 500ms ease;width:.75em;height:.75em;border:.375em solid transparent;border-top:.375em solid #ffd195;border-bottom:0 none #6c8997;-webkit-transform:translate(0%, 25%);-ms-transform:translate(0%, 25%);-o-transform:translate(0%, 25%);transform:translate(0%, 25%)}.selector--open .selector__arrow{width:.75em;height:.75em;border:.375em solid transparent;border-bottom:.375em solid #ffd195;border-top:0 none #6c8997;-webkit-transform:translate(0%, -25%);-ms-transform:translate(0%, -25%);-o-transform:translate(0%, -25%);transform:translate(0%, -25%)}.selector-pane__sizing-box{position:absolute;top:7.75rem;height:0;max-height:0;width:100%;z-index:100;-webkit-transition:height 500ms ease, max-height 500ms ease;-o-transition:height 500ms ease, max-height 500ms ease;transition:height 500ms ease, max-height 500ms ease}@media (min-width: 48rem){.selector-pane__sizing-box{left:0.25rem;top:7.15rem;width:auto}}@media (min-width: 66.5rem){.selector-pane__sizing-box{left:0.45rem;top:7.4rem}}.selector-pane__shadow-box{height:1000px;max-height:1000px;-webkit-transition:height 500ms ease, max-height 500ms ease;-o-transition:height 500ms ease, max-height 500ms ease;transition:height 500ms ease, max-height 500ms ease;-webkit-box-shadow:-0.25rem -0.25rem 2rem -0.25rem rgba(0,0,0,0.2);box-shadow:-0.25rem -0.25rem 2rem -0.25rem rgba(0,0,0,0.2)}.selector-pane__primary{height:100%;background:#ebebeb;color:#f0f2f5;font-size:105%;padding-left:1rem;padding-right:1rem;white-space:nowrap;overflow-y:hidden}.selector-list__sizing-box{position:relative;height:100%;vertical-align:bottom;padding-left:0.75rem}.selector-list__sizing-box:first-of-type{padding-left:0}.release-is-archived-and-hidden{display:none !important}.selector-list__scroll-box{height:100%;padding-top:1rem;padding-bottom:1rem;padding-left:0.6rem;padding-right:0.4rem;margin-left:-0.6rem;margin-right:-0.4rem}.selector-list{margin:0;padding:0;list-style:none;-webkit-box-shadow:-0.125rem -0.125rem 1rem -0.2rem rgba(0,0,0,0.2);box-shadow:-0.125rem -0.125rem 1rem -0.2rem rgba(0,0,0,0.2)}.selector-list__element{background:rgba(255,0,0,0.7);color:#f0f2f5;text-align:center;min-width:3.5rem;border-top:.0625rem solid #d4d9de;-webkit-transition:background 400ms ease;-o-transition:background 400ms ease;transition:background 400ms ease}.selector-list__element a{padding:0.6rem 0.6rem;color:inherit;text-decoration:none}.selector-list__element:first-of-type{border-top-left-radius:.15rem;border-top-right-radius:.15rem;border-top:none}.selector-list__element:last-of-type{border-bottom-left-radius:.15rem;border-bottom-right-radius:.15rem}.selector-list__element:only-of-type{border-radius:.15rem;border-top:none}.selector-list__element--1{background:#f99d53}.selector-list__element--1.selector-list__element--current{background:#fbb984}.selector-list__element--1:hover,.selector-list__element--1:active,.selector-list__element--1:focus{background:#fbb984}.selector-list__element--1.selector-list__element--lts-flag:hover,.selector-list__element--1.selector-list__element--lts-flag:active,.selector-list__element--1.selector-list__element--lts-flag:focus{background:#f99d53}.selector-list__element--2{background:#79aeb6}.selector-list__element--2.selector-list__element--current{background:#9ac2c8}.selector-list__element--2:hover,.selector-list__element--2:active,.selector-list__element--2:focus{background:#9ac2c8}.selector-list__element--2.selector-list__element--lts-flag:hover,.selector-list__element--2.selector-list__element--lts-flag:active,.selector-list__element--2.selector-list__element--lts-flag:focus{background:#79aeb6}.selector-list__element--3{background:#88b3a8}.selector-list__element--3.selector-list__element--current{background:#a7c7bf}.selector-list__element--3:hover,.selector-list__element--3:active,.selector-list__element--3:focus{background:#a7c7bf}.selector-list__element--3.selector-list__element--lts-flag:hover,.selector-list__element--3.selector-list__element--lts-flag:active,.selector-list__element--3.selector-list__element--lts-flag:focus{background:#88b3a8}.selector-list__element--4{background:#6f8a86}.selector-list__element--4.selector-list__element--current{background:#8ba19e}.selector-list__element--4:hover,.selector-list__element--4:active,.selector-list__element--4:focus{background:#8ba19e}.selector-list__element--4.selector-list__element--lts-flag:hover,.selector-list__element--4.selector-list__element--lts-flag:active,.selector-list__element--4.selector-list__element--lts-flag:focus{background:#6f8a86}.selector-list__element--5{background:#6c8997}.selector-list__element--5.selector-list__element--current{background:#8aa1ac}.selector-list__element--5:hover,.selector-list__element--5:active,.selector-list__element--5:focus{background:#8aa1ac}.selector-list__element--5.selector-list__element--lts-flag:hover,.selector-list__element--5.selector-list__element--lts-flag:active,.selector-list__element--5.selector-list__element--lts-flag:focus{background:#6c8997}.selector-list__element--6{background:#3b4a5d}.selector-list__element--6.selector-list__element--current{background:#4f637c}.selector-list__element--6:hover,.selector-list__element--6:active,.selector-list__element--6:focus{background:#4f637c}.selector-list__element--6.selector-list__element--lts-flag:hover,.selector-list__element--6.selector-list__element--lts-flag:active,.selector-list__element--6.selector-list__element--lts-flag:focus{background:#3b4a5d}.selector-list__element--archived{color:#d4d9de;background:#f0f2f5}.selector-list__element--archived:hover,.selector-list__element--archived:active,.selector-list__element--archived:focus{color:#f0f2f5;background:#d4d9de}.selector-list__element--other{color:#d4d9de;background:#f0f2f5}.selector-list__element--other:hover,.selector-list__element--other:active,.selector-list__element--other:focus{color:#f0f2f5;background:#d4d9de}.selector-list__element--disabled{background:#f0f2f5;border-top:.0625rem solid #f7f8f9;color:#d4d9de}.selector-list__element--current a,.selector-list__element--disabled a{pointer-events:none;cursor:default}.selector-list__element--lts-flag{color:#fff}.selector-list__element--lts-flag:hover,.selector-list__element--lts-flag:active,.selector-list__element--lts-flag:focus{background:inherit}.selector-list__element--lts-flag a{padding-top:0.4rem;padding-bottom:0.2rem;letter-spacing:.1875rem;font-size:80%}.selector-pane__shadow-box .edge-fader--left{font-size:0.8rem;width:0.75rem;margin-left:0.2rem;height:1.25rem;max-height:100%;top:50%;-webkit-transform:translateY(-50%);-ms-transform:translateY(-50%);-o-transform:translateY(-50%);transform:translateY(-50%);z-index:1;background:transparent;-webkit-filter:none;filter:none}.selector-pane__shadow-box .edge-fader--left .edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-right:.5em solid transparent;border-left:0 none transparent;-webkit-transform:translate(-25%, -50%);-ms-transform:translate(-25%, -50%);-o-transform:translate(-25%, -50%);transform:translate(-25%, -50%)}.selector-pane__shadow-box .edge-fader--right{font-size:0.8rem;width:0.75rem;margin-right:0.2rem;height:1.25rem;max-height:100%;top:50%;-webkit-transform:translateY(-50%);-ms-transform:translateY(-50%);-o-transform:translateY(-50%);transform:translateY(-50%);z-index:1;background:transparent;-webkit-filter:none;filter:none}.selector-pane__shadow-box .edge-fader--right .edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-left:.5em solid transparent;border-right:0 none transparent;-webkit-transform:translate(25%, -50%);-ms-transform:translate(25%, -50%);-o-transform:translate(25%, -50%);transform:translate(25%, -50%)}.selector-list__sizing-box .edge-fader--top{font-size:0.8rem;top:0;left:1.4rem;width:1.25rem;height:0.75rem;margin-top:0.1rem;max-height:100%;background:transparent;-webkit-filter:none;filter:none}.selector-list__sizing-box .edge-fader--top .edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-bottom:.5em solid transparent;border-top:0 none transparent;-webkit-transform:translate(-50%, -25%);-ms-transform:translate(-50%, -25%);-o-transform:translate(-50%, -25%);transform:translate(-50%, -25%)}.selector-list__sizing-box .edge-fader--bottom{font-size:0.8rem;top:0;left:0.6rem;width:1.25rem;height:0.75rem;margin-top:0.1rem;max-height:100%;background:transparent;-webkit-filter:none;filter:none}.selector-list__sizing-box .edge-fader--bottom .edge-fader__arrow--inactive{width:1em;height:1em;border:.5em solid transparent;border-top:.5em solid transparent;border-bottom:0 none transparent;-webkit-transform:translate(-50%, 25%);-ms-transform:translate(-50%, 25%);-o-transform:translate(-50%, 25%);transform:translate(-50%, 25%)}.social-button{position:relative;height:42px;width:42px;margin-right:8px}.social-button:last-of-type{margin-right:0}.social-button .social-button__primary{position:absolute;left:0;height:42px;width:42px;background-image:url("/images/index/social-sprite.png");background-repeat:no-repeat;background-position-y:0;opacity:1;-webkit-transition:opacity 200ms ease-in;-o-transition:opacity 200ms ease-in;transition:opacity 200ms ease-in}.social-button .social-button__inverted{position:absolute;left:0;height:42px;width:42px;background-image:url("/images/index/social-sprite.png");background-repeat:no-repeat;background-position-y:-42px;opacity:0;-webkit-transition:opacity 200ms ease-in;-o-transition:opacity 200ms ease-in;transition:opacity 200ms ease-in}.social-button:hover .social-button__primary,.social-button:active .social-button__primary,.social-button:focus .social-button__primary{opacity:0}.social-button:hover .social-button__inverted,.social-button:active .social-button__inverted,.social-button:focus .social-button__inverted{opacity:1}.social-button--facebook .social-button__primary{background-position-x:0}.social-button--facebook .social-button__inverted{background-position-x:0}.social-button--twitter .social-button__primary{background-position-x:-50px}.social-button--twitter .social-button__inverted{background-position-x:-50px}.social-button--youtube .social-button__primary{background-position-x:-100px}.social-button--youtube .social-button__inverted{background-position-x:-100px}.social-button--linkedin .social-button__primary{background-position-x:-150px}.social-button--linkedin .social-button__inverted{background-position-x:-150px}.social-button--github .social-button__primary{background-position-x:-200px}.social-button--github .social-button__inverted{background-position-x:-200px}.table-of-contents{margin-bottom:3rem}.table-of-contents__title{margin-top:0;margin-bottom:0.5rem}@media (min-width: 30rem){.table-of-contents__wrapper--multi{-webkit-column-count:2;-moz-column-count:2;column-count:2}}@media (min-width: 86rem){.table-of-contents__wrapper--multi{-webkit-column-count:3;-moz-column-count:3;column-count:3}}.table-of-contents__items{color:#9199a1;margin-bottom:0}.table-of-contents__items a{color:#636567}.table-of-contents__items a:hover,.table-of-contents__items a:active,.table-of-contents__items a:focus{color:#e78505}.table-of-contents__item{font-size:110%;padding-top:.25rem}@media (min-width: 48rem) and (max-width: 66.49rem){.table-of-contents__item{font-size:100%}}.index .welcome-content{padding-top:3.5rem}@media (min-width: 48rem){.index .welcome-content{padding-top:4.5rem}}.index .welcome{background-color:#fcc074;background:url("../images/index/home-banner.jpg") no-repeat bottom/cover;padding-top:3rem;padding-bottom:3rem;color:#636567;font-size:1.7rem;line-height:1;text-align:center;margin-bottom:0}@media (min-width: 48rem){.index .welcome{padding-bottom:5rem}}.index .welcome h1{letter-spacing:-1px;color:#636567;text-transform:uppercase;font-size:2.5rem;margin-bottom:1rem}@media (min-width: 66.5rem){.index .welcome h1{font-size:3rem}}.index .product-content{-webkit-box-shadow:0 0 1.5rem 0 rgba(0,0,0,0.2);box-shadow:0 0 1.5rem 0 rgba(0,0,0,0.2)}.index .product-content .product-callout{text-align:center;color:#788087;font-size:1.2rem;padding-top:1rem;margin-bottom:0}@media (min-width: 48rem){.index .product-content .product-callout{font-size:1.5rem}}.index .product-picker{padding-top:2rem;padding-bottom:5rem}@media (min-width: 66.5rem){.index .product-picker{max-width:71rem;padding-top:5rem;padding-bottom:6rem}}.index .product__wrapper{padding-bottom:2rem;padding-left:1rem;padding-right:1rem}.index .product__wrapper:last-of-type{padding-bottom:0}.index .product__card{border-radius:.3rem;padding:1.5rem 1rem;color:#636567;line-height:1.3;font-size:1.2rem;text-align:center;text-decoration:none;background:#f5f3f5;border:1px solid #ebebeb;-webkit-box-shadow:0 -0.25rem 4rem -0.5rem rgba(0,0,0,0.1);box-shadow:0 -0.25rem 4rem -0.5rem rgba(0,0,0,0.1);-webkit-transition:color 200ms ease-in, background 200ms ease-in;-o-transition:color 200ms ease-in, background 200ms ease-in;transition:color 200ms ease-in, background 200ms ease-in}.index .product__card .product__title{font-size:3.75rem;font-weight:700;line-height:1;margin-bottom:1.25rem;color:#636567;text-transform:lowercase;-webkit-transition:color 200ms ease-in;-o-transition:color 200ms ease-in;transition:color 200ms ease-in}.index .product__card .product__title .product__logo{vertical-align:middle;height:0.60em;width:0.60em;margin-bottom:0.10em}.index .product__card .product__title .product__logo .logo{height:100%;width:100%}.index .product__card .product__title .product__logo--invertible{position:relative}.index .product__card .product__title .product__logo--invertible .logo{position:absolute;left:0;-webkit-transition:opacity 200ms ease-in;-o-transition:opacity 200ms ease-in;transition:opacity 200ms ease-in}.index .product__card .product__title .product__logo--invertible .logo--primary{opacity:1}.index .product__card .product__title .product__logo--invertible .logo--inverted{opacity:0}.index .product__card .product__title .product__title-highlight{color:#f99d53;text-transform:uppercase;font-size:70.5%;font-weight:400;-webkit-transition:color 200ms ease-in;-o-transition:color 200ms ease-in;transition:color 200ms ease-in}@media (min-width: 66.5rem){.index .product__card .product__description{min-height:3.9em}}.index .product__card:hover,.index .product__card:active,.index .product__card:focus{text-decoration:none;color:#f0f2f5;background:#6c8997}.index .product__card:hover .product__title,.index .product__card:active .product__title,.index .product__card:focus .product__title{color:#f0f2f5}.index .product__card:hover .product__title .product__logo--invertible .logo--primary,.index .product__card:active .product__title .product__logo--invertible .logo--primary,.index .product__card:focus .product__title .product__logo--invertible .logo--primary{opacity:0}.index .product__card:hover .product__title .product__logo--invertible .logo--inverted,.index .product__card:active .product__title .product__logo--invertible .logo--inverted,.index .product__card:focus .product__title .product__logo--invertible .logo--inverted{opacity:1}.index .highlight__wrapper{-webkit-box-shadow:0 -0.15rem 3rem -0.5rem rgba(0,0,0,0.2);box-shadow:0 -0.15rem 3rem -0.5rem rgba(0,0,0,0.2);font-size:1.2rem}@media (min-width: 66.5rem){.index .highlight__wrapper{background:url("../images/index/highlights-bg-split.jpg") left/cover}}@media (max-width: 66.49rem){.index .highlight__container{padding:0;margin:0;max-width:none}.index .highlight__container .row{margin:0}}.index .highlight__container .highlight__title{letter-spacing:-1px;font-size:1.8rem;text-align:left;padding-bottom:0.25rem}.highlight__left .index .highlight__container .highlight__title{padding-bottom:0.5rem}.index .highlight__container .highlight__link-wrapper{text-align:center}.index .highlight__container .highlight__link{border-radius:.15rem;padding:0.8rem 2.5rem;font-size:85%;text-decoration:none;text-transform:uppercase;word-spacing:-1px;-webkit-transition:color 500ms ease, background 500ms ease;-o-transition:color 500ms ease, background 500ms ease;transition:color 500ms ease, background 500ms ease}.index .highlight__container .highlight__left{padding:3rem 1rem 2rem 1rem;color:#f7f7f7;background-color:#3b4a5d;background:url("../images/index/highlights-bg-dark.jpg")}@media (min-width: 66.5rem){.index .highlight__container .highlight__left{padding-top:2rem;min-height:16rem}.index .highlight__container .highlight__left p{min-height:5.4rem}}@media (min-width: 86rem){.index .highlight__container .highlight__left{padding-top:3rem}}.index .highlight__container .highlight__left .highlight__link{color:#f7f7f7;background:#f99d53}.index .highlight__container .highlight__left .highlight__link:hover,.index .highlight__container .highlight__left .highlight__link:active,.index .highlight__container .highlight__left .highlight__link:focus{color:#f7f7f7;background:#e78505}.index .highlight__container .highlight__right{padding:2rem 1rem 2rem 1rem;color:#3b4a5d;background-color:#f7f7f7;background:url("../images/index/highlights-bg-light.jpg") repeat}@media (min-width: 66.5rem){.index .highlight__container .highlight__right{padding-top:2rem;padding-left:3rem;min-height:16rem}.index .highlight__container .highlight__right p{min-height:5.4rem}}@media (min-width: 86rem){.index .highlight__container .highlight__right{padding-top:3rem}}.index .highlight__container .highlight__right .highlight__link{color:#3b4a5d;background:#ffd195}.index .highlight__container .highlight__right .highlight__link:hover,.index .highlight__container .highlight__right .highlight__link:active,.index .highlight__container .highlight__right .highlight__link:focus{color:#3b4a5d;background:#fcc074}.client-library-logos{list-style:none}.client-library-logo{border-radius:.25rem;margin:0.5rem;padding:0.4rem;-webkit-box-shadow:0 -0.25rem 2rem -0.5rem rgba(0,0,0,0.2);box-shadow:0 -0.25rem 2rem -0.5rem rgba(0,0,0,0.2);background:#f7f7f7;-webkit-transition:background 300ms ease-in, -webkit-box-shadow 300ms ease-in;transition:background 300ms ease-in, -webkit-box-shadow 300ms ease-in;-o-transition:background 300ms ease-in, box-shadow 300ms ease-in;transition:background 300ms ease-in, box-shadow 300ms ease-in;transition:background 300ms ease-in, box-shadow 300ms ease-in, -webkit-box-shadow 300ms ease-in}.client-library-logo:hover,.client-library-logo:active,.client-library-logo:focus{background:#e3e6e8;-webkit-box-shadow:0 -0.25rem 2rem -0.5rem rgba(0,0,0,0.4);box-shadow:0 -0.25rem 2rem -0.5rem rgba(0,0,0,0.4)}table.use-cases__image-links{width:100%}table.use-cases__image-links td:first-child{width:30%}table.use-cases__image-links td:first-child img{width:100%}table.use-cases__image-links td:last-child{width:70%}.main-article{-webkit-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0)} diff --git a/static/data/project_descriptions.json b/static/data/project_descriptions.json index fa8b14f81e..1dbf825653 100644 --- a/static/data/project_descriptions.json +++ b/static/data/project_descriptions.json @@ -1 +1 @@ -{"__comment":"This file was automatically generated using `rake generate_projects_metadata`. See the Project Descriptions entry in config.yaml for more information.","riak_kv":{"project_name":"Riak KV","path":"/riak/kv","archived_path":"/riak","releases":[["2.0.0","2.0.1","2.0.2","2.0.4","2.0.5","2.0.6","2.0.7","2.0.8","2.0.9"],["2.1.1","2.1.3","2.1.4"],["2.2.0","2.2.1","2.2.2","2.2.3"]],"latest":"2.2.3","lts":"2.0","archived_url":"http://docs.basho.com/riak/1.4.12/"},"riak_cs":{"project_name":"Riak CS","path":"/riak/cs","archived_path":"/riakcs","releases":[["2.0.0","2.0.1"],["2.1.0","2.1.1"]],"latest":"2.1.1","lts":"2.0","archived_url":"http://docs.basho.com/riakcs/1.5.4/"},"riak_ts":{"project_name":"Riak TS","path":"/riak/ts","archived_path":"/riakts","releases":[["1.0.0"],["1.1.0"],["1.2.0"],["1.3.0","1.3.1"],["1.4.0"],["1.5.0","1.5.1","1.5.2"]],"latest":"1.5.2"},"dataplatform":{"project_name":"DataPlatform","path":"/dataplatform","archived_path":"/dataplatform","releases":[["1.0.0"]],"latest":"1.0.0"}} \ No newline at end of file +{"__comment":"This file was automatically generated using `rake generate_projects_metadata`. See the Project Descriptions entry in config.yaml for more information.","riak_kv":{"project_name":"Riak KV","path":"/riak/kv","github_path":"https://github.com/TI-Tokyo/riak-docs-fork/tree/master/content/","archived_path":"/riak","releases":[["2.0.0","2.0.1","2.0.2","2.0.4","2.0.5","2.0.6","2.0.7","2.0.8","2.0.9"],["2.1.1","2.1.3","2.1.4"],["2.2.0","2.2.1","2.2.2","2.2.3","2.2.6"],["2.9.0p5","2.9.1","2.9.2","2.9.4","2.9.7","2.9.8","2.9.9","2.9.10"],["3.0.1","3.0.2","3.0.3","3.0.4"]],"latest":"3.0.4","lts":["2.9","3.0"],"archive_below":"2.2"},"riak_cs":{"project_name":"Riak CS","path":"/riak/cs","github_path":"https://github.com/TI-Tokyo/riak-docs-fork/tree/master/content/","archived_path":"/riakcs","releases":[["2.0.0","2.0.1"],["2.1.0","2.1.1","2.1.2"]],"latest":"2.1.2","lts":["2.1"]},"riak_ts":{"project_name":"Riak TS","path":"/riak/ts","github_path":"https://github.com/TI-Tokyo/riak-docs-fork/tree/master/content/","archived_path":"/riakts","releases":[["1.0.0"],["1.1.0"],["1.2.0"],["1.3.0","1.3.1"],["1.4.0"],["1.5.0","1.5.1","1.5.2"]],"latest":"1.5.2","lts":["1.5"],"archive_below":"1.3"},"dataplatform":{"project_name":"DataPlatform","path":"/dataplatform","github_path":null,"archived_path":"/dataplatform","releases":[["1.0.0"]],"latest":"1.0.0"}} \ No newline at end of file diff --git a/static/images/branding/riak-docs.png b/static/images/branding/riak-docs.png new file mode 100644 index 0000000000..487024e850 Binary files /dev/null and b/static/images/branding/riak-docs.png differ diff --git a/static/images/shared/operating_system_branding/amazon.png b/static/images/shared/operating_system_branding/amazon.png new file mode 100644 index 0000000000..5526f7a554 Binary files /dev/null and b/static/images/shared/operating_system_branding/amazon.png differ diff --git a/static/images/shared/operating_system_branding/oracle.png b/static/images/shared/operating_system_branding/oracle.png new file mode 100644 index 0000000000..6af23fdfa5 Binary files /dev/null and b/static/images/shared/operating_system_branding/oracle.png differ diff --git a/static/images/shared/operating_system_branding/raspbian.png b/static/images/shared/operating_system_branding/raspbian.png new file mode 100644 index 0000000000..1e6ced9927 Binary files /dev/null and b/static/images/shared/operating_system_branding/raspbian.png differ diff --git a/static/images/shared/riak-ring.png b/static/images/shared/riak-ring.png new file mode 100644 index 0000000000..6a879479dc Binary files /dev/null and b/static/images/shared/riak-ring.png differ diff --git a/static/js/main.js b/static/js/main.js index 53e5c3e113..dc5b5725cb 100644 --- a/static/js/main.js +++ b/static/js/main.js @@ -1,6 +1,6 @@ -function format_number(e){e+="",x=e.split("."),x1=x[0],x2=x.length>1?"."+x[1]:"";for(var t=/(\d+)(\d{3})/;t.test(x1);)x1=x1.replace(t,"$1,$2");return x1+x2}function format_bytes(e){var t=["bytes","KiB","MiB","GiB","TiB","PiB","EiB","ZiB","YiB"];if(0==e)return"";if(1==e)return"1 byte";var n=parseInt(Math.floor(Math.log(e)/Math.log(1024)));return(0==n?e/Math.pow(1024,n):(e/Math.pow(1024,n)).toFixed(1))+" "+t[n]}function abbreviate_number(e){var t=["","thousand","million","billion","trillion","quadrillion","quintillion","sextillion","septillion"];if(e<1e3)return e;var n=parseInt(Math.floor(Math.log(e)/Math.log(1e3)));return(0==n?e/Math.pow(1e3,n):(e/Math.pow(1e3,n)).toFixed(1))+" "+t[n]}function update_calculations(){return NumEntries()>1e26?void $("#recomend").text("You have more keys than sub-atomic particles in all known universes. That's too many."):Bucket()<1?void $("#recomend").text("You'll need to have a non-zero bucket size."):Key()<1?void $("#recomend").text("You'll need to have a non-zero key size."):Value()<1?void $("#recomend").text("You'll need to have a non-zero value size."):RAM()<1?void $("#recomend").text("You'll need to allocate a non-zero amount of RAM to data storage."):(N_Val()<3&&$("#recomend").text("You'll want to deploy at least 3 Riak nodes, 4 would be even better as a starting point."),n=estimate_nodes(),d=estimate_storage(),r=estimate_keydir(),void $("#recommend").html("

        To manage your estimated "+abbreviate_number(NumEntries())+" key/bucket pairs where bucket names are ~"+format_bytes(Bucket())+", keys are ~"+format_bytes(Key())+", values are ~"+format_bytes(Value())+" and you are setting aside "+format_bytes(RAM())+" of RAM per-node for in-memory data management within a cluster that is configured to maintain "+N_Val()+" replicas per key (N = "+N_Val()+") then Riak, using the Bitcask storage engine, will require at least:

        • "+n+" nodes
        • "+format_bytes(r/n)+" of RAM per node ("+format_bytes(r)+" total across all nodes)
        • "+format_bytes(d/n)+" of storage space per node ("+format_bytes(d)+" total storage space used across all nodes)
        "))}!function(e,t,n){function r(e,t){return typeof e===t}function i(){var e,t,n,i,a,o,s;for(var l in _)if(_.hasOwnProperty(l)){if(e=[],t=_[l],t.name&&(e.push(t.name.toLowerCase()),t.options&&t.options.aliases&&t.options.aliases.length))for(n=0;n",r.insertBefore(n.lastChild,r.firstChild)}function r(){var e=y.elements;return"string"==typeof e?e.split(" "):e}function i(e,t){var n=y.elements;"string"!=typeof n&&(n=n.join(" ")),"string"!=typeof e&&(e=e.join(" ")),y.elements=n+" "+e,c(t)}function a(e){var t=v[e[m]];return t||(t={},b++,e[m]=b,v[b]=t),t}function o(e,n,r){if(n||(n=t),d)return n.createElement(e);r||(r=a(n));var i;return i=r.cache[e]?r.cache[e].cloneNode():g.test(e)?(r.cache[e]=r.createElem(e)).cloneNode():r.createElem(e),!i.canHaveChildren||h.test(e)||i.tagUrn?i:r.frag.appendChild(i)}function s(e,n){if(e||(e=t),d)return e.createDocumentFragment();n=n||a(e);for(var i=n.frag.cloneNode(),o=0,s=r(),l=s.length;o",u="hidden"in e,d=1==e.childNodes.length||function(){t.createElement("a");var e=t.createDocumentFragment();return"undefined"==typeof e.cloneNode||"undefined"==typeof e.createDocumentFragment||"undefined"==typeof e.createElement}()}catch(e){u=!0,d=!0}}();var y={elements:p.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:f,shivCSS:p.shivCSS!==!1,supportsUnknownElements:d,shivMethods:p.shivMethods!==!1,type:"default",shivDocument:c,createElement:o,createDocumentFragment:s,addElements:i};e.html5=y,c(t),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof e?e:this,t);var E="Moz O ms Webkit",S=w._config.usePrefixes?E.toLowerCase().split(" "):[];w._domPrefixes=S;var M;!function(){var e={}.hasOwnProperty;M=r(e,"undefined")||r(e.call,"undefined")?function(e,t){return t in e&&r(e.constructor.prototype[t],"undefined")}:function(t,n){return e.call(t,n)}}(),w._l={},w.on=function(e,t){this._l[e]||(this._l[e]=[]),this._l[e].push(t),x.hasOwnProperty(e)&&setTimeout(function(){x._trigger(e,x[e])},0)},w._trigger=function(e,t){if(this._l[e]){var n=this._l[e];setTimeout(function(){var e,r;for(e=0;e7)}),x.addTest("audio",function(){var e=s("audio"),t=!1;try{(t=!!e.canPlayType)&&(t=new Boolean(t),t.ogg=e.canPlayType('audio/ogg; codecs="vorbis"').replace(/^no$/,""),t.mp3=e.canPlayType('audio/mpeg; codecs="mp3"').replace(/^no$/,""),t.opus=e.canPlayType('audio/ogg; codecs="opus"')||e.canPlayType('audio/webm; codecs="opus"').replace(/^no$/,""),t.wav=e.canPlayType('audio/wav; codecs="1"').replace(/^no$/,""),t.m4a=(e.canPlayType("audio/x-m4a;")||e.canPlayType("audio/aac;")).replace(/^no$/,""))}catch(e){}return t}),x.addTest("canvas",function(){var e=s("canvas");return!(!e.getContext||!e.getContext("2d"))}),x.addTest("canvastext",function(){return x.canvas!==!1&&"function"==typeof s("canvas").getContext("2d").fillText}),x.addTest("video",function(){var e=s("video"),t=!1;try{(t=!!e.canPlayType)&&(t=new Boolean(t),t.ogg=e.canPlayType('video/ogg; codecs="theora"').replace(/^no$/,""),t.h264=e.canPlayType('video/mp4; codecs="avc1.42E01E"').replace(/^no$/,""),t.webm=e.canPlayType('video/webm; codecs="vp8, vorbis"').replace(/^no$/,""),t.vp9=e.canPlayType('video/webm; codecs="vp9"').replace(/^no$/,""),t.hls=e.canPlayType('application/x-mpegURL; codecs="avc1.42E01E"').replace(/^no$/,""))}catch(e){}return t}),x.addTest("webgl",function(){var t=s("canvas"),n="probablySupportsContext"in t?"probablySupportsContext":"supportsContext";return n in t?t[n]("webgl")||t[n]("experimental-webgl"):"WebGLRenderingContext"in e}),x.addTest("cssgradients",function(){for(var e,t="background-image:",n="gradient(linear,left top,right bottom,from(#9f9),to(white));",r="",i=0,a=N.length-1;i-1}),x.addTest("multiplebgs",function(){var e=s("a").style;return e.cssText="background:url(https://),url(https://),red url(https://)",/(url\s*\(.*?){3}/.test(e.background)}),x.addTest("opacity",function(){var e=s("a").style;return e.cssText=N.join("opacity:.55;"),/^0.55$/.test(e.opacity)}),x.addTest("rgba",function(){var e=s("a").style;return e.cssText="background-color:rgba(150,255,150,.5)",(""+e.backgroundColor).indexOf("rgba")>-1}),x.addTest("inlinesvg",function(){var e=s("div");return e.innerHTML="","http://www.w3.org/2000/svg"==("undefined"!=typeof SVGRect&&e.firstChild&&e.firstChild.namespaceURI)});var $=s("input"),j="autocomplete autofocus list placeholder max min multiple pattern required step".split(" "),L={};x.input=function(t){for(var n=0,r=t.length;n=9,i=t<533&&e.match(/android/gi);return n||i||r}();F?x.addTest("fontface",!1):H('@font-face {font-family:"font";src:url("https://")}',function(e,n){var r=t.getElementById("smodernizr"),i=r.sheet||r.styleSheet,a=i?i.cssRules&&i.cssRules[0]?i.cssRules[0].cssText:i.cssText||"":"",o=/src/i.test(a)&&0===a.indexOf(n.split(" ")[0]);x.addTest("fontface",o)}),H('#modernizr{font:0/0 a}#modernizr:after{content:":)";visibility:hidden;font:7px/1 a}',function(e){x.addTest("generatedcontent",e.offsetHeight>=7)});var O=w._config.usePrefixes?E.split(" "):[];w._cssomPrefixes=O;var P=function(t){var r,i=N.length,a=e.CSSRule;if("undefined"==typeof a)return n;if(!t)return!1;if(t=t.replace(/^@/,""),r=t.replace(/-/g,"_").toUpperCase()+"_RULE",r in a)return"@"+t;for(var o=0;o0&&t-1 in e)}function r(e,t,n){if(ae.isFunction(t))return ae.grep(e,function(e,r){return!!t.call(e,r,e)!==n});if(t.nodeType)return ae.grep(e,function(e){return e===t!==n});if("string"==typeof t){if(ge.test(t))return ae.filter(t,e,n);t=ae.filter(t,e)}return ae.grep(e,function(e){return J.call(t,e)>-1!==n})}function i(e,t){for(;(e=e[t])&&1!==e.nodeType;);return e}function a(e){var t={};return ae.each(e.match(we)||[],function(e,n){t[n]=!0}),t}function o(){Q.removeEventListener("DOMContentLoaded",o),e.removeEventListener("load",o),ae.ready()}function s(){this.expando=ae.expando+s.uid++}function l(e,t,n){var r;if(void 0===n&&1===e.nodeType)if(r="data-"+t.replace(Se,"-$&").toLowerCase(),n=e.getAttribute(r),"string"==typeof n){try{n="true"===n||"false"!==n&&("null"===n?null:+n+""===n?+n:Ee.test(n)?ae.parseJSON(n):n)}catch(e){}Te.set(e,t,n)}else n=void 0;return n}function c(e,t,n,r){var i,a=1,o=20,s=r?function(){return r.cur()}:function(){return ae.css(e,t,"")},l=s(),c=n&&n[3]||(ae.cssNumber[t]?"":"px"),u=(ae.cssNumber[t]||"px"!==c&&+l)&&Ae.exec(ae.css(e,t));if(u&&u[3]!==c){c=c||u[3],n=n||[],u=+l||1;do a=a||".5",u/=a,ae.style(e,t,u+c);while(a!==(a=s()/l)&&1!==a&&--o)}return n&&(u=+u||+l||0,i=n[1]?u+(n[1]+1)*n[2]:+n[2],r&&(r.unit=c,r.start=u,r.end=i)),i}function u(e,t){var n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[];return void 0===t||t&&ae.nodeName(e,t)?ae.merge([e],n):n}function d(e,t){for(var n=0,r=e.length;n-1)i&&i.push(a);else if(c=ae.contains(a.ownerDocument,a),o=u(p.appendChild(a),"script"),c&&d(o),n)for(f=0;a=o[f++];)ze.test(a.type||"")&&n.push(a);return p}function p(){return!0}function h(){return!1}function g(){try{return Q.activeElement}catch(e){}}function m(e,t,n,r,i,a){var o,s;if("object"==typeof t){"string"!=typeof n&&(r=r||n,n=void 0);for(s in t)m(e,s,n,r,t[s],a);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),i===!1)i=h;else if(!i)return e;return 1===a&&(o=i,i=function(e){return ae().off(e),o.apply(this,arguments)},i.guid=o.guid||(o.guid=ae.guid++)),e.each(function(){ae.event.add(this,t,i,r,n)})}function b(e,t){return ae.nodeName(e,"table")&&ae.nodeName(11!==t.nodeType?t:t.firstChild,"tr")?e.getElementsByTagName("tbody")[0]||e.appendChild(e.ownerDocument.createElement("tbody")):e}function v(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function y(e){var t=We.exec(e.type);return t?e.type=t[1]:e.removeAttribute("type"),e}function _(e,t){var n,r,i,a,o,s,l,c;if(1===t.nodeType){if(Ce.hasData(e)&&(a=Ce.access(e),o=Ce.set(t,a),c=a.events)){delete o.handle,o.events={};for(i in c)for(n=0,r=c[i].length;n1&&"string"==typeof g&&!re.checkClone&&Pe.test(g))return e.each(function(i){var a=e.eq(i);m&&(t[0]=g.call(this,i,a.html())),x(a,t,n,r)});if(p&&(i=f(t,e[0].ownerDocument,!1,e,r),a=i.firstChild,1===i.childNodes.length&&(i=a),a||r)){for(o=ae.map(u(i,"script"),v),s=o.length;d")).appendTo(t.documentElement),t=Ke[0].contentDocument,t.write(),t.close(),n=N(e,t),Ke.detach()),Ze[e]=n),n}function T(e,t,n){var r,i,a,o,s=e.style;return n=n||Xe(e),o=n?n.getPropertyValue(t)||n[t]:void 0,""!==o&&void 0!==o||ae.contains(e.ownerDocument,e)||(o=ae.style(e,t)),n&&!re.pixelMarginRight()&&Qe.test(o)&&Ve.test(t)&&(r=s.width,i=s.minWidth,a=s.maxWidth,s.minWidth=s.maxWidth=s.width=o,o=n.width,s.width=r,s.minWidth=i,s.maxWidth=a),void 0!==o?o+"":o}function E(e,t){return{get:function(){return e()?void delete this.get:(this.get=t).apply(this,arguments)}}}function S(e){if(e in rt)return e;for(var t=e[0].toUpperCase()+e.slice(1),n=nt.length;n--;)if(e=nt[n]+t,e in rt)return e}function M(e,t,n){var r=Ae.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[3]||"px"):t}function A(e,t,n,r,i){for(var a=n===(r?"border":"content")?4:"width"===t?1:0,o=0;a<4;a+=2)"margin"===n&&(o+=ae.css(e,n+$e[a],!0,i)),r?("content"===n&&(o-=ae.css(e,"padding"+$e[a],!0,i)),"margin"!==n&&(o-=ae.css(e,"border"+$e[a]+"Width",!0,i))):(o+=ae.css(e,"padding"+$e[a],!0,i),"padding"!==n&&(o+=ae.css(e,"border"+$e[a]+"Width",!0,i)));return o}function $(e,t,n){var r=!0,i="width"===t?e.offsetWidth:e.offsetHeight,a=Xe(e),o="border-box"===ae.css(e,"boxSizing",!1,a);if(i<=0||null==i){if(i=T(e,t,a),(i<0||null==i)&&(i=e.style[t]),Qe.test(i))return i;r=o&&(re.boxSizingReliable()||i===e.style[t]),i=parseFloat(i)||0}return i+A(e,t,n||(o?"border":"content"),r,a)+"px"}function j(e,t){for(var n,r,i,a=[],o=0,s=e.length;o=0&&n=0},isPlainObject:function(e){var t;if("object"!==ae.type(e)||e.nodeType||ae.isWindow(e))return!1;if(e.constructor&&!ne.call(e,"constructor")&&!ne.call(e.constructor.prototype||{},"isPrototypeOf"))return!1;for(t in e);return void 0===t||ne.call(e,t)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},type:function(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?ee[te.call(e)]||"object":typeof e},globalEval:function(e){var t,n=eval;e=ae.trim(e),e&&(1===e.indexOf("use strict")?(t=Q.createElement("script"),t.text=e,Q.head.appendChild(t).parentNode.removeChild(t)):n(e))},camelCase:function(e){return e.replace(se,"ms-").replace(le,ce)},nodeName:function(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()},each:function(e,t){var r,i=0;if(n(e))for(r=e.length;ix.cacheLength&&delete e[t.shift()],e[n+" "]=r}var t=[];return e}function r(e){return e[D]=!0,e}function i(e){var t=j.createElement("div");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function a(e,t){for(var n=e.split("|"),r=n.length;r--;)x.attrHandle[n[r]]=t}function o(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&(~t.sourceIndex||Z)-(~e.sourceIndex||Z);if(r)return r;if(n)for(;n=n.nextSibling;)if(n===t)return-1;return e?1:-1}function s(e){return function(t){var n=t.nodeName.toLowerCase();return"input"===n&&t.type===e}}function l(e){return function(t){var n=t.nodeName.toLowerCase();return("input"===n||"button"===n)&&t.type===e}}function c(e){return r(function(t){return t=+t,r(function(n,r){for(var i,a=e([],n.length,t),o=a.length;o--;)n[i=a[o]]&&(n[i]=!(r[i]=n[i]))})})}function u(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}function d(){}function f(e){for(var t=0,n=e.length,r="";t1?function(t,n,r){for(var i=e.length;i--;)if(!e[i](t,n,r))return!1;return!0}:e[0]}function g(e,n,r){for(var i=0,a=n.length;i-1&&(r[c]=!(o[c]=d))}}else y=m(y===o?y.splice(h,y.length):y),a?a(null,o,y,l):Y.apply(o,y)})}function v(e){for(var t,n,r,i=e.length,a=x.relative[e[0].type],o=a||x.relative[" "],s=a?1:0,l=p(function(e){return e===t},o,!0),c=p(function(e){return ee(t,e)>-1},o,!0),u=[function(e,n,r){var i=!a&&(r||n!==S)||((t=n).nodeType?l(e,n,r):c(e,n,r));return t=null,i}];s1&&h(u),s>1&&f(e.slice(0,s-1).concat({value:" "===e[s-2].type?"*":""})).replace(se,"$1"),n,s0,a=e.length>0,o=function(r,o,s,l,c){var u,d,f,p=0,h="0",g=r&&[],b=[],v=S,y=r||a&&x.find.TAG("*",c),_=F+=null==v?1:Math.random()||.1,w=y.length;for(c&&(S=o===j||o||c);h!==w&&null!=(u=y[h]);h++){if(a&&u){for(d=0,o||u.ownerDocument===j||($(u),s=!B);f=e[d++];)if(f(u,o||j,s)){l.push(u);break}c&&(F=_)}i&&((u=!f&&u)&&p--,r&&g.push(u))}if(p+=h,i&&h!==p){for(d=0;f=n[d++];)f(g,b,o,s);if(r){if(p>0)for(;h--;)g[h]||b[h]||(b[h]=X.call(l));b=m(b)}Y.apply(l,b),c&&!r&&b.length>0&&p+n.length>1&&t.uniqueSort(l)}return c&&(F=_,S=v),g};return i?r(o):o}var _,w,x,k,N,C,T,E,S,M,A,$,j,L,B,z,R,q,I,D="sizzle"+1*new Date,H=e.document,F=0,O=0,P=n(),W=n(),U=n(),K=function(e,t){return e===t&&(A=!0),0},Z=1<<31,V={}.hasOwnProperty,Q=[],X=Q.pop,G=Q.push,Y=Q.push,J=Q.slice,ee=function(e,t){for(var n=0,r=e.length;n+~]|"+ne+")"+ne+"*"),ue=new RegExp("="+ne+"*([^\\]'\"]*?)"+ne+"*\\]","g"),de=new RegExp(ae),fe=new RegExp("^"+re+"$"),pe={ID:new RegExp("^#("+re+")"),CLASS:new RegExp("^\\.("+re+")"),TAG:new RegExp("^("+re+"|[*])"),ATTR:new RegExp("^"+ie),PSEUDO:new RegExp("^"+ae),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+ne+"*(even|odd|(([+-]|)(\\d*)n|)"+ne+"*(?:([+-]|)"+ne+"*(\\d+)|))"+ne+"*\\)|)","i"),bool:new RegExp("^(?:"+te+")$","i"),needsContext:new RegExp("^"+ne+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+ne+"*((?:-\\d)?\\d*)"+ne+"*\\)|)(?=[^-]|$)","i")},he=/^(?:input|select|textarea|button)$/i,ge=/^h\d$/i,me=/^[^{]+\{\s*\[native \w/,be=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ve=/[+~]/,ye=/'|\\/g,_e=new RegExp("\\\\([\\da-f]{1,6}"+ne+"?|("+ne+")|.)","ig"),we=function(e,t,n){var r="0x"+t-65536;return r!==r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},xe=function(){$()};try{Y.apply(Q=J.call(H.childNodes),H.childNodes),Q[H.childNodes.length].nodeType}catch(e){Y={apply:Q.length?function(e,t){G.apply(e,J.call(t))}:function(e,t){for(var n=e.length,r=0;e[n++]=t[r++];);e.length=n-1}}}w=t.support={},N=t.isXML=function(e){var t=e&&(e.ownerDocument||e).documentElement;return!!t&&"HTML"!==t.nodeName},$=t.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:H;return r!==j&&9===r.nodeType&&r.documentElement?(j=r,L=j.documentElement,B=!N(j),(n=j.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",xe,!1):n.attachEvent&&n.attachEvent("onunload",xe)),w.attributes=i(function(e){return e.className="i",!e.getAttribute("className")}),w.getElementsByTagName=i(function(e){return e.appendChild(j.createComment("")),!e.getElementsByTagName("*").length}),w.getElementsByClassName=me.test(j.getElementsByClassName),w.getById=i(function(e){return L.appendChild(e).id=D,!j.getElementsByName||!j.getElementsByName(D).length}),w.getById?(x.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&B){var n=t.getElementById(e);return n?[n]:[]}},x.filter.ID=function(e){var t=e.replace(_e,we);return function(e){return e.getAttribute("id")===t}}):(delete x.find.ID,x.filter.ID=function(e){var t=e.replace(_e,we);return function(e){var n="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return n&&n.value===t}}),x.find.TAG=w.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):w.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,a=t.getElementsByTagName(e);if("*"===e){for(;n=a[i++];)1===n.nodeType&&r.push(n);return r}return a},x.find.CLASS=w.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&B)return t.getElementsByClassName(e)},R=[],z=[],(w.qsa=me.test(j.querySelectorAll))&&(i(function(e){L.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&z.push("[*^$]="+ne+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||z.push("\\["+ne+"*(?:value|"+te+")"),e.querySelectorAll("[id~="+D+"-]").length||z.push("~="),e.querySelectorAll(":checked").length||z.push(":checked"),e.querySelectorAll("a#"+D+"+*").length||z.push(".#.+[+~]")}),i(function(e){var t=j.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&z.push("name"+ne+"*[*^$|!~]?="),e.querySelectorAll(":enabled").length||z.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),z.push(",.*:")})),(w.matchesSelector=me.test(q=L.matches||L.webkitMatchesSelector||L.mozMatchesSelector||L.oMatchesSelector||L.msMatchesSelector))&&i(function(e){w.disconnectedMatch=q.call(e,"div"),q.call(e,"[s!='']:x"),R.push("!=",ae)}),z=z.length&&new RegExp(z.join("|")),R=R.length&&new RegExp(R.join("|")),t=me.test(L.compareDocumentPosition),I=t||me.test(L.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)for(;t=t.parentNode;)if(t===e)return!0;return!1},K=t?function(e,t){if(e===t)return A=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n?n:(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1,1&n||!w.sortDetached&&t.compareDocumentPosition(e)===n?e===j||e.ownerDocument===H&&I(H,e)?-1:t===j||t.ownerDocument===H&&I(H,t)?1:M?ee(M,e)-ee(M,t):0:4&n?-1:1)}:function(e,t){if(e===t)return A=!0,0;var n,r=0,i=e.parentNode,a=t.parentNode,s=[e],l=[t];if(!i||!a)return e===j?-1:t===j?1:i?-1:a?1:M?ee(M,e)-ee(M,t):0;if(i===a)return o(e,t);for(n=e;n=n.parentNode;)s.unshift(n);for(n=t;n=n.parentNode;)l.unshift(n);for(;s[r]===l[r];)r++;return r?o(s[r],l[r]):s[r]===H?-1:l[r]===H?1:0},j):j},t.matches=function(e,n){return t(e,null,null,n)},t.matchesSelector=function(e,n){if((e.ownerDocument||e)!==j&&$(e),n=n.replace(ue,"='$1']"),w.matchesSelector&&B&&!U[n+" "]&&(!R||!R.test(n))&&(!z||!z.test(n)))try{var r=q.call(e,n);if(r||w.disconnectedMatch||e.document&&11!==e.document.nodeType)return r}catch(e){}return t(n,j,null,[e]).length>0},t.contains=function(e,t){return(e.ownerDocument||e)!==j&&$(e),I(e,t)},t.attr=function(e,t){(e.ownerDocument||e)!==j&&$(e);var n=x.attrHandle[t.toLowerCase()],r=n&&V.call(x.attrHandle,t.toLowerCase())?n(e,t,!B):void 0;return void 0!==r?r:w.attributes||!B?e.getAttribute(t):(r=e.getAttributeNode(t))&&r.specified?r.value:null},t.error=function(e){throw new Error("Syntax error, unrecognized expression: "+e)},t.uniqueSort=function(e){var t,n=[],r=0,i=0;if(A=!w.detectDuplicates,M=!w.sortStable&&e.slice(0),e.sort(K),A){for(;t=e[i++];)t===e[i]&&(r=n.push(i));for(;r--;)e.splice(n[r],1)}return M=null,e},k=t.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=k(e)}else if(3===i||4===i)return e.nodeValue}else for(;t=e[r++];)n+=k(t);return n},x=t.selectors={cacheLength:50,createPseudo:r,match:pe,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(_e,we),e[3]=(e[3]||e[4]||e[5]||"").replace(_e,we),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||t.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&t.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return pe.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&de.test(n)&&(t=C(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(_e,we).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=P[e+" "];return t||(t=new RegExp("(^|"+ne+")"+e+"("+ne+"|$)"))&&P(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(e,n,r){return function(i){var a=t.attr(i,e);return null==a?"!="===n:!n||(a+="","="===n?a===r:"!="===n?a!==r:"^="===n?r&&0===a.indexOf(r):"*="===n?r&&a.indexOf(r)>-1:"$="===n?r&&a.slice(-r.length)===r:"~="===n?(" "+a.replace(oe," ")+" ").indexOf(r)>-1:"|="===n&&(a===r||a.slice(0,r.length+1)===r+"-"))}},CHILD:function(e,t,n,r,i){var a="nth"!==e.slice(0,3),o="last"!==e.slice(-4),s="of-type"===t;return 1===r&&0===i?function(e){return!!e.parentNode}:function(t,n,l){var c,u,d,f,p,h,g=a!==o?"nextSibling":"previousSibling",m=t.parentNode,b=s&&t.nodeName.toLowerCase(),v=!l&&!s,y=!1;if(m){if(a){for(;g;){for(f=t;f=f[g];)if(s?f.nodeName.toLowerCase()===b:1===f.nodeType)return!1;h=g="only"===e&&!h&&"nextSibling"}return!0}if(h=[o?m.firstChild:m.lastChild],o&&v){for(f=m,d=f[D]||(f[D]={}),u=d[f.uniqueID]||(d[f.uniqueID]={}),c=u[e]||[],p=c[0]===F&&c[1],y=p&&c[2],f=p&&m.childNodes[p];f=++p&&f&&f[g]||(y=p=0)||h.pop();)if(1===f.nodeType&&++y&&f===t){u[e]=[F,p,y];break}}else if(v&&(f=t,d=f[D]||(f[D]={}),u=d[f.uniqueID]||(d[f.uniqueID]={}),c=u[e]||[],p=c[0]===F&&c[1],y=p),y===!1)for(;(f=++p&&f&&f[g]||(y=p=0)||h.pop())&&((s?f.nodeName.toLowerCase()!==b:1!==f.nodeType)||!++y||(v&&(d=f[D]||(f[D]={}),u=d[f.uniqueID]||(d[f.uniqueID]={}),u[e]=[F,y]),f!==t)););return y-=i,y===r||y%r===0&&y/r>=0}}},PSEUDO:function(e,n){var i,a=x.pseudos[e]||x.setFilters[e.toLowerCase()]||t.error("unsupported pseudo: "+e);return a[D]?a(n):a.length>1?(i=[e,e,"",n],x.setFilters.hasOwnProperty(e.toLowerCase())?r(function(e,t){for(var r,i=a(e,n),o=i.length;o--;)r=ee(e,i[o]),e[r]=!(t[r]=i[o])}):function(e){return a(e,0,i)}):a}},pseudos:{not:r(function(e){var t=[],n=[],i=T(e.replace(se,"$1"));return i[D]?r(function(e,t,n,r){for(var a,o=i(e,null,r,[]),s=e.length;s--;)(a=o[s])&&(e[s]=!(t[s]=a))}):function(e,r,a){return t[0]=e,i(t,null,a,n),t[0]=null,!n.pop()}}),has:r(function(e){return function(n){return t(e,n).length>0}}),contains:r(function(e){return e=e.replace(_e,we),function(t){return(t.textContent||t.innerText||k(t)).indexOf(e)>-1}}),lang:r(function(e){return fe.test(e||"")||t.error("unsupported lang: "+e),e=e.replace(_e,we).toLowerCase(),function(t){var n;do if(n=B?t.lang:t.getAttribute("xml:lang")||t.getAttribute("lang"))return n=n.toLowerCase(),n===e||0===n.indexOf(e+"-");while((t=t.parentNode)&&1===t.nodeType);return!1}}),target:function(t){var n=e.location&&e.location.hash;return n&&n.slice(1)===t.id},root:function(e){return e===L},focus:function(e){return e===j.activeElement&&(!j.hasFocus||j.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:function(e){return e.disabled===!1},disabled:function(e){return e.disabled===!0},checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,e.selected===!0},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeType<6)return!1;return!0},parent:function(e){return!x.pseudos.empty(e)},header:function(e){return ge.test(e.nodeName)},input:function(e){return he.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||"text"===t.toLowerCase())},first:c(function(){return[0]}),last:c(function(e,t){return[t-1]}),eq:c(function(e,t,n){return[n<0?n+t:n]}),even:c(function(e,t){for(var n=0;n=0;)e.push(r);return e}),gt:c(function(e,t,n){for(var r=n<0?n+t:n;++r2&&"ID"===(o=a[0]).type&&w.getById&&9===t.nodeType&&B&&x.relative[a[1].type]){if(t=(x.find.ID(o.matches[0].replace(_e,we),t)||[])[0],!t)return n;c&&(t=t.parentNode),e=e.slice(a.shift().value.length)}for(i=pe.needsContext.test(e)?0:a.length;i--&&(o=a[i],!x.relative[s=o.type]);)if((l=x.find[s])&&(r=l(o.matches[0].replace(_e,we),ve.test(a[0].type)&&u(t.parentNode)||t))){if(a.splice(i,1),e=r.length&&f(a),!e)return Y.apply(n,r),n;break}}return(c||T(e,d))(r,t,!B,n,!t||ve.test(e)&&u(t.parentNode)||t),n},w.sortStable=D.split("").sort(K).join("")===D,w.detectDuplicates=!!A,$(),w.sortDetached=i(function(e){return 1&e.compareDocumentPosition(j.createElement("div"))}),i(function(e){return e.innerHTML="","#"===e.firstChild.getAttribute("href")})||a("type|href|height|width",function(e,t,n){if(!n)return e.getAttribute(t,"type"===t.toLowerCase()?1:2)}),w.attributes&&i(function(e){return e.innerHTML="",e.firstChild.setAttribute("value",""),""===e.firstChild.getAttribute("value")})||a("value",function(e,t,n){if(!n&&"input"===e.nodeName.toLowerCase())return e.defaultValue}),i(function(e){return null==e.getAttribute("disabled")})||a(te,function(e,t,n){var r;if(!n)return e[t]===!0?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),t}(e);ae.find=ue,ae.expr=ue.selectors,ae.expr[":"]=ae.expr.pseudos,ae.uniqueSort=ae.unique=ue.uniqueSort,ae.text=ue.getText,ae.isXMLDoc=ue.isXML,ae.contains=ue.contains;var de=function(e,t,n){for(var r=[],i=void 0!==n;(e=e[t])&&9!==e.nodeType;)if(1===e.nodeType){if(i&&ae(e).is(n))break;r.push(e)}return r},fe=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},pe=ae.expr.match.needsContext,he=/^<([\w-]+)\s*\/?>(?:<\/\1>|)$/,ge=/^.[^:#\[\.,]*$/;ae.filter=function(e,t,n){var r=t[0];return n&&(e=":not("+e+")"),1===t.length&&1===r.nodeType?ae.find.matchesSelector(r,e)?[r]:[]:ae.find.matches(e,ae.grep(t,function(e){return 1===e.nodeType}))},ae.fn.extend({find:function(e){var t,n=this.length,r=[],i=this;if("string"!=typeof e)return this.pushStack(ae(e).filter(function(){for(t=0;t1?ae.unique(r):r),r.selector=this.selector?this.selector+" "+e:e,r},filter:function(e){return this.pushStack(r(this,e||[],!1))},not:function(e){return this.pushStack(r(this,e||[],!0))},is:function(e){return!!r(this,"string"==typeof e&&pe.test(e)?ae(e):e||[],!1).length}});var me,be=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,ve=ae.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||me,"string"==typeof e){if(r="<"===e[0]&&">"===e[e.length-1]&&e.length>=3?[null,e,null]:be.exec(e),!r||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof ae?t[0]:t,ae.merge(this,ae.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:Q,!0)),he.test(r[1])&&ae.isPlainObject(t))for(r in t)ae.isFunction(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return i=Q.getElementById(r[2]),i&&i.parentNode&&(this.length=1,this[0]=i),this.context=Q,this.selector=e,this}return e.nodeType?(this.context=this[0]=e,this.length=1,this):ae.isFunction(e)?void 0!==n.ready?n.ready(e):e(ae):(void 0!==e.selector&&(this.selector=e.selector,this.context=e.context),ae.makeArray(e,this))};ve.prototype=ae.fn,me=ae(Q);var ye=/^(?:parents|prev(?:Until|All))/,_e={children:!0,contents:!0,next:!0,prev:!0};ae.fn.extend({has:function(e){var t=ae(e,this),n=t.length;return this.filter(function(){for(var e=0;e-1:1===n.nodeType&&ae.find.matchesSelector(n,e))){a.push(n);break}return this.pushStack(a.length>1?ae.uniqueSort(a):a)},index:function(e){return e?"string"==typeof e?J.call(ae(e),this[0]):J.call(this,e.jquery?e[0]:e):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){return this.pushStack(ae.uniqueSort(ae.merge(this.get(),ae(e,t))))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),ae.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return de(e,"parentNode")},parentsUntil:function(e,t,n){return de(e,"parentNode",n)},next:function(e){return i(e,"nextSibling")},prev:function(e){return i(e,"previousSibling")},nextAll:function(e){return de(e,"nextSibling")},prevAll:function(e){return de(e,"previousSibling")},nextUntil:function(e,t,n){return de(e,"nextSibling",n)},prevUntil:function(e,t,n){return de(e,"previousSibling",n)},siblings:function(e){return fe((e.parentNode||{}).firstChild,e)},children:function(e){return fe(e.firstChild)},contents:function(e){return e.contentDocument||ae.merge([],e.childNodes)}},function(e,t){ae.fn[e]=function(n,r){var i=ae.map(this,t,n);return"Until"!==e.slice(-5)&&(r=n),r&&"string"==typeof r&&(i=ae.filter(r,i)),this.length>1&&(_e[e]||ae.uniqueSort(i),ye.test(e)&&i.reverse()),this.pushStack(i)}});var we=/\S+/g;ae.Callbacks=function(e){e="string"==typeof e?a(e):ae.extend({},e);var t,n,r,i,o=[],s=[],l=-1,c=function(){for(i=e.once,r=t=!0;s.length;l=-1)for(n=s.shift();++l-1;)o.splice(n,1),n<=l&&l--}),this},has:function(e){return e?ae.inArray(e,o)>-1:o.length>0},empty:function(){return o&&(o=[]),this},disable:function(){return i=s=[],o=n="",this},disabled:function(){return!o},lock:function(){return i=s=[],n||(o=n=""),this},locked:function(){return!!i},fireWith:function(e,n){return i||(n=n||[],n=[e,n.slice?n.slice():n],s.push(n),t||c()),this},fire:function(){return u.fireWith(this,arguments),this},fired:function(){return!!r}};return u},ae.extend({Deferred:function(e){var t=[["resolve","done",ae.Callbacks("once memory"),"resolved"],["reject","fail",ae.Callbacks("once memory"),"rejected"],["notify","progress",ae.Callbacks("memory")]],n="pending",r={state:function(){return n},always:function(){return i.done(arguments).fail(arguments),this},then:function(){var e=arguments;return ae.Deferred(function(n){ae.each(t,function(t,a){var o=ae.isFunction(e[t])&&e[t];i[a[1]](function(){var e=o&&o.apply(this,arguments);e&&ae.isFunction(e.promise)?e.promise().progress(n.notify).done(n.resolve).fail(n.reject):n[a[0]+"With"](this===r?n.promise():this,o?[e]:arguments)})}),e=null}).promise()},promise:function(e){return null!=e?ae.extend(e,r):r}},i={};return r.pipe=r.then,ae.each(t,function(e,a){var o=a[2],s=a[3];r[a[1]]=o.add,s&&o.add(function(){n=s},t[1^e][2].disable,t[2][2].lock),i[a[0]]=function(){return i[a[0]+"With"](this===i?r:this,arguments),this},i[a[0]+"With"]=o.fireWith}),r.promise(i),e&&e.call(i,i),i},when:function(e){var t,n,r,i=0,a=X.call(arguments),o=a.length,s=1!==o||e&&ae.isFunction(e.promise)?o:0,l=1===s?e:ae.Deferred(),c=function(e,n,r){return function(i){n[e]=this,r[e]=arguments.length>1?X.call(arguments):i,r===t?l.notifyWith(n,r):--s||l.resolveWith(n,r)}};if(o>1)for(t=new Array(o),n=new Array(o),r=new Array(o);i0||(xe.resolveWith(Q,[ae]),ae.fn.triggerHandler&&(ae(Q).triggerHandler("ready"),ae(Q).off("ready"))))}}),ae.ready.promise=function(t){return xe||(xe=ae.Deferred(),"complete"===Q.readyState||"loading"!==Q.readyState&&!Q.documentElement.doScroll?e.setTimeout(ae.ready):(Q.addEventListener("DOMContentLoaded",o),e.addEventListener("load",o))),xe.promise(t)},ae.ready.promise();var ke=function(e,t,n,r,i,a,o){var s=0,l=e.length,c=null==n;if("object"===ae.type(n)){i=!0;for(s in n)ke(e,t,s,n[s],!0,a,o)}else if(void 0!==r&&(i=!0,ae.isFunction(r)||(o=!0),c&&(o?(t.call(e,r),t=null):(c=t,t=function(e,t,n){return c.call(ae(e),n)})),t))for(;s-1&&void 0!==n&&Te.set(this,e,t)})},null,t,arguments.length>1,null,!0)},removeData:function(e){return this.each(function(){Te.remove(this,e)})}}),ae.extend({queue:function(e,t,n){var r;if(e)return t=(t||"fx")+"queue",r=Ce.get(e,t),n&&(!r||ae.isArray(n)?r=Ce.access(e,t,ae.makeArray(n)):r.push(n)),r||[]},dequeue:function(e,t){t=t||"fx";var n=ae.queue(e,t),r=n.length,i=n.shift(),a=ae._queueHooks(e,t),o=function(){ae.dequeue(e,t)};"inprogress"===i&&(i=n.shift(),r--),i&&("fx"===t&&n.unshift("inprogress"),delete a.stop,i.call(e,o,a)),!r&&a&&a.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return Ce.get(e,n)||Ce.access(e,n,{empty:ae.Callbacks("once memory").add(function(){Ce.remove(e,[t+"queue",n])})})}}),ae.fn.extend({queue:function(e,t){var n=2;return"string"!=typeof e&&(t=e,e="fx",n--),arguments.length",""],thead:[1,"","
        "],col:[2,"","
        "],tr:[2,"","
        "],td:[3,"","
        "],_default:[0,"",""]};Re.optgroup=Re.option,Re.tbody=Re.tfoot=Re.colgroup=Re.caption=Re.thead,Re.th=Re.td;var qe=/<|&#?\w+;/;!function(){var e=Q.createDocumentFragment(),t=e.appendChild(Q.createElement("div")),n=Q.createElement("input");n.setAttribute("type","radio"),n.setAttribute("checked","checked"),n.setAttribute("name","t"),t.appendChild(n),re.checkClone=t.cloneNode(!0).cloneNode(!0).lastChild.checked,t.innerHTML="",re.noCloneChecked=!!t.cloneNode(!0).lastChild.defaultValue}();var Ie=/^key/,De=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,He=/^([^.]*)(?:\.(.+)|)/;ae.event={global:{},add:function(e,t,n,r,i){var a,o,s,l,c,u,d,f,p,h,g,m=Ce.get(e);if(m)for(n.handler&&(a=n,n=a.handler,i=a.selector),n.guid||(n.guid=ae.guid++),(l=m.events)||(l=m.events={}),(o=m.handle)||(o=m.handle=function(t){return"undefined"!=typeof ae&&ae.event.triggered!==t.type?ae.event.dispatch.apply(e,arguments):void 0}),t=(t||"").match(we)||[""],c=t.length;c--;)s=He.exec(t[c])||[],p=g=s[1],h=(s[2]||"").split(".").sort(),p&&(d=ae.event.special[p]||{},p=(i?d.delegateType:d.bindType)||p,d=ae.event.special[p]||{},u=ae.extend({type:p,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&ae.expr.match.needsContext.test(i),namespace:h.join(".")},a),(f=l[p])||(f=l[p]=[],f.delegateCount=0,d.setup&&d.setup.call(e,r,h,o)!==!1||e.addEventListener&&e.addEventListener(p,o)),d.add&&(d.add.call(e,u),u.handler.guid||(u.handler.guid=n.guid)),i?f.splice(f.delegateCount++,0,u):f.push(u),ae.event.global[p]=!0)},remove:function(e,t,n,r,i){var a,o,s,l,c,u,d,f,p,h,g,m=Ce.hasData(e)&&Ce.get(e);if(m&&(l=m.events)){for(t=(t||"").match(we)||[""],c=t.length;c--;)if(s=He.exec(t[c])||[], -p=g=s[1],h=(s[2]||"").split(".").sort(),p){for(d=ae.event.special[p]||{},p=(r?d.delegateType:d.bindType)||p,f=l[p]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),o=a=f.length;a--;)u=f[a],!i&&g!==u.origType||n&&n.guid!==u.guid||s&&!s.test(u.namespace)||r&&r!==u.selector&&("**"!==r||!u.selector)||(f.splice(a,1),u.selector&&f.delegateCount--,d.remove&&d.remove.call(e,u));o&&!f.length&&(d.teardown&&d.teardown.call(e,h,m.handle)!==!1||ae.removeEvent(e,p,m.handle),delete l[p])}else for(p in l)ae.event.remove(e,p+t[c],n,r,!0);ae.isEmptyObject(l)&&Ce.remove(e,"handle events")}},dispatch:function(e){e=ae.event.fix(e);var t,n,r,i,a,o=[],s=X.call(arguments),l=(Ce.get(this,"events")||{})[e.type]||[],c=ae.event.special[e.type]||{};if(s[0]=e,e.delegateTarget=this,!c.preDispatch||c.preDispatch.call(this,e)!==!1){for(o=ae.event.handlers.call(this,e,l),t=0;(i=o[t++])&&!e.isPropagationStopped();)for(e.currentTarget=i.elem,n=0;(a=i.handlers[n++])&&!e.isImmediatePropagationStopped();)e.rnamespace&&!e.rnamespace.test(a.namespace)||(e.handleObj=a,e.data=a.data,r=((ae.event.special[a.origType]||{}).handle||a.handler).apply(i.elem,s),void 0!==r&&(e.result=r)===!1&&(e.preventDefault(),e.stopPropagation()));return c.postDispatch&&c.postDispatch.call(this,e),e.result}},handlers:function(e,t){var n,r,i,a,o=[],s=t.delegateCount,l=e.target;if(s&&l.nodeType&&("click"!==e.type||isNaN(e.button)||e.button<1))for(;l!==this;l=l.parentNode||this)if(1===l.nodeType&&(l.disabled!==!0||"click"!==e.type)){for(r=[],n=0;n-1:ae.find(i,this,null,[l]).length),r[i]&&r.push(a);r.length&&o.push({elem:l,handlers:r})}return s]*)\/>/gi,Oe=/\s*$/g;ae.extend({htmlPrefilter:function(e){return e.replace(Fe,"<$1>")},clone:function(e,t,n){var r,i,a,o,s=e.cloneNode(!0),l=ae.contains(e.ownerDocument,e);if(!(re.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||ae.isXMLDoc(e)))for(o=u(s),a=u(e),r=0,i=a.length;r0&&d(o,!l&&u(e,"script")),s},cleanData:function(e){for(var t,n,r,i=ae.event.special,a=0;void 0!==(n=e[a]);a++)if(Ne(n)){if(t=n[Ce.expando]){if(t.events)for(r in t.events)i[r]?ae.event.remove(n,r):ae.removeEvent(n,r,t.handle);n[Ce.expando]=void 0}n[Te.expando]&&(n[Te.expando]=void 0)}}}),ae.fn.extend({domManip:x,detach:function(e){return k(this,e,!0)},remove:function(e){return k(this,e)},text:function(e){return ke(this,function(e){return void 0===e?ae.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=e)})},null,e,arguments.length)},append:function(){return x(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=b(this,e);t.appendChild(e)}})},prepend:function(){return x(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=b(this,e);t.insertBefore(e,t.firstChild)}})},before:function(){return x(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return x(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},empty:function(){for(var e,t=0;null!=(e=this[t]);t++)1===e.nodeType&&(ae.cleanData(u(e,!1)),e.textContent="");return this},clone:function(e,t){return e=null!=e&&e,t=null==t?e:t,this.map(function(){return ae.clone(this,e,t)})},html:function(e){return ke(this,function(e){var t=this[0]||{},n=0,r=this.length;if(void 0===e&&1===t.nodeType)return t.innerHTML;if("string"==typeof e&&!Oe.test(e)&&!Re[(Be.exec(e)||["",""])[1].toLowerCase()]){e=ae.htmlPrefilter(e);try{for(;n1)},show:function(){return j(this,!0)},hide:function(){return j(this)},toggle:function(e){return"boolean"==typeof e?e?this.show():this.hide():this.each(function(){je(this)?ae(this).show():ae(this).hide()})}}),ae.Tween=L,L.prototype={constructor:L,init:function(e,t,n,r,i,a){this.elem=e,this.prop=n,this.easing=i||ae.easing._default,this.options=t,this.start=this.now=this.cur(),this.end=r,this.unit=a||(ae.cssNumber[n]?"":"px")},cur:function(){var e=L.propHooks[this.prop];return e&&e.get?e.get(this):L.propHooks._default.get(this)},run:function(e){var t,n=L.propHooks[this.prop];return this.options.duration?this.pos=t=ae.easing[this.easing](e,this.options.duration*e,0,1,this.options.duration):this.pos=t=e,this.now=(this.end-this.start)*t+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),n&&n.set?n.set(this):L.propHooks._default.set(this),this}},L.prototype.init.prototype=L.prototype,L.propHooks={_default:{get:function(e){var t;return 1!==e.elem.nodeType||null!=e.elem[e.prop]&&null==e.elem.style[e.prop]?e.elem[e.prop]:(t=ae.css(e.elem,e.prop,""),t&&"auto"!==t?t:0)},set:function(e){ae.fx.step[e.prop]?ae.fx.step[e.prop](e):1!==e.elem.nodeType||null==e.elem.style[ae.cssProps[e.prop]]&&!ae.cssHooks[e.prop]?e.elem[e.prop]=e.now:ae.style(e.elem,e.prop,e.now+e.unit)}}},L.propHooks.scrollTop=L.propHooks.scrollLeft={set:function(e){e.elem.nodeType&&e.elem.parentNode&&(e.elem[e.prop]=e.now)}},ae.easing={linear:function(e){return e},swing:function(e){return.5-Math.cos(e*Math.PI)/2},_default:"swing"},ae.fx=L.prototype.init,ae.fx.step={};var it,at,ot=/^(?:toggle|show|hide)$/,st=/queueHooks$/;ae.Animation=ae.extend(D,{tweeners:{"*":[function(e,t){var n=this.createTween(e,t);return c(n.elem,e,Ae.exec(t),n),n}]},tweener:function(e,t){ae.isFunction(e)?(t=e,e=["*"]):e=e.match(we);for(var n,r=0,i=e.length;r1)},removeAttr:function(e){return this.each(function(){ae.removeAttr(this,e)})}}),ae.extend({attr:function(e,t,n){var r,i,a=e.nodeType;if(3!==a&&8!==a&&2!==a)return"undefined"==typeof e.getAttribute?ae.prop(e,t,n):(1===a&&ae.isXMLDoc(e)||(t=t.toLowerCase(),i=ae.attrHooks[t]||(ae.expr.match.bool.test(t)?lt:void 0)),void 0!==n?null===n?void ae.removeAttr(e,t):i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:(e.setAttribute(t,n+""),n):i&&"get"in i&&null!==(r=i.get(e,t))?r:(r=ae.find.attr(e,t),null==r?void 0:r))},attrHooks:{type:{set:function(e,t){if(!re.radioValue&&"radio"===t&&ae.nodeName(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},removeAttr:function(e,t){var n,r,i=0,a=t&&t.match(we);if(a&&1===e.nodeType)for(;n=a[i++];)r=ae.propFix[n]||n,ae.expr.match.bool.test(n)&&(e[r]=!1),e.removeAttribute(n)}}),lt={set:function(e,t,n){return t===!1?ae.removeAttr(e,n):e.setAttribute(n,n),n}},ae.each(ae.expr.match.bool.source.match(/\w+/g),function(e,t){var n=ct[t]||ae.find.attr;ct[t]=function(e,t,r){var i,a;return r||(a=ct[t],ct[t]=i,i=null!=n(e,t,r)?t.toLowerCase():null,ct[t]=a),i}});var ut=/^(?:input|select|textarea|button)$/i,dt=/^(?:a|area)$/i;ae.fn.extend({prop:function(e,t){return ke(this,ae.prop,e,t,arguments.length>1)},removeProp:function(e){return this.each(function(){delete this[ae.propFix[e]||e]})}}),ae.extend({prop:function(e,t,n){var r,i,a=e.nodeType;if(3!==a&&8!==a&&2!==a)return 1===a&&ae.isXMLDoc(e)||(t=ae.propFix[t]||t,i=ae.propHooks[t]),void 0!==n?i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:e[t]=n:i&&"get"in i&&null!==(r=i.get(e,t))?r:e[t]},propHooks:{tabIndex:{get:function(e){var t=ae.find.attr(e,"tabindex");return t?parseInt(t,10):ut.test(e.nodeName)||dt.test(e.nodeName)&&e.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),re.optSelected||(ae.propHooks.selected={get:function(e){var t=e.parentNode;return t&&t.parentNode&&t.parentNode.selectedIndex,null},set:function(e){var t=e.parentNode;t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex)}}),ae.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){ae.propFix[this.toLowerCase()]=this});var ft=/[\t\r\n\f]/g;ae.fn.extend({addClass:function(e){var t,n,r,i,a,o,s,l=0;if(ae.isFunction(e))return this.each(function(t){ae(this).addClass(e.call(this,t,H(this)))});if("string"==typeof e&&e)for(t=e.match(we)||[];n=this[l++];)if(i=H(n),r=1===n.nodeType&&(" "+i+" ").replace(ft," ")){for(o=0;a=t[o++];)r.indexOf(" "+a+" ")<0&&(r+=a+" ");s=ae.trim(r),i!==s&&n.setAttribute("class",s)}return this},removeClass:function(e){var t,n,r,i,a,o,s,l=0;if(ae.isFunction(e))return this.each(function(t){ae(this).removeClass(e.call(this,t,H(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof e&&e)for(t=e.match(we)||[];n=this[l++];)if(i=H(n),r=1===n.nodeType&&(" "+i+" ").replace(ft," ")){for(o=0;a=t[o++];)for(;r.indexOf(" "+a+" ")>-1;)r=r.replace(" "+a+" "," ");s=ae.trim(r),i!==s&&n.setAttribute("class",s)}return this},toggleClass:function(e,t){var n=typeof e;return"boolean"==typeof t&&"string"===n?t?this.addClass(e):this.removeClass(e):ae.isFunction(e)?this.each(function(n){ae(this).toggleClass(e.call(this,n,H(this),t),t)}):this.each(function(){var t,r,i,a;if("string"===n)for(r=0,i=ae(this),a=e.match(we)||[];t=a[r++];)i.hasClass(t)?i.removeClass(t):i.addClass(t);else void 0!==e&&"boolean"!==n||(t=H(this),t&&Ce.set(this,"__className__",t),this.setAttribute&&this.setAttribute("class",t||e===!1?"":Ce.get(this,"__className__")||""))})},hasClass:function(e){var t,n,r=0;for(t=" "+e+" ";n=this[r++];)if(1===n.nodeType&&(" "+H(n)+" ").replace(ft," ").indexOf(t)>-1)return!0;return!1}});var pt=/\r/g,ht=/[\x20\t\r\n\f]+/g;ae.fn.extend({val:function(e){var t,n,r,i=this[0];{if(arguments.length)return r=ae.isFunction(e),this.each(function(n){var i;1===this.nodeType&&(i=r?e.call(this,n,ae(this).val()):e,null==i?i="":"number"==typeof i?i+="":ae.isArray(i)&&(i=ae.map(i,function(e){return null==e?"":e+""})),t=ae.valHooks[this.type]||ae.valHooks[this.nodeName.toLowerCase()],t&&"set"in t&&void 0!==t.set(this,i,"value")||(this.value=i))});if(i)return t=ae.valHooks[i.type]||ae.valHooks[i.nodeName.toLowerCase()],t&&"get"in t&&void 0!==(n=t.get(i,"value"))?n:(n=i.value,"string"==typeof n?n.replace(pt,""):null==n?"":n)}}}),ae.extend({valHooks:{option:{get:function(e){var t=ae.find.attr(e,"value");return null!=t?t:ae.trim(ae.text(e)).replace(ht," ")}},select:{get:function(e){for(var t,n,r=e.options,i=e.selectedIndex,a="select-one"===e.type||i<0,o=a?null:[],s=a?i+1:r.length,l=i<0?s:a?i:0;l-1)&&(n=!0);return n||(e.selectedIndex=-1),a}}}}),ae.each(["radio","checkbox"],function(){ae.valHooks[this]={set:function(e,t){if(ae.isArray(t))return e.checked=ae.inArray(ae(e).val(),t)>-1}},re.checkOn||(ae.valHooks[this].get=function(e){return null===e.getAttribute("value")?"on":e.value})});var gt=/^(?:focusinfocus|focusoutblur)$/;ae.extend(ae.event,{trigger:function(t,n,r,i){var a,o,s,l,c,u,d,f=[r||Q],p=ne.call(t,"type")?t.type:t,h=ne.call(t,"namespace")?t.namespace.split("."):[];if(o=s=r=r||Q,3!==r.nodeType&&8!==r.nodeType&&!gt.test(p+ae.event.triggered)&&(p.indexOf(".")>-1&&(h=p.split("."),p=h.shift(),h.sort()),c=p.indexOf(":")<0&&"on"+p,t=t[ae.expando]?t:new ae.Event(p,"object"==typeof t&&t),t.isTrigger=i?2:3,t.namespace=h.join("."),t.rnamespace=t.namespace?new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,t.result=void 0,t.target||(t.target=r),n=null==n?[t]:ae.makeArray(n,[t]),d=ae.event.special[p]||{},i||!d.trigger||d.trigger.apply(r,n)!==!1)){if(!i&&!d.noBubble&&!ae.isWindow(r)){for(l=d.delegateType||p,gt.test(l+p)||(o=o.parentNode);o;o=o.parentNode)f.push(o),s=o;s===(r.ownerDocument||Q)&&f.push(s.defaultView||s.parentWindow||e)}for(a=0;(o=f[a++])&&!t.isPropagationStopped();)t.type=a>1?l:d.bindType||p,u=(Ce.get(o,"events")||{})[t.type]&&Ce.get(o,"handle"),u&&u.apply(o,n),u=c&&o[c],u&&u.apply&&Ne(o)&&(t.result=u.apply(o,n),t.result===!1&&t.preventDefault());return t.type=p,i||t.isDefaultPrevented()||d._default&&d._default.apply(f.pop(),n)!==!1||!Ne(r)||c&&ae.isFunction(r[p])&&!ae.isWindow(r)&&(s=r[c],s&&(r[c]=null),ae.event.triggered=p,r[p](),ae.event.triggered=void 0,s&&(r[c]=s)),t.result}},simulate:function(e,t,n){var r=ae.extend(new ae.Event,n,{type:e,isSimulated:!0});ae.event.trigger(r,null,t)}}),ae.fn.extend({trigger:function(e,t){return this.each(function(){ae.event.trigger(e,t,this)})},triggerHandler:function(e,t){var n=this[0];if(n)return ae.event.trigger(e,t,n,!0)}}),ae.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(e,t){ae.fn[t]=function(e,n){return arguments.length>0?this.on(t,null,e,n):this.trigger(t)}}),ae.fn.extend({hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),re.focusin="onfocusin"in e,re.focusin||ae.each({focus:"focusin",blur:"focusout"},function(e,t){var n=function(e){ae.event.simulate(t,e.target,ae.event.fix(e))};ae.event.special[t]={setup:function(){var r=this.ownerDocument||this,i=Ce.access(r,t);i||r.addEventListener(e,n,!0),Ce.access(r,t,(i||0)+1)},teardown:function(){var r=this.ownerDocument||this,i=Ce.access(r,t)-1;i?Ce.access(r,t,i):(r.removeEventListener(e,n,!0),Ce.remove(r,t))}}});var mt=e.location,bt=ae.now(),vt=/\?/;ae.parseJSON=function(e){return JSON.parse(e+"")},ae.parseXML=function(t){var n;if(!t||"string"!=typeof t)return null;try{n=(new e.DOMParser).parseFromString(t,"text/xml")}catch(e){n=void 0}return n&&!n.getElementsByTagName("parsererror").length||ae.error("Invalid XML: "+t),n};var yt=/#.*$/,_t=/([?&])_=[^&]*/,wt=/^(.*?):[ \t]*([^\r\n]*)$/gm,xt=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,kt=/^(?:GET|HEAD)$/,Nt=/^\/\//,Ct={},Tt={},Et="*/".concat("*"),St=Q.createElement("a");St.href=mt.href,ae.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:mt.href,type:"GET",isLocal:xt.test(mt.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Et,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":ae.parseJSON,"text xml":ae.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(e,t){return t?P(P(e,ae.ajaxSettings),t):P(ae.ajaxSettings,e)},ajaxPrefilter:F(Ct),ajaxTransport:F(Tt),ajax:function(t,n){function r(t,n,r,s){var c,d,v,y,w,k=n;2!==_&&(_=2,l&&e.clearTimeout(l),i=void 0,o=s||"",x.readyState=t>0?4:0,c=t>=200&&t<300||304===t,r&&(y=W(f,x,r)),y=U(f,y,x,c),c?(f.ifModified&&(w=x.getResponseHeader("Last-Modified"),w&&(ae.lastModified[a]=w),w=x.getResponseHeader("etag"),w&&(ae.etag[a]=w)),204===t||"HEAD"===f.type?k="nocontent":304===t?k="notmodified":(k=y.state,d=y.data,v=y.error,c=!v)):(v=k,!t&&k||(k="error",t<0&&(t=0))),x.status=t,x.statusText=(n||k)+"",c?g.resolveWith(p,[d,k,x]):g.rejectWith(p,[x,k,v]),x.statusCode(b),b=void 0,u&&h.trigger(c?"ajaxSuccess":"ajaxError",[x,f,c?d:v]),m.fireWith(p,[x,k]),u&&(h.trigger("ajaxComplete",[x,f]),--ae.active||ae.event.trigger("ajaxStop")))}"object"==typeof t&&(n=t,t=void 0),n=n||{};var i,a,o,s,l,c,u,d,f=ae.ajaxSetup({},n),p=f.context||f,h=f.context&&(p.nodeType||p.jquery)?ae(p):ae.event,g=ae.Deferred(),m=ae.Callbacks("once memory"),b=f.statusCode||{},v={},y={},_=0,w="canceled",x={readyState:0,getResponseHeader:function(e){var t;if(2===_){if(!s)for(s={};t=wt.exec(o);)s[t[1].toLowerCase()]=t[2];t=s[e.toLowerCase()]}return null==t?null:t},getAllResponseHeaders:function(){return 2===_?o:null},setRequestHeader:function(e,t){var n=e.toLowerCase();return _||(e=y[n]=y[n]||e,v[e]=t),this},overrideMimeType:function(e){return _||(f.mimeType=e),this},statusCode:function(e){var t;if(e)if(_<2)for(t in e)b[t]=[b[t],e[t]];else x.always(e[x.status]);return this},abort:function(e){var t=e||w;return i&&i.abort(t),r(0,t),this}};if(g.promise(x).complete=m.add,x.success=x.done,x.error=x.fail,f.url=((t||f.url||mt.href)+"").replace(yt,"").replace(Nt,mt.protocol+"//"),f.type=n.method||n.type||f.method||f.type,f.dataTypes=ae.trim(f.dataType||"*").toLowerCase().match(we)||[""],null==f.crossDomain){c=Q.createElement("a");try{c.href=f.url,c.href=c.href,f.crossDomain=St.protocol+"//"+St.host!=c.protocol+"//"+c.host}catch(e){f.crossDomain=!0}}if(f.data&&f.processData&&"string"!=typeof f.data&&(f.data=ae.param(f.data,f.traditional)),O(Ct,f,n,x),2===_)return x;u=ae.event&&f.global,u&&0===ae.active++&&ae.event.trigger("ajaxStart"),f.type=f.type.toUpperCase(),f.hasContent=!kt.test(f.type),a=f.url,f.hasContent||(f.data&&(a=f.url+=(vt.test(a)?"&":"?")+f.data,delete f.data),f.cache===!1&&(f.url=_t.test(a)?a.replace(_t,"$1_="+bt++):a+(vt.test(a)?"&":"?")+"_="+bt++)),f.ifModified&&(ae.lastModified[a]&&x.setRequestHeader("If-Modified-Since",ae.lastModified[a]),ae.etag[a]&&x.setRequestHeader("If-None-Match",ae.etag[a])),(f.data&&f.hasContent&&f.contentType!==!1||n.contentType)&&x.setRequestHeader("Content-Type",f.contentType),x.setRequestHeader("Accept",f.dataTypes[0]&&f.accepts[f.dataTypes[0]]?f.accepts[f.dataTypes[0]]+("*"!==f.dataTypes[0]?", "+Et+"; q=0.01":""):f.accepts["*"]);for(d in f.headers)x.setRequestHeader(d,f.headers[d]);if(f.beforeSend&&(f.beforeSend.call(p,x,f)===!1||2===_))return x.abort();w="abort";for(d in{success:1,error:1,complete:1})x[d](f[d]);if(i=O(Tt,f,n,x)){if(x.readyState=1,u&&h.trigger("ajaxSend",[x,f]),2===_)return x;f.async&&f.timeout>0&&(l=e.setTimeout(function(){x.abort("timeout")},f.timeout));try{_=1,i.send(v,r)}catch(e){if(!(_<2))throw e;r(-1,e)}}else r(-1,"No Transport");return x},getJSON:function(e,t,n){return ae.get(e,t,n,"json")},getScript:function(e,t){return ae.get(e,void 0,t,"script")}}),ae.each(["get","post"],function(e,t){ae[t]=function(e,n,r,i){return ae.isFunction(n)&&(i=i||r,r=n,n=void 0),ae.ajax(ae.extend({url:e,type:t,dataType:i,data:n,success:r},ae.isPlainObject(e)&&e))}}),ae._evalUrl=function(e){return ae.ajax({url:e,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0})},ae.fn.extend({wrapAll:function(e){var t;return ae.isFunction(e)?this.each(function(t){ae(this).wrapAll(e.call(this,t))}):(this[0]&&(t=ae(e,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){for(var e=this;e.firstElementChild;)e=e.firstElementChild;return e}).append(this)),this)},wrapInner:function(e){return ae.isFunction(e)?this.each(function(t){ae(this).wrapInner(e.call(this,t))}):this.each(function(){var t=ae(this),n=t.contents();n.length?n.wrapAll(e):t.append(e)})},wrap:function(e){var t=ae.isFunction(e);return this.each(function(n){ae(this).wrapAll(t?e.call(this,n):e)})},unwrap:function(){return this.parent().each(function(){ae.nodeName(this,"body")||ae(this).replaceWith(this.childNodes)}).end()}}),ae.expr.filters.hidden=function(e){return!ae.expr.filters.visible(e)},ae.expr.filters.visible=function(e){return e.offsetWidth>0||e.offsetHeight>0||e.getClientRects().length>0};var Mt=/%20/g,At=/\[\]$/,$t=/\r?\n/g,jt=/^(?:submit|button|image|reset|file)$/i,Lt=/^(?:input|select|textarea|keygen)/i;ae.param=function(e,t){var n,r=[],i=function(e,t){t=ae.isFunction(t)?t():null==t?"":t,r[r.length]=encodeURIComponent(e)+"="+encodeURIComponent(t)};if(void 0===t&&(t=ae.ajaxSettings&&ae.ajaxSettings.traditional),ae.isArray(e)||e.jquery&&!ae.isPlainObject(e))ae.each(e,function(){i(this.name,this.value)});else for(n in e)K(n,e[n],t,i);return r.join("&").replace(Mt,"+")},ae.fn.extend({serialize:function(){return ae.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var e=ae.prop(this,"elements");return e?ae.makeArray(e):this}).filter(function(){var e=this.type;return this.name&&!ae(this).is(":disabled")&&Lt.test(this.nodeName)&&!jt.test(e)&&(this.checked||!Le.test(e))}).map(function(e,t){var n=ae(this).val();return null==n?null:ae.isArray(n)?ae.map(n,function(e){return{name:t.name,value:e.replace($t,"\r\n")}}):{name:t.name,value:n.replace($t,"\r\n")}}).get()}}),ae.ajaxSettings.xhr=function(){try{return new e.XMLHttpRequest}catch(e){}};var Bt={0:200,1223:204},zt=ae.ajaxSettings.xhr();re.cors=!!zt&&"withCredentials"in zt,re.ajax=zt=!!zt,ae.ajaxTransport(function(t){var n,r;if(re.cors||zt&&!t.crossDomain)return{send:function(i,a){var o,s=t.xhr();if(s.open(t.type,t.url,t.async,t.username,t.password),t.xhrFields)for(o in t.xhrFields)s[o]=t.xhrFields[o];t.mimeType&&s.overrideMimeType&&s.overrideMimeType(t.mimeType),t.crossDomain||i["X-Requested-With"]||(i["X-Requested-With"]="XMLHttpRequest");for(o in i)s.setRequestHeader(o,i[o]);n=function(e){return function(){n&&(n=r=s.onload=s.onerror=s.onabort=s.onreadystatechange=null,"abort"===e?s.abort():"error"===e?"number"!=typeof s.status?a(0,"error"):a(s.status,s.statusText):a(Bt[s.status]||s.status,s.statusText,"text"!==(s.responseType||"text")||"string"!=typeof s.responseText?{binary:s.response}:{text:s.responseText},s.getAllResponseHeaders())); -}},s.onload=n(),r=s.onerror=n("error"),void 0!==s.onabort?s.onabort=r:s.onreadystatechange=function(){4===s.readyState&&e.setTimeout(function(){n&&r()})},n=n("abort");try{s.send(t.hasContent&&t.data||null)}catch(e){if(n)throw e}},abort:function(){n&&n()}}}),ae.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(e){return ae.globalEval(e),e}}}),ae.ajaxPrefilter("script",function(e){void 0===e.cache&&(e.cache=!1),e.crossDomain&&(e.type="GET")}),ae.ajaxTransport("script",function(e){if(e.crossDomain){var t,n;return{send:function(r,i){t=ae("",rE:!0,sL:["actionscript","javascript","handlebars","xml"]}},{cN:"meta",v:[{b:/<\?xml/,e:/\?>/,r:10},{b:/<\?\w+/,e:/\?>/}]},{cN:"tag",b:"",c:[{cN:"name",b:/[^\/><\s]+/,r:0},n]}]}}),hljs.registerLanguage("markdown",function(){return{aliases:["md","mkdown","mkd"],c:[{cN:"section",v:[{b:"^#{1,6}",e:"$"},{b:"^.+?\\n[=-]{2,}$"}]},{b:"<",e:">",sL:"xml",r:0},{cN:"bullet",b:"^([*+-]|(\\d+\\.))\\s+"},{cN:"strong",b:"[*_]{2}.+?[*_]{2}"},{cN:"emphasis",v:[{b:"\\*.+?\\*"},{b:"_.+?_",r:0}]},{cN:"quote",b:"^>\\s+",e:"$"},{cN:"code",v:[{b:"^```w*s*$",e:"^```s*$"},{b:"`.+?`"},{b:"^( {4}| )",e:"$",r:0}]},{b:"^[-\\*]{3,}",e:"$"},{b:"\\[.+?\\][\\(\\[].*?[\\)\\]]",rB:!0,c:[{cN:"string",b:"\\[",e:"\\]",eB:!0,rE:!0,r:0},{cN:"link",b:"\\]\\(",e:"\\)",eB:!0,eE:!0},{cN:"symbol",b:"\\]\\[",e:"\\]",eB:!0,eE:!0}],r:10},{b:/^\[[^\n]+\]:/,rB:!0,c:[{cN:"symbol",b:/\[/,e:/\]/,eB:!0,eE:!0},{cN:"link",b:/:\s*/,e:/$/,eB:!0}]}]}}),hljs.registerLanguage("python",function(e){var t={cN:"meta",b:/^(>>>|\.\.\.) /},n={cN:"string",c:[e.BE],v:[{b:/(u|b)?r?'''/,e:/'''/,c:[t],r:10},{b:/(u|b)?r?"""/,e:/"""/,c:[t],r:10},{b:/(u|r|ur)'/,e:/'/,r:10},{b:/(u|r|ur)"/,e:/"/,r:10},{b:/(b|br)'/,e:/'/},{b:/(b|br)"/,e:/"/},e.ASM,e.QSM]},r={cN:"number",r:0,v:[{b:e.BNR+"[lLjJ]?"},{b:"\\b(0o[0-7]+)[lLjJ]?"},{b:e.CNR+"[lLjJ]?"}]},i={cN:"params",b:/\(/,e:/\)/,c:["self",t,r,n]};return{aliases:["py","gyp"],k:{keyword:"and elif is global as in if from raise for except finally print import pass return exec else break not with class assert yield try while continue del or def lambda async await nonlocal|10 None True False",built_in:"Ellipsis NotImplemented"},i:/(<\/|->|\?)/,c:[t,r,n,e.HCM,{v:[{cN:"function",bK:"def",r:10},{cN:"class",bK:"class"}],e:/:/,i:/[${=;\n,]/,c:[e.UTM,i,{b:/->/,eW:!0,k:"None"}]},{cN:"meta",b:/^[\t ]*@/,e:/$/},{b:/\b(print|exec)\(/}]}}),hljs.registerLanguage("makefile",function(e){var t={cN:"variable",b:/\$\(/,e:/\)/,c:[e.BE]};return{aliases:["mk","mak"],c:[e.HCM,{b:/^\w+\s*\W*=/,rB:!0,r:0,starts:{e:/\s*\W*=/,eE:!0,starts:{e:/$/,r:0,c:[t]}}},{cN:"section",b:/^[\w]+:\s*$/},{cN:"meta",b:/^\.PHONY:/,e:/$/,k:{"meta-keyword":".PHONY"},l:/[\.\w]+/},{b:/^\t+/,e:/$/,r:0,c:[e.QSM,t]}]}}),hljs.registerLanguage("css",function(e){var t="[a-zA-Z-][a-zA-Z0-9_-]*",n={b:/[A-Z\_\.\-]+\s*:/,rB:!0,e:";",eW:!0,c:[{cN:"attribute",b:/\S/,e:":",eE:!0,starts:{eW:!0,eE:!0,c:[{b:/[\w-]+\(/,rB:!0,c:[{cN:"built_in",b:/[\w-]+/},{b:/\(/,e:/\)/,c:[e.ASM,e.QSM]}]},e.CSSNM,e.QSM,e.ASM,e.CBCM,{cN:"number",b:"#[0-9A-Fa-f]+"},{cN:"meta",b:"!important"}]}}]};return{cI:!0,i:/[=\/|'\$]/,c:[e.CBCM,{cN:"selector-id",b:/#[A-Za-z0-9_-]+/},{cN:"selector-class",b:/\.[A-Za-z0-9_-]+/},{cN:"selector-attr",b:/\[/,e:/\]/,i:"$"},{cN:"selector-pseudo",b:/:(:)?[a-zA-Z0-9\_\-\+\(\)"'.]+/},{b:"@(font-face|page)",l:"[a-z-]+",k:"font-face page"},{b:"@",e:"[{;]",i:/:/,c:[{cN:"keyword",b:/\w+/},{b:/\s/,eW:!0,eE:!0,r:0,c:[e.ASM,e.QSM,e.CSSNM]}]},{cN:"selector-tag",b:t,r:0},{b:"{",e:"}",i:/\S/,c:[e.CBCM,n]}]}}),hljs.registerLanguage("go",function(e){var t={keyword:"break default func interface select case map struct chan else goto package switch const fallthrough if range type continue for import return var go defer bool byte complex64 complex128 float32 float64 int8 int16 int32 int64 string uint8 uint16 uint32 uint64 int uint uintptr rune",literal:"true false iota nil",built_in:"append cap close complex copy imag len make new panic print println real recover delete"};return{aliases:["golang"],k:t,i:"|<-"}]}}),hljs.registerLanguage("erlang",function(e){var t="[a-z'][a-zA-Z0-9_']*",n="("+t+":"+t+"|"+t+")",r={keyword:"after and andalso|10 band begin bnot bor bsl bzr bxor case catch cond div end fun if let not of orelse|10 query receive rem try when xor",literal:"false true"},i=e.C("%","$"),a={cN:"number",b:"\\b(\\d+#[a-fA-F0-9]+|\\d+(\\.\\d+)?([eE][-+]?\\d+)?)",r:0},o={b:"fun\\s+"+t+"/\\d+"},s={b:n+"\\(",e:"\\)",rB:!0,r:0,c:[{b:n,r:0},{b:"\\(",e:"\\)",eW:!0,rE:!0,r:0}]},l={b:"{",e:"}",r:0},c={b:"\\b_([A-Z][A-Za-z0-9_]*)?",r:0},u={b:"[A-Z][a-zA-Z0-9_]*",r:0},d={b:"#"+e.UIR,r:0,rB:!0,c:[{b:"#"+e.UIR,r:0},{b:"{",e:"}",r:0}]},f={bK:"fun receive if try case",e:"end",k:r};f.c=[i,o,e.inherit(e.ASM,{cN:""}),f,s,e.QSM,a,l,c,u,d];var p=[i,o,f,s,e.QSM,a,l,c,u,d];s.c[1].c=p,l.c=p,d.c[1].c=p;var h={cN:"params",b:"\\(",e:"\\)",c:p};return{aliases:["erl"],k:r,i:"(",rB:!0,i:"\\(|#|//|/\\*|\\\\|:|;",c:[h,e.inherit(e.TM,{b:t})],starts:{e:";|\\.",k:r,c:p}},i,{b:"^-",e:"\\.",r:0,eE:!0,rB:!0,l:"-"+e.IR,k:"-module -record -undef -export -ifdef -ifndef -author -copyright -doc -vsn -import -include -include_lib -compile -define -else -endif -file -behaviour -behavior -spec",c:[h]},a,e.QSM,d,c,u,l,{b:/\.$/}]}}),hljs.registerLanguage("ruby",function(e){var t="[a-zA-Z_]\\w*[!?=]?|[-+~]\\@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?",n={keyword:"and then defined module in return redo if BEGIN retry end for self when next until do begin unless END rescue else break undef not super class case require yield alias while ensure elsif or include attr_reader attr_writer attr_accessor",literal:"true false nil"},r={cN:"doctag",b:"@[A-Za-z]+"},i={b:"#<",e:">"},a=[e.C("#","$",{c:[r]}),e.C("^\\=begin","^\\=end",{c:[r],r:10}),e.C("^__END__","\\n$")],o={cN:"subst",b:"#\\{",e:"}",k:n},s={cN:"string",c:[e.BE,o],v:[{b:/'/,e:/'/},{b:/"/,e:/"/},{b:/`/,e:/`/},{b:"%[qQwWx]?\\(",e:"\\)"},{b:"%[qQwWx]?\\[",e:"\\]"},{b:"%[qQwWx]?{",e:"}"},{b:"%[qQwWx]?<",e:">"},{b:"%[qQwWx]?/",e:"/"},{b:"%[qQwWx]?%",e:"%"},{b:"%[qQwWx]?-",e:"-"},{b:"%[qQwWx]?\\|",e:"\\|"},{b:/\B\?(\\\d{1,3}|\\x[A-Fa-f0-9]{1,2}|\\u[A-Fa-f0-9]{4}|\\?\S)\b/},{b:/<<(-?)\w+$/,e:/^\s*\w+$/}]},l={cN:"params",b:"\\(",e:"\\)",endsParent:!0,k:n},c=[s,i,{cN:"class",bK:"class module",e:"$|;",i:/=/,c:[e.inherit(e.TM,{b:"[A-Za-z_]\\w*(::\\w+)*(\\?|\\!)?"}),{b:"<\\s*",c:[{b:"("+e.IR+"::)?"+e.IR}]}].concat(a)},{cN:"function",bK:"def",e:"$|;",c:[e.inherit(e.TM,{b:t}),l].concat(a)},{b:e.IR+"::"},{cN:"symbol",b:e.UIR+"(\\!|\\?)?:",r:0},{cN:"symbol",b:":(?!\\s)",c:[s,{b:t}],r:0},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0},{b:"(\\$\\W)|((\\$|\\@\\@?)(\\w+))"},{cN:"params",b:/\|/,e:/\|/,k:n},{b:"("+e.RSR+")\\s*",c:[i,{cN:"regexp",c:[e.BE,o],i:/\n/,v:[{b:"/",e:"/[a-z]*"},{b:"%r{",e:"}[a-z]*"},{b:"%r\\(",e:"\\)[a-z]*"},{b:"%r!",e:"![a-z]*"},{b:"%r\\[",e:"\\][a-z]*"}]}].concat(a),r:0}].concat(a);o.c=c,l.c=c;var u="[>?]>",d="[\\w#]+\\(\\w+\\):\\d+:\\d+>",f="(\\w+-)?\\d+\\.\\d+\\.\\d(p\\d+)?[^>]+>",p=[{b:/^\s*=>/,starts:{e:"$",c:c}},{cN:"meta",b:"^("+u+"|"+d+"|"+f+")",starts:{e:"$",c:c}}];return{aliases:["rb","gemspec","podspec","thor","irb"],k:n,i:/\/\*/,c:a.concat(p).concat(c)}}),hljs.registerLanguage("cpp",function(e){var t={cN:"keyword",b:"\\b[a-z\\d_]*_t\\b"},n={cN:"string",v:[{b:'(u8?|U)?L?"',e:'"',i:"\\n",c:[e.BE]},{b:'(u8?|U)?R"',e:'"',c:[e.BE]},{b:"'\\\\?.",e:"'",i:"."}]},r={cN:"number",v:[{b:"\\b(0b[01']+)"},{b:"\\b([\\d']+(\\.[\\d']*)?|\\.[\\d']+)(u|U|l|L|ul|UL|f|F|b|B)"},{b:"(-?)(\\b0[xX][a-fA-F0-9']+|(\\b[\\d']+(\\.[\\d']*)?|\\.[\\d']+)([eE][-+]?[\\d']+)?)"}],r:0},i={cN:"meta",b:/#\s*[a-z]+\b/,e:/$/,k:{"meta-keyword":"if else elif endif define undef warning error line pragma ifdef ifndef include"},c:[{b:/\\\n/,r:0},e.inherit(n,{cN:"meta-string"}),{cN:"meta-string",b:"<",e:">",i:"\\n"},e.CLCM,e.CBCM]},a=e.IR+"\\s*\\(",o={keyword:"int float while private char catch import module export virtual operator sizeof dynamic_cast|10 typedef const_cast|10 const struct for static_cast|10 union namespace unsigned long volatile static protected bool template mutable if public friend do goto auto void enum else break extern using class asm case typeid short reinterpret_cast|10 default double register explicit signed typename try this switch continue inline delete alignof constexpr decltype noexcept static_assert thread_local restrict _Bool complex _Complex _Imaginary atomic_bool atomic_char atomic_schar atomic_uchar atomic_short atomic_ushort atomic_int atomic_uint atomic_long atomic_ulong atomic_llong atomic_ullong new throw return",built_in:"std string cin cout cerr clog stdin stdout stderr stringstream istringstream ostringstream auto_ptr deque list queue stack vector map set bitset multiset multimap unordered_set unordered_map unordered_multiset unordered_multimap array shared_ptr abort abs acos asin atan2 atan calloc ceil cosh cos exit exp fabs floor fmod fprintf fputs free frexp fscanf isalnum isalpha iscntrl isdigit isgraph islower isprint ispunct isspace isupper isxdigit tolower toupper labs ldexp log10 log malloc realloc memchr memcmp memcpy memset modf pow printf putchar puts scanf sinh sin snprintf sprintf sqrt sscanf strcat strchr strcmp strcpy strcspn strlen strncat strncmp strncpy strpbrk strrchr strspn strstr tanh tan vfprintf vprintf vsprintf endl initializer_list unique_ptr",literal:"true false nullptr NULL"},s=[t,e.CLCM,e.CBCM,r,n];return{aliases:["c","cc","h","c++","h++","hpp"],k:o,i:"",k:o,c:["self",t]},{b:e.IR+"::",k:o},{v:[{b:/=/,e:/;/},{b:/\(/,e:/\)/},{bK:"new throw return else",e:/;/}],k:o,c:s.concat([{b:/\(/,e:/\)/,k:o,c:s.concat(["self"]),r:0}]),r:0},{cN:"function",b:"("+e.IR+"[\\*&\\s]+)+"+a,rB:!0,e:/[{;=]/,eE:!0,k:o,i:/[^\w\s\*&]/,c:[{b:a,rB:!0,c:[e.TM],r:0},{cN:"params",b:/\(/,e:/\)/,k:o,r:0,c:[e.CLCM,e.CBCM,n,r,t]},e.CLCM,e.CBCM,i]}]),exports:{preprocessor:i,strings:n,k:o}}}),hljs.registerLanguage("bash",function(e){var t={cN:"variable",v:[{b:/\$[\w\d#@][\w\d_]*/},{b:/\$\{(.*?)}/}]},n={cN:"string",b:/"/,e:/"/,c:[e.BE,t,{cN:"variable",b:/\$\(/,e:/\)/,c:[e.BE]}]},r={cN:"string",b:/'/,e:/'/};return{aliases:["sh","zsh"],l:/-?[a-z\._]+/,k:{keyword:"if then else elif fi for while in do done case esac function",literal:"true false",built_in:"break cd continue eval exec exit export getopts hash pwd readonly return shift test times trap umask unset alias bind builtin caller command declare echo enable help let local logout mapfile printf read readarray source type typeset ulimit unalias set shopt autoload bg bindkey bye cap chdir clone comparguments compcall compctl compdescribe compfiles compgroups compquote comptags comptry compvalues dirs disable disown echotc echoti emulate fc fg float functions getcap getln history integer jobs kill limit log noglob popd print pushd pushln rehash sched setcap setopt stat suspend ttyctl unfunction unhash unlimit unsetopt vared wait whence where which zcompile zformat zftp zle zmodload zparseopts zprof zpty zregexparse zsocket zstyle ztcp",_:"-ne -eq -lt -gt -f -d -e -s -l -a"},c:[{cN:"meta",b:/^#![^\n]+sh\s*$/,r:10},{cN:"function",b:/\w[\w\d_]*\s*\(\s*\)\s*\{/,rB:!0,c:[e.inherit(e.TM,{b:/\w[\w\d_]*/})],r:0},e.HCM,n,r,t]}}),hljs.registerLanguage("json",function(e){var t={literal:"true false null"},n=[e.QSM,e.CNM],r={e:",",eW:!0,eE:!0,c:n,k:t},i={b:"{",e:"}",c:[{cN:"attr",b:/"/,e:/"/,c:[e.BE],i:"\\n"},e.inherit(r,{b:/:/})],i:"\\S"},a={b:"\\[",e:"\\]",c:[e.inherit(r)],i:"\\S"};return n.splice(n.length,0,i,a),{c:n,k:t,i:"\\S"}}),hljs.registerLanguage("php",function(e){var t={b:"\\$+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*"},n={cN:"meta",b:/<\?(php)?|\?>/},r={cN:"string",c:[e.BE,n],v:[{b:'b"',e:'"'},{b:"b'",e:"'"},e.inherit(e.ASM,{i:null}),e.inherit(e.QSM,{i:null})]},i={v:[e.BNM,e.CNM]};return{aliases:["php3","php4","php5","php6"],cI:!0,k:"and include_once list abstract global private echo interface as static endswitch array null if endwhile or const for endforeach self var while isset public protected exit foreach throw elseif include __FILE__ empty require_once do xor return parent clone use __CLASS__ __LINE__ else break print eval new catch __METHOD__ case exception default die require __FUNCTION__ enddeclare final try switch continue endfor endif declare unset true false trait goto instanceof insteadof __DIR__ __NAMESPACE__ yield finally",c:[e.HCM,e.C("//","$",{c:[n]}),e.C("/\\*","\\*/",{c:[{cN:"doctag",b:"@[A-Za-z]+"}]}),e.C("__halt_compiler.+?;",!1,{eW:!0,k:"__halt_compiler",l:e.UIR}),{cN:"string",b:/<<<['"]?\w+['"]?$/,e:/^\w+;?$/,c:[e.BE,{cN:"subst",v:[{b:/\$\w+/},{b:/\{\$/,e:/\}/}]}]},n,{cN:"keyword",b:/\$this\b/},t,{b:/(::|->)+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/},{cN:"function",bK:"function",e:/[;{]/,eE:!0,i:"\\$|\\[|%",c:[e.UTM,{cN:"params",b:"\\(",e:"\\)",c:["self",t,e.CBCM,r,i]}]},{cN:"class",bK:"class interface",e:"{",eE:!0,i:/[:\(\$"]/,c:[{bK:"extends implements"},e.UTM]},{bK:"namespace",e:";",i:/[\.']/,c:[e.UTM]},{bK:"use",e:";",c:[e.UTM]},{b:"=>"},r,i]}}),hljs.registerLanguage("dart",function(e){var t={cN:"subst",b:"\\$\\{",e:"}",k:"true false null this is new super"},n={cN:"string",v:[{b:"r'''",e:"'''"},{b:'r"""',e:'"""'},{b:"r'",e:"'",i:"\\n"},{b:'r"',e:'"',i:"\\n"},{b:"'''",e:"'''",c:[e.BE,t]},{b:'"""',e:'"""',c:[e.BE,t]},{b:"'",e:"'",i:"\\n",c:[e.BE,t]},{b:'"',e:'"',i:"\\n",c:[e.BE,t]}]};t.c=[e.CNM,n];var r={keyword:"assert async await break case catch class const continue default do else enum extends false final finally for if in is new null rethrow return super switch sync this throw true try var void while with yield abstract as dynamic export external factory get implements import library operator part set static typedef",built_in:"print Comparable DateTime Duration Function Iterable Iterator List Map Match Null Object Pattern RegExp Set Stopwatch String StringBuffer StringSink Symbol Type Uri bool double int num document window querySelector querySelectorAll Element ElementList"};return{k:r,c:[n,e.C("/\\*\\*","\\*/",{sL:"markdown"}),e.C("///","$",{sL:"markdown"}),e.CLCM,e.CBCM,{cN:"class",bK:"class interface",e:"{",eE:!0,c:[{bK:"extends implements"},e.UTM]},e.CNM,{cN:"meta",b:"@[A-Za-z]+"},{b:"=>"}]}}),hljs.registerLanguage("ini",function(e){var t={cN:"string",c:[e.BE],v:[{b:"'''",e:"'''",r:10},{b:'"""',e:'"""',r:10},{b:'"',e:'"'},{b:"'",e:"'"}]};return{aliases:["toml"],cI:!0,i:/\S/,c:[e.C(";","$"),e.HCM,{cN:"section",b:/^\s*\[+/,e:/\]+/},{b:/^[a-z0-9\[\]_-]+\s*=\s*/,e:"$",rB:!0,c:[{cN:"attr",b:/[a-z0-9\[\]_-]+/},{b:/=/,eW:!0,r:0,c:[{cN:"literal",b:/\bon|off|true|false|yes|no\b/},{cN:"variable",v:[{b:/\$[\w\d"][\w\d_]*/},{b:/\$\{(.*?)}/}]},t,{cN:"number",b:/([\+\-]+)?[\d]+_[\d_]+/},e.NM]}]}]}}),hljs.registerLanguage("java",function(e){var t=e.UIR+"(<"+e.UIR+"(\\s*,\\s*"+e.UIR+")*>)?",n="false synchronized int abstract float private char boolean static null if const for true while long strictfp finally protected import native final void enum else break transient catch instanceof byte super volatile case assert short package default double public try this switch continue throws protected public private module requires exports",r="\\b(0[bB]([01]+[01_]+[01]+|[01]+)|0[xX]([a-fA-F0-9]+[a-fA-F0-9_]+[a-fA-F0-9]+|[a-fA-F0-9]+)|(([\\d]+[\\d_]+[\\d]+|[\\d]+)(\\.([\\d]+[\\d_]+[\\d]+|[\\d]+))?|\\.([\\d]+[\\d_]+[\\d]+|[\\d]+))([eE][-+]?\\d+)?)[lLfF]?",i={cN:"number",b:r,r:0};return{aliases:["jsp"],k:n,i:/<\/|#/,c:[e.C("/\\*\\*","\\*/",{r:0,c:[{b:/\w+@/,r:0},{cN:"doctag",b:"@[A-Za-z]+"}]}),e.CLCM,e.CBCM,e.ASM,e.QSM,{cN:"class",bK:"class interface",e:/[{;=]/,eE:!0,k:"class interface",i:/[:"\[\]]/,c:[{bK:"extends implements"},e.UTM]},{bK:"new throw return else",r:0},{cN:"function",b:"("+t+"\\s+)+"+e.UIR+"\\s*\\(",rB:!0,e:/[{;=]/,eE:!0,k:n,c:[{b:e.UIR+"\\s*\\(",rB:!0,r:0,c:[e.UTM]},{cN:"params",b:/\(/,e:/\)/,k:n,r:0,c:[e.ASM,e.QSM,e.CNM,e.CBCM]},e.CLCM,e.CBCM]},i,{cN:"meta",b:"@[A-Za-z]+"}]}}),hljs.registerLanguage("matlab",function(e){var t=[e.CNM,{cN:"string",b:"'",e:"'",c:[e.BE,{b:"''"}]}],n={r:0,c:[{b:/'['\.]*/}]};return{k:{keyword:"break case catch classdef continue else elseif end enumerated events for function global if methods otherwise parfor persistent properties return spmd switch try while",built_in:"sin sind sinh asin asind asinh cos cosd cosh acos acosd acosh tan tand tanh atan atand atan2 atanh sec secd sech asec asecd asech csc cscd csch acsc acscd acsch cot cotd coth acot acotd acoth hypot exp expm1 log log1p log10 log2 pow2 realpow reallog realsqrt sqrt nthroot nextpow2 abs angle complex conj imag real unwrap isreal cplxpair fix floor ceil round mod rem sign airy besselj bessely besselh besseli besselk beta betainc betaln ellipj ellipke erf erfc erfcx erfinv expint gamma gammainc gammaln psi legendre cross dot factor isprime primes gcd lcm rat rats perms nchoosek factorial cart2sph cart2pol pol2cart sph2cart hsv2rgb rgb2hsv zeros ones eye repmat rand randn linspace logspace freqspace meshgrid accumarray size length ndims numel disp isempty isequal isequalwithequalnans cat reshape diag blkdiag tril triu fliplr flipud flipdim rot90 find sub2ind ind2sub bsxfun ndgrid permute ipermute shiftdim circshift squeeze isscalar isvector ans eps realmax realmin pi i inf nan isnan isinf isfinite j why compan gallery hadamard hankel hilb invhilb magic pascal rosser toeplitz vander wilkinson"},i:'(//|"|#|/\\*|\\s+/\\w+)',c:[{cN:"function",bK:"function",e:"$",c:[e.UTM,{cN:"params",v:[{b:"\\(",e:"\\)"},{b:"\\[",e:"\\]"}]}]},{b:/[a-zA-Z_][a-zA-Z_0-9]*'['\.]*/,rB:!0,r:0,c:[{b:/[a-zA-Z_][a-zA-Z_0-9]*/,r:0},n.c[0]]},{b:"\\[",e:"\\]",c:t,r:0,starts:n},{b:"\\{",e:/}/,c:t,r:0,starts:n},{b:/\)/,r:0,starts:n},e.C("^\\s*\\%\\{\\s*$","^\\s*\\%\\}\\s*$"),e.C("\\%","$")].concat(t)}}),hljs.registerLanguage("erlang-repl",function(e){ +function format_number(e){e+="",x=e.split("."),x1=x[0],x2=x.length>1?"."+x[1]:"";for(var t=/(\d+)(\d{3})/;t.test(x1);)x1=x1.replace(t,"$1,$2");return x1+x2}function format_bytes(e){var t=["bytes","KiB","MiB","GiB","TiB","PiB","EiB","ZiB","YiB"];if(0==e)return"";if(1==e)return"1 byte";var n=parseInt(Math.floor(Math.log(e)/Math.log(1024)));return(0==n?e/Math.pow(1024,n):(e/Math.pow(1024,n)).toFixed(1))+" "+t[n]}function abbreviate_number(e){var t=["","thousand","million","billion","trillion","quadrillion","quintillion","sextillion","septillion"];if(e<1e3)return e;var n=parseInt(Math.floor(Math.log(e)/Math.log(1e3)));return(0==n?e/Math.pow(1e3,n):(e/Math.pow(1e3,n)).toFixed(1))+" "+t[n]}function update_calculations(){return NumEntries()>1e26?void $("#recomend").text("You have more keys than sub-atomic particles in all known universes. That's too many."):Bucket()<1?void $("#recomend").text("You'll need to have a non-zero bucket size."):Key()<1?void $("#recomend").text("You'll need to have a non-zero key size."):Value()<1?void $("#recomend").text("You'll need to have a non-zero value size."):RAM()<1?void $("#recomend").text("You'll need to allocate a non-zero amount of RAM to data storage."):(N_Val()<3&&$("#recomend").text("You'll want to deploy at least 3 Riak nodes, 4 would be even better as a starting point."),n=estimate_nodes(),d=estimate_storage(),r=estimate_keydir(),void $("#recommend").html("

        To manage your estimated "+abbreviate_number(NumEntries())+" key/bucket pairs where bucket names are ~"+format_bytes(Bucket())+", keys are ~"+format_bytes(Key())+", values are ~"+format_bytes(Value())+" and you are setting aside "+format_bytes(RAM())+" of RAM per-node for in-memory data management within a cluster that is configured to maintain "+N_Val()+" replicas per key (N = "+N_Val()+") then Riak, using the Bitcask storage engine, will require at least:

        • "+n+" nodes
        • "+format_bytes(r/n)+" of RAM per node ("+format_bytes(r)+" total across all nodes)
        • "+format_bytes(d/n)+" of storage space per node ("+format_bytes(d)+" total storage space used across all nodes)
        "))}!function(e,t,n){function r(e,t){return typeof e===t}function i(){var e,t,n,i,a,o,s;for(var l in _)if(_.hasOwnProperty(l)){if(e=[],t=_[l],t.name&&(e.push(t.name.toLowerCase()),t.options&&t.options.aliases&&t.options.aliases.length))for(n=0;n",r.insertBefore(n.lastChild,r.firstChild)}function r(){var e=y.elements;return"string"==typeof e?e.split(" "):e}function i(e,t){var n=y.elements;"string"!=typeof n&&(n=n.join(" ")),"string"!=typeof e&&(e=e.join(" ")),y.elements=n+" "+e,c(t)}function a(e){var t=v[e[m]];return t||(t={},b++,e[m]=b,v[b]=t),t}function o(e,n,r){if(n||(n=t),d)return n.createElement(e);r||(r=a(n));var i;return i=r.cache[e]?r.cache[e].cloneNode():g.test(e)?(r.cache[e]=r.createElem(e)).cloneNode():r.createElem(e),!i.canHaveChildren||h.test(e)||i.tagUrn?i:r.frag.appendChild(i)}function s(e,n){if(e||(e=t),d)return e.createDocumentFragment();n=n||a(e);for(var i=n.frag.cloneNode(),o=0,s=r(),l=s.length;o",u="hidden"in e,d=1==e.childNodes.length||function(){t.createElement("a");var e=t.createDocumentFragment();return"undefined"==typeof e.cloneNode||"undefined"==typeof e.createDocumentFragment||"undefined"==typeof e.createElement}()}catch(e){u=!0,d=!0}}();var y={elements:p.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:f,shivCSS:p.shivCSS!==!1,supportsUnknownElements:d,shivMethods:p.shivMethods!==!1,type:"default",shivDocument:c,createElement:o,createDocumentFragment:s,addElements:i};e.html5=y,c(t),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof e?e:this,t);var E="Moz O ms Webkit",S=w._config.usePrefixes?E.toLowerCase().split(" "):[];w._domPrefixes=S;var M;!function(){var e={}.hasOwnProperty;M=r(e,"undefined")||r(e.call,"undefined")?function(e,t){return t in e&&r(e.constructor.prototype[t],"undefined")}:function(t,n){return e.call(t,n)}}(),w._l={},w.on=function(e,t){this._l[e]||(this._l[e]=[]),this._l[e].push(t),x.hasOwnProperty(e)&&setTimeout(function(){x._trigger(e,x[e])},0)},w._trigger=function(e,t){if(this._l[e]){var n=this._l[e];setTimeout(function(){var e,r;for(e=0;e7)}),x.addTest("audio",function(){var e=s("audio"),t=!1;try{(t=!!e.canPlayType)&&(t=new Boolean(t),t.ogg=e.canPlayType('audio/ogg; codecs="vorbis"').replace(/^no$/,""),t.mp3=e.canPlayType('audio/mpeg; codecs="mp3"').replace(/^no$/,""),t.opus=e.canPlayType('audio/ogg; codecs="opus"')||e.canPlayType('audio/webm; codecs="opus"').replace(/^no$/,""),t.wav=e.canPlayType('audio/wav; codecs="1"').replace(/^no$/,""),t.m4a=(e.canPlayType("audio/x-m4a;")||e.canPlayType("audio/aac;")).replace(/^no$/,""))}catch(e){}return t}),x.addTest("canvas",function(){var e=s("canvas");return!(!e.getContext||!e.getContext("2d"))}),x.addTest("canvastext",function(){return x.canvas!==!1&&"function"==typeof s("canvas").getContext("2d").fillText}),x.addTest("video",function(){var e=s("video"),t=!1;try{(t=!!e.canPlayType)&&(t=new Boolean(t),t.ogg=e.canPlayType('video/ogg; codecs="theora"').replace(/^no$/,""),t.h264=e.canPlayType('video/mp4; codecs="avc1.42E01E"').replace(/^no$/,""),t.webm=e.canPlayType('video/webm; codecs="vp8, vorbis"').replace(/^no$/,""),t.vp9=e.canPlayType('video/webm; codecs="vp9"').replace(/^no$/,""),t.hls=e.canPlayType('application/x-mpegURL; codecs="avc1.42E01E"').replace(/^no$/,""))}catch(e){}return t}),x.addTest("webgl",function(){var t=s("canvas"),n="probablySupportsContext"in t?"probablySupportsContext":"supportsContext";return n in t?t[n]("webgl")||t[n]("experimental-webgl"):"WebGLRenderingContext"in e}),x.addTest("cssgradients",function(){for(var e,t="background-image:",n="gradient(linear,left top,right bottom,from(#9f9),to(white));",r="",i=0,a=N.length-1;i-1}),x.addTest("multiplebgs",function(){var e=s("a").style;return e.cssText="background:url(https://),url(https://),red url(https://)",/(url\s*\(.*?){3}/.test(e.background)}),x.addTest("opacity",function(){var e=s("a").style;return e.cssText=N.join("opacity:.55;"),/^0.55$/.test(e.opacity)}),x.addTest("rgba",function(){var e=s("a").style;return e.cssText="background-color:rgba(150,255,150,.5)",(""+e.backgroundColor).indexOf("rgba")>-1}),x.addTest("inlinesvg",function(){var e=s("div");return e.innerHTML="","http://www.w3.org/2000/svg"==("undefined"!=typeof SVGRect&&e.firstChild&&e.firstChild.namespaceURI)});var A=s("input"),j="autocomplete autofocus list placeholder max min multiple pattern required step".split(" "),L={};x.input=function(t){for(var n=0,r=t.length;n=9,i=t<533&&e.match(/android/gi);return n||i||r}();F?x.addTest("fontface",!1):H('@font-face {font-family:"font";src:url("https://")}',function(e,n){var r=t.getElementById("smodernizr"),i=r.sheet||r.styleSheet,a=i?i.cssRules&&i.cssRules[0]?i.cssRules[0].cssText:i.cssText||"":"",o=/src/i.test(a)&&0===a.indexOf(n.split(" ")[0]);x.addTest("fontface",o)}),H('#modernizr{font:0/0 a}#modernizr:after{content:":)";visibility:hidden;font:7px/1 a}',function(e){x.addTest("generatedcontent",e.offsetHeight>=7)});var O=w._config.usePrefixes?E.split(" "):[];w._cssomPrefixes=O;var P=function(t){var r,i=N.length,a=e.CSSRule;if("undefined"==typeof a)return n;if(!t)return!1;if(t=t.replace(/^@/,""),r=t.replace(/-/g,"_").toUpperCase()+"_RULE",r in a)return"@"+t;for(var o=0;o0&&t-1 in e)}function r(e,t,n){if(ae.isFunction(t))return ae.grep(e,function(e,r){return!!t.call(e,r,e)!==n});if(t.nodeType)return ae.grep(e,function(e){return e===t!==n});if("string"==typeof t){if(ge.test(t))return ae.filter(t,e,n);t=ae.filter(t,e)}return ae.grep(e,function(e){return J.call(t,e)>-1!==n})}function i(e,t){for(;(e=e[t])&&1!==e.nodeType;);return e}function a(e){var t={};return ae.each(e.match(we)||[],function(e,n){t[n]=!0}),t}function o(){Q.removeEventListener("DOMContentLoaded",o),e.removeEventListener("load",o),ae.ready()}function s(){this.expando=ae.expando+s.uid++}function l(e,t,n){var r;if(void 0===n&&1===e.nodeType)if(r="data-"+t.replace(Se,"-$&").toLowerCase(),n=e.getAttribute(r),"string"==typeof n){try{n="true"===n||"false"!==n&&("null"===n?null:+n+""===n?+n:Ee.test(n)?ae.parseJSON(n):n)}catch(e){}Te.set(e,t,n)}else n=void 0;return n}function c(e,t,n,r){var i,a=1,o=20,s=r?function(){return r.cur()}:function(){return ae.css(e,t,"")},l=s(),c=n&&n[3]||(ae.cssNumber[t]?"":"px"),u=(ae.cssNumber[t]||"px"!==c&&+l)&&$e.exec(ae.css(e,t));if(u&&u[3]!==c){c=c||u[3],n=n||[],u=+l||1;do a=a||".5",u/=a,ae.style(e,t,u+c);while(a!==(a=s()/l)&&1!==a&&--o)}return n&&(u=+u||+l||0,i=n[1]?u+(n[1]+1)*n[2]:+n[2],r&&(r.unit=c,r.start=u,r.end=i)),i}function u(e,t){var n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[];return void 0===t||t&&ae.nodeName(e,t)?ae.merge([e],n):n}function d(e,t){for(var n=0,r=e.length;n-1)i&&i.push(a);else if(c=ae.contains(a.ownerDocument,a),o=u(p.appendChild(a),"script"),c&&d(o),n)for(f=0;a=o[f++];)ze.test(a.type||"")&&n.push(a);return p}function p(){return!0}function h(){return!1}function g(){try{return Q.activeElement}catch(e){}}function m(e,t,n,r,i,a){var o,s;if("object"==typeof t){"string"!=typeof n&&(r=r||n,n=void 0);for(s in t)m(e,s,n,r,t[s],a);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),i===!1)i=h;else if(!i)return e;return 1===a&&(o=i,i=function(e){return ae().off(e),o.apply(this,arguments)},i.guid=o.guid||(o.guid=ae.guid++)),e.each(function(){ae.event.add(this,t,i,r,n)})}function b(e,t){return ae.nodeName(e,"table")&&ae.nodeName(11!==t.nodeType?t:t.firstChild,"tr")?e.getElementsByTagName("tbody")[0]||e.appendChild(e.ownerDocument.createElement("tbody")):e}function v(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function y(e){var t=We.exec(e.type);return t?e.type=t[1]:e.removeAttribute("type"),e}function _(e,t){var n,r,i,a,o,s,l,c;if(1===t.nodeType){if(Ce.hasData(e)&&(a=Ce.access(e),o=Ce.set(t,a),c=a.events)){delete o.handle,o.events={};for(i in c)for(n=0,r=c[i].length;n1&&"string"==typeof g&&!re.checkClone&&Pe.test(g))return e.each(function(i){var a=e.eq(i);m&&(t[0]=g.call(this,i,a.html())),x(a,t,n,r)});if(p&&(i=f(t,e[0].ownerDocument,!1,e,r),a=i.firstChild,1===i.childNodes.length&&(i=a),a||r)){for(o=ae.map(u(i,"script"),v),s=o.length;d")).appendTo(t.documentElement),t=Ke[0].contentDocument,t.write(),t.close(),n=N(e,t),Ke.detach()),Ze[e]=n),n}function T(e,t,n){var r,i,a,o,s=e.style;return n=n||Xe(e),o=n?n.getPropertyValue(t)||n[t]:void 0,""!==o&&void 0!==o||ae.contains(e.ownerDocument,e)||(o=ae.style(e,t)),n&&!re.pixelMarginRight()&&Qe.test(o)&&Ve.test(t)&&(r=s.width,i=s.minWidth,a=s.maxWidth,s.minWidth=s.maxWidth=s.width=o,o=n.width,s.width=r,s.minWidth=i,s.maxWidth=a),void 0!==o?o+"":o}function E(e,t){return{get:function(){return e()?void delete this.get:(this.get=t).apply(this,arguments)}}}function S(e){if(e in rt)return e;for(var t=e[0].toUpperCase()+e.slice(1),n=nt.length;n--;)if(e=nt[n]+t,e in rt)return e}function M(e,t,n){var r=$e.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[3]||"px"):t}function $(e,t,n,r,i){for(var a=n===(r?"border":"content")?4:"width"===t?1:0,o=0;a<4;a+=2)"margin"===n&&(o+=ae.css(e,n+Ae[a],!0,i)),r?("content"===n&&(o-=ae.css(e,"padding"+Ae[a],!0,i)),"margin"!==n&&(o-=ae.css(e,"border"+Ae[a]+"Width",!0,i))):(o+=ae.css(e,"padding"+Ae[a],!0,i),"padding"!==n&&(o+=ae.css(e,"border"+Ae[a]+"Width",!0,i)));return o}function A(e,t,n){var r=!0,i="width"===t?e.offsetWidth:e.offsetHeight,a=Xe(e),o="border-box"===ae.css(e,"boxSizing",!1,a);if(i<=0||null==i){if(i=T(e,t,a),(i<0||null==i)&&(i=e.style[t]),Qe.test(i))return i;r=o&&(re.boxSizingReliable()||i===e.style[t]),i=parseFloat(i)||0}return i+$(e,t,n||(o?"border":"content"),r,a)+"px"}function j(e,t){for(var n,r,i,a=[],o=0,s=e.length;o=0&&n=0},isPlainObject:function(e){var t;if("object"!==ae.type(e)||e.nodeType||ae.isWindow(e))return!1;if(e.constructor&&!ne.call(e,"constructor")&&!ne.call(e.constructor.prototype||{},"isPrototypeOf"))return!1;for(t in e);return void 0===t||ne.call(e,t)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},type:function(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?ee[te.call(e)]||"object":typeof e},globalEval:function(e){var t,n=eval;e=ae.trim(e),e&&(1===e.indexOf("use strict")?(t=Q.createElement("script"),t.text=e,Q.head.appendChild(t).parentNode.removeChild(t)):n(e))},camelCase:function(e){return e.replace(se,"ms-").replace(le,ce)},nodeName:function(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()},each:function(e,t){var r,i=0;if(n(e))for(r=e.length;ix.cacheLength&&delete e[t.shift()],e[n+" "]=r}var t=[];return e}function r(e){return e[D]=!0,e}function i(e){var t=j.createElement("div");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function a(e,t){for(var n=e.split("|"),r=n.length;r--;)x.attrHandle[n[r]]=t}function o(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&(~t.sourceIndex||Z)-(~e.sourceIndex||Z);if(r)return r;if(n)for(;n=n.nextSibling;)if(n===t)return-1;return e?1:-1}function s(e){return function(t){var n=t.nodeName.toLowerCase();return"input"===n&&t.type===e}}function l(e){return function(t){var n=t.nodeName.toLowerCase();return("input"===n||"button"===n)&&t.type===e}}function c(e){return r(function(t){return t=+t,r(function(n,r){for(var i,a=e([],n.length,t),o=a.length;o--;)n[i=a[o]]&&(n[i]=!(r[i]=n[i]))})})}function u(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}function d(){}function f(e){for(var t=0,n=e.length,r="";t1?function(t,n,r){for(var i=e.length;i--;)if(!e[i](t,n,r))return!1;return!0}:e[0]}function g(e,n,r){for(var i=0,a=n.length;i-1&&(r[c]=!(o[c]=d))}}else y=m(y===o?y.splice(h,y.length):y),a?a(null,o,y,l):Y.apply(o,y)})}function v(e){for(var t,n,r,i=e.length,a=x.relative[e[0].type],o=a||x.relative[" "],s=a?1:0,l=p(function(e){return e===t},o,!0),c=p(function(e){return ee(t,e)>-1},o,!0),u=[function(e,n,r){var i=!a&&(r||n!==S)||((t=n).nodeType?l(e,n,r):c(e,n,r));return t=null,i}];s1&&h(u),s>1&&f(e.slice(0,s-1).concat({value:" "===e[s-2].type?"*":""})).replace(se,"$1"),n,s0,a=e.length>0,o=function(r,o,s,l,c){var u,d,f,p=0,h="0",g=r&&[],b=[],v=S,y=r||a&&x.find.TAG("*",c),_=F+=null==v?1:Math.random()||.1,w=y.length;for(c&&(S=o===j||o||c);h!==w&&null!=(u=y[h]);h++){if(a&&u){for(d=0,o||u.ownerDocument===j||(A(u),s=!B);f=e[d++];)if(f(u,o||j,s)){l.push(u);break}c&&(F=_)}i&&((u=!f&&u)&&p--,r&&g.push(u))}if(p+=h,i&&h!==p){for(d=0;f=n[d++];)f(g,b,o,s);if(r){if(p>0)for(;h--;)g[h]||b[h]||(b[h]=X.call(l));b=m(b)}Y.apply(l,b),c&&!r&&b.length>0&&p+n.length>1&&t.uniqueSort(l)}return c&&(F=_,S=v),g};return i?r(o):o}var _,w,x,k,N,C,T,E,S,M,$,A,j,L,B,z,R,q,I,D="sizzle"+1*new Date,H=e.document,F=0,O=0,P=n(),W=n(),U=n(),K=function(e,t){return e===t&&($=!0),0},Z=1<<31,V={}.hasOwnProperty,Q=[],X=Q.pop,G=Q.push,Y=Q.push,J=Q.slice,ee=function(e,t){for(var n=0,r=e.length;n+~]|"+ne+")"+ne+"*"),ue=new RegExp("="+ne+"*([^\\]'\"]*?)"+ne+"*\\]","g"),de=new RegExp(ae),fe=new RegExp("^"+re+"$"),pe={ID:new RegExp("^#("+re+")"),CLASS:new RegExp("^\\.("+re+")"),TAG:new RegExp("^("+re+"|[*])"),ATTR:new RegExp("^"+ie),PSEUDO:new RegExp("^"+ae),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+ne+"*(even|odd|(([+-]|)(\\d*)n|)"+ne+"*(?:([+-]|)"+ne+"*(\\d+)|))"+ne+"*\\)|)","i"),bool:new RegExp("^(?:"+te+")$","i"),needsContext:new RegExp("^"+ne+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+ne+"*((?:-\\d)?\\d*)"+ne+"*\\)|)(?=[^-]|$)","i")},he=/^(?:input|select|textarea|button)$/i,ge=/^h\d$/i,me=/^[^{]+\{\s*\[native \w/,be=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ve=/[+~]/,ye=/'|\\/g,_e=new RegExp("\\\\([\\da-f]{1,6}"+ne+"?|("+ne+")|.)","ig"),we=function(e,t,n){var r="0x"+t-65536;return r!==r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},xe=function(){A()};try{Y.apply(Q=J.call(H.childNodes),H.childNodes),Q[H.childNodes.length].nodeType}catch(e){Y={apply:Q.length?function(e,t){G.apply(e,J.call(t))}:function(e,t){for(var n=e.length,r=0;e[n++]=t[r++];);e.length=n-1}}}w=t.support={},N=t.isXML=function(e){var t=e&&(e.ownerDocument||e).documentElement;return!!t&&"HTML"!==t.nodeName},A=t.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:H;return r!==j&&9===r.nodeType&&r.documentElement?(j=r,L=j.documentElement,B=!N(j),(n=j.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",xe,!1):n.attachEvent&&n.attachEvent("onunload",xe)),w.attributes=i(function(e){return e.className="i",!e.getAttribute("className")}),w.getElementsByTagName=i(function(e){return e.appendChild(j.createComment("")),!e.getElementsByTagName("*").length}),w.getElementsByClassName=me.test(j.getElementsByClassName),w.getById=i(function(e){return L.appendChild(e).id=D,!j.getElementsByName||!j.getElementsByName(D).length}),w.getById?(x.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&B){var n=t.getElementById(e);return n?[n]:[]}},x.filter.ID=function(e){var t=e.replace(_e,we);return function(e){return e.getAttribute("id")===t}}):(delete x.find.ID,x.filter.ID=function(e){var t=e.replace(_e,we);return function(e){var n="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return n&&n.value===t}}),x.find.TAG=w.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):w.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,a=t.getElementsByTagName(e);if("*"===e){for(;n=a[i++];)1===n.nodeType&&r.push(n);return r}return a},x.find.CLASS=w.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&B)return t.getElementsByClassName(e)},R=[],z=[],(w.qsa=me.test(j.querySelectorAll))&&(i(function(e){L.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&z.push("[*^$]="+ne+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||z.push("\\["+ne+"*(?:value|"+te+")"),e.querySelectorAll("[id~="+D+"-]").length||z.push("~="),e.querySelectorAll(":checked").length||z.push(":checked"),e.querySelectorAll("a#"+D+"+*").length||z.push(".#.+[+~]")}),i(function(e){var t=j.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&z.push("name"+ne+"*[*^$|!~]?="),e.querySelectorAll(":enabled").length||z.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),z.push(",.*:")})),(w.matchesSelector=me.test(q=L.matches||L.webkitMatchesSelector||L.mozMatchesSelector||L.oMatchesSelector||L.msMatchesSelector))&&i(function(e){w.disconnectedMatch=q.call(e,"div"),q.call(e,"[s!='']:x"),R.push("!=",ae)}),z=z.length&&new RegExp(z.join("|")),R=R.length&&new RegExp(R.join("|")),t=me.test(L.compareDocumentPosition),I=t||me.test(L.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)for(;t=t.parentNode;)if(t===e)return!0;return!1},K=t?function(e,t){if(e===t)return $=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n?n:(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1,1&n||!w.sortDetached&&t.compareDocumentPosition(e)===n?e===j||e.ownerDocument===H&&I(H,e)?-1:t===j||t.ownerDocument===H&&I(H,t)?1:M?ee(M,e)-ee(M,t):0:4&n?-1:1)}:function(e,t){if(e===t)return $=!0,0;var n,r=0,i=e.parentNode,a=t.parentNode,s=[e],l=[t];if(!i||!a)return e===j?-1:t===j?1:i?-1:a?1:M?ee(M,e)-ee(M,t):0;if(i===a)return o(e,t);for(n=e;n=n.parentNode;)s.unshift(n);for(n=t;n=n.parentNode;)l.unshift(n);for(;s[r]===l[r];)r++;return r?o(s[r],l[r]):s[r]===H?-1:l[r]===H?1:0},j):j},t.matches=function(e,n){return t(e,null,null,n)},t.matchesSelector=function(e,n){if((e.ownerDocument||e)!==j&&A(e),n=n.replace(ue,"='$1']"),w.matchesSelector&&B&&!U[n+" "]&&(!R||!R.test(n))&&(!z||!z.test(n)))try{var r=q.call(e,n);if(r||w.disconnectedMatch||e.document&&11!==e.document.nodeType)return r}catch(e){}return t(n,j,null,[e]).length>0},t.contains=function(e,t){return(e.ownerDocument||e)!==j&&A(e),I(e,t)},t.attr=function(e,t){(e.ownerDocument||e)!==j&&A(e);var n=x.attrHandle[t.toLowerCase()],r=n&&V.call(x.attrHandle,t.toLowerCase())?n(e,t,!B):void 0;return void 0!==r?r:w.attributes||!B?e.getAttribute(t):(r=e.getAttributeNode(t))&&r.specified?r.value:null},t.error=function(e){throw new Error("Syntax error, unrecognized expression: "+e)},t.uniqueSort=function(e){var t,n=[],r=0,i=0;if($=!w.detectDuplicates,M=!w.sortStable&&e.slice(0),e.sort(K),$){for(;t=e[i++];)t===e[i]&&(r=n.push(i));for(;r--;)e.splice(n[r],1)}return M=null,e},k=t.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=k(e)}else if(3===i||4===i)return e.nodeValue}else for(;t=e[r++];)n+=k(t);return n},x=t.selectors={cacheLength:50,createPseudo:r,match:pe,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(_e,we),e[3]=(e[3]||e[4]||e[5]||"").replace(_e,we),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||t.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&t.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return pe.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&de.test(n)&&(t=C(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(_e,we).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=P[e+" "];return t||(t=new RegExp("(^|"+ne+")"+e+"("+ne+"|$)"))&&P(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(e,n,r){return function(i){var a=t.attr(i,e);return null==a?"!="===n:!n||(a+="","="===n?a===r:"!="===n?a!==r:"^="===n?r&&0===a.indexOf(r):"*="===n?r&&a.indexOf(r)>-1:"$="===n?r&&a.slice(-r.length)===r:"~="===n?(" "+a.replace(oe," ")+" ").indexOf(r)>-1:"|="===n&&(a===r||a.slice(0,r.length+1)===r+"-"))}},CHILD:function(e,t,n,r,i){var a="nth"!==e.slice(0,3),o="last"!==e.slice(-4),s="of-type"===t;return 1===r&&0===i?function(e){return!!e.parentNode}:function(t,n,l){var c,u,d,f,p,h,g=a!==o?"nextSibling":"previousSibling",m=t.parentNode,b=s&&t.nodeName.toLowerCase(),v=!l&&!s,y=!1;if(m){if(a){for(;g;){for(f=t;f=f[g];)if(s?f.nodeName.toLowerCase()===b:1===f.nodeType)return!1;h=g="only"===e&&!h&&"nextSibling"}return!0}if(h=[o?m.firstChild:m.lastChild],o&&v){for(f=m,d=f[D]||(f[D]={}),u=d[f.uniqueID]||(d[f.uniqueID]={}),c=u[e]||[],p=c[0]===F&&c[1],y=p&&c[2],f=p&&m.childNodes[p];f=++p&&f&&f[g]||(y=p=0)||h.pop();)if(1===f.nodeType&&++y&&f===t){u[e]=[F,p,y];break}}else if(v&&(f=t,d=f[D]||(f[D]={}),u=d[f.uniqueID]||(d[f.uniqueID]={}),c=u[e]||[],p=c[0]===F&&c[1],y=p),y===!1)for(;(f=++p&&f&&f[g]||(y=p=0)||h.pop())&&((s?f.nodeName.toLowerCase()!==b:1!==f.nodeType)||!++y||(v&&(d=f[D]||(f[D]={}),u=d[f.uniqueID]||(d[f.uniqueID]={}),u[e]=[F,y]),f!==t)););return y-=i,y===r||y%r===0&&y/r>=0}}},PSEUDO:function(e,n){var i,a=x.pseudos[e]||x.setFilters[e.toLowerCase()]||t.error("unsupported pseudo: "+e);return a[D]?a(n):a.length>1?(i=[e,e,"",n],x.setFilters.hasOwnProperty(e.toLowerCase())?r(function(e,t){for(var r,i=a(e,n),o=i.length;o--;)r=ee(e,i[o]),e[r]=!(t[r]=i[o])}):function(e){return a(e,0,i)}):a}},pseudos:{not:r(function(e){var t=[],n=[],i=T(e.replace(se,"$1"));return i[D]?r(function(e,t,n,r){for(var a,o=i(e,null,r,[]),s=e.length;s--;)(a=o[s])&&(e[s]=!(t[s]=a))}):function(e,r,a){return t[0]=e,i(t,null,a,n),t[0]=null,!n.pop()}}),has:r(function(e){return function(n){return t(e,n).length>0}}),contains:r(function(e){return e=e.replace(_e,we),function(t){return(t.textContent||t.innerText||k(t)).indexOf(e)>-1}}),lang:r(function(e){return fe.test(e||"")||t.error("unsupported lang: "+e),e=e.replace(_e,we).toLowerCase(),function(t){var n;do if(n=B?t.lang:t.getAttribute("xml:lang")||t.getAttribute("lang"))return n=n.toLowerCase(),n===e||0===n.indexOf(e+"-");while((t=t.parentNode)&&1===t.nodeType);return!1}}),target:function(t){var n=e.location&&e.location.hash;return n&&n.slice(1)===t.id},root:function(e){return e===L},focus:function(e){return e===j.activeElement&&(!j.hasFocus||j.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:function(e){return e.disabled===!1},disabled:function(e){return e.disabled===!0},checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,e.selected===!0},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeType<6)return!1;return!0},parent:function(e){return!x.pseudos.empty(e)},header:function(e){return ge.test(e.nodeName)},input:function(e){return he.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||"text"===t.toLowerCase())},first:c(function(){return[0]}),last:c(function(e,t){return[t-1]}),eq:c(function(e,t,n){return[n<0?n+t:n]}),even:c(function(e,t){for(var n=0;n=0;)e.push(r);return e}),gt:c(function(e,t,n){for(var r=n<0?n+t:n;++r2&&"ID"===(o=a[0]).type&&w.getById&&9===t.nodeType&&B&&x.relative[a[1].type]){if(t=(x.find.ID(o.matches[0].replace(_e,we),t)||[])[0],!t)return n;c&&(t=t.parentNode),e=e.slice(a.shift().value.length)}for(i=pe.needsContext.test(e)?0:a.length;i--&&(o=a[i],!x.relative[s=o.type]);)if((l=x.find[s])&&(r=l(o.matches[0].replace(_e,we),ve.test(a[0].type)&&u(t.parentNode)||t))){if(a.splice(i,1),e=r.length&&f(a),!e)return Y.apply(n,r),n;break}}return(c||T(e,d))(r,t,!B,n,!t||ve.test(e)&&u(t.parentNode)||t),n},w.sortStable=D.split("").sort(K).join("")===D,w.detectDuplicates=!!$,A(),w.sortDetached=i(function(e){return 1&e.compareDocumentPosition(j.createElement("div"))}),i(function(e){return e.innerHTML="","#"===e.firstChild.getAttribute("href")})||a("type|href|height|width",function(e,t,n){if(!n)return e.getAttribute(t,"type"===t.toLowerCase()?1:2)}),w.attributes&&i(function(e){return e.innerHTML="",e.firstChild.setAttribute("value",""),""===e.firstChild.getAttribute("value")})||a("value",function(e,t,n){if(!n&&"input"===e.nodeName.toLowerCase())return e.defaultValue}),i(function(e){return null==e.getAttribute("disabled")})||a(te,function(e,t,n){var r;if(!n)return e[t]===!0?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),t}(e);ae.find=ue,ae.expr=ue.selectors,ae.expr[":"]=ae.expr.pseudos,ae.uniqueSort=ae.unique=ue.uniqueSort,ae.text=ue.getText,ae.isXMLDoc=ue.isXML,ae.contains=ue.contains;var de=function(e,t,n){for(var r=[],i=void 0!==n;(e=e[t])&&9!==e.nodeType;)if(1===e.nodeType){if(i&&ae(e).is(n))break;r.push(e)}return r},fe=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},pe=ae.expr.match.needsContext,he=/^<([\w-]+)\s*\/?>(?:<\/\1>|)$/,ge=/^.[^:#\[\.,]*$/;ae.filter=function(e,t,n){var r=t[0];return n&&(e=":not("+e+")"),1===t.length&&1===r.nodeType?ae.find.matchesSelector(r,e)?[r]:[]:ae.find.matches(e,ae.grep(t,function(e){return 1===e.nodeType}))},ae.fn.extend({find:function(e){var t,n=this.length,r=[],i=this;if("string"!=typeof e)return this.pushStack(ae(e).filter(function(){for(t=0;t1?ae.unique(r):r),r.selector=this.selector?this.selector+" "+e:e,r},filter:function(e){return this.pushStack(r(this,e||[],!1))},not:function(e){return this.pushStack(r(this,e||[],!0))},is:function(e){return!!r(this,"string"==typeof e&&pe.test(e)?ae(e):e||[],!1).length}});var me,be=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,ve=ae.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||me,"string"==typeof e){if(r="<"===e[0]&&">"===e[e.length-1]&&e.length>=3?[null,e,null]:be.exec(e),!r||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof ae?t[0]:t,ae.merge(this,ae.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:Q,!0)),he.test(r[1])&&ae.isPlainObject(t))for(r in t)ae.isFunction(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return i=Q.getElementById(r[2]),i&&i.parentNode&&(this.length=1,this[0]=i),this.context=Q,this.selector=e,this}return e.nodeType?(this.context=this[0]=e,this.length=1,this):ae.isFunction(e)?void 0!==n.ready?n.ready(e):e(ae):(void 0!==e.selector&&(this.selector=e.selector,this.context=e.context),ae.makeArray(e,this))};ve.prototype=ae.fn,me=ae(Q);var ye=/^(?:parents|prev(?:Until|All))/,_e={children:!0,contents:!0,next:!0,prev:!0};ae.fn.extend({has:function(e){var t=ae(e,this),n=t.length;return this.filter(function(){for(var e=0;e-1:1===n.nodeType&&ae.find.matchesSelector(n,e))){a.push(n);break}return this.pushStack(a.length>1?ae.uniqueSort(a):a)},index:function(e){return e?"string"==typeof e?J.call(ae(e),this[0]):J.call(this,e.jquery?e[0]:e):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){return this.pushStack(ae.uniqueSort(ae.merge(this.get(),ae(e,t))))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),ae.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return de(e,"parentNode")},parentsUntil:function(e,t,n){return de(e,"parentNode",n)},next:function(e){return i(e,"nextSibling")},prev:function(e){return i(e,"previousSibling")},nextAll:function(e){return de(e,"nextSibling")},prevAll:function(e){return de(e,"previousSibling")},nextUntil:function(e,t,n){return de(e,"nextSibling",n)},prevUntil:function(e,t,n){return de(e,"previousSibling",n)},siblings:function(e){return fe((e.parentNode||{}).firstChild,e)},children:function(e){return fe(e.firstChild)},contents:function(e){return e.contentDocument||ae.merge([],e.childNodes)}},function(e,t){ae.fn[e]=function(n,r){var i=ae.map(this,t,n);return"Until"!==e.slice(-5)&&(r=n),r&&"string"==typeof r&&(i=ae.filter(r,i)),this.length>1&&(_e[e]||ae.uniqueSort(i),ye.test(e)&&i.reverse()),this.pushStack(i)}});var we=/\S+/g;ae.Callbacks=function(e){e="string"==typeof e?a(e):ae.extend({},e);var t,n,r,i,o=[],s=[],l=-1,c=function(){for(i=e.once,r=t=!0;s.length;l=-1)for(n=s.shift();++l-1;)o.splice(n,1),n<=l&&l--}),this},has:function(e){return e?ae.inArray(e,o)>-1:o.length>0},empty:function(){return o&&(o=[]),this},disable:function(){return i=s=[],o=n="",this},disabled:function(){return!o},lock:function(){return i=s=[],n||(o=n=""),this},locked:function(){return!!i},fireWith:function(e,n){return i||(n=n||[],n=[e,n.slice?n.slice():n],s.push(n),t||c()),this},fire:function(){return u.fireWith(this,arguments),this},fired:function(){return!!r}};return u},ae.extend({Deferred:function(e){var t=[["resolve","done",ae.Callbacks("once memory"),"resolved"],["reject","fail",ae.Callbacks("once memory"),"rejected"],["notify","progress",ae.Callbacks("memory")]],n="pending",r={state:function(){return n},always:function(){return i.done(arguments).fail(arguments),this},then:function(){var e=arguments;return ae.Deferred(function(n){ae.each(t,function(t,a){var o=ae.isFunction(e[t])&&e[t];i[a[1]](function(){var e=o&&o.apply(this,arguments);e&&ae.isFunction(e.promise)?e.promise().progress(n.notify).done(n.resolve).fail(n.reject):n[a[0]+"With"](this===r?n.promise():this,o?[e]:arguments)})}),e=null}).promise()},promise:function(e){return null!=e?ae.extend(e,r):r}},i={};return r.pipe=r.then,ae.each(t,function(e,a){var o=a[2],s=a[3];r[a[1]]=o.add,s&&o.add(function(){n=s},t[1^e][2].disable,t[2][2].lock),i[a[0]]=function(){return i[a[0]+"With"](this===i?r:this,arguments),this},i[a[0]+"With"]=o.fireWith}),r.promise(i),e&&e.call(i,i),i},when:function(e){var t,n,r,i=0,a=X.call(arguments),o=a.length,s=1!==o||e&&ae.isFunction(e.promise)?o:0,l=1===s?e:ae.Deferred(),c=function(e,n,r){return function(i){n[e]=this,r[e]=arguments.length>1?X.call(arguments):i,r===t?l.notifyWith(n,r):--s||l.resolveWith(n,r)}};if(o>1)for(t=new Array(o),n=new Array(o),r=new Array(o);i0||(xe.resolveWith(Q,[ae]),ae.fn.triggerHandler&&(ae(Q).triggerHandler("ready"),ae(Q).off("ready"))))}}),ae.ready.promise=function(t){return xe||(xe=ae.Deferred(),"complete"===Q.readyState||"loading"!==Q.readyState&&!Q.documentElement.doScroll?e.setTimeout(ae.ready):(Q.addEventListener("DOMContentLoaded",o),e.addEventListener("load",o))),xe.promise(t)},ae.ready.promise();var ke=function(e,t,n,r,i,a,o){var s=0,l=e.length,c=null==n;if("object"===ae.type(n)){i=!0;for(s in n)ke(e,t,s,n[s],!0,a,o)}else if(void 0!==r&&(i=!0,ae.isFunction(r)||(o=!0),c&&(o?(t.call(e,r),t=null):(c=t,t=function(e,t,n){return c.call(ae(e),n)})),t))for(;s-1&&void 0!==n&&Te.set(this,e,t)})},null,t,arguments.length>1,null,!0)},removeData:function(e){return this.each(function(){Te.remove(this,e)})}}),ae.extend({queue:function(e,t,n){var r;if(e)return t=(t||"fx")+"queue",r=Ce.get(e,t),n&&(!r||ae.isArray(n)?r=Ce.access(e,t,ae.makeArray(n)):r.push(n)),r||[]},dequeue:function(e,t){t=t||"fx";var n=ae.queue(e,t),r=n.length,i=n.shift(),a=ae._queueHooks(e,t),o=function(){ae.dequeue(e,t)};"inprogress"===i&&(i=n.shift(),r--),i&&("fx"===t&&n.unshift("inprogress"),delete a.stop,i.call(e,o,a)),!r&&a&&a.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return Ce.get(e,n)||Ce.access(e,n,{empty:ae.Callbacks("once memory").add(function(){Ce.remove(e,[t+"queue",n])})})}}),ae.fn.extend({queue:function(e,t){var n=2;return"string"!=typeof e&&(t=e,e="fx",n--),arguments.length",""],thead:[1,"","
        "],col:[2,"","
        "],tr:[2,"","
        "],td:[3,"","
        "],_default:[0,"",""]};Re.optgroup=Re.option,Re.tbody=Re.tfoot=Re.colgroup=Re.caption=Re.thead,Re.th=Re.td;var qe=/<|&#?\w+;/;!function(){var e=Q.createDocumentFragment(),t=e.appendChild(Q.createElement("div")),n=Q.createElement("input");n.setAttribute("type","radio"),n.setAttribute("checked","checked"),n.setAttribute("name","t"),t.appendChild(n),re.checkClone=t.cloneNode(!0).cloneNode(!0).lastChild.checked,t.innerHTML="",re.noCloneChecked=!!t.cloneNode(!0).lastChild.defaultValue}();var Ie=/^key/,De=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,He=/^([^.]*)(?:\.(.+)|)/;ae.event={global:{},add:function(e,t,n,r,i){var a,o,s,l,c,u,d,f,p,h,g,m=Ce.get(e);if(m)for(n.handler&&(a=n,n=a.handler,i=a.selector),n.guid||(n.guid=ae.guid++),(l=m.events)||(l=m.events={}),(o=m.handle)||(o=m.handle=function(t){return"undefined"!=typeof ae&&ae.event.triggered!==t.type?ae.event.dispatch.apply(e,arguments):void 0}),t=(t||"").match(we)||[""],c=t.length;c--;)s=He.exec(t[c])||[],p=g=s[1],h=(s[2]||"").split(".").sort(),p&&(d=ae.event.special[p]||{},p=(i?d.delegateType:d.bindType)||p,d=ae.event.special[p]||{},u=ae.extend({type:p,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&ae.expr.match.needsContext.test(i),namespace:h.join(".")},a),(f=l[p])||(f=l[p]=[],f.delegateCount=0,d.setup&&d.setup.call(e,r,h,o)!==!1||e.addEventListener&&e.addEventListener(p,o)),d.add&&(d.add.call(e,u),u.handler.guid||(u.handler.guid=n.guid)),i?f.splice(f.delegateCount++,0,u):f.push(u),ae.event.global[p]=!0)},remove:function(e,t,n,r,i){var a,o,s,l,c,u,d,f,p,h,g,m=Ce.hasData(e)&&Ce.get(e);if(m&&(l=m.events)){for(t=(t||"").match(we)||[""],c=t.length;c--;)if(s=He.exec(t[c])||[], +p=g=s[1],h=(s[2]||"").split(".").sort(),p){for(d=ae.event.special[p]||{},p=(r?d.delegateType:d.bindType)||p,f=l[p]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),o=a=f.length;a--;)u=f[a],!i&&g!==u.origType||n&&n.guid!==u.guid||s&&!s.test(u.namespace)||r&&r!==u.selector&&("**"!==r||!u.selector)||(f.splice(a,1),u.selector&&f.delegateCount--,d.remove&&d.remove.call(e,u));o&&!f.length&&(d.teardown&&d.teardown.call(e,h,m.handle)!==!1||ae.removeEvent(e,p,m.handle),delete l[p])}else for(p in l)ae.event.remove(e,p+t[c],n,r,!0);ae.isEmptyObject(l)&&Ce.remove(e,"handle events")}},dispatch:function(e){e=ae.event.fix(e);var t,n,r,i,a,o=[],s=X.call(arguments),l=(Ce.get(this,"events")||{})[e.type]||[],c=ae.event.special[e.type]||{};if(s[0]=e,e.delegateTarget=this,!c.preDispatch||c.preDispatch.call(this,e)!==!1){for(o=ae.event.handlers.call(this,e,l),t=0;(i=o[t++])&&!e.isPropagationStopped();)for(e.currentTarget=i.elem,n=0;(a=i.handlers[n++])&&!e.isImmediatePropagationStopped();)e.rnamespace&&!e.rnamespace.test(a.namespace)||(e.handleObj=a,e.data=a.data,r=((ae.event.special[a.origType]||{}).handle||a.handler).apply(i.elem,s),void 0!==r&&(e.result=r)===!1&&(e.preventDefault(),e.stopPropagation()));return c.postDispatch&&c.postDispatch.call(this,e),e.result}},handlers:function(e,t){var n,r,i,a,o=[],s=t.delegateCount,l=e.target;if(s&&l.nodeType&&("click"!==e.type||isNaN(e.button)||e.button<1))for(;l!==this;l=l.parentNode||this)if(1===l.nodeType&&(l.disabled!==!0||"click"!==e.type)){for(r=[],n=0;n-1:ae.find(i,this,null,[l]).length),r[i]&&r.push(a);r.length&&o.push({elem:l,handlers:r})}return s]*)\/>/gi,Oe=/\s*$/g;ae.extend({htmlPrefilter:function(e){return e.replace(Fe,"<$1>")},clone:function(e,t,n){var r,i,a,o,s=e.cloneNode(!0),l=ae.contains(e.ownerDocument,e);if(!(re.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||ae.isXMLDoc(e)))for(o=u(s),a=u(e),r=0,i=a.length;r0&&d(o,!l&&u(e,"script")),s},cleanData:function(e){for(var t,n,r,i=ae.event.special,a=0;void 0!==(n=e[a]);a++)if(Ne(n)){if(t=n[Ce.expando]){if(t.events)for(r in t.events)i[r]?ae.event.remove(n,r):ae.removeEvent(n,r,t.handle);n[Ce.expando]=void 0}n[Te.expando]&&(n[Te.expando]=void 0)}}}),ae.fn.extend({domManip:x,detach:function(e){return k(this,e,!0)},remove:function(e){return k(this,e)},text:function(e){return ke(this,function(e){return void 0===e?ae.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=e)})},null,e,arguments.length)},append:function(){return x(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=b(this,e);t.appendChild(e)}})},prepend:function(){return x(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=b(this,e);t.insertBefore(e,t.firstChild)}})},before:function(){return x(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return x(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},empty:function(){for(var e,t=0;null!=(e=this[t]);t++)1===e.nodeType&&(ae.cleanData(u(e,!1)),e.textContent="");return this},clone:function(e,t){return e=null!=e&&e,t=null==t?e:t,this.map(function(){return ae.clone(this,e,t)})},html:function(e){return ke(this,function(e){var t=this[0]||{},n=0,r=this.length;if(void 0===e&&1===t.nodeType)return t.innerHTML;if("string"==typeof e&&!Oe.test(e)&&!Re[(Be.exec(e)||["",""])[1].toLowerCase()]){e=ae.htmlPrefilter(e);try{for(;n1)},show:function(){return j(this,!0)},hide:function(){return j(this)},toggle:function(e){return"boolean"==typeof e?e?this.show():this.hide():this.each(function(){je(this)?ae(this).show():ae(this).hide()})}}),ae.Tween=L,L.prototype={constructor:L,init:function(e,t,n,r,i,a){this.elem=e,this.prop=n,this.easing=i||ae.easing._default,this.options=t,this.start=this.now=this.cur(),this.end=r,this.unit=a||(ae.cssNumber[n]?"":"px")},cur:function(){var e=L.propHooks[this.prop];return e&&e.get?e.get(this):L.propHooks._default.get(this)},run:function(e){var t,n=L.propHooks[this.prop];return this.options.duration?this.pos=t=ae.easing[this.easing](e,this.options.duration*e,0,1,this.options.duration):this.pos=t=e,this.now=(this.end-this.start)*t+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),n&&n.set?n.set(this):L.propHooks._default.set(this),this}},L.prototype.init.prototype=L.prototype,L.propHooks={_default:{get:function(e){var t;return 1!==e.elem.nodeType||null!=e.elem[e.prop]&&null==e.elem.style[e.prop]?e.elem[e.prop]:(t=ae.css(e.elem,e.prop,""),t&&"auto"!==t?t:0)},set:function(e){ae.fx.step[e.prop]?ae.fx.step[e.prop](e):1!==e.elem.nodeType||null==e.elem.style[ae.cssProps[e.prop]]&&!ae.cssHooks[e.prop]?e.elem[e.prop]=e.now:ae.style(e.elem,e.prop,e.now+e.unit)}}},L.propHooks.scrollTop=L.propHooks.scrollLeft={set:function(e){e.elem.nodeType&&e.elem.parentNode&&(e.elem[e.prop]=e.now)}},ae.easing={linear:function(e){return e},swing:function(e){return.5-Math.cos(e*Math.PI)/2},_default:"swing"},ae.fx=L.prototype.init,ae.fx.step={};var it,at,ot=/^(?:toggle|show|hide)$/,st=/queueHooks$/;ae.Animation=ae.extend(D,{tweeners:{"*":[function(e,t){var n=this.createTween(e,t);return c(n.elem,e,$e.exec(t),n),n}]},tweener:function(e,t){ae.isFunction(e)?(t=e,e=["*"]):e=e.match(we);for(var n,r=0,i=e.length;r1)},removeAttr:function(e){return this.each(function(){ae.removeAttr(this,e)})}}),ae.extend({attr:function(e,t,n){var r,i,a=e.nodeType;if(3!==a&&8!==a&&2!==a)return"undefined"==typeof e.getAttribute?ae.prop(e,t,n):(1===a&&ae.isXMLDoc(e)||(t=t.toLowerCase(),i=ae.attrHooks[t]||(ae.expr.match.bool.test(t)?lt:void 0)),void 0!==n?null===n?void ae.removeAttr(e,t):i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:(e.setAttribute(t,n+""),n):i&&"get"in i&&null!==(r=i.get(e,t))?r:(r=ae.find.attr(e,t),null==r?void 0:r))},attrHooks:{type:{set:function(e,t){if(!re.radioValue&&"radio"===t&&ae.nodeName(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},removeAttr:function(e,t){var n,r,i=0,a=t&&t.match(we);if(a&&1===e.nodeType)for(;n=a[i++];)r=ae.propFix[n]||n,ae.expr.match.bool.test(n)&&(e[r]=!1),e.removeAttribute(n)}}),lt={set:function(e,t,n){return t===!1?ae.removeAttr(e,n):e.setAttribute(n,n),n}},ae.each(ae.expr.match.bool.source.match(/\w+/g),function(e,t){var n=ct[t]||ae.find.attr;ct[t]=function(e,t,r){var i,a;return r||(a=ct[t],ct[t]=i,i=null!=n(e,t,r)?t.toLowerCase():null,ct[t]=a),i}});var ut=/^(?:input|select|textarea|button)$/i,dt=/^(?:a|area)$/i;ae.fn.extend({prop:function(e,t){return ke(this,ae.prop,e,t,arguments.length>1)},removeProp:function(e){return this.each(function(){delete this[ae.propFix[e]||e]})}}),ae.extend({prop:function(e,t,n){var r,i,a=e.nodeType;if(3!==a&&8!==a&&2!==a)return 1===a&&ae.isXMLDoc(e)||(t=ae.propFix[t]||t,i=ae.propHooks[t]),void 0!==n?i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:e[t]=n:i&&"get"in i&&null!==(r=i.get(e,t))?r:e[t]},propHooks:{tabIndex:{get:function(e){var t=ae.find.attr(e,"tabindex");return t?parseInt(t,10):ut.test(e.nodeName)||dt.test(e.nodeName)&&e.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),re.optSelected||(ae.propHooks.selected={get:function(e){var t=e.parentNode;return t&&t.parentNode&&t.parentNode.selectedIndex,null},set:function(e){var t=e.parentNode;t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex)}}),ae.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){ae.propFix[this.toLowerCase()]=this});var ft=/[\t\r\n\f]/g;ae.fn.extend({addClass:function(e){var t,n,r,i,a,o,s,l=0;if(ae.isFunction(e))return this.each(function(t){ae(this).addClass(e.call(this,t,H(this)))});if("string"==typeof e&&e)for(t=e.match(we)||[];n=this[l++];)if(i=H(n),r=1===n.nodeType&&(" "+i+" ").replace(ft," ")){for(o=0;a=t[o++];)r.indexOf(" "+a+" ")<0&&(r+=a+" ");s=ae.trim(r),i!==s&&n.setAttribute("class",s)}return this},removeClass:function(e){var t,n,r,i,a,o,s,l=0;if(ae.isFunction(e))return this.each(function(t){ae(this).removeClass(e.call(this,t,H(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof e&&e)for(t=e.match(we)||[];n=this[l++];)if(i=H(n),r=1===n.nodeType&&(" "+i+" ").replace(ft," ")){for(o=0;a=t[o++];)for(;r.indexOf(" "+a+" ")>-1;)r=r.replace(" "+a+" "," ");s=ae.trim(r),i!==s&&n.setAttribute("class",s)}return this},toggleClass:function(e,t){var n=typeof e;return"boolean"==typeof t&&"string"===n?t?this.addClass(e):this.removeClass(e):ae.isFunction(e)?this.each(function(n){ae(this).toggleClass(e.call(this,n,H(this),t),t)}):this.each(function(){var t,r,i,a;if("string"===n)for(r=0,i=ae(this),a=e.match(we)||[];t=a[r++];)i.hasClass(t)?i.removeClass(t):i.addClass(t);else void 0!==e&&"boolean"!==n||(t=H(this),t&&Ce.set(this,"__className__",t),this.setAttribute&&this.setAttribute("class",t||e===!1?"":Ce.get(this,"__className__")||""))})},hasClass:function(e){var t,n,r=0;for(t=" "+e+" ";n=this[r++];)if(1===n.nodeType&&(" "+H(n)+" ").replace(ft," ").indexOf(t)>-1)return!0;return!1}});var pt=/\r/g,ht=/[\x20\t\r\n\f]+/g;ae.fn.extend({val:function(e){var t,n,r,i=this[0];{if(arguments.length)return r=ae.isFunction(e),this.each(function(n){var i;1===this.nodeType&&(i=r?e.call(this,n,ae(this).val()):e,null==i?i="":"number"==typeof i?i+="":ae.isArray(i)&&(i=ae.map(i,function(e){return null==e?"":e+""})),t=ae.valHooks[this.type]||ae.valHooks[this.nodeName.toLowerCase()],t&&"set"in t&&void 0!==t.set(this,i,"value")||(this.value=i))});if(i)return t=ae.valHooks[i.type]||ae.valHooks[i.nodeName.toLowerCase()],t&&"get"in t&&void 0!==(n=t.get(i,"value"))?n:(n=i.value,"string"==typeof n?n.replace(pt,""):null==n?"":n)}}}),ae.extend({valHooks:{option:{get:function(e){var t=ae.find.attr(e,"value");return null!=t?t:ae.trim(ae.text(e)).replace(ht," ")}},select:{get:function(e){for(var t,n,r=e.options,i=e.selectedIndex,a="select-one"===e.type||i<0,o=a?null:[],s=a?i+1:r.length,l=i<0?s:a?i:0;l-1)&&(n=!0);return n||(e.selectedIndex=-1),a}}}}),ae.each(["radio","checkbox"],function(){ae.valHooks[this]={set:function(e,t){if(ae.isArray(t))return e.checked=ae.inArray(ae(e).val(),t)>-1}},re.checkOn||(ae.valHooks[this].get=function(e){return null===e.getAttribute("value")?"on":e.value})});var gt=/^(?:focusinfocus|focusoutblur)$/;ae.extend(ae.event,{trigger:function(t,n,r,i){var a,o,s,l,c,u,d,f=[r||Q],p=ne.call(t,"type")?t.type:t,h=ne.call(t,"namespace")?t.namespace.split("."):[];if(o=s=r=r||Q,3!==r.nodeType&&8!==r.nodeType&&!gt.test(p+ae.event.triggered)&&(p.indexOf(".")>-1&&(h=p.split("."),p=h.shift(),h.sort()),c=p.indexOf(":")<0&&"on"+p,t=t[ae.expando]?t:new ae.Event(p,"object"==typeof t&&t),t.isTrigger=i?2:3,t.namespace=h.join("."),t.rnamespace=t.namespace?new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,t.result=void 0,t.target||(t.target=r),n=null==n?[t]:ae.makeArray(n,[t]),d=ae.event.special[p]||{},i||!d.trigger||d.trigger.apply(r,n)!==!1)){if(!i&&!d.noBubble&&!ae.isWindow(r)){for(l=d.delegateType||p,gt.test(l+p)||(o=o.parentNode);o;o=o.parentNode)f.push(o),s=o;s===(r.ownerDocument||Q)&&f.push(s.defaultView||s.parentWindow||e)}for(a=0;(o=f[a++])&&!t.isPropagationStopped();)t.type=a>1?l:d.bindType||p,u=(Ce.get(o,"events")||{})[t.type]&&Ce.get(o,"handle"),u&&u.apply(o,n),u=c&&o[c],u&&u.apply&&Ne(o)&&(t.result=u.apply(o,n),t.result===!1&&t.preventDefault());return t.type=p,i||t.isDefaultPrevented()||d._default&&d._default.apply(f.pop(),n)!==!1||!Ne(r)||c&&ae.isFunction(r[p])&&!ae.isWindow(r)&&(s=r[c],s&&(r[c]=null),ae.event.triggered=p,r[p](),ae.event.triggered=void 0,s&&(r[c]=s)),t.result}},simulate:function(e,t,n){var r=ae.extend(new ae.Event,n,{type:e,isSimulated:!0});ae.event.trigger(r,null,t)}}),ae.fn.extend({trigger:function(e,t){return this.each(function(){ae.event.trigger(e,t,this)})},triggerHandler:function(e,t){var n=this[0];if(n)return ae.event.trigger(e,t,n,!0)}}),ae.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(e,t){ae.fn[t]=function(e,n){return arguments.length>0?this.on(t,null,e,n):this.trigger(t)}}),ae.fn.extend({hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),re.focusin="onfocusin"in e,re.focusin||ae.each({focus:"focusin",blur:"focusout"},function(e,t){var n=function(e){ae.event.simulate(t,e.target,ae.event.fix(e))};ae.event.special[t]={setup:function(){var r=this.ownerDocument||this,i=Ce.access(r,t);i||r.addEventListener(e,n,!0),Ce.access(r,t,(i||0)+1)},teardown:function(){var r=this.ownerDocument||this,i=Ce.access(r,t)-1;i?Ce.access(r,t,i):(r.removeEventListener(e,n,!0),Ce.remove(r,t))}}});var mt=e.location,bt=ae.now(),vt=/\?/;ae.parseJSON=function(e){return JSON.parse(e+"")},ae.parseXML=function(t){var n;if(!t||"string"!=typeof t)return null;try{n=(new e.DOMParser).parseFromString(t,"text/xml")}catch(e){n=void 0}return n&&!n.getElementsByTagName("parsererror").length||ae.error("Invalid XML: "+t),n};var yt=/#.*$/,_t=/([?&])_=[^&]*/,wt=/^(.*?):[ \t]*([^\r\n]*)$/gm,xt=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,kt=/^(?:GET|HEAD)$/,Nt=/^\/\//,Ct={},Tt={},Et="*/".concat("*"),St=Q.createElement("a");St.href=mt.href,ae.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:mt.href,type:"GET",isLocal:xt.test(mt.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Et,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":ae.parseJSON,"text xml":ae.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(e,t){return t?P(P(e,ae.ajaxSettings),t):P(ae.ajaxSettings,e)},ajaxPrefilter:F(Ct),ajaxTransport:F(Tt),ajax:function(t,n){function r(t,n,r,s){var c,d,v,y,w,k=n;2!==_&&(_=2,l&&e.clearTimeout(l),i=void 0,o=s||"",x.readyState=t>0?4:0,c=t>=200&&t<300||304===t,r&&(y=W(f,x,r)),y=U(f,y,x,c),c?(f.ifModified&&(w=x.getResponseHeader("Last-Modified"),w&&(ae.lastModified[a]=w),w=x.getResponseHeader("etag"),w&&(ae.etag[a]=w)),204===t||"HEAD"===f.type?k="nocontent":304===t?k="notmodified":(k=y.state,d=y.data,v=y.error,c=!v)):(v=k,!t&&k||(k="error",t<0&&(t=0))),x.status=t,x.statusText=(n||k)+"",c?g.resolveWith(p,[d,k,x]):g.rejectWith(p,[x,k,v]),x.statusCode(b),b=void 0,u&&h.trigger(c?"ajaxSuccess":"ajaxError",[x,f,c?d:v]),m.fireWith(p,[x,k]),u&&(h.trigger("ajaxComplete",[x,f]),--ae.active||ae.event.trigger("ajaxStop")))}"object"==typeof t&&(n=t,t=void 0),n=n||{};var i,a,o,s,l,c,u,d,f=ae.ajaxSetup({},n),p=f.context||f,h=f.context&&(p.nodeType||p.jquery)?ae(p):ae.event,g=ae.Deferred(),m=ae.Callbacks("once memory"),b=f.statusCode||{},v={},y={},_=0,w="canceled",x={readyState:0,getResponseHeader:function(e){var t;if(2===_){if(!s)for(s={};t=wt.exec(o);)s[t[1].toLowerCase()]=t[2];t=s[e.toLowerCase()]}return null==t?null:t},getAllResponseHeaders:function(){return 2===_?o:null},setRequestHeader:function(e,t){var n=e.toLowerCase();return _||(e=y[n]=y[n]||e,v[e]=t),this},overrideMimeType:function(e){return _||(f.mimeType=e),this},statusCode:function(e){var t;if(e)if(_<2)for(t in e)b[t]=[b[t],e[t]];else x.always(e[x.status]);return this},abort:function(e){var t=e||w;return i&&i.abort(t),r(0,t),this}};if(g.promise(x).complete=m.add,x.success=x.done,x.error=x.fail,f.url=((t||f.url||mt.href)+"").replace(yt,"").replace(Nt,mt.protocol+"//"),f.type=n.method||n.type||f.method||f.type,f.dataTypes=ae.trim(f.dataType||"*").toLowerCase().match(we)||[""],null==f.crossDomain){c=Q.createElement("a");try{c.href=f.url,c.href=c.href,f.crossDomain=St.protocol+"//"+St.host!=c.protocol+"//"+c.host}catch(e){f.crossDomain=!0}}if(f.data&&f.processData&&"string"!=typeof f.data&&(f.data=ae.param(f.data,f.traditional)),O(Ct,f,n,x),2===_)return x;u=ae.event&&f.global,u&&0===ae.active++&&ae.event.trigger("ajaxStart"),f.type=f.type.toUpperCase(),f.hasContent=!kt.test(f.type),a=f.url,f.hasContent||(f.data&&(a=f.url+=(vt.test(a)?"&":"?")+f.data,delete f.data),f.cache===!1&&(f.url=_t.test(a)?a.replace(_t,"$1_="+bt++):a+(vt.test(a)?"&":"?")+"_="+bt++)),f.ifModified&&(ae.lastModified[a]&&x.setRequestHeader("If-Modified-Since",ae.lastModified[a]),ae.etag[a]&&x.setRequestHeader("If-None-Match",ae.etag[a])),(f.data&&f.hasContent&&f.contentType!==!1||n.contentType)&&x.setRequestHeader("Content-Type",f.contentType),x.setRequestHeader("Accept",f.dataTypes[0]&&f.accepts[f.dataTypes[0]]?f.accepts[f.dataTypes[0]]+("*"!==f.dataTypes[0]?", "+Et+"; q=0.01":""):f.accepts["*"]);for(d in f.headers)x.setRequestHeader(d,f.headers[d]);if(f.beforeSend&&(f.beforeSend.call(p,x,f)===!1||2===_))return x.abort();w="abort";for(d in{success:1,error:1,complete:1})x[d](f[d]);if(i=O(Tt,f,n,x)){if(x.readyState=1,u&&h.trigger("ajaxSend",[x,f]),2===_)return x;f.async&&f.timeout>0&&(l=e.setTimeout(function(){x.abort("timeout")},f.timeout));try{_=1,i.send(v,r)}catch(e){if(!(_<2))throw e;r(-1,e)}}else r(-1,"No Transport");return x},getJSON:function(e,t,n){return ae.get(e,t,n,"json")},getScript:function(e,t){return ae.get(e,void 0,t,"script")}}),ae.each(["get","post"],function(e,t){ae[t]=function(e,n,r,i){return ae.isFunction(n)&&(i=i||r,r=n,n=void 0),ae.ajax(ae.extend({url:e,type:t,dataType:i,data:n,success:r},ae.isPlainObject(e)&&e))}}),ae._evalUrl=function(e){return ae.ajax({url:e,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0})},ae.fn.extend({wrapAll:function(e){var t;return ae.isFunction(e)?this.each(function(t){ae(this).wrapAll(e.call(this,t))}):(this[0]&&(t=ae(e,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){for(var e=this;e.firstElementChild;)e=e.firstElementChild;return e}).append(this)),this)},wrapInner:function(e){return ae.isFunction(e)?this.each(function(t){ae(this).wrapInner(e.call(this,t))}):this.each(function(){var t=ae(this),n=t.contents();n.length?n.wrapAll(e):t.append(e)})},wrap:function(e){var t=ae.isFunction(e);return this.each(function(n){ae(this).wrapAll(t?e.call(this,n):e)})},unwrap:function(){return this.parent().each(function(){ae.nodeName(this,"body")||ae(this).replaceWith(this.childNodes)}).end()}}),ae.expr.filters.hidden=function(e){return!ae.expr.filters.visible(e)},ae.expr.filters.visible=function(e){return e.offsetWidth>0||e.offsetHeight>0||e.getClientRects().length>0};var Mt=/%20/g,$t=/\[\]$/,At=/\r?\n/g,jt=/^(?:submit|button|image|reset|file)$/i,Lt=/^(?:input|select|textarea|keygen)/i;ae.param=function(e,t){var n,r=[],i=function(e,t){t=ae.isFunction(t)?t():null==t?"":t,r[r.length]=encodeURIComponent(e)+"="+encodeURIComponent(t)};if(void 0===t&&(t=ae.ajaxSettings&&ae.ajaxSettings.traditional),ae.isArray(e)||e.jquery&&!ae.isPlainObject(e))ae.each(e,function(){i(this.name,this.value)});else for(n in e)K(n,e[n],t,i);return r.join("&").replace(Mt,"+")},ae.fn.extend({serialize:function(){return ae.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var e=ae.prop(this,"elements");return e?ae.makeArray(e):this}).filter(function(){var e=this.type;return this.name&&!ae(this).is(":disabled")&&Lt.test(this.nodeName)&&!jt.test(e)&&(this.checked||!Le.test(e))}).map(function(e,t){var n=ae(this).val();return null==n?null:ae.isArray(n)?ae.map(n,function(e){return{name:t.name,value:e.replace(At,"\r\n")}}):{name:t.name,value:n.replace(At,"\r\n")}}).get()}}),ae.ajaxSettings.xhr=function(){try{return new e.XMLHttpRequest}catch(e){}};var Bt={0:200,1223:204},zt=ae.ajaxSettings.xhr();re.cors=!!zt&&"withCredentials"in zt,re.ajax=zt=!!zt,ae.ajaxTransport(function(t){var n,r;if(re.cors||zt&&!t.crossDomain)return{send:function(i,a){var o,s=t.xhr();if(s.open(t.type,t.url,t.async,t.username,t.password),t.xhrFields)for(o in t.xhrFields)s[o]=t.xhrFields[o];t.mimeType&&s.overrideMimeType&&s.overrideMimeType(t.mimeType),t.crossDomain||i["X-Requested-With"]||(i["X-Requested-With"]="XMLHttpRequest");for(o in i)s.setRequestHeader(o,i[o]);n=function(e){return function(){n&&(n=r=s.onload=s.onerror=s.onabort=s.onreadystatechange=null,"abort"===e?s.abort():"error"===e?"number"!=typeof s.status?a(0,"error"):a(s.status,s.statusText):a(Bt[s.status]||s.status,s.statusText,"text"!==(s.responseType||"text")||"string"!=typeof s.responseText?{binary:s.response}:{text:s.responseText},s.getAllResponseHeaders())); +}},s.onload=n(),r=s.onerror=n("error"),void 0!==s.onabort?s.onabort=r:s.onreadystatechange=function(){4===s.readyState&&e.setTimeout(function(){n&&r()})},n=n("abort");try{s.send(t.hasContent&&t.data||null)}catch(e){if(n)throw e}},abort:function(){n&&n()}}}),ae.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(e){return ae.globalEval(e),e}}}),ae.ajaxPrefilter("script",function(e){void 0===e.cache&&(e.cache=!1),e.crossDomain&&(e.type="GET")}),ae.ajaxTransport("script",function(e){if(e.crossDomain){var t,n;return{send:function(r,i){t=ae("",rE:!0,sL:["actionscript","javascript","handlebars","xml"]}},{cN:"meta",v:[{b:/<\?xml/,e:/\?>/,r:10},{b:/<\?\w+/,e:/\?>/}]},{cN:"tag",b:"",c:[{cN:"name",b:/[^\/><\s]+/,r:0},n]}]}}),hljs.registerLanguage("markdown",function(){return{aliases:["md","mkdown","mkd"],c:[{cN:"section",v:[{b:"^#{1,6}",e:"$"},{b:"^.+?\\n[=-]{2,}$"}]},{b:"<",e:">",sL:"xml",r:0},{cN:"bullet",b:"^([*+-]|(\\d+\\.))\\s+"},{cN:"strong",b:"[*_]{2}.+?[*_]{2}"},{cN:"emphasis",v:[{b:"\\*.+?\\*"},{b:"_.+?_",r:0}]},{cN:"quote",b:"^>\\s+",e:"$"},{cN:"code",v:[{b:"^```w*s*$",e:"^```s*$"},{b:"`.+?`"},{b:"^( {4}| )",e:"$",r:0}]},{b:"^[-\\*]{3,}",e:"$"},{b:"\\[.+?\\][\\(\\[].*?[\\)\\]]",rB:!0,c:[{cN:"string",b:"\\[",e:"\\]",eB:!0,rE:!0,r:0},{cN:"link",b:"\\]\\(",e:"\\)",eB:!0,eE:!0},{cN:"symbol",b:"\\]\\[",e:"\\]",eB:!0,eE:!0}],r:10},{b:/^\[[^\n]+\]:/,rB:!0,c:[{cN:"symbol",b:/\[/,e:/\]/,eB:!0,eE:!0},{cN:"link",b:/:\s*/,e:/$/,eB:!0}]}]}}),hljs.registerLanguage("python",function(e){var t={cN:"meta",b:/^(>>>|\.\.\.) /},n={cN:"string",c:[e.BE],v:[{b:/(u|b)?r?'''/,e:/'''/,c:[t],r:10},{b:/(u|b)?r?"""/,e:/"""/,c:[t],r:10},{b:/(u|r|ur)'/,e:/'/,r:10},{b:/(u|r|ur)"/,e:/"/,r:10},{b:/(b|br)'/,e:/'/},{b:/(b|br)"/,e:/"/},e.ASM,e.QSM]},r={cN:"number",r:0,v:[{b:e.BNR+"[lLjJ]?"},{b:"\\b(0o[0-7]+)[lLjJ]?"},{b:e.CNR+"[lLjJ]?"}]},i={cN:"params",b:/\(/,e:/\)/,c:["self",t,r,n]};return{aliases:["py","gyp"],k:{keyword:"and elif is global as in if from raise for except finally print import pass return exec else break not with class assert yield try while continue del or def lambda async await nonlocal|10 None True False",built_in:"Ellipsis NotImplemented"},i:/(<\/|->|\?)/,c:[t,r,n,e.HCM,{v:[{cN:"function",bK:"def",r:10},{cN:"class",bK:"class"}],e:/:/,i:/[${=;\n,]/,c:[e.UTM,i,{b:/->/,eW:!0,k:"None"}]},{cN:"meta",b:/^[\t ]*@/,e:/$/},{b:/\b(print|exec)\(/}]}}),hljs.registerLanguage("makefile",function(e){var t={cN:"variable",b:/\$\(/,e:/\)/,c:[e.BE]};return{aliases:["mk","mak"],c:[e.HCM,{b:/^\w+\s*\W*=/,rB:!0,r:0,starts:{e:/\s*\W*=/,eE:!0,starts:{e:/$/,r:0,c:[t]}}},{cN:"section",b:/^[\w]+:\s*$/},{cN:"meta",b:/^\.PHONY:/,e:/$/,k:{"meta-keyword":".PHONY"},l:/[\.\w]+/},{b:/^\t+/,e:/$/,r:0,c:[e.QSM,t]}]}}),hljs.registerLanguage("css",function(e){var t="[a-zA-Z-][a-zA-Z0-9_-]*",n={b:/[A-Z\_\.\-]+\s*:/,rB:!0,e:";",eW:!0,c:[{cN:"attribute",b:/\S/,e:":",eE:!0,starts:{eW:!0,eE:!0,c:[{b:/[\w-]+\(/,rB:!0,c:[{cN:"built_in",b:/[\w-]+/},{b:/\(/,e:/\)/,c:[e.ASM,e.QSM]}]},e.CSSNM,e.QSM,e.ASM,e.CBCM,{cN:"number",b:"#[0-9A-Fa-f]+"},{cN:"meta",b:"!important"}]}}]};return{cI:!0,i:/[=\/|'\$]/,c:[e.CBCM,{cN:"selector-id",b:/#[A-Za-z0-9_-]+/},{cN:"selector-class",b:/\.[A-Za-z0-9_-]+/},{cN:"selector-attr",b:/\[/,e:/\]/,i:"$"},{cN:"selector-pseudo",b:/:(:)?[a-zA-Z0-9\_\-\+\(\)"'.]+/},{b:"@(font-face|page)",l:"[a-z-]+",k:"font-face page"},{b:"@",e:"[{;]",i:/:/,c:[{cN:"keyword",b:/\w+/},{b:/\s/,eW:!0,eE:!0,r:0,c:[e.ASM,e.QSM,e.CSSNM]}]},{cN:"selector-tag",b:t,r:0},{b:"{",e:"}",i:/\S/,c:[e.CBCM,n]}]}}),hljs.registerLanguage("go",function(e){var t={keyword:"break default func interface select case map struct chan else goto package switch const fallthrough if range type continue for import return var go defer bool byte complex64 complex128 float32 float64 int8 int16 int32 int64 string uint8 uint16 uint32 uint64 int uint uintptr rune",literal:"true false iota nil",built_in:"append cap close complex copy imag len make new panic print println real recover delete"};return{aliases:["golang"],k:t,i:"|<-"}]}}),hljs.registerLanguage("erlang",function(e){var t="[a-z'][a-zA-Z0-9_']*",n="("+t+":"+t+"|"+t+")",r={keyword:"after and andalso|10 band begin bnot bor bsl bzr bxor case catch cond div end fun if let not of orelse|10 query receive rem try when xor",literal:"false true"},i=e.C("%","$"),a={cN:"number",b:"\\b(\\d+#[a-fA-F0-9]+|\\d+(\\.\\d+)?([eE][-+]?\\d+)?)",r:0},o={b:"fun\\s+"+t+"/\\d+"},s={b:n+"\\(",e:"\\)",rB:!0,r:0,c:[{b:n,r:0},{b:"\\(",e:"\\)",eW:!0,rE:!0,r:0}]},l={b:"{",e:"}",r:0},c={b:"\\b_([A-Z][A-Za-z0-9_]*)?",r:0},u={b:"[A-Z][a-zA-Z0-9_]*",r:0},d={b:"#"+e.UIR,r:0,rB:!0,c:[{b:"#"+e.UIR,r:0},{b:"{",e:"}",r:0}]},f={bK:"fun receive if try case",e:"end",k:r};f.c=[i,o,e.inherit(e.ASM,{cN:""}),f,s,e.QSM,a,l,c,u,d];var p=[i,o,f,s,e.QSM,a,l,c,u,d];s.c[1].c=p,l.c=p,d.c[1].c=p;var h={cN:"params",b:"\\(",e:"\\)",c:p};return{aliases:["erl"],k:r,i:"(",rB:!0,i:"\\(|#|//|/\\*|\\\\|:|;",c:[h,e.inherit(e.TM,{b:t})],starts:{e:";|\\.",k:r,c:p}},i,{b:"^-",e:"\\.",r:0,eE:!0,rB:!0,l:"-"+e.IR,k:"-module -record -undef -export -ifdef -ifndef -author -copyright -doc -vsn -import -include -include_lib -compile -define -else -endif -file -behaviour -behavior -spec",c:[h]},a,e.QSM,d,c,u,l,{b:/\.$/}]}}),hljs.registerLanguage("ruby",function(e){var t="[a-zA-Z_]\\w*[!?=]?|[-+~]\\@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?",n={keyword:"and then defined module in return redo if BEGIN retry end for self when next until do begin unless END rescue else break undef not super class case require yield alias while ensure elsif or include attr_reader attr_writer attr_accessor",literal:"true false nil"},r={cN:"doctag",b:"@[A-Za-z]+"},i={b:"#<",e:">"},a=[e.C("#","$",{c:[r]}),e.C("^\\=begin","^\\=end",{c:[r],r:10}),e.C("^__END__","\\n$")],o={cN:"subst",b:"#\\{",e:"}",k:n},s={cN:"string",c:[e.BE,o],v:[{b:/'/,e:/'/},{b:/"/,e:/"/},{b:/`/,e:/`/},{b:"%[qQwWx]?\\(",e:"\\)"},{b:"%[qQwWx]?\\[",e:"\\]"},{b:"%[qQwWx]?{",e:"}"},{b:"%[qQwWx]?<",e:">"},{b:"%[qQwWx]?/",e:"/"},{b:"%[qQwWx]?%",e:"%"},{b:"%[qQwWx]?-",e:"-"},{b:"%[qQwWx]?\\|",e:"\\|"},{b:/\B\?(\\\d{1,3}|\\x[A-Fa-f0-9]{1,2}|\\u[A-Fa-f0-9]{4}|\\?\S)\b/},{b:/<<(-?)\w+$/,e:/^\s*\w+$/}]},l={cN:"params",b:"\\(",e:"\\)",endsParent:!0,k:n},c=[s,i,{cN:"class",bK:"class module",e:"$|;",i:/=/,c:[e.inherit(e.TM,{b:"[A-Za-z_]\\w*(::\\w+)*(\\?|\\!)?"}),{b:"<\\s*",c:[{b:"("+e.IR+"::)?"+e.IR}]}].concat(a)},{cN:"function",bK:"def",e:"$|;",c:[e.inherit(e.TM,{b:t}),l].concat(a)},{b:e.IR+"::"},{cN:"symbol",b:e.UIR+"(\\!|\\?)?:",r:0},{cN:"symbol",b:":(?!\\s)",c:[s,{b:t}],r:0},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0},{b:"(\\$\\W)|((\\$|\\@\\@?)(\\w+))"},{cN:"params",b:/\|/,e:/\|/,k:n},{b:"("+e.RSR+")\\s*",c:[i,{cN:"regexp",c:[e.BE,o],i:/\n/,v:[{b:"/",e:"/[a-z]*"},{b:"%r{",e:"}[a-z]*"},{b:"%r\\(",e:"\\)[a-z]*"},{b:"%r!",e:"![a-z]*"},{b:"%r\\[",e:"\\][a-z]*"}]}].concat(a),r:0}].concat(a);o.c=c,l.c=c;var u="[>?]>",d="[\\w#]+\\(\\w+\\):\\d+:\\d+>",f="(\\w+-)?\\d+\\.\\d+\\.\\d(p\\d+)?[^>]+>",p=[{b:/^\s*=>/,starts:{e:"$",c:c}},{cN:"meta",b:"^("+u+"|"+d+"|"+f+")",starts:{e:"$",c:c}}];return{aliases:["rb","gemspec","podspec","thor","irb"],k:n,i:/\/\*/,c:a.concat(p).concat(c)}}),hljs.registerLanguage("cpp",function(e){var t={cN:"keyword",b:"\\b[a-z\\d_]*_t\\b"},n={cN:"string",v:[{b:'(u8?|U)?L?"',e:'"',i:"\\n",c:[e.BE]},{b:'(u8?|U)?R"',e:'"',c:[e.BE]},{b:"'\\\\?.",e:"'",i:"."}]},r={cN:"number",v:[{b:"\\b(0b[01']+)"},{b:"\\b([\\d']+(\\.[\\d']*)?|\\.[\\d']+)(u|U|l|L|ul|UL|f|F|b|B)"},{b:"(-?)(\\b0[xX][a-fA-F0-9']+|(\\b[\\d']+(\\.[\\d']*)?|\\.[\\d']+)([eE][-+]?[\\d']+)?)"}],r:0},i={cN:"meta",b:/#\s*[a-z]+\b/,e:/$/,k:{"meta-keyword":"if else elif endif define undef warning error line pragma ifdef ifndef include"},c:[{b:/\\\n/,r:0},e.inherit(n,{cN:"meta-string"}),{cN:"meta-string",b:"<",e:">",i:"\\n"},e.CLCM,e.CBCM]},a=e.IR+"\\s*\\(",o={keyword:"int float while private char catch import module export virtual operator sizeof dynamic_cast|10 typedef const_cast|10 const struct for static_cast|10 union namespace unsigned long volatile static protected bool template mutable if public friend do goto auto void enum else break extern using class asm case typeid short reinterpret_cast|10 default double register explicit signed typename try this switch continue inline delete alignof constexpr decltype noexcept static_assert thread_local restrict _Bool complex _Complex _Imaginary atomic_bool atomic_char atomic_schar atomic_uchar atomic_short atomic_ushort atomic_int atomic_uint atomic_long atomic_ulong atomic_llong atomic_ullong new throw return",built_in:"std string cin cout cerr clog stdin stdout stderr stringstream istringstream ostringstream auto_ptr deque list queue stack vector map set bitset multiset multimap unordered_set unordered_map unordered_multiset unordered_multimap array shared_ptr abort abs acos asin atan2 atan calloc ceil cosh cos exit exp fabs floor fmod fprintf fputs free frexp fscanf isalnum isalpha iscntrl isdigit isgraph islower isprint ispunct isspace isupper isxdigit tolower toupper labs ldexp log10 log malloc realloc memchr memcmp memcpy memset modf pow printf putchar puts scanf sinh sin snprintf sprintf sqrt sscanf strcat strchr strcmp strcpy strcspn strlen strncat strncmp strncpy strpbrk strrchr strspn strstr tanh tan vfprintf vprintf vsprintf endl initializer_list unique_ptr",literal:"true false nullptr NULL"},s=[t,e.CLCM,e.CBCM,r,n];return{aliases:["c","cc","h","c++","h++","hpp"],k:o,i:"",k:o,c:["self",t]},{b:e.IR+"::",k:o},{v:[{b:/=/,e:/;/},{b:/\(/,e:/\)/},{bK:"new throw return else",e:/;/}],k:o,c:s.concat([{b:/\(/,e:/\)/,k:o,c:s.concat(["self"]),r:0}]),r:0},{cN:"function",b:"("+e.IR+"[\\*&\\s]+)+"+a,rB:!0,e:/[{;=]/,eE:!0,k:o,i:/[^\w\s\*&]/,c:[{b:a,rB:!0,c:[e.TM],r:0},{cN:"params",b:/\(/,e:/\)/,k:o,r:0,c:[e.CLCM,e.CBCM,n,r,t]},e.CLCM,e.CBCM,i]}]),exports:{preprocessor:i,strings:n,k:o}}}),hljs.registerLanguage("bash",function(e){var t={cN:"variable",v:[{b:/\$[\w\d#@][\w\d_]*/},{b:/\$\{(.*?)}/}]},n={cN:"string",b:/"/,e:/"/,c:[e.BE,t,{cN:"variable",b:/\$\(/,e:/\)/,c:[e.BE]}]},r={cN:"string",b:/'/,e:/'/};return{aliases:["sh","zsh"],l:/-?[a-z\._]+/,k:{keyword:"if then else elif fi for while in do done case esac function",literal:"true false",built_in:"break cd continue eval exec exit export getopts hash pwd readonly return shift test times trap umask unset alias bind builtin caller command declare echo enable help let local logout mapfile printf read readarray source type typeset ulimit unalias set shopt autoload bg bindkey bye cap chdir clone comparguments compcall compctl compdescribe compfiles compgroups compquote comptags comptry compvalues dirs disable disown echotc echoti emulate fc fg float functions getcap getln history integer jobs kill limit log noglob popd print pushd pushln rehash sched setcap setopt stat suspend ttyctl unfunction unhash unlimit unsetopt vared wait whence where which zcompile zformat zftp zle zmodload zparseopts zprof zpty zregexparse zsocket zstyle ztcp",_:"-ne -eq -lt -gt -f -d -e -s -l -a"},c:[{cN:"meta",b:/^#![^\n]+sh\s*$/,r:10},{cN:"function",b:/\w[\w\d_]*\s*\(\s*\)\s*\{/,rB:!0,c:[e.inherit(e.TM,{b:/\w[\w\d_]*/})],r:0},e.HCM,n,r,t]}}),hljs.registerLanguage("json",function(e){var t={literal:"true false null"},n=[e.QSM,e.CNM],r={e:",",eW:!0,eE:!0,c:n,k:t},i={b:"{",e:"}",c:[{cN:"attr",b:/"/,e:/"/,c:[e.BE],i:"\\n"},e.inherit(r,{b:/:/})],i:"\\S"},a={b:"\\[",e:"\\]",c:[e.inherit(r)],i:"\\S"};return n.splice(n.length,0,i,a),{c:n,k:t,i:"\\S"}}),hljs.registerLanguage("php",function(e){var t={b:"\\$+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*"},n={cN:"meta",b:/<\?(php)?|\?>/},r={cN:"string",c:[e.BE,n],v:[{b:'b"',e:'"'},{b:"b'",e:"'"},e.inherit(e.ASM,{i:null}),e.inherit(e.QSM,{i:null})]},i={v:[e.BNM,e.CNM]};return{aliases:["php3","php4","php5","php6"],cI:!0,k:"and include_once list abstract global private echo interface as static endswitch array null if endwhile or const for endforeach self var while isset public protected exit foreach throw elseif include __FILE__ empty require_once do xor return parent clone use __CLASS__ __LINE__ else break print eval new catch __METHOD__ case exception default die require __FUNCTION__ enddeclare final try switch continue endfor endif declare unset true false trait goto instanceof insteadof __DIR__ __NAMESPACE__ yield finally",c:[e.HCM,e.C("//","$",{c:[n]}),e.C("/\\*","\\*/",{c:[{cN:"doctag",b:"@[A-Za-z]+"}]}),e.C("__halt_compiler.+?;",!1,{eW:!0,k:"__halt_compiler",l:e.UIR}),{cN:"string",b:/<<<['"]?\w+['"]?$/,e:/^\w+;?$/,c:[e.BE,{cN:"subst",v:[{b:/\$\w+/},{b:/\{\$/,e:/\}/}]}]},n,{cN:"keyword",b:/\$this\b/},t,{b:/(::|->)+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/},{cN:"function",bK:"function",e:/[;{]/,eE:!0,i:"\\$|\\[|%",c:[e.UTM,{cN:"params",b:"\\(",e:"\\)",c:["self",t,e.CBCM,r,i]}]},{cN:"class",bK:"class interface",e:"{",eE:!0,i:/[:\(\$"]/,c:[{bK:"extends implements"},e.UTM]},{bK:"namespace",e:";",i:/[\.']/,c:[e.UTM]},{bK:"use",e:";",c:[e.UTM]},{b:"=>"},r,i]}}),hljs.registerLanguage("dart",function(e){var t={cN:"subst",b:"\\$\\{",e:"}",k:"true false null this is new super"},n={cN:"string",v:[{b:"r'''",e:"'''"},{b:'r"""',e:'"""'},{b:"r'",e:"'",i:"\\n"},{b:'r"',e:'"',i:"\\n"},{b:"'''",e:"'''",c:[e.BE,t]},{b:'"""',e:'"""',c:[e.BE,t]},{b:"'",e:"'",i:"\\n",c:[e.BE,t]},{b:'"',e:'"',i:"\\n",c:[e.BE,t]}]};t.c=[e.CNM,n];var r={keyword:"assert async await break case catch class const continue default do else enum extends false final finally for if in is new null rethrow return super switch sync this throw true try var void while with yield abstract as dynamic export external factory get implements import library operator part set static typedef",built_in:"print Comparable DateTime Duration Function Iterable Iterator List Map Match Null Object Pattern RegExp Set Stopwatch String StringBuffer StringSink Symbol Type Uri bool double int num document window querySelector querySelectorAll Element ElementList"};return{k:r,c:[n,e.C("/\\*\\*","\\*/",{sL:"markdown"}),e.C("///","$",{sL:"markdown"}),e.CLCM,e.CBCM,{cN:"class",bK:"class interface",e:"{",eE:!0,c:[{bK:"extends implements"},e.UTM]},e.CNM,{cN:"meta",b:"@[A-Za-z]+"},{b:"=>"}]}}),hljs.registerLanguage("ini",function(e){var t={cN:"string",c:[e.BE],v:[{b:"'''",e:"'''",r:10},{b:'"""',e:'"""',r:10},{b:'"',e:'"'},{b:"'",e:"'"}]};return{aliases:["toml"],cI:!0,i:/\S/,c:[e.C(";","$"),e.HCM,{cN:"section",b:/^\s*\[+/,e:/\]+/},{b:/^[a-z0-9\[\]_-]+\s*=\s*/,e:"$",rB:!0,c:[{cN:"attr",b:/[a-z0-9\[\]_-]+/},{b:/=/,eW:!0,r:0,c:[{cN:"literal",b:/\bon|off|true|false|yes|no\b/},{cN:"variable",v:[{b:/\$[\w\d"][\w\d_]*/},{b:/\$\{(.*?)}/}]},t,{cN:"number",b:/([\+\-]+)?[\d]+_[\d_]+/},e.NM]}]}]}}),hljs.registerLanguage("java",function(e){var t=e.UIR+"(<"+e.UIR+"(\\s*,\\s*"+e.UIR+")*>)?",n="false synchronized int abstract float private char boolean static null if const for true while long strictfp finally protected import native final void enum else break transient catch instanceof byte super volatile case assert short package default double public try this switch continue throws protected public private module requires exports",r="\\b(0[bB]([01]+[01_]+[01]+|[01]+)|0[xX]([a-fA-F0-9]+[a-fA-F0-9_]+[a-fA-F0-9]+|[a-fA-F0-9]+)|(([\\d]+[\\d_]+[\\d]+|[\\d]+)(\\.([\\d]+[\\d_]+[\\d]+|[\\d]+))?|\\.([\\d]+[\\d_]+[\\d]+|[\\d]+))([eE][-+]?\\d+)?)[lLfF]?",i={cN:"number",b:r,r:0};return{aliases:["jsp"],k:n,i:/<\/|#/,c:[e.C("/\\*\\*","\\*/",{r:0,c:[{b:/\w+@/,r:0},{cN:"doctag",b:"@[A-Za-z]+"}]}),e.CLCM,e.CBCM,e.ASM,e.QSM,{cN:"class",bK:"class interface",e:/[{;=]/,eE:!0,k:"class interface",i:/[:"\[\]]/,c:[{bK:"extends implements"},e.UTM]},{bK:"new throw return else",r:0},{cN:"function",b:"("+t+"\\s+)+"+e.UIR+"\\s*\\(",rB:!0,e:/[{;=]/,eE:!0,k:n,c:[{b:e.UIR+"\\s*\\(",rB:!0,r:0,c:[e.UTM]},{cN:"params",b:/\(/,e:/\)/,k:n,r:0,c:[e.ASM,e.QSM,e.CNM,e.CBCM]},e.CLCM,e.CBCM]},i,{cN:"meta",b:"@[A-Za-z]+"}]}}),hljs.registerLanguage("matlab",function(e){var t=[e.CNM,{cN:"string",b:"'",e:"'",c:[e.BE,{b:"''"}]}],n={r:0,c:[{b:/'['\.]*/}]};return{k:{keyword:"break case catch classdef continue else elseif end enumerated events for function global if methods otherwise parfor persistent properties return spmd switch try while",built_in:"sin sind sinh asin asind asinh cos cosd cosh acos acosd acosh tan tand tanh atan atand atan2 atanh sec secd sech asec asecd asech csc cscd csch acsc acscd acsch cot cotd coth acot acotd acoth hypot exp expm1 log log1p log10 log2 pow2 realpow reallog realsqrt sqrt nthroot nextpow2 abs angle complex conj imag real unwrap isreal cplxpair fix floor ceil round mod rem sign airy besselj bessely besselh besseli besselk beta betainc betaln ellipj ellipke erf erfc erfcx erfinv expint gamma gammainc gammaln psi legendre cross dot factor isprime primes gcd lcm rat rats perms nchoosek factorial cart2sph cart2pol pol2cart sph2cart hsv2rgb rgb2hsv zeros ones eye repmat rand randn linspace logspace freqspace meshgrid accumarray size length ndims numel disp isempty isequal isequalwithequalnans cat reshape diag blkdiag tril triu fliplr flipud flipdim rot90 find sub2ind ind2sub bsxfun ndgrid permute ipermute shiftdim circshift squeeze isscalar isvector ans eps realmax realmin pi i inf nan isnan isinf isfinite j why compan gallery hadamard hankel hilb invhilb magic pascal rosser toeplitz vander wilkinson"},i:'(//|"|#|/\\*|\\s+/\\w+)',c:[{cN:"function",bK:"function",e:"$",c:[e.UTM,{cN:"params",v:[{b:"\\(",e:"\\)"},{b:"\\[",e:"\\]"}]}]},{b:/[a-zA-Z_][a-zA-Z_0-9]*'['\.]*/,rB:!0,r:0,c:[{b:/[a-zA-Z_][a-zA-Z_0-9]*/,r:0},n.c[0]]},{b:"\\[",e:"\\]",c:t,r:0,starts:n},{b:"\\{",e:/}/,c:t,r:0,starts:n},{b:/\)/,r:0,starts:n},e.C("^\\s*\\%\\{\\s*$","^\\s*\\%\\}\\s*$"),e.C("\\%","$")].concat(t)}}),hljs.registerLanguage("erlang-repl",function(e){ return{k:{built_in:"spawn spawn_link self",keyword:"after and andalso|10 band begin bnot bor bsl bsr bxor case catch cond div end fun if let not of or orelse|10 query receive rem try when xor"},c:[{cN:"meta",b:"^[0-9]+> ",r:10},e.C("%","$"),{cN:"number",b:"\\b(\\d+#[a-fA-F0-9]+|\\d+(\\.\\d+)?([eE][-+]?\\d+)?)",r:0},e.ASM,e.QSM,{b:"\\?(::)?([A-Z]\\w*(::)?)+"},{b:"->"},{b:"ok"},{b:"!"},{b:"(\\b[a-z'][a-zA-Z0-9_']*:[a-z'][a-zA-Z0-9_']*)|(\\b[a-z'][a-zA-Z0-9_']*)",r:0},{b:"[A-Z][a-zA-Z0-9_']*",r:0}]}}),hljs.registerLanguage("perl",function(e){var t="getpwent getservent quotemeta msgrcv scalar kill dbmclose undef lc ma syswrite tr send umask sysopen shmwrite vec qx utime local oct semctl localtime readpipe do return format read sprintf dbmopen pop getpgrp not getpwnam rewinddir qqfileno qw endprotoent wait sethostent bless s|0 opendir continue each sleep endgrent shutdown dump chomp connect getsockname die socketpair close flock exists index shmgetsub for endpwent redo lstat msgctl setpgrp abs exit select print ref gethostbyaddr unshift fcntl syscall goto getnetbyaddr join gmtime symlink semget splice x|0 getpeername recv log setsockopt cos last reverse gethostbyname getgrnam study formline endhostent times chop length gethostent getnetent pack getprotoent getservbyname rand mkdir pos chmod y|0 substr endnetent printf next open msgsnd readdir use unlink getsockopt getpriority rindex wantarray hex system getservbyport endservent int chr untie rmdir prototype tell listen fork shmread ucfirst setprotoent else sysseek link getgrgid shmctl waitpid unpack getnetbyname reset chdir grep split require caller lcfirst until warn while values shift telldir getpwuid my getprotobynumber delete and sort uc defined srand accept package seekdir getprotobyname semop our rename seek if q|0 chroot sysread setpwent no crypt getc chown sqrt write setnetent setpriority foreach tie sin msgget map stat getlogin unless elsif truncate exec keys glob tied closedirioctl socket readlink eval xor readline binmode setservent eof ord bind alarm pipe atan2 getgrent exp time push setgrent gt lt or ne m|0 break given say state when",n={cN:"subst",b:"[$@]\\{",e:"\\}",k:t},r={b:"->{",e:"}"},i={v:[{b:/\$\d/},{b:/[\$%@](\^\w\b|#\w+(::\w+)*|{\w+}|\w+(::\w*)*)/},{b:/[\$%@][^\s\w{]/,r:0}]},a=[e.BE,n,i],o=[i,e.HCM,e.C("^\\=\\w","\\=cut",{eW:!0}),r,{cN:"string",c:a,v:[{b:"q[qwxr]?\\s*\\(",e:"\\)",r:5},{b:"q[qwxr]?\\s*\\[",e:"\\]",r:5},{b:"q[qwxr]?\\s*\\{",e:"\\}",r:5},{b:"q[qwxr]?\\s*\\|",e:"\\|",r:5},{b:"q[qwxr]?\\s*\\<",e:"\\>",r:5},{b:"qw\\s+q",e:"q",r:5},{b:"'",e:"'",c:[e.BE]},{b:'"',e:'"'},{b:"`",e:"`",c:[e.BE]},{b:"{\\w+}",c:[],r:0},{b:"-?\\w+\\s*\\=\\>",c:[],r:0}]},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0},{b:"(\\/\\/|"+e.RSR+"|\\b(split|return|print|reverse|grep)\\b)\\s*",k:"split return print reverse grep",r:0,c:[e.HCM,{cN:"regexp",b:"(s|tr|y)/(\\\\.|[^/])*/(\\\\.|[^/])*/[a-z]*",r:10},{cN:"regexp",b:"(m|qr)?/",e:"/[a-z]*",c:[e.BE],r:0}]},{cN:"function",bK:"sub",e:"(\\s*\\(.*?\\))?[;{]",eE:!0,r:5,c:[e.TM]},{b:"-\\w\\b",r:0},{b:"^__DATA__$",e:"^__END__$",sL:"mojolicious",c:[{b:"^@@.*",e:"$",cN:"comment"}]}];return n.c=o,r.c=o,{aliases:["pl","pm"],l:/[\w\.]+/,k:t,c:o}}),hljs.registerLanguage("elixir",function(e){var t="[a-zA-Z_][a-zA-Z0-9_]*(\\!|\\?)?",n="[a-zA-Z_]\\w*[!?=]?|[-+~]\\@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?",r="and false then defined module in return redo retry end for true self when next until do begin unless nil break not case cond alias while ensure or include use alias fn quote",i={cN:"subst",b:"#\\{",e:"}",l:t,k:r},a={cN:"string",c:[e.BE,i],v:[{b:/'/,e:/'/},{b:/"/,e:/"/}]},o={cN:"function",bK:"def defp defmacro",e:/\B\b/,c:[e.inherit(e.TM,{b:t,endsParent:!0})]},s=e.inherit(o,{cN:"class",bK:"defimpl defmodule defprotocol defrecord",e:/\bdo\b|$|;/}),l=[a,e.HCM,s,o,{cN:"symbol",b:":(?!\\s)",c:[a,{b:n}],r:0},{cN:"symbol",b:t+":",r:0},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0},{cN:"variable",b:"(\\$\\W)|((\\$|\\@\\@?)(\\w+))"},{b:"->"},{b:"("+e.RSR+")\\s*",c:[e.HCM,{cN:"regexp",i:"\\n",c:[e.BE,i],v:[{b:"/",e:"/[a-z]*"},{b:"%r\\[",e:"\\][a-z]*"}]}],r:0}];return i.c=l,{l:t,k:r,c:l}}),hljs.registerLanguage("protobuf",function(e){return{k:{keyword:"package import option optional required repeated group",built_in:"double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes",literal:"true false"},c:[e.QSM,e.NM,e.CLCM,{cN:"class",bK:"message enum service",e:/\{/,i:/\n/,c:[e.inherit(e.TM,{starts:{eW:!0,eE:!0}})]},{cN:"function",bK:"rpc",e:/;/,eE:!0,k:"rpc returns"},{b:/^\s*[A-Z_]+/,e:/\s*=/,eE:!0}]}}),hljs.registerLanguage("cs",function(e){var t={keyword:"abstract as base bool break byte case catch char checked const continue decimal default delegate do double else enum event explicit extern finally fixed float for foreach goto if implicit in int interface internal is lock long object operator out override params private protected public readonly ref sbyte sealed short sizeof stackalloc static string struct switch this try typeof uint ulong unchecked unsafe ushort using virtual void volatile while nameof add alias ascending async await by descending dynamic equals from get global group into join let on orderby partial remove select set value var where yield",literal:"null false true"},n={cN:"string",b:'@"',e:'"',c:[{b:'""'}]},r=e.inherit(n,{i:/\n/}),i={cN:"subst",b:"{",e:"}",k:t},a=e.inherit(i,{i:/\n/}),o={cN:"string",b:/\$"/,e:'"',i:/\n/,c:[{b:"{{"},{b:"}}"},e.BE,a]},s={cN:"string",b:/\$@"/,e:'"',c:[{b:"{{"},{b:"}}"},{b:'""'},i]},l=e.inherit(s,{i:/\n/,c:[{b:"{{"},{b:"}}"},{b:'""'},a]});i.c=[s,o,n,e.ASM,e.QSM,e.CNM,e.CBCM],a.c=[l,o,r,e.ASM,e.QSM,e.CNM,e.inherit(e.CBCM,{i:/\n/})];var c={v:[s,o,n,e.ASM,e.QSM]},u=e.IR+"(<"+e.IR+"(\\s*,\\s*"+e.IR+")*>)?(\\[\\])?";return{aliases:["csharp"],k:t,i:/::/,c:[e.C("///","$",{rB:!0,c:[{cN:"doctag",v:[{b:"///",r:0},{b:""},{b:""}]}]}),e.CLCM,e.CBCM,{cN:"meta",b:"#",e:"$",k:{"meta-keyword":"if else elif endif define undef warning error line region endregion pragma checksum"}},c,e.CNM,{bK:"class interface",e:/[{;=]/,i:/[^\s:]/,c:[e.TM,e.CLCM,e.CBCM]},{bK:"namespace",e:/[{;=]/,i:/[^\s:]/,c:[e.inherit(e.TM,{b:"[a-zA-Z](\\.?\\w)*"}),e.CLCM,e.CBCM]},{bK:"new return throw await",r:0},{cN:"function",b:"("+u+"\\s+)+"+e.IR+"\\s*\\(",rB:!0,e:/[{;=]/,eE:!0,k:t,c:[{b:e.IR+"\\s*\\(",rB:!0,c:[e.TM],r:0},{cN:"params",b:/\(/,e:/\)/,eB:!0,eE:!0,k:t,r:0,c:[c,e.CNM,e.CBCM]},e.CLCM,e.CBCM]}]}}),hljs.registerLanguage("apache",function(e){var t={cN:"number",b:"[\\$%]\\d+"};return{aliases:["apacheconf"],cI:!0,c:[e.HCM,{cN:"section",b:""},{cN:"attribute",b:/\w+/,r:0,k:{nomarkup:"order deny allow setenv rewriterule rewriteengine rewritecond documentroot sethandler errordocument loadmodule options header listen serverroot servername"},starts:{e:/$/,r:0,k:{literal:"on off all"},c:[{cN:"meta",b:"\\s\\[",e:"\\]$"},{cN:"variable",b:"[\\$%]\\{",e:"\\}",c:["self",t]},t,e.QSM]}}],i:/\S/}}),hljs.registerLanguage("scala",function(e){var t={cN:"meta",b:"@[A-Za-z]+"},n={cN:"subst",v:[{b:"\\$[A-Za-z0-9_]+"},{b:"\\${",e:"}"}]},r={cN:"string",v:[{b:'"',e:'"',i:"\\n",c:[e.BE]},{b:'"""',e:'"""',r:10},{b:'[a-z]+"',e:'"',i:"\\n",c:[e.BE,n]},{cN:"string",b:'[a-z]+"""',e:'"""',c:[n],r:10}]},i={cN:"symbol",b:"'\\w[\\w\\d_]*(?!')"},a={cN:"type",b:"\\b[A-Z][A-Za-z0-9_]*",r:0},o={cN:"title",b:/[^0-9\n\t "'(),.`{}\[\]:;][^\n\t "'(),.`{}\[\]:;]+|[^0-9\n\t "'(),.`{}\[\]:;=]/,r:0},s={cN:"class",bK:"class object trait type",e:/[:={\[\n;]/,eE:!0,c:[{bK:"extends with",r:10},{b:/\[/,e:/\]/,eB:!0,eE:!0,r:0,c:[a]},{cN:"params",b:/\(/,e:/\)/,eB:!0,eE:!0,r:0,c:[a]},o]},l={cN:"function",bK:"def",e:/[:={\[(\n;]/,eE:!0,c:[o]};return{k:{literal:"true false null",keyword:"type yield lazy override def with val var sealed abstract private trait object if forSome for while throw finally protected extends import final return else break new catch super class case package default try this match continue throws implicit"},c:[e.CLCM,e.CBCM,r,i,a,l,s,e.CNM,t]}}),hljs.registerLanguage("nginx",function(e){var t={cN:"variable",v:[{b:/\$\d+/},{b:/\$\{/,e:/}/},{b:"[\\$\\@]"+e.UIR}]},n={eW:!0,l:"[a-z/_]+",k:{literal:"on off yes no true false none blocked debug info notice warn error crit select break last permanent redirect kqueue rtsig epoll poll /dev/poll"},r:0,i:"=>",c:[e.HCM,{cN:"string",c:[e.BE,t],v:[{b:/"/,e:/"/},{b:/'/,e:/'/}]},{b:"([a-z]+):/",e:"\\s",eW:!0,eE:!0,c:[t]},{cN:"regexp",c:[e.BE,t],v:[{b:"\\s\\^",e:"\\s|{|;",rE:!0},{b:"~\\*?\\s+",e:"\\s|{|;",rE:!0},{b:"\\*(\\.[a-z\\-]+)+"},{b:"([a-z\\-]+\\.)+\\*"}]},{cN:"number",b:"\\b\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}(:\\d{1,5})?\\b"},{cN:"number",b:"\\b\\d+[kKmMgGdshdwy]*\\b",r:0},t]};return{aliases:["nginxconf"],c:[e.HCM,{b:e.UIR+"\\s+{",rB:!0,e:"{",c:[{cN:"section",b:e.UIR}],r:0},{b:e.UIR+"\\s",e:";|{",rB:!0,c:[{cN:"attribute",b:e.UIR,starts:n}],r:0}],i:"[^\\s\\}]"}}),hljs.registerLanguage("coffeescript",function(e){var t={keyword:"in if for while finally new do return else break catch instanceof throw try this switch continue typeof delete debugger super then unless until loop of by when and or is isnt not",literal:"true false null undefined yes no on off",built_in:"npm require console print module global window document"},n="[A-Za-z$_][0-9A-Za-z$_]*",r={cN:"subst",b:/#\{/,e:/}/,k:t},i=[e.BNM,e.inherit(e.CNM,{starts:{e:"(\\s*/)?",r:0}}),{cN:"string",v:[{b:/'''/,e:/'''/,c:[e.BE]},{b:/'/,e:/'/,c:[e.BE]},{b:/"""/,e:/"""/,c:[e.BE,r]},{b:/"/,e:/"/,c:[e.BE,r]}]},{cN:"regexp",v:[{b:"///",e:"///",c:[r,e.HCM]},{b:"//[gim]*",r:0},{b:/\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)/}]},{b:"@"+n},{b:"`",e:"`",eB:!0,eE:!0,sL:"javascript"}];r.c=i;var a=e.inherit(e.TM,{b:n}),o="(\\(.*\\))?\\s*\\B[-=]>",s={cN:"params",b:"\\([^\\(]",rB:!0,c:[{b:/\(/,e:/\)/,k:t,c:["self"].concat(i)}]};return{aliases:["coffee","cson","iced"],k:t,i:/\/\*/,c:i.concat([e.C("###","###"),e.HCM,{cN:"function",b:"^\\s*"+n+"\\s*=\\s*"+o,e:"[-=]>",rB:!0,c:[a,s]},{b:/[:\(,=]\s*/,r:0,c:[{cN:"function",b:o,e:"[-=]>",rB:!0,c:[s]}]},{cN:"class",bK:"class",e:"$",i:/[:="\[\]]/,c:[{bK:"extends",eW:!0,i:/[:="\[\]]/,c:[a]},a]},{b:n+":",e:":",rB:!0,rE:!0,r:0}])}}),hljs.registerLanguage("diff",function(){return{aliases:["patch"],c:[{cN:"meta",r:10,v:[{b:/^@@ +\-\d+,\d+ +\+\d+,\d+ +@@$/},{b:/^\*\*\* +\d+,\d+ +\*\*\*\*$/},{b:/^\-\-\- +\d+,\d+ +\-\-\-\-$/}]},{cN:"comment",v:[{b:/Index: /,e:/$/},{b:/={3,}/,e:/$/},{b:/^\-{3}/,e:/$/},{b:/^\*{3} /,e:/$/},{b:/^\+{3}/,e:/$/},{b:/\*{5}/,e:/\*{5}$/}]},{cN:"addition",b:"^\\+",e:"$"},{cN:"deletion",b:"^\\-",e:"$"},{cN:"addition",b:"^\\!",e:"$"}]}}),hljs.registerLanguage("sql",function(e){var t=e.C("--","$");return{cI:!0,i:/[<>{}*#]/,c:[{bK:"begin end start commit rollback savepoint lock alter create drop rename call delete do handler insert load replace select truncate update set show pragma grant merge describe use explain help declare prepare execute deallocate release unlock purge reset change stop analyze cache flush optimize repair kill install uninstall checksum restore check backup revoke comment",e:/;/,eW:!0,l:/[\w\.]+/,k:{keyword:"abort abs absolute acc acce accep accept access accessed accessible account acos action activate add addtime admin administer advanced advise aes_decrypt aes_encrypt after agent aggregate ali alia alias allocate allow alter always analyze ancillary and any anydata anydataset anyschema anytype apply archive archived archivelog are as asc ascii asin assembly assertion associate asynchronous at atan atn2 attr attri attrib attribu attribut attribute attributes audit authenticated authentication authid authors auto autoallocate autodblink autoextend automatic availability avg backup badfile basicfile before begin beginning benchmark between bfile bfile_base big bigfile bin binary_double binary_float binlog bit_and bit_count bit_length bit_or bit_xor bitmap blob_base block blocksize body both bound buffer_cache buffer_pool build bulk by byte byteordermark bytes cache caching call calling cancel capacity cascade cascaded case cast catalog category ceil ceiling chain change changed char_base char_length character_length characters characterset charindex charset charsetform charsetid check checksum checksum_agg child choose chr chunk class cleanup clear client clob clob_base clone close cluster_id cluster_probability cluster_set clustering coalesce coercibility col collate collation collect colu colum column column_value columns columns_updated comment commit compact compatibility compiled complete composite_limit compound compress compute concat concat_ws concurrent confirm conn connec connect connect_by_iscycle connect_by_isleaf connect_by_root connect_time connection consider consistent constant constraint constraints constructor container content contents context contributors controlfile conv convert convert_tz corr corr_k corr_s corresponding corruption cos cost count count_big counted covar_pop covar_samp cpu_per_call cpu_per_session crc32 create creation critical cross cube cume_dist curdate current current_date current_time current_timestamp current_user cursor curtime customdatum cycle data database databases datafile datafiles datalength date_add date_cache date_format date_sub dateadd datediff datefromparts datename datepart datetime2fromparts day day_to_second dayname dayofmonth dayofweek dayofyear days db_role_change dbtimezone ddl deallocate declare decode decompose decrement decrypt deduplicate def defa defau defaul default defaults deferred defi defin define degrees delayed delegate delete delete_all delimited demand dense_rank depth dequeue des_decrypt des_encrypt des_key_file desc descr descri describ describe descriptor deterministic diagnostics difference dimension direct_load directory disable disable_all disallow disassociate discardfile disconnect diskgroup distinct distinctrow distribute distributed div do document domain dotnet double downgrade drop dumpfile duplicate duration each edition editionable editions element ellipsis else elsif elt empty enable enable_all enclosed encode encoding encrypt end end-exec endian enforced engine engines enqueue enterprise entityescaping eomonth error errors escaped evalname evaluate event eventdata events except exception exceptions exchange exclude excluding execu execut execute exempt exists exit exp expire explain export export_set extended extent external external_1 external_2 externally extract failed failed_login_attempts failover failure far fast feature_set feature_value fetch field fields file file_name_convert filesystem_like_logging final finish first first_value fixed flash_cache flashback floor flush following follows for forall force form forma format found found_rows freelist freelists freepools fresh from from_base64 from_days ftp full function general generated get get_format get_lock getdate getutcdate global global_name globally go goto grant grants greatest group group_concat group_id grouping grouping_id groups gtid_subtract guarantee guard handler hash hashkeys having hea head headi headin heading heap help hex hierarchy high high_priority hosts hour http id ident_current ident_incr ident_seed identified identity idle_time if ifnull ignore iif ilike ilm immediate import in include including increment index indexes indexing indextype indicator indices inet6_aton inet6_ntoa inet_aton inet_ntoa infile initial initialized initially initrans inmemory inner innodb input insert install instance instantiable instr interface interleaved intersect into invalidate invisible is is_free_lock is_ipv4 is_ipv4_compat is_not is_not_null is_used_lock isdate isnull isolation iterate java join json json_exists keep keep_duplicates key keys kill language large last last_day last_insert_id last_value lax lcase lead leading least leaves left len lenght length less level levels library like like2 like4 likec limit lines link list listagg little ln load load_file lob lobs local localtime localtimestamp locate locator lock locked log log10 log2 logfile logfiles logging logical logical_reads_per_call logoff logon logs long loop low low_priority lower lpad lrtrim ltrim main make_set makedate maketime managed management manual map mapping mask master master_pos_wait match matched materialized max maxextents maximize maxinstances maxlen maxlogfiles maxloghistory maxlogmembers maxsize maxtrans md5 measures median medium member memcompress memory merge microsecond mid migration min minextents minimum mining minus minute minvalue missing mod mode model modification modify module monitoring month months mount move movement multiset mutex name name_const names nan national native natural nav nchar nclob nested never new newline next nextval no no_write_to_binlog noarchivelog noaudit nobadfile nocheck nocompress nocopy nocycle nodelay nodiscardfile noentityescaping noguarantee nokeep nologfile nomapping nomaxvalue nominimize nominvalue nomonitoring none noneditionable nonschema noorder nopr nopro noprom nopromp noprompt norely noresetlogs noreverse normal norowdependencies noschemacheck noswitch not nothing notice notrim novalidate now nowait nth_value nullif nulls num numb numbe nvarchar nvarchar2 object ocicoll ocidate ocidatetime ociduration ociinterval ociloblocator ocinumber ociref ocirefcursor ocirowid ocistring ocitype oct octet_length of off offline offset oid oidindex old on online only opaque open operations operator optimal optimize option optionally or oracle oracle_date oradata ord ordaudio orddicom orddoc order ordimage ordinality ordvideo organization orlany orlvary out outer outfile outline output over overflow overriding package pad parallel parallel_enable parameters parent parse partial partition partitions pascal passing password password_grace_time password_lock_time password_reuse_max password_reuse_time password_verify_function patch path patindex pctincrease pctthreshold pctused pctversion percent percent_rank percentile_cont percentile_disc performance period period_add period_diff permanent physical pi pipe pipelined pivot pluggable plugin policy position post_transaction pow power pragma prebuilt precedes preceding precision prediction prediction_cost prediction_details prediction_probability prediction_set prepare present preserve prior priority private private_sga privileges procedural procedure procedure_analyze processlist profiles project prompt protection public publishingservername purge quarter query quick quiesce quota quotename radians raise rand range rank raw read reads readsize rebuild record records recover recovery recursive recycle redo reduced ref reference referenced references referencing refresh regexp_like register regr_avgx regr_avgy regr_count regr_intercept regr_r2 regr_slope regr_sxx regr_sxy reject rekey relational relative relaylog release release_lock relies_on relocate rely rem remainder rename repair repeat replace replicate replication required reset resetlogs resize resource respect restore restricted result result_cache resumable resume retention return returning returns reuse reverse revoke right rlike role roles rollback rolling rollup round row row_count rowdependencies rowid rownum rows rtrim rules safe salt sample save savepoint sb1 sb2 sb4 scan schema schemacheck scn scope scroll sdo_georaster sdo_topo_geometry search sec_to_time second section securefile security seed segment select self sequence sequential serializable server servererror session session_user sessions_per_user set sets settings sha sha1 sha2 share shared shared_pool short show shrink shutdown si_averagecolor si_colorhistogram si_featurelist si_positionalcolor si_stillimage si_texture siblings sid sign sin size size_t sizes skip slave sleep smalldatetimefromparts smallfile snapshot some soname sort soundex source space sparse spfile split sql sql_big_result sql_buffer_result sql_cache sql_calc_found_rows sql_small_result sql_variant_property sqlcode sqldata sqlerror sqlname sqlstate sqrt square standalone standby start starting startup statement static statistics stats_binomial_test stats_crosstab stats_ks_test stats_mode stats_mw_test stats_one_way_anova stats_t_test_ stats_t_test_indep stats_t_test_one stats_t_test_paired stats_wsr_test status std stddev stddev_pop stddev_samp stdev stop storage store stored str str_to_date straight_join strcmp strict string struct stuff style subdate subpartition subpartitions substitutable substr substring subtime subtring_index subtype success sum suspend switch switchoffset switchover sync synchronous synonym sys sys_xmlagg sysasm sysaux sysdate sysdatetimeoffset sysdba sysoper system system_user sysutcdatetime table tables tablespace tan tdo template temporary terminated tertiary_weights test than then thread through tier ties time time_format time_zone timediff timefromparts timeout timestamp timestampadd timestampdiff timezone_abbr timezone_minute timezone_region to to_base64 to_date to_days to_seconds todatetimeoffset trace tracking transaction transactional translate translation treat trigger trigger_nestlevel triggers trim truncate try_cast try_convert try_parse type ub1 ub2 ub4 ucase unarchived unbounded uncompress under undo unhex unicode uniform uninstall union unique unix_timestamp unknown unlimited unlock unpivot unrecoverable unsafe unsigned until untrusted unusable unused update updated upgrade upped upper upsert url urowid usable usage use use_stored_outlines user user_data user_resources users using utc_date utc_timestamp uuid uuid_short validate validate_password_strength validation valist value values var var_samp varcharc vari varia variab variabl variable variables variance varp varraw varrawc varray verify version versions view virtual visible void wait wallet warning warnings week weekday weekofyear wellformed when whene whenev wheneve whenever where while whitespace with within without work wrapped xdb xml xmlagg xmlattributes xmlcast xmlcolattval xmlelement xmlexists xmlforest xmlindex xmlnamespaces xmlpi xmlquery xmlroot xmlschema xmlserialize xmltable xmltype xor year year_to_month years yearweek",literal:"true false null",built_in:"array bigint binary bit blob boolean char character date dec decimal float int int8 integer interval number numeric real record serial serial8 smallint text varchar varying void"},c:[{cN:"string",b:"'",e:"'",c:[e.BE,{b:"''"}]},{cN:"string",b:'"',e:'"',c:[e.BE,{b:'""'}]},{cN:"string",b:"`",e:"`",c:[e.BE]},e.CNM,e.CBCM,t]},e.CBCM,t]}}),hljs.registerLanguage("http",function(){var e="HTTP/[0-9\\.]+";return{aliases:["https"],i:"\\S",c:[{b:"^"+e,e:"$",c:[{cN:"number",b:"\\b\\d{3}\\b"}]},{b:"^[A-Z]+ (.*?) "+e+"$",rB:!0,e:"$",c:[{cN:"string",b:" ",e:" ",eB:!0,eE:!0},{b:e},{cN:"keyword",b:"[A-Z]+"}]},{cN:"attribute",b:"^\\w",e:": ",eE:!0,i:"\\n|\\s|=",starts:{e:"$",r:0}},{b:"\\n\\n",starts:{sL:[],eW:!0}}]}}),hljs.registerLanguage("dockerfile",function(e){return{aliases:["docker"],cI:!0,k:"from maintainer expose env user onbuild",c:[e.HCM,e.ASM,e.QSM,e.NM,{bK:"run cmd entrypoint volume add copy workdir label healthcheck",starts:{e:/[^\\]\n/,sL:"bash"}}],i:"",rB:!0,e:"=>",c:[{cN:"attr",b:e.IR}]},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0},a]}],r:0}]}}),hljs.registerLanguage("javascript",function(e){var t="[A-Za-z$_][0-9A-Za-z$_]*",n={keyword:"in of if for while finally var new function do return void else break catch instanceof with throw case default try this switch continue typeof delete let yield const export super debugger as async await static import from as",literal:"true false null undefined NaN Infinity",built_in:"eval isFinite isNaN parseFloat parseInt decodeURI decodeURIComponent encodeURI encodeURIComponent escape unescape Object Function Boolean Error EvalError InternalError RangeError ReferenceError StopIteration SyntaxError TypeError URIError Number Math Date String RegExp Array Float32Array Float64Array Int16Array Int32Array Int8Array Uint16Array Uint32Array Uint8Array Uint8ClampedArray ArrayBuffer DataView JSON Intl arguments require module console window document Symbol Set Map WeakSet WeakMap Proxy Reflect Promise"},r={cN:"number",v:[{b:"\\b(0[bB][01]+)"},{b:"\\b(0[oO][0-7]+)"},{b:e.CNR}],r:0},i={cN:"subst",b:"\\$\\{",e:"\\}",k:n,c:[]},a={cN:"string",b:"`",e:"`",c:[e.BE,i]};i.c=[e.ASM,e.QSM,a,r,e.RM];var o=i.c.concat([e.CBCM,e.CLCM]);return{aliases:["js","jsx"],k:n,c:[{cN:"meta",r:10,b:/^\s*['"]use (strict|asm)['"]/},{cN:"meta",b:/^#!/,e:/$/},e.ASM,e.QSM,a,e.CLCM,e.CBCM,r,{b:/[{,]\s*/,r:0,c:[{b:t+"\\s*:",rB:!0,r:0,c:[{cN:"attr",b:t,r:0}]}]},{b:"("+e.RSR+"|\\b(case|return|throw)\\b)\\s*",k:"return throw case",c:[e.CLCM,e.CBCM,e.RM,{cN:"function",b:"(\\(.*?\\)|"+t+")\\s*=>",rB:!0,e:"\\s*=>",c:[{cN:"params",v:[{b:t},{b:/\(\s*\)/},{b:/\(/,e:/\)/,eB:!0,eE:!0,k:n,c:o}]}]},{b://,sL:"xml",c:[{b:/<\w+\s*\/>/,skip:!0},{b:/<\w+/,e:/(\/\w+|\w+\/)>/,skip:!0,c:[{b:/<\w+\s*\/>/,skip:!0},"self"]}]}],r:0},{cN:"function",bK:"function",e:/\{/,eE:!0,c:[e.inherit(e.TM,{b:t}),{cN:"params",b:/\(/,e:/\)/,eB:!0,eE:!0,c:o}],i:/\[|%/},{b:/\$[(.]/},e.METHOD_GUARD,{cN:"class",bK:"class",e:/[{;=]/,eE:!0,i:/[:"\[\]]/,c:[{bK:"extends"},e.UTM]},{bK:"constructor",e:/\{/,eE:!0}],i:/#(?!!)/}}),hljs.registerLanguage("scss",function(e){var t="[a-zA-Z-][a-zA-Z0-9_-]*",n={cN:"variable",b:"(\\$"+t+")\\b"},r={cN:"number",b:"#[0-9A-Fa-f]+"};return{cN:"attribute",b:"[A-Z\\_\\.\\-]+",e:":",eE:!0,i:"[^\\s]",starts:{eW:!0,eE:!0,c:[r,e.CSSNM,e.QSM,e.ASM,e.CBCM,{cN:"meta",b:"!important"}]}},{cI:!0,i:"[=/|']",c:[e.CLCM,e.CBCM,{cN:"selector-id",b:"\\#[A-Za-z0-9_-]+",r:0},{cN:"selector-class",b:"\\.[A-Za-z0-9_-]+",r:0},{cN:"selector-attr",b:"\\[",e:"\\]",i:"$"},{cN:"selector-tag",b:"\\b(a|abbr|acronym|address|area|article|aside|audio|b|base|big|blockquote|body|br|button|canvas|caption|cite|code|col|colgroup|command|datalist|dd|del|details|dfn|div|dl|dt|em|embed|fieldset|figcaption|figure|footer|form|frame|frameset|(h[1-6])|head|header|hgroup|hr|html|i|iframe|img|input|ins|kbd|keygen|label|legend|li|link|map|mark|meta|meter|nav|noframes|noscript|object|ol|optgroup|option|output|p|param|pre|progress|q|rp|rt|ruby|samp|script|section|select|small|span|strike|strong|style|sub|sup|table|tbody|td|textarea|tfoot|th|thead|time|title|tr|tt|ul|var|video)\\b",r:0},{b:":(visited|valid|root|right|required|read-write|read-only|out-range|optional|only-of-type|only-child|nth-of-type|nth-last-of-type|nth-last-child|nth-child|not|link|left|last-of-type|last-child|lang|invalid|indeterminate|in-range|hover|focus|first-of-type|first-line|first-letter|first-child|first|enabled|empty|disabled|default|checked|before|after|active)"},{b:"::(after|before|choices|first-letter|first-line|repeat-index|repeat-item|selection|value)"},n,{cN:"attribute",b:"\\b(z-index|word-wrap|word-spacing|word-break|width|widows|white-space|visibility|vertical-align|unicode-bidi|transition-timing-function|transition-property|transition-duration|transition-delay|transition|transform-style|transform-origin|transform|top|text-underline-position|text-transform|text-shadow|text-rendering|text-overflow|text-indent|text-decoration-style|text-decoration-line|text-decoration-color|text-decoration|text-align-last|text-align|tab-size|table-layout|right|resize|quotes|position|pointer-events|perspective-origin|perspective|page-break-inside|page-break-before|page-break-after|padding-top|padding-right|padding-left|padding-bottom|padding|overflow-y|overflow-x|overflow-wrap|overflow|outline-width|outline-style|outline-offset|outline-color|outline|orphans|order|opacity|object-position|object-fit|normal|none|nav-up|nav-right|nav-left|nav-index|nav-down|min-width|min-height|max-width|max-height|mask|marks|margin-top|margin-right|margin-left|margin-bottom|margin|list-style-type|list-style-position|list-style-image|list-style|line-height|letter-spacing|left|justify-content|initial|inherit|ime-mode|image-orientation|image-resolution|image-rendering|icon|hyphens|height|font-weight|font-variant-ligatures|font-variant|font-style|font-stretch|font-size-adjust|font-size|font-language-override|font-kerning|font-feature-settings|font-family|font|float|flex-wrap|flex-shrink|flex-grow|flex-flow|flex-direction|flex-basis|flex|filter|empty-cells|display|direction|cursor|counter-reset|counter-increment|content|column-width|column-span|column-rule-width|column-rule-style|column-rule-color|column-rule|column-gap|column-fill|column-count|columns|color|clip-path|clip|clear|caption-side|break-inside|break-before|break-after|box-sizing|box-shadow|box-decoration-break|bottom|border-width|border-top-width|border-top-style|border-top-right-radius|border-top-left-radius|border-top-color|border-top|border-style|border-spacing|border-right-width|border-right-style|border-right-color|border-right|border-radius|border-left-width|border-left-style|border-left-color|border-left|border-image-width|border-image-source|border-image-slice|border-image-repeat|border-image-outset|border-image|border-color|border-collapse|border-bottom-width|border-bottom-style|border-bottom-right-radius|border-bottom-left-radius|border-bottom-color|border-bottom|border|background-size|background-repeat|background-position|background-origin|background-image|background-color|background-clip|background-attachment|background-blend-mode|background|backface-visibility|auto|animation-timing-function|animation-play-state|animation-name|animation-iteration-count|animation-fill-mode|animation-duration|animation-direction|animation-delay|animation|align-self|align-items|align-content)\\b", -i:"[^\\s]"},{b:"\\b(whitespace|wait|w-resize|visible|vertical-text|vertical-ideographic|uppercase|upper-roman|upper-alpha|underline|transparent|top|thin|thick|text|text-top|text-bottom|tb-rl|table-header-group|table-footer-group|sw-resize|super|strict|static|square|solid|small-caps|separate|se-resize|scroll|s-resize|rtl|row-resize|ridge|right|repeat|repeat-y|repeat-x|relative|progress|pointer|overline|outside|outset|oblique|nowrap|not-allowed|normal|none|nw-resize|no-repeat|no-drop|newspaper|ne-resize|n-resize|move|middle|medium|ltr|lr-tb|lowercase|lower-roman|lower-alpha|loose|list-item|line|line-through|line-edge|lighter|left|keep-all|justify|italic|inter-word|inter-ideograph|inside|inset|inline|inline-block|inherit|inactive|ideograph-space|ideograph-parenthesis|ideograph-numeric|ideograph-alpha|horizontal|hidden|help|hand|groove|fixed|ellipsis|e-resize|double|dotted|distribute|distribute-space|distribute-letter|distribute-all-lines|disc|disabled|default|decimal|dashed|crosshair|collapse|col-resize|circle|char|center|capitalize|break-word|break-all|bottom|both|bolder|bold|block|bidi-override|below|baseline|auto|always|all-scroll|absolute|table|table-cell)\\b"},{b:":",e:";",c:[n,r,e.CSSNM,e.QSM,e.ASM,{cN:"meta",b:"!important"}]},{b:"@",e:"[{;]",k:"mixin include extend for if else each while charset import debug media page content font-face namespace warn",c:[n,e.QSM,e.ASM,r,e.CSSNM,{b:"\\s[A-Za-z0-9_.-]+",r:0}]}]}}),hljs.registerLanguage("objectivec",function(e){var t={cN:"built_in",b:"\\b(AV|CA|CF|CG|CI|CL|CM|CN|CT|MK|MP|MTK|MTL|NS|SCN|SK|UI|WK|XC)\\w+"},n={keyword:"int float while char export sizeof typedef const struct for union unsigned long volatile static bool mutable if do return goto void enum else break extern asm case short default double register explicit signed typename this switch continue wchar_t inline readonly assign readwrite self @synchronized id typeof nonatomic super unichar IBOutlet IBAction strong weak copy in out inout bycopy byref oneway __strong __weak __block __autoreleasing @private @protected @public @try @property @end @throw @catch @finally @autoreleasepool @synthesize @dynamic @selector @optional @required @encode @package @import @defs @compatibility_alias __bridge __bridge_transfer __bridge_retained __bridge_retain __covariant __contravariant __kindof _Nonnull _Nullable _Null_unspecified __FUNCTION__ __PRETTY_FUNCTION__ __attribute__ getter setter retain unsafe_unretained nonnull nullable null_unspecified null_resettable class instancetype NS_DESIGNATED_INITIALIZER NS_UNAVAILABLE NS_REQUIRES_SUPER NS_RETURNS_INNER_POINTER NS_INLINE NS_AVAILABLE NS_DEPRECATED NS_ENUM NS_OPTIONS NS_SWIFT_UNAVAILABLE NS_ASSUME_NONNULL_BEGIN NS_ASSUME_NONNULL_END NS_REFINED_FOR_SWIFT NS_SWIFT_NAME NS_SWIFT_NOTHROW NS_DURING NS_HANDLER NS_ENDHANDLER NS_VALUERETURN NS_VOIDRETURN",literal:"false true FALSE TRUE nil YES NO NULL",built_in:"BOOL dispatch_once_t dispatch_queue_t dispatch_sync dispatch_async dispatch_once"},r=/[a-zA-Z@][a-zA-Z0-9_]*/,i="@interface @class @protocol @implementation";return{aliases:["mm","objc","obj-c"],k:n,l:r,i:""}]}]},{cN:"class",b:"("+i.split(" ").join("|")+")\\b",e:"({|$)",eE:!0,k:i,l:r,c:[e.UTM]},{b:"\\."+e.UIR,r:0}]}}),function(){"use strict";window.padNumber=function(e,t,n){var r,i,a;return null==n&&(n="0"),r=Math.abs(e),a=Math.max(0,t-r.toString().length),i=Math.pow(10,a).toString().substr(1),"0"!==n&&i.replace(/^0+/,function(e){return e.replace(/0/g,n)}),e<0&&(i="-"+i),i+r},window.rem=function(e){return null==e&&(e=1),e*parseInt($("html").css("font-size"))},Number.prototype.rem=function(){return this*parseInt($("html").css("font-size"))},String.prototype.toInt=function(){return parseInt(this)},$.fn.extend({maxScrollLeft:function(){return this[0].scrollWidth-this[0].clientWidth}}),$.fn.extend({maxScrollTop:function(){return this[0].scrollHeight-this.outerHeight()}}),window.delay=function(e,t){var n=Array.prototype.slice.call(arguments,2);return setTimeout(function(){return e.apply(null,n)},t)},window.throttle=function(e,t,n){var r,i,a,o=null,s=0;n||(n={});var l=function(){s=n.leading===!1?0:Date.now(),o=null,a=e.apply(r,i),o||(r=i=null)};return function(){var c=Date.now();s||n.leading!==!1||(s=c);var u=t-(c-s);return r=this,i=arguments,u<=0||u>t?(o&&(clearTimeout(o),o=null),s=c,a=e.apply(r,i),o||(r=i=null)):o||n.trailing===!1||(o=setTimeout(l,u)),a}},window.debounce=function(e,t,n){var r,i,a,o,s,l=function(){var c=Date.now()-o;c=0?r=setTimeout(l,t-c):(r=null,n||(s=e.apply(a,i),r||(a=i=null)))};return function(){a=this,i=arguments,o=Date.now();var c=n&&!r;return r||(r=setTimeout(l,t)),c&&(s=e.apply(a,i),a=i=null),s}}}.call(this),function(){"use strict";var e,t,n;e=window.SemVer={},n={major:0,minor:0,patch:0},t={major:Infinity,minor:Infinity,patch:Infinity},e.parse=function(e){var t;return t=/^[<>]?[=]?(\d+)\.?(\d+)?\.?(\d+)?/.exec(e),{major:parseInt(t[1])||0,minor:parseInt(t[2])||0,patch:parseInt(t[3])||0}},e.parseRange=function(r){var i,a;return r.match(/\+$/)||r.match(/^\>\=/)?[e.parse(r),t]:r.match(/^\>/)?(i=e.parse(r),i.patch++,[i,t]):r.match(/^\<\=/)?[n,e.parse(r)]:r.match(/\-$/)||r.match(/^\0||e.compare(t,r)<0)}}.call(this),function(){$("main").each(function(){var e,t;return t=$(this),e=t.find("h1").add(t.find("h2")).add(t.find("h3")).add(t.find("h4")).add(t.find("h5")).add(t.find("h6")),e.each(function(){var e,t,n,r;return r=$(this),n=r.attr("id"),t=r.html(),e="",r.wrapInner(e)})})}.call(this),function(){var e,t,n,r,i;t=$(".table-of-contents"),t.length&&(e=$("main > h2"),e.length<3||(r=$('

        Contents

        '),i=$('
        '),n=$('
          ').appendTo(i),e.length>=6&&i.addClass("table-of-contents__wrapper--multi"),e.each(function(){var e;return e=$(this),n.append($("
        1. ",{"class":"table-of-contents__item",html:""+e.text()+""}))}),r.appendTo(t),i.appendTo(t)))}.call(this),function(){$(function(){var e,t,n,r;r=$(".content-nav"),t=$(".content-well"),e=$(".js_toggle-content-nav"),n=$(".content-menu__item"),e.on("click",function(){var e,n;return n=$(window).scrollTop(),e=t.offset().top,r.toggleClass("content-nav--fullscreen"),t.toggleClass("content-well--immobile"),t.hasClass("content-well--immobile")?t.css("top",-1*n):($(window).scrollTop(-1*e),t.css("top",""))}),n.on("click",".content-menu__menu-toggle",function(){var e;return e=$(this.parentNode.nextElementSibling),e.hasClass("content-menu--open")?e.slideUp("fast"):e.slideDown("fast"),e.toggleClass("content-menu--open"),$(this).toggleClass("content-menu__menu-toggle--open")})})}.call(this),function(){"use strict";var e,t;e=function(e){return $("meta[name="+e+"]").attr("content")},t=function(){var t,n,r,i,a,o,s,l,c,u;if(o=e("project"),t=e("version"),s=e("project_relative_path"),s||(s=""),r=e("version_history_in"),i=e("version_history_locations"),c=void 0,u=[],r&&(c=SemVer.parseRange(r)),i&&(n=JSON.parse(i),u=function(){var e,t,r,i;for(i=[],e=0,t=n.length;e',p='',D+='
          '+p+"
          ",D+='
          '+p+"
          ",D+='
          ',D+='
            ',N=Math.min(6,R+1),C&&B[0].match("^"+C)&&(h=["selector-list__element","selector-list__element--"+N,"selector-list__element--lts-flag"],D+='
          • LTS
          • '),M=B.reverse(),b=v=0,x=M.length;v',m&&z!==t){for(j=s,y=0,k=u.length;y'}D+='
          • '+d+z+"
          • "}D+="
        "}return f&&(h=["selector-list__element","selector-list__element--archived"],D+='
        ',D+='
        ',D+='
        "),$(".selector-pane--versions").html(D),n=$(".selector-pane--versions"),r=n.parent(),i=r.parent(),q=Math.max.apply(Math,n.find(".selector-list").map(function(){return $(this).outerHeight()})),r.css("height",q+2..rem()),i.css("height",q+2..rem()),$(".selector-pane--versions").find(".js_edge-fader--target").on("scroll.selector-fader-target",throttle(function(){return EdgeFader.verifyArrowState($(this))},250)),$(".edge-fader__arrow").on("click.selector-fader-arrow",EdgeFader.onClickCallback)})},$(function(){var e,n,r,i,a,o,s;return t(),e=$(".content-nav"),n=$(".content-nav__primary"),o=$(".selector--version"),r=$(".selector-pane--versions"),i=r.parent(),a=i.parent(),s=debounce(function(){if(0===r.closest(".selector-pane__sizing-box--hidden").length)return r.find(".selector-list__scroll-box").each(function(){return EdgeFader.showOrHideArrows($(this))})},500),o.on("click.toggle_selector",".selector__btn",function(){var e;return e=$(this).parent(),e.toggleClass("selector--open"),a.toggleClass("selector-pane__sizing-box--hidden"),e.hasClass("selector--open")?(i.css("max-height",n.outerHeight()+.75.rem()),a.css("max-height",n.outerHeight()+.75.rem()),EdgeFader.showOrHideArrows(r),s()):(a.css("max-height",""),EdgeFader.hideArrows(r),r.find(".selector-list__scroll-box").each(function(){return EdgeFader.hideArrows($(this))}))}),$(window).on("resize.toggled_selector_reize",debounce(function(){var e;return e=Math.max.apply(Math,r.find(".selector-list").map(function(){return $(this).outerHeight()})),i.css("height",e+2..rem()),a.css("height",e+2..rem()),o.hasClass("selector--open")&&(i.css("max-height",n.outerHeight()+.75.rem()),a.css("max-height",n.outerHeight()+.75.rem()),EdgeFader.showOrHideArrows(r),s()),!0},250)),$(document).on("click.selector_close",function(e){return $(e.target).closest(".selector-pane__sizing-box").length>0||(!!$(e.target).hasClass("selector__btn")||($(".selector--open").removeClass("selector--open"),a.addClass("selector-pane__sizing-box--hidden"),a.css("max-height",""),EdgeFader.hideArrows(r),r.find(".selector-list__scroll-box").each(function(){return EdgeFader.hideArrows($(this))})))})})}.call(this),function(){var e;e=window.EdgeFader={},e.verifyArrowState=function(e){var t,n,r,i,a,o,s,l,c;l=e.maxScrollLeft(),o=e.scrollLeft(),c=e.maxScrollTop(),s=e.scrollTop(),0===l&&0===c||(a=e.parent(),n=a.children(".edge-fader--left").children(".edge-fader__arrow"),r=a.children(".edge-fader--right").children(".edge-fader__arrow"),i=a.children(".edge-fader--top").children(".edge-fader__arrow"),t=a.children(".edge-fader--bottom").children(".edge-fader__arrow"),l>3&&(o>3?n.removeClass("edge-fader__arrow--inactive"):n.addClass("edge-fader__arrow--inactive"),o3&&(s>3?i.removeClass("edge-fader__arrow--inactive"):i.addClass("edge-fader__arrow--inactive"),s0?(r.removeClass("edge-fader__arrow--invisible"),i.removeClass("edge-fader__arrow--invisible")):(r.addClass("edge-fader__arrow--invisible"),i.addClass("edge-fader__arrow--invisible")),l>0?(a.removeClass("edge-fader__arrow--invisible"),n.removeClass("edge-fader__arrow--invisible")):(a.addClass("edge-fader__arrow--invisible"),n.addClass("edge-fader__arrow--invisible")),(s>0||l>0)&&e.verifyArrowState(t)},e.showArrows=function(e){var t,n,r,i,a;return a=e.parent(),n=a.children(".edge-fader--left").children(".edge-fader__arrow"),r=a.children(".edge-fader--right").children(".edge-fader__arrow"),i=a.children(".edge-fader--top").children(".edge-fader__arrow"),t=a.children(".edge-fader--bottom").children(".edge-fader__arrow"),n.removeClass("edge-fader__arrow--invisible"),r.removeClass("edge-fader__arrow--invisible"),i.removeClass("edge-fader__arrow--invisible"),t.removeClass("edge-fader__arrow--invisible")},e.hideArrows=function(e){var t,n,r,i,a;return a=e.parent(),n=a.children(".edge-fader--left").children(".edge-fader__arrow"),r=a.children(".edge-fader--right").children(".edge-fader__arrow"),i=a.children(".edge-fader--top").children(".edge-fader__arrow"),t=a.children(".edge-fader--bottom").children(".edge-fader__arrow"),n.addClass("edge-fader__arrow--invisible"),r.addClass("edge-fader__arrow--invisible"),i.addClass("edge-fader__arrow--invisible"),t.addClass("edge-fader__arrow--invisible")},e.onClickCallback=function(){var e,t,n,r,i;e=$(this),t=e.parent(),r=t.parent(),n=r.children(".js_edge-fader--target"),t.hasClass("edge-fader--left")?(i=-1*r.width()*.75+n.scrollLeft(),n.animate({scrollLeft:i},200)):t.hasClass("edge-fader--right")?(i=1*r.width()*.75+n.scrollLeft(),n.animate({scrollLeft:i},200)):t.hasClass("edge-fader--top")?(i=-1*r.height()*.75+n.scrollTop(),n.animate({scrollTop:i},200)):t.hasClass("edge-fader--bottom")&&(i=1*r.height()*.75+n.scrollTop(),n.animate({scrollTop:i},200))},$(function(){$(".js_edge-fader--target").on("scroll.edge-fader-target",throttle(function(){return e.verifyArrowState($(this))},250)),$(window).on("resize.edge-fader-target",throttle(function(){return $(".edge-fader").parent().children(".js_edge-fader--target").each(function(){return e.showOrHideArrows($(this))})},250)),$(".edge-fader__arrow").on("click.edge-fader-arrow",e.onClickCallback)})}.call(this),function(){"use strict";var e,t;t={"language-advancedconfig":{display_name:"advanced.config",highlight_as:"language-erlang"},"language-appconfig":{display_name:"app.config",highlight_as:"language-erlang"},"language-riakconf":{display_name:"riak.conf",highlight_as:"language-matlab"},"language-riakcsconf":{display_name:"riak-cs.conf",highlight_as:"language-matlab"},"language-stanchionconf":{display_name:"stanchion.conf",highlight_as:"language-matlab"},"language-vmargs":{display_name:"vm.args",highlight_as:"language-ini"},"language-bash":{display_name:"Shell",highlight_as:""},"language-curl":{display_name:"CURL",highlight_as:"language-bash"},"language-csharp":{display_name:"C#",highlight_as:""},"language-erlang":{display_name:"Erlang",highlight_as:""},"language-golang":{display_name:"Go",highlight_as:""},"language-java":{display_name:"Java",highlight_as:""},"language-javascript":{display_name:"JS",highlight_as:""},"language-coffeescript":{display_name:"Coffee",highlight_as:""},"language-json":{display_name:"JSON",highlight_as:""},"language-php":{display_name:"PHP",highlight_as:""},"language-protobuf":{display_name:"Protobuf",highlight_as:""},"language-python":{display_name:"Python",highlight_as:""},"language-ruby":{display_name:"Ruby",highlight_as:""},"language-scala":{display_name:"Scala",highlight_as:""},"language-sql":{display_name:"SQL",highlight_as:""},"language-xml":{display_name:"XML",highlight_as:""}},e=function(e){var t,n;return null!=(t=e.attr("class"))&&null!=(n=t.match(/(?:^|\s)(language-.+?)(?:\s|$)/))?n[1]:void 0},$("pre > code").each(function(){var n,r,i,a,o,s;if(n=$(this),r=n.parent(),o=e(n),o&&(o.indexOf(".")!==-1&&(n.removeClass(o),o=o.replace(/\./g,""),n.addClass(o)),o in t&&(a=null!=(s=t[o])?s.highlight_as:void 0,a&&(n.removeClass(o),n.addClass(a)),hljs.highlightBlock(n[0]),a&&(n.removeClass(a),n.addClass(o)))),!r.parent().hasClass("code-block__code-set"))return i=r.nextUntil(":not(pre)"),o&&0===i.length&&r.wrap('
        '),i.length?r.add(i).wrapAll('
        '):void 0}),$(".code-block--titled").each(function(){var n,r,i,a,o,s;return r=$(this),i=r.children("pre"),n=i.children("code"),o=e(n),i.addClass("code-block__code"),a=null!=(s=t[o])?s.display_name:void 0,a||(a=o.replace(/language-/,"")),$(''+a+"").prependTo(r)}),$(".code-block--tabbed").each(function(){var n,r,i,a,o;return a=$(this),n=a.children(".code-block__code-set"),o='',i=$('
        '+o+'
        '+o+"
        "),r=$('
          '),r.appendTo(i),n.children("pre").each(function(n){var i,a,o,s,l,c,u;return a=$(this),i=a.children(),c=e(i),l=null!=(u=t[c])?u.display_name:void 0,l||(l=null!=c?c.replace(/language-/,""):void 0),l||(l="////"+padNumber(n,2)),s=c,s||(s="unnamed-lang"+padNumber(n,2)),o="code-block__"+s+padNumber(n,3),a.addClass("code-block__code"),a.attr("id",o),r.append('
        • '+l+"
        • ")}),r.find(".code-block__tab").first().addClass("code-block__tab--active"),a.find(".code-block__code").first().addClass("code-block__code--active"),a.prepend(i),EdgeFader.showOrHideArrows(r)}),$(function(){var e;e=$(".code-block--tabbed"),$(".code-block__tab").on("click.code-block-tab","a",function(t){var n,r,i,a,o,s,l,c;if(t.preventDefault(),i=$(this),s=i.data("language"),!i.parent().hasClass("code-block__tab--active")){for(c=this.getBoundingClientRect().top,o=0,l=e.length;o"))}),$("td code").each(function(){$(this).html($(this).html().replace(/([^^])\//g,"$1/"))}); \ No newline at end of file +i:"[^\\s]"},{b:"\\b(whitespace|wait|w-resize|visible|vertical-text|vertical-ideographic|uppercase|upper-roman|upper-alpha|underline|transparent|top|thin|thick|text|text-top|text-bottom|tb-rl|table-header-group|table-footer-group|sw-resize|super|strict|static|square|solid|small-caps|separate|se-resize|scroll|s-resize|rtl|row-resize|ridge|right|repeat|repeat-y|repeat-x|relative|progress|pointer|overline|outside|outset|oblique|nowrap|not-allowed|normal|none|nw-resize|no-repeat|no-drop|newspaper|ne-resize|n-resize|move|middle|medium|ltr|lr-tb|lowercase|lower-roman|lower-alpha|loose|list-item|line|line-through|line-edge|lighter|left|keep-all|justify|italic|inter-word|inter-ideograph|inside|inset|inline|inline-block|inherit|inactive|ideograph-space|ideograph-parenthesis|ideograph-numeric|ideograph-alpha|horizontal|hidden|help|hand|groove|fixed|ellipsis|e-resize|double|dotted|distribute|distribute-space|distribute-letter|distribute-all-lines|disc|disabled|default|decimal|dashed|crosshair|collapse|col-resize|circle|char|center|capitalize|break-word|break-all|bottom|both|bolder|bold|block|bidi-override|below|baseline|auto|always|all-scroll|absolute|table|table-cell)\\b"},{b:":",e:";",c:[n,r,e.CSSNM,e.QSM,e.ASM,{cN:"meta",b:"!important"}]},{b:"@",e:"[{;]",k:"mixin include extend for if else each while charset import debug media page content font-face namespace warn",c:[n,e.QSM,e.ASM,r,e.CSSNM,{b:"\\s[A-Za-z0-9_.-]+",r:0}]}]}}),hljs.registerLanguage("objectivec",function(e){var t={cN:"built_in",b:"\\b(AV|CA|CF|CG|CI|CL|CM|CN|CT|MK|MP|MTK|MTL|NS|SCN|SK|UI|WK|XC)\\w+"},n={keyword:"int float while char export sizeof typedef const struct for union unsigned long volatile static bool mutable if do return goto void enum else break extern asm case short default double register explicit signed typename this switch continue wchar_t inline readonly assign readwrite self @synchronized id typeof nonatomic super unichar IBOutlet IBAction strong weak copy in out inout bycopy byref oneway __strong __weak __block __autoreleasing @private @protected @public @try @property @end @throw @catch @finally @autoreleasepool @synthesize @dynamic @selector @optional @required @encode @package @import @defs @compatibility_alias __bridge __bridge_transfer __bridge_retained __bridge_retain __covariant __contravariant __kindof _Nonnull _Nullable _Null_unspecified __FUNCTION__ __PRETTY_FUNCTION__ __attribute__ getter setter retain unsafe_unretained nonnull nullable null_unspecified null_resettable class instancetype NS_DESIGNATED_INITIALIZER NS_UNAVAILABLE NS_REQUIRES_SUPER NS_RETURNS_INNER_POINTER NS_INLINE NS_AVAILABLE NS_DEPRECATED NS_ENUM NS_OPTIONS NS_SWIFT_UNAVAILABLE NS_ASSUME_NONNULL_BEGIN NS_ASSUME_NONNULL_END NS_REFINED_FOR_SWIFT NS_SWIFT_NAME NS_SWIFT_NOTHROW NS_DURING NS_HANDLER NS_ENDHANDLER NS_VALUERETURN NS_VOIDRETURN",literal:"false true FALSE TRUE nil YES NO NULL",built_in:"BOOL dispatch_once_t dispatch_queue_t dispatch_sync dispatch_async dispatch_once"},r=/[a-zA-Z@][a-zA-Z0-9_]*/,i="@interface @class @protocol @implementation";return{aliases:["mm","objc","obj-c"],k:n,l:r,i:""}]}]},{cN:"class",b:"("+i.split(" ").join("|")+")\\b",e:"({|$)",eE:!0,k:i,l:r,c:[e.UTM]},{b:"\\."+e.UIR,r:0}]}}),function(){"use strict";window.padNumber=function(e,t,n){var r,i,a;return null==n&&(n="0"),r=Math.abs(e),a=Math.max(0,t-r.toString().length),i=Math.pow(10,a).toString().substr(1),"0"!==n&&i.replace(/^0+/,function(e){return e.replace(/0/g,n)}),e<0&&(i="-"+i),i+r},window.rem=function(e){return null==e&&(e=1),e*parseInt($("html").css("font-size"))},Number.prototype.rem=function(){return this*parseInt($("html").css("font-size"))},String.prototype.toInt=function(){return parseInt(this)},$.fn.extend({maxScrollLeft:function(){return this[0].scrollWidth-this[0].clientWidth}}),$.fn.extend({maxScrollTop:function(){return this[0].scrollHeight-this.outerHeight()}}),window.delay=function(e,t){var n=Array.prototype.slice.call(arguments,2);return setTimeout(function(){return e.apply(null,n)},t)},window.throttle=function(e,t,n){var r,i,a,o=null,s=0;n||(n={});var l=function(){s=n.leading===!1?0:Date.now(),o=null,a=e.apply(r,i),o||(r=i=null)};return function(){var c=Date.now();s||n.leading!==!1||(s=c);var u=t-(c-s);return r=this,i=arguments,u<=0||u>t?(o&&(clearTimeout(o),o=null),s=c,a=e.apply(r,i),o||(r=i=null)):o||n.trailing===!1||(o=setTimeout(l,u)),a}},window.debounce=function(e,t,n){var r,i,a,o,s,l=function(){var c=Date.now()-o;c=0?r=setTimeout(l,t-c):(r=null,n||(s=e.apply(a,i),r||(a=i=null)))};return function(){a=this,i=arguments,o=Date.now();var c=n&&!r;return r||(r=setTimeout(l,t)),c&&(s=e.apply(a,i),a=i=null),s}}}.call(this),function(){"use strict";var e,t,n;e=window.SemVer={},n={major:0,minor:0,patch:0},t={major:Infinity,minor:Infinity,patch:Infinity},e.parse=function(e){var t;return t=/^[<>]?[=]?(\d+)\.?(\d+)?\.?(\d+)?/.exec(e),{major:parseInt(t[1])||0,minor:parseInt(t[2])||0,patch:parseInt(t[3])||0}},e.parseRange=function(r){var i,a;return r.match(/\+$/)||r.match(/^\>\=/)?[e.parse(r),t]:r.match(/^\>/)?(i=e.parse(r),i.patch++,[i,t]):r.match(/^\<\=/)?[n,e.parse(r)]:r.match(/\-$/)||r.match(/^\0||e.compare(t,r)<0)}}.call(this),function(){$("main").each(function(){var e,t;return t=$(this),e=t.find("h1").add(t.find("h2")).add(t.find("h3")).add(t.find("h4")).add(t.find("h5")).add(t.find("h6")),e.each(function(){var e,t,n,r;return r=$(this),n=r.attr("id"),t=r.html(),e="",r.wrapInner(e)})})}.call(this),function(){var e,t,n,r,i;t=$(".table-of-contents"),t.length&&(e=$("main > h2"),e.length<2||(r=$('

          Contents

          '),i=$('
          '),n=$('
            ').appendTo(i),e.length>=6&&i.addClass("table-of-contents__wrapper--multi"),e.each(function(){var e;return e=$(this),n.append($("
          1. ",{"class":"table-of-contents__item",html:""+e.text()+""}))}),r.appendTo(t),i.appendTo(t)))}.call(this),function(){$(function(){var e,t,n,r;r=$(".content-nav"),t=$(".content-well"),e=$(".js_toggle-content-nav"),n=$(".content-menu__item"),e.on("click",function(){var e,n;return n=$(window).scrollTop(),e=t.offset().top,r.toggleClass("content-nav--fullscreen"),t.toggleClass("content-well--immobile"),t.hasClass("content-well--immobile")?t.css("top",-1*n):($(window).scrollTop(-1*e),t.css("top",""))}),n.on("click",".content-menu__menu-toggle",function(){var e;return e=$(this.parentNode.nextElementSibling),e.hasClass("content-menu--open")?e.slideUp("fast"):e.slideDown("fast"),e.toggleClass("content-menu--open"),$(this).toggleClass("content-menu__menu-toggle--open")})})}.call(this),function(){"use strict";var e,t;e=function(e){return $("meta[name="+e+"]").attr("content")},t=function(){var t,n,r,i,a,o,s,l,c,u,d,f;if(s=e("project"),t=e("version"),c=e("project_relative_path"),c||(c=""),n=e("docs_root_url"),n||(n=""),l=n+"data/project_descriptions.json",i=e("version_history_in"),a=e("version_history_locations"),d=void 0,f=[],i&&(d=SemVer.parseRange(i)),a&&(r=JSON.parse(a),f=function(){var e,t,n,i;for(i=[],e=0,t=r.length;e',m='',Z+='
            '+m+"
            ",Z+='
            '+m+"
            ",Z+='
            ',Z+='
              ',j=Math.min(6,W+1),x=!1,B)for(k=0,S=B.length;kLTS'),I=O.reverse(),w=N=0,M=I.length;N',_&&P!==t){for(H=c,C=0,A=f.length;C'}Z+='
            • '+l+P+"
            • "}Z+="
          "}return g&&(b=["selector-list__element","selector-list__element--archived"],Z+='
          ',Z+='
          ',Z+='
          "),v&&(b=["selector-list__element","selector-list__element--other"],Z+='
          ',Z+='
          ',Z+='
            \n',Z+='
          • ',Z+="
          "),$(".selector-pane--versions").html(Z),r=$(".selector-pane--versions"),i=r.parent(),a=i.parent(),U=Math.max.apply(Math,r.find(".selector-list").map(function(){return $(this).outerHeight()})),i.css("height",U+2..rem()),a.css("height",U+2..rem()),$(".selector-pane--versions").find(".js_edge-fader--target").on("scroll.selector-fader-target",throttle(function(){return EdgeFader.verifyArrowState($(this))},250)),$(".edge-fader__arrow").on("click.selector-fader-arrow",EdgeFader.onClickCallback),$(".selector-other-releases").on("click",function(){return $(".release-is-archived-and-hidden").toggleClass("release-is-archived-and-hidden"),$(".selector-other-releases").toggleClass("release-is-archived-and-hidden"),window.dispatchEvent(new Event("resize"))})})},$(function(){var e,n,r,i,a,o,s;return t(),e=$(".content-nav"),n=$(".content-nav__primary"),o=$(".selector--version"),r=$(".selector-pane--versions"),i=r.parent(),a=i.parent(),s=debounce(function(){if(0===r.closest(".selector-pane__sizing-box--hidden").length)return r.find(".selector-list__scroll-box").each(function(){return EdgeFader.showOrHideArrows($(this))})},500),o.on("click.toggle_selector",".selector__btn",function(){var e;return e=$(this).parent(),e.toggleClass("selector--open"),a.toggleClass("selector-pane__sizing-box--hidden"),e.hasClass("selector--open")?(i.css("max-height",n.outerHeight()+.75.rem()),a.css("max-height",n.outerHeight()+.75.rem()),EdgeFader.showOrHideArrows(r),s()):(a.css("max-height",""),EdgeFader.hideArrows(r),r.find(".selector-list__scroll-box").each(function(){return EdgeFader.hideArrows($(this))}))}),$(window).on("resize.toggled_selector_reize",debounce(function(){var e;return e=Math.max.apply(Math,r.find(".selector-list").map(function(){return $(this).outerHeight()})),i.css("height",e+2..rem()),a.css("height",e+2..rem()),o.hasClass("selector--open")&&(i.css("max-height",n.outerHeight()+.75.rem()),a.css("max-height",n.outerHeight()+.75.rem()),EdgeFader.showOrHideArrows(r),s()),!0},250)),$(document).on("click.selector_close",function(e){return $(e.target).closest(".selector-pane__sizing-box").length>0||(!!$(e.target).hasClass("selector__btn")||($(".selector--open").removeClass("selector--open"),a.addClass("selector-pane__sizing-box--hidden"),a.css("max-height",""),EdgeFader.hideArrows(r),r.find(".selector-list__scroll-box").each(function(){return EdgeFader.hideArrows($(this))})))})})}.call(this),function(){var e;e=window.EdgeFader={},e.verifyArrowState=function(e){var t,n,r,i,a,o,s,l,c;l=e.maxScrollLeft(),o=e.scrollLeft(),c=e.maxScrollTop(),s=e.scrollTop(),0===l&&0===c||(a=e.parent(),n=a.children(".edge-fader--left").children(".edge-fader__arrow"),r=a.children(".edge-fader--right").children(".edge-fader__arrow"),i=a.children(".edge-fader--top").children(".edge-fader__arrow"),t=a.children(".edge-fader--bottom").children(".edge-fader__arrow"),l>3&&(o>3?n.removeClass("edge-fader__arrow--inactive"):n.addClass("edge-fader__arrow--inactive"),o3&&(s>3?i.removeClass("edge-fader__arrow--inactive"):i.addClass("edge-fader__arrow--inactive"),s0?(r.removeClass("edge-fader__arrow--invisible"),i.removeClass("edge-fader__arrow--invisible")):(r.addClass("edge-fader__arrow--invisible"),i.addClass("edge-fader__arrow--invisible")),l>0?(a.removeClass("edge-fader__arrow--invisible"),n.removeClass("edge-fader__arrow--invisible")):(a.addClass("edge-fader__arrow--invisible"),n.addClass("edge-fader__arrow--invisible")),(s>0||l>0)&&e.verifyArrowState(t)},e.showArrows=function(e){var t,n,r,i,a;return a=e.parent(),n=a.children(".edge-fader--left").children(".edge-fader__arrow"),r=a.children(".edge-fader--right").children(".edge-fader__arrow"),i=a.children(".edge-fader--top").children(".edge-fader__arrow"),t=a.children(".edge-fader--bottom").children(".edge-fader__arrow"),n.removeClass("edge-fader__arrow--invisible"),r.removeClass("edge-fader__arrow--invisible"),i.removeClass("edge-fader__arrow--invisible"),t.removeClass("edge-fader__arrow--invisible")},e.hideArrows=function(e){var t,n,r,i,a;return a=e.parent(),n=a.children(".edge-fader--left").children(".edge-fader__arrow"),r=a.children(".edge-fader--right").children(".edge-fader__arrow"),i=a.children(".edge-fader--top").children(".edge-fader__arrow"),t=a.children(".edge-fader--bottom").children(".edge-fader__arrow"),n.addClass("edge-fader__arrow--invisible"),r.addClass("edge-fader__arrow--invisible"),i.addClass("edge-fader__arrow--invisible"),t.addClass("edge-fader__arrow--invisible")},e.onClickCallback=function(){var e,t,n,r,i;e=$(this),t=e.parent(),r=t.parent(),n=r.children(".js_edge-fader--target"),t.hasClass("edge-fader--left")?(i=-1*r.width()*.75+n.scrollLeft(),n.animate({scrollLeft:i},200)):t.hasClass("edge-fader--right")?(i=1*r.width()*.75+n.scrollLeft(),n.animate({scrollLeft:i},200)):t.hasClass("edge-fader--top")?(i=-1*r.height()*.75+n.scrollTop(),n.animate({scrollTop:i},200)):t.hasClass("edge-fader--bottom")&&(i=1*r.height()*.75+n.scrollTop(),n.animate({scrollTop:i},200))},$(function(){$(".js_edge-fader--target").on("scroll.edge-fader-target",throttle(function(){return e.verifyArrowState($(this))},250)),$(window).on("resize.edge-fader-target",throttle(function(){return $(".edge-fader").parent().children(".js_edge-fader--target").each(function(){return e.showOrHideArrows($(this))})},250)),$(".edge-fader__arrow").on("click.edge-fader-arrow",e.onClickCallback)})}.call(this),function(){"use strict";var e,t;t={"language-advancedconfig":{display_name:"advanced.config",highlight_as:"language-erlang"},"language-appconfig":{display_name:"app.config",highlight_as:"language-erlang"},"language-riakconf":{display_name:"riak.conf",highlight_as:"language-matlab"},"language-riakcsconf":{display_name:"riak-cs.conf",highlight_as:"language-matlab"},"language-stanchionconf":{display_name:"stanchion.conf",highlight_as:"language-matlab"},"language-vmargs":{display_name:"vm.args",highlight_as:"language-ini"},"language-bash":{display_name:"Shell",highlight_as:""},"language-curl":{display_name:"CURL",highlight_as:"language-bash"},"language-csharp":{display_name:"C#",highlight_as:""},"language-erlang":{display_name:"Erlang",highlight_as:""},"language-golang":{display_name:"Go",highlight_as:""},"language-java":{display_name:"Java",highlight_as:""},"language-javascript":{display_name:"JS",highlight_as:""},"language-coffeescript":{display_name:"Coffee",highlight_as:""},"language-json":{display_name:"JSON",highlight_as:""},"language-php":{display_name:"PHP",highlight_as:""},"language-protobuf":{display_name:"Protobuf",highlight_as:""},"language-python":{display_name:"Python",highlight_as:""},"language-ruby":{display_name:"Ruby",highlight_as:""},"language-scala":{display_name:"Scala",highlight_as:""},"language-sql":{display_name:"SQL",highlight_as:""},"language-xml":{display_name:"XML",highlight_as:""}},e=function(e){var t,n;return null!=(t=e.attr("class"))&&null!=(n=t.match(/(?:^|\s)(language-.+?)(?:\s|$)/))?n[1]:void 0},$("pre > code").each(function(){var n,r,i,a,o,s;if(n=$(this),r=n.parent(),o=e(n),o&&(o.indexOf(".")!==-1&&(n.removeClass(o),o=o.replace(/\./g,""),n.addClass(o)),o in t&&(a=null!=(s=t[o])?s.highlight_as:void 0,a&&(n.removeClass(o),n.addClass(a)),hljs.highlightBlock(n[0]),a&&(n.removeClass(a),n.addClass(o)))),!r.parent().hasClass("code-block__code-set"))return i=r.nextUntil(":not(pre)"),o&&0===i.length&&r.wrap('
          '),i.length?r.add(i).wrapAll('
          '):void 0}),$(".code-block--titled").each(function(){var n,r,i,a,o,s;return r=$(this),i=r.children("pre"),n=i.children("code"),o=e(n),i.addClass("code-block__code"),a=null!=(s=t[o])?s.display_name:void 0,a||(a=o.replace(/language-/,"")),$(''+a+"").prependTo(r)}),$(".code-block--tabbed").each(function(){var n,r,i,a,o;return a=$(this),n=a.children(".code-block__code-set"),o='',i=$('
          '+o+'
          '+o+"
          "),r=$('
            '),r.appendTo(i),n.children("pre").each(function(n){var i,a,o,s,l,c,u;return a=$(this),i=a.children(),c=e(i),l=null!=(u=t[c])?u.display_name:void 0,l||(l=null!=c?c.replace(/language-/,""):void 0),l||(l="////"+padNumber(n,2)),s=c,s||(s="unnamed-lang"+padNumber(n,2)),o="code-block__"+s+padNumber(n,3),a.addClass("code-block__code"),a.attr("id",o),r.append('
          • '+l+"
          • ")}),r.find(".code-block__tab").first().addClass("code-block__tab--active"),a.find(".code-block__code").first().addClass("code-block__code--active"),a.prepend(i),EdgeFader.showOrHideArrows(r)}),$(function(){var e;e=$(".code-block--tabbed"),$(".code-block__tab").on("click.code-block-tab","a",function(t){var n,r,i,a,o,s,l,c;if(t.preventDefault(),i=$(this),s=i.data("language"),!i.parent().hasClass("code-block__tab--active")){for(c=this.getBoundingClientRect().top,o=0,l=e.length;o"))}),$("td code").each(function(){$(this).html($(this).html().replace(/([^^])\//g,"$1/"))}); \ No newline at end of file diff --git a/static/old-riak/riak/cs/index.html b/static/old-riak/riak/cs/index.html new file mode 100644 index 0000000000..e4f5659106 --- /dev/null +++ b/static/old-riak/riak/cs/index.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/static/old-riak/riak/kv/index.html b/static/old-riak/riak/kv/index.html new file mode 100644 index 0000000000..981e36e232 --- /dev/null +++ b/static/old-riak/riak/kv/index.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/static/old-riak/riak/ts/index.html b/static/old-riak/riak/ts/index.html new file mode 100644 index 0000000000..a9bd486e24 --- /dev/null +++ b/static/old-riak/riak/ts/index.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/static/old-riak/riakcs/index.html b/static/old-riak/riakcs/index.html new file mode 100644 index 0000000000..bfa59fcebf --- /dev/null +++ b/static/old-riak/riakcs/index.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/static/old-riak/riakts/index.html b/static/old-riak/riakts/index.html new file mode 100644 index 0000000000..d53dadb886 --- /dev/null +++ b/static/old-riak/riakts/index.html @@ -0,0 +1,8 @@ + + + + + + + +